Fix a nesting issue in some matrix-vector cases.

This commit is contained in:
Gael Guennebaud 2015-10-08 17:36:57 +02:00
parent dd934ad057
commit 8d00a953af
2 changed files with 10 additions and 4 deletions

View File

@ -350,10 +350,11 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
// TODO makes sure dest is sequentially stored in memory, otherwise use a temp
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
const Index size = rhs.rows();
for(Index k=0; k<size; ++k)
dest += (alpha*rhs.coeff(k)) * lhs.col(k);
dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
}
};
@ -362,10 +363,10 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
// TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
const Index rows = dest.rows();
for(Index i=0; i<rows; ++i)
dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(rhs.transpose())).sum();
dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
}
};

View File

@ -107,6 +107,11 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.col(0), 0 );
VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * rv1.adjoint(), 0 );
VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.row(0).transpose(), 0 );
VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * cv1, 0 );
VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 );
VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 );
VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 );
}
void test_product_notemporary()