bug #1166: fix shortcomming in gemv when the destination is not a vector at compile-time.

This commit is contained in:
Gael Guennebaud 2016-02-15 21:43:07 +01:00
parent 8e1f1ba6a6
commit f6f057bb7d
2 changed files with 23 additions and 4 deletions

View File

@ -213,15 +213,18 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
* RhsBlasTraits::extractScalarFactor(rhs);
// make sure Dest is a compile-time vector type (bug 1166)
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
@ -314,7 +317,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhsPtr, 1),
dest.data(), dest.innerStride(),
dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
actualAlpha);
}
};

View File

@ -144,6 +144,22 @@ template<typename MatrixType> void product(const MatrixType& m)
VERIFY_IS_APPROX(res.col(r).noalias() = square.adjoint() * square.col(r), (square.adjoint() * square.col(r)).eval());
VERIFY_IS_APPROX(res.col(r).noalias() = square * square.col(r), (square * square.col(r)).eval());
// vector at runtime (see bug 1166)
{
RowSquareMatrixType ref(square);
ColSquareMatrixType ref2(square2);
ref = res = square;
VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.col(0).transpose() * square.transpose(), (ref.row(0) = m1.col(0).transpose() * square.transpose()));
VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.block(0,0,rows,1).transpose() * square.transpose(), (ref.row(0) = m1.col(0).transpose() * square.transpose()));
VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.col(0).transpose() * square, (ref.row(0) = m1.col(0).transpose() * square));
VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.block(0,0,rows,1).transpose() * square, (ref.row(0) = m1.col(0).transpose() * square));
ref2 = res2 = square2;
VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.row(0) * square2.transpose(), (ref2.row(0) = m1.row(0) * square2.transpose()));
VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2.transpose(), (ref2.row(0) = m1.row(0) * square2.transpose()));
VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.row(0) * square2, (ref2.row(0) = m1.row(0) * square2));
VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2, (ref2.row(0) = m1.row(0) * square2));
}
// inner product
{
Scalar x = square2.row(c) * square2.col(c2);