make colmaj * vector uses pointers only

This commit is contained in:
Gael Guennebaud 2010-07-11 16:01:48 +02:00
parent ff96c94043
commit 8e3c4283f5
5 changed files with 19 additions and 16 deletions

View File

@ -298,8 +298,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
{ {
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess) bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
/*&& ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret*/>::run(*this, dst, alpha);
} }
}; };
@ -357,7 +356,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run( <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(), actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
actualRhs, actualRhs.innerStride(), actualRhs.data(), actualRhs.innerStride(),
actualDest, 1, actualDest, 1,
actualAlpha); actualAlpha);

View File

@ -151,7 +151,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run( ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run(
r, actualPanelWidth, r, actualPanelWidth,
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(), &(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
other.segment(startBlock, actualPanelWidth), other.innerStride(), &other.coeff(startBlock), other.innerStride(),
&(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1)); &(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1));
} }
} }

View File

@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
} }
// FIXME // FIXME
// #ifdef EIGEN_HAS_FUSE_CJMADD #ifndef EIGEN_HAS_FUSE_CJMADD
#define EIGEN_HAS_FUSE_CJMADD
#endif
#ifdef EIGEN_HAS_FUSE_CJMADD
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
// #else #else
//#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T)); #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); #endif
// #endif
// optimized GEneral packed Block * packed Panel product kernel // optimized GEneral packed Block * packed Panel product kernel
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs> template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend");
const RhsScalar* blB = unpackedB; const RhsScalar* blB = unpackedB;
for(Index k=0; k<depth; k++) for(Index k=0; k<depth; k++)
{ {
#ifndef EIGEN_HAS_FUSE_CJMADD
RhsPacket T0; RhsPacket T0;
#endif
MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0); MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0);
blB += RhsPacketSize; blB += RhsPacketSize;
blA += LhsPacketSize; blA += LhsPacketSize;

View File

@ -53,15 +53,13 @@ typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket; typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket; typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
template<typename RhsType>
EIGEN_DONT_INLINE static void run( EIGEN_DONT_INLINE static void run(
Index rows, Index cols, Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride, const LhsScalar* lhs, Index lhsStride,
const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr, const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr, ResScalar* res, Index resIncr,
ResScalar alpha) ResScalar alpha)
{ {
EIGEN_UNUSED_VARIABLE(rhsIncr);
ei_internal_assert(resIncr==1); ei_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS #ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined #error _EIGEN_ACCUMULATE_PACKETS has already been defined
@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run(
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
{ {
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]), ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[i+offset1]), RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[i+2]), ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[i+offset3]); ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
// this helps a lot generating better binary code // this helps a lot generating better binary code
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run(
{ {
for (Index i=start; i<end; ++i) for (Index i=start; i<end; ++i)
{ {
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]); RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]);
const LhsScalar* lhs0 = lhs + i*lhsStride; const LhsScalar* lhs0 = lhs + i*lhsStride;
if (Vectorizable) if (Vectorizable)

View File

@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run( ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
r, actualPanelWidth, r, actualPanelWidth,
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(), &(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
rhs.segment(pi, actualPanelWidth), rhs.innerStride(), &rhs.coeff(pi), rhs.innerStride(),
&res.coeffRef(s), res.innerStride(), alpha); &res.coeffRef(s), res.innerStride(), alpha);
} }
} }