mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
make colmaj * vector uses pointers only
This commit is contained in:
parent
ff96c94043
commit
8e3c4283f5
@ -298,8 +298,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
|
||||
{
|
||||
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
|
||||
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||
/*&& ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret*/>::run(*this, dst, alpha);
|
||||
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
@ -357,7 +356,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
|
||||
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
|
||||
actualRhs, actualRhs.innerStride(),
|
||||
actualRhs.data(), actualRhs.innerStride(),
|
||||
actualDest, 1,
|
||||
actualAlpha);
|
||||
|
||||
|
@ -151,7 +151,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
|
||||
ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run(
|
||||
r, actualPanelWidth,
|
||||
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
|
||||
other.segment(startBlock, actualPanelWidth), other.innerStride(),
|
||||
&other.coeff(startBlock), other.innerStride(),
|
||||
&(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1));
|
||||
}
|
||||
}
|
||||
|
@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
|
||||
}
|
||||
|
||||
// FIXME
|
||||
// #ifdef EIGEN_HAS_FUSE_CJMADD
|
||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||
#define EIGEN_HAS_FUSE_CJMADD
|
||||
#endif
|
||||
#ifdef EIGEN_HAS_FUSE_CJMADD
|
||||
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
||||
// #else
|
||||
//#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
|
||||
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T);
|
||||
// #endif
|
||||
#else
|
||||
#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
|
||||
#endif
|
||||
|
||||
// optimized GEneral packed Block * packed Panel product kernel
|
||||
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
|
||||
@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend");
|
||||
const RhsScalar* blB = unpackedB;
|
||||
for(Index k=0; k<depth; k++)
|
||||
{
|
||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||
RhsPacket T0;
|
||||
#endif
|
||||
MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0);
|
||||
blB += RhsPacketSize;
|
||||
blA += LhsPacketSize;
|
||||
|
@ -53,15 +53,13 @@ typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
|
||||
|
||||
template<typename RhsType>
|
||||
EIGEN_DONT_INLINE static void run(
|
||||
Index rows, Index cols,
|
||||
const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr,
|
||||
const RhsScalar* rhs, Index rhsIncr,
|
||||
ResScalar* res, Index resIncr,
|
||||
ResScalar alpha)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(rhsIncr);
|
||||
ei_internal_assert(resIncr==1);
|
||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||
@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run(
|
||||
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
||||
{
|
||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]), ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[i+offset1]),
|
||||
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[i+2]), ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[i+offset3]);
|
||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
|
||||
ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
|
||||
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
|
||||
ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
|
||||
|
||||
// this helps a lot generating better binary code
|
||||
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||
@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run(
|
||||
{
|
||||
for (Index i=start; i<end; ++i)
|
||||
{
|
||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]);
|
||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]);
|
||||
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
||||
|
||||
if (Vectorizable)
|
||||
|
@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
|
||||
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
|
||||
r, actualPanelWidth,
|
||||
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
|
||||
rhs.segment(pi, actualPanelWidth), rhs.innerStride(),
|
||||
&rhs.coeff(pi), rhs.innerStride(),
|
||||
&res.coeffRef(s), res.innerStride(), alpha);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user