mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
make colmaj * vector uses pointers only
This commit is contained in:
parent
ff96c94043
commit
8e3c4283f5
@ -298,8 +298,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
|
|||||||
{
|
{
|
||||||
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
|
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
|
||||||
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||||
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)
|
bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
|
||||||
/*&& ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret*/>::run(*this, dst, alpha);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -357,7 +356,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
|
|||||||
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
|
||||||
actualLhs.rows(), actualLhs.cols(),
|
actualLhs.rows(), actualLhs.cols(),
|
||||||
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
|
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
|
||||||
actualRhs, actualRhs.innerStride(),
|
actualRhs.data(), actualRhs.innerStride(),
|
||||||
actualDest, 1,
|
actualDest, 1,
|
||||||
actualAlpha);
|
actualAlpha);
|
||||||
|
|
||||||
|
@ -151,7 +151,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
|
|||||||
ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run(
|
ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run(
|
||||||
r, actualPanelWidth,
|
r, actualPanelWidth,
|
||||||
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
|
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
|
||||||
other.segment(startBlock, actualPanelWidth), other.innerStride(),
|
&other.coeff(startBlock), other.innerStride(),
|
||||||
&(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1));
|
&(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
|
|||||||
}
|
}
|
||||||
|
|
||||||
// FIXME
|
// FIXME
|
||||||
// #ifdef EIGEN_HAS_FUSE_CJMADD
|
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||||
|
#define EIGEN_HAS_FUSE_CJMADD
|
||||||
|
#endif
|
||||||
|
#ifdef EIGEN_HAS_FUSE_CJMADD
|
||||||
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
||||||
// #else
|
#else
|
||||||
//#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
|
#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
|
||||||
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T);
|
#endif
|
||||||
// #endif
|
|
||||||
|
|
||||||
// optimized GEneral packed Block * packed Panel product kernel
|
// optimized GEneral packed Block * packed Panel product kernel
|
||||||
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
|
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
|
||||||
@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend");
|
|||||||
const RhsScalar* blB = unpackedB;
|
const RhsScalar* blB = unpackedB;
|
||||||
for(Index k=0; k<depth; k++)
|
for(Index k=0; k<depth; k++)
|
||||||
{
|
{
|
||||||
|
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||||
RhsPacket T0;
|
RhsPacket T0;
|
||||||
|
#endif
|
||||||
MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0);
|
MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0);
|
||||||
blB += RhsPacketSize;
|
blB += RhsPacketSize;
|
||||||
blA += LhsPacketSize;
|
blA += LhsPacketSize;
|
||||||
|
@ -53,15 +53,13 @@ typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
|
|||||||
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
|
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
|
||||||
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
|
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
|
||||||
|
|
||||||
template<typename RhsType>
|
|
||||||
EIGEN_DONT_INLINE static void run(
|
EIGEN_DONT_INLINE static void run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const LhsScalar* lhs, Index lhsStride,
|
const LhsScalar* lhs, Index lhsStride,
|
||||||
const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr,
|
const RhsScalar* rhs, Index rhsIncr,
|
||||||
ResScalar* res, Index resIncr,
|
ResScalar* res, Index resIncr,
|
||||||
ResScalar alpha)
|
ResScalar alpha)
|
||||||
{
|
{
|
||||||
EIGEN_UNUSED_VARIABLE(rhsIncr);
|
|
||||||
ei_internal_assert(resIncr==1);
|
ei_internal_assert(resIncr==1);
|
||||||
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||||
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||||
@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run(
|
|||||||
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||||
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
||||||
{
|
{
|
||||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]), ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[i+offset1]),
|
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
|
||||||
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[i+2]), ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[i+offset3]);
|
ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
|
||||||
|
ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
|
||||||
|
ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
|
||||||
|
|
||||||
// this helps a lot generating better binary code
|
// this helps a lot generating better binary code
|
||||||
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||||
@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run(
|
|||||||
{
|
{
|
||||||
for (Index i=start; i<end; ++i)
|
for (Index i=start; i<end; ++i)
|
||||||
{
|
{
|
||||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]);
|
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]);
|
||||||
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
||||||
|
|
||||||
if (Vectorizable)
|
if (Vectorizable)
|
||||||
|
@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
|
|||||||
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
|
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
|
||||||
r, actualPanelWidth,
|
r, actualPanelWidth,
|
||||||
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
|
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
|
||||||
rhs.segment(pi, actualPanelWidth), rhs.innerStride(),
|
&rhs.coeff(pi), rhs.innerStride(),
|
||||||
&res.coeffRef(s), res.innerStride(), alpha);
|
&res.coeffRef(s), res.innerStride(), alpha);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user