mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
bug #51: make general_matrix_matrix_triangular_product use L3-blocking helper so that general symmetric rank-updates and general-matrix-to-triangular products do not trigger dynamic memory allocation for fixed size matrices.
This commit is contained in:
parent
c10021c00a
commit
e58827d2ed
@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
|
||||
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
|
||||
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
|
||||
{
|
||||
general_matrix_matrix_triangular_product<Index,
|
||||
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
||||
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
||||
ColMajor, UpLo==Lower?Upper:Lower>
|
||||
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
|
||||
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
|
||||
}
|
||||
};
|
||||
|
||||
@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
|
||||
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha)
|
||||
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
|
||||
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
|
||||
{
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
@ -73,12 +75,20 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
||||
Index mc = size; // cache block size along the M direction
|
||||
Index nc = size; // cache block size along the N direction
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1);
|
||||
|
||||
kc = blocking.kc();
|
||||
mc = (std::min)(size,blocking.mc());
|
||||
nc = (std::min)(size,blocking.nc());
|
||||
|
||||
// !!! mc must be a multiple of nr:
|
||||
if(mc > Traits::nr)
|
||||
mc = (mc/Traits::nr)*Traits::nr;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*size;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
||||
|
||||
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
@ -256,13 +266,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
||||
|
||||
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
enum {
|
||||
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
|
||||
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
|
||||
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
|
||||
};
|
||||
|
||||
Index size = mat.cols();
|
||||
Index depth = actualLhs.cols();
|
||||
|
||||
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
|
||||
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
|
||||
|
||||
BlockingType blocking(size, size, depth, 1, false);
|
||||
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
|
||||
::run(mat.cols(), actualLhs.cols(),
|
||||
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
||||
::run(size, depth,
|
||||
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
|
||||
mat.data(), mat.outerStride(), actualAlpha);
|
||||
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
|
||||
|
||||
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
|
||||
|
||||
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
|
||||
enum {
|
||||
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
|
||||
OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
|
||||
};
|
||||
|
||||
Index size = mat.cols();
|
||||
Index depth = actualOther.cols();
|
||||
|
||||
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
|
||||
|
||||
BlockingType blocking(size, size, depth, 1, false);
|
||||
|
||||
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
||||
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
|
||||
::run(mat.cols(), actualOther.cols(),
|
||||
Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
||||
Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
||||
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
||||
::run(size, depth,
|
||||
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
|
||||
mat.data(), mat.outerStride(), actualAlpha);
|
||||
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -81,11 +81,12 @@ template<typename MatrixType> void nomalloc(const MatrixType& m)
|
||||
m2.template selfadjointView<Lower>().rankUpdate(m1.row(0),-1);
|
||||
|
||||
// The following fancy matrix-matrix products are not safe yet regarding static allocation
|
||||
// m1 += m1.template triangularView<Upper>() * m2.col(;
|
||||
// m1.template selfadjointView<Lower>().rankUpdate(m2);
|
||||
// m1 += m1.template triangularView<Upper>() * m2;
|
||||
// m1.col(1) += m1.template triangularView<Upper>() * m2.col(0);
|
||||
m2.template selfadjointView<Lower>().rankUpdate(m1);
|
||||
m2 += m2.template triangularView<Upper>() * m1;
|
||||
m2.template triangularView<Upper>() = m2 * m2;
|
||||
// m1 += m1.template selfadjointView<Lower>() * m2;
|
||||
// VERIFY_IS_APPROX(m1,m1);
|
||||
VERIFY_IS_APPROX(m2,m2);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
|
Loading…
Reference in New Issue
Block a user