mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-31 19:00:35 +08:00
remove dynamic allocation for fixed size object and triangular matrix-matrix products
This commit is contained in:
parent
8994f9962a
commit
57b5804974
@ -76,7 +76,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
||||
const Scalar* lhs, Index lhsStride,
|
||||
const Scalar* rhs, Index rhsStride,
|
||||
Scalar* res, Index resStride,
|
||||
Scalar alpha)
|
||||
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
product_triangular_matrix_matrix<Scalar, Index,
|
||||
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
|
||||
@ -86,7 +86,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
||||
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
||||
ConjugateLhs,
|
||||
ColMajor>
|
||||
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
|
||||
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
|
||||
}
|
||||
};
|
||||
|
||||
@ -111,7 +111,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
const Scalar* _lhs, Index lhsStride,
|
||||
const Scalar* _rhs, Index rhsStride,
|
||||
Scalar* res, Index resStride,
|
||||
Scalar alpha)
|
||||
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
// strip zeros
|
||||
Index diagSize = (std::min)(_rows,_depth);
|
||||
@ -122,15 +122,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
Index mc = rows; // cache block size along the M direction
|
||||
Index nc = cols; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
@ -188,7 +189,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
|
||||
|
||||
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
|
||||
// GEBP with remaining micro panel
|
||||
if (lengthTarget>0)
|
||||
@ -198,7 +199,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
|
||||
|
||||
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset);
|
||||
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -212,7 +213,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
|
||||
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -239,7 +240,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
const Scalar* _lhs, Index lhsStride,
|
||||
const Scalar* _rhs, Index rhsStride,
|
||||
Scalar* res, Index resStride,
|
||||
Scalar alpha)
|
||||
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||
{
|
||||
// strip zeros
|
||||
Index diagSize = (std::min)(_cols,_depth);
|
||||
@ -250,16 +251,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
Index mc = rows; // cache block size along the M direction
|
||||
Index nc = cols; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||
Index kc = blocking.kc(); // cache block size along the K direction
|
||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||
|
||||
std::size_t sizeA = kc*mc;
|
||||
std::size_t sizeB = kc*cols;
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*cols;
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
|
||||
|
||||
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
|
||||
triangularBuffer.setZero();
|
||||
@ -347,13 +348,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||
alpha,
|
||||
actual_kc, actual_kc, // strides
|
||||
blockOffset, blockOffset,// offsets
|
||||
allocatedBlockB); // workspace
|
||||
blockW); // workspace
|
||||
}
|
||||
}
|
||||
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
|
||||
blockA, geb, actual_mc, actual_kc, rs,
|
||||
alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
-1, -1, 0, 0, blockW);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -386,17 +387,28 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(m_rhs);
|
||||
|
||||
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
|
||||
|
||||
enum { IsLower = (Mode&Lower) == Lower };
|
||||
Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
|
||||
Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
|
||||
Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
|
||||
: ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
|
||||
|
||||
BlockingType blocking(stripedRows, stripedCols, stripedDepth);
|
||||
|
||||
internal::product_triangular_matrix_matrix<Scalar, Index,
|
||||
Mode, LhsIsTriangular,
|
||||
(internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||
(internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
||||
::run(
|
||||
lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes
|
||||
stripedRows, stripedCols, stripedDepth, // sizes
|
||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha // alpha
|
||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||
actualAlpha, blocking
|
||||
);
|
||||
}
|
||||
};
|
||||
|
@ -82,11 +82,11 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
LowUp = IsLower ? Lower : Upper \
|
||||
}; \
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
|
||||
{ \
|
||||
if (ConjLhs || IsZeroDiag) { \
|
||||
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
|
||||
return; \
|
||||
}\
|
||||
Index size = (std::min)(_rows,_cols); \
|
||||
@ -167,11 +167,11 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
||||
LowUp = IsLower ? Lower : Upper \
|
||||
}; \
|
||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
|
||||
{ \
|
||||
if (IsZeroDiag) { \
|
||||
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
|
||||
return; \
|
||||
}\
|
||||
Index size = (std::min)(_rows,_cols); \
|
||||
|
@ -167,7 +167,7 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
|
||||
{
|
||||
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
|
||||
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar);
|
||||
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&);
|
||||
static functype func[32];
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
@ -236,9 +236,15 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
||||
matrix(b,*m,*n,*ldb).setZero();
|
||||
|
||||
if(SIDE(*side)==LEFT)
|
||||
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha);
|
||||
{
|
||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
|
||||
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);
|
||||
}
|
||||
else
|
||||
func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha);
|
||||
{
|
||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
|
||||
func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ void ctms_decompositions()
|
||||
X = hQR.solve(B);
|
||||
x = hQR.solve(b);
|
||||
Eigen::ColPivHouseholderQR<Matrix> cpQR; cpQR.compute(A);
|
||||
// FIXME X = cpQR.solve(B);
|
||||
X = cpQR.solve(B);
|
||||
x = cpQR.solve(b);
|
||||
Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
|
||||
// FIXME X = fpQR.solve(B);
|
||||
|
Loading…
x
Reference in New Issue
Block a user