mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-06 14:14:46 +08:00
Speedup PartialPivLU for small matrices by passing compile-time sizes when available.
This change set also makes a better use of Map<>+OuterStride and Ref<> yielding surprising speed up for small dynamic sizes as well. The table below reports times in micro seconds for 10 random matrices: | ------ float --------- | ------- double ------- | size | before after ratio | before after ratio | fixed 1 | 0.34 0.11 2.93 | 0.35 0.11 3.06 | fixed 2 | 0.81 0.24 3.38 | 0.91 0.25 3.60 | fixed 3 | 1.49 0.49 3.04 | 1.68 0.55 3.01 | fixed 4 | 2.31 0.70 3.28 | 2.45 1.08 2.27 | fixed 5 | 3.49 1.11 3.13 | 3.84 2.24 1.71 | fixed 6 | 4.76 1.64 2.88 | 4.87 2.84 1.71 | dyn 1 | 0.50 0.40 1.23 | 0.51 0.40 1.26 | dyn 2 | 1.08 0.85 1.27 | 1.04 0.69 1.49 | dyn 3 | 1.76 1.26 1.40 | 1.84 1.14 1.60 | dyn 4 | 2.57 1.75 1.46 | 2.67 1.66 1.60 | dyn 5 | 3.80 2.64 1.43 | 4.00 2.48 1.61 | dyn 6 | 5.06 3.43 1.47 | 5.15 3.21 1.60 |
This commit is contained in:
parent
21eb97d3e0
commit
ab6e6edc32
@ -331,17 +331,15 @@ PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
|
|||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
/** \internal This is the blocked version of fullpivlu_unblocked() */
|
/** \internal This is the blocked version of fullpivlu_unblocked() */
|
||||||
template<typename Scalar, int StorageOrder, typename PivIndex>
|
template<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic>
|
||||||
struct partial_lu_impl
|
struct partial_lu_impl
|
||||||
{
|
{
|
||||||
// FIXME add a stride to Map, so that the following mapping becomes easier,
|
static const int UnBlockedBound = 16;
|
||||||
// another option would be to create an expression being able to automatically
|
static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
|
||||||
// warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
|
static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
|
||||||
// a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
|
typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;
|
||||||
// and Block.
|
typedef Ref<MatrixType> MatrixTypeRef;
|
||||||
typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
|
typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;
|
||||||
typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
|
|
||||||
typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
|
|
||||||
typedef typename MatrixType::RealScalar RealScalar;
|
typedef typename MatrixType::RealScalar RealScalar;
|
||||||
|
|
||||||
/** \internal performs the LU decomposition in-place of the matrix \a lu
|
/** \internal performs the LU decomposition in-place of the matrix \a lu
|
||||||
@ -354,7 +352,7 @@ struct partial_lu_impl
|
|||||||
*
|
*
|
||||||
* \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
|
* \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
|
||||||
*/
|
*/
|
||||||
static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
|
static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
|
||||||
{
|
{
|
||||||
typedef scalar_score_coeff_op<Scalar> Scoring;
|
typedef scalar_score_coeff_op<Scalar> Scoring;
|
||||||
typedef typename Scoring::result_type Score;
|
typedef typename Scoring::result_type Score;
|
||||||
@ -417,13 +415,12 @@ struct partial_lu_impl
|
|||||||
*/
|
*/
|
||||||
static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
|
static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
|
||||||
{
|
{
|
||||||
MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
|
MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride));
|
||||||
MatrixType lu(lu1,0,0,rows,cols);
|
|
||||||
|
|
||||||
const Index size = (std::min)(rows,cols);
|
const Index size = (std::min)(rows,cols);
|
||||||
|
|
||||||
// if the matrix is too small, no blocking:
|
// if the matrix is too small, no blocking:
|
||||||
if(size<=16)
|
if(UnBlockedAtCompileTime || size<=UnBlockedBound)
|
||||||
{
|
{
|
||||||
return unblocked_lu(lu, row_transpositions, nb_transpositions);
|
return unblocked_lu(lu, row_transpositions, nb_transpositions);
|
||||||
}
|
}
|
||||||
@ -449,12 +446,12 @@ struct partial_lu_impl
|
|||||||
// A00 | A01 | A02
|
// A00 | A01 | A02
|
||||||
// lu = A_0 | A_1 | A_2 = A10 | A11 | A12
|
// lu = A_0 | A_1 | A_2 = A10 | A11 | A12
|
||||||
// A20 | A21 | A22
|
// A20 | A21 | A22
|
||||||
BlockType A_0(lu,0,0,rows,k);
|
BlockType A_0 = lu.block(0,0,rows,k);
|
||||||
BlockType A_2(lu,0,k+bs,rows,tsize);
|
BlockType A_2 = lu.block(0,k+bs,rows,tsize);
|
||||||
BlockType A11(lu,k,k,bs,bs);
|
BlockType A11 = lu.block(k,k,bs,bs);
|
||||||
BlockType A12(lu,k,k+bs,bs,tsize);
|
BlockType A12 = lu.block(k,k+bs,bs,tsize);
|
||||||
BlockType A21(lu,k+bs,k,trows,bs);
|
BlockType A21 = lu.block(k+bs,k,trows,bs);
|
||||||
BlockType A22(lu,k+bs,k+bs,trows,tsize);
|
BlockType A22 = lu.block(k+bs,k+bs,trows,tsize);
|
||||||
|
|
||||||
PivIndex nb_transpositions_in_panel;
|
PivIndex nb_transpositions_in_panel;
|
||||||
// recursively call the blocked LU algorithm on [A11^T A21^T]^T
|
// recursively call the blocked LU algorithm on [A11^T A21^T]^T
|
||||||
@ -497,7 +494,9 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
|
|||||||
eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
|
eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
|
||||||
|
|
||||||
partial_lu_impl
|
partial_lu_impl
|
||||||
<typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex>
|
< typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor,
|
||||||
|
typename TranspositionType::StorageIndex,
|
||||||
|
EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)>
|
||||||
::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
|
::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user