mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-18 14:34:17 +08:00
bug #1562: optimize evaluation of small products of the form s*A*B by rewriting them as: s*(A.lazyProduct(B)) to save a costly temporary. Measured speedup from 2x to 5x...
This commit is contained in:
parent
a7b313a16c
commit
d428a199ab
@ -396,7 +396,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
// but easier on the compiler side
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
|
||||
}
|
||||
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
@ -410,6 +410,32 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
// dst.noalias() -= lhs.lazyProduct(rhs);
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
||||
}
|
||||
|
||||
// Catch "dst {,+,-}= (s*A)*B" and evaluate it lazily by moving out the scalar factor:
|
||||
// dst {,+,-}= s * (A.lazyProduct(B))
|
||||
// This is a huge benefit for heap-allocated matrix types as it save one costly allocation.
|
||||
// For them, this strategy is also faster than simply by-passing the heap allocation through
|
||||
// stack allocation.
|
||||
// For fixed sizes matrices, this is less obvious, it is sometimes x2 faster, but sometimes x3 slower,
|
||||
// and the behavior depends also a lot on the compiler... so let's be conservative and enable them for dynamic-size only,
|
||||
// that is when coming from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
|
||||
template<typename Dst, typename Scalar1, typename Scalar2, typename Plain1, typename Xpr2, typename Func>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void eval_dynamic(Dst& dst, const CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
|
||||
{
|
||||
call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
|
||||
}
|
||||
|
||||
// Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above
|
||||
// overload more specialized.
|
||||
template<typename Dst, typename LhsT, typename Func>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func)
|
||||
{
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
|
||||
}
|
||||
|
||||
|
||||
// template<typename Dst>
|
||||
// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
|
@ -431,10 +431,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
// to determine the following heuristic.
|
||||
// EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
|
||||
// unless it has been specialized by the user or for a given architecture.
|
||||
// Note that the condition rhs.rows()>0 was required because lazy produc is (was?) not happy with empty inputs.
|
||||
// Note that the condition rhs.rows()>0 was required because lazy product is (was?) not happy with empty inputs.
|
||||
// I'm not sure it is still required.
|
||||
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
|
||||
lazyproduct::evalTo(dst, lhs, rhs);
|
||||
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());
|
||||
else
|
||||
{
|
||||
dst.setZero();
|
||||
@ -446,7 +446,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
|
||||
lazyproduct::addTo(dst, lhs, rhs);
|
||||
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
||||
else
|
||||
scaleAndAddTo(dst,lhs, rhs, Scalar(1));
|
||||
}
|
||||
@ -455,7 +455,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
|
||||
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
|
||||
lazyproduct::subTo(dst, lhs, rhs);
|
||||
lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
||||
else
|
||||
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
|
||||
}
|
||||
|
@ -111,6 +111,17 @@ template<typename MatrixType> void product(const MatrixType& m)
|
||||
vcres.noalias() -= m1.transpose() * v1;
|
||||
VERIFY_IS_APPROX(vcres, vc2 - m1.transpose() * v1);
|
||||
|
||||
// test scaled products
|
||||
res = square;
|
||||
res.noalias() = s1 * m1 * m2.transpose();
|
||||
VERIFY_IS_APPROX(res, ((s1*m1).eval() * m2.transpose()));
|
||||
res = square;
|
||||
res.noalias() += s1 * m1 * m2.transpose();
|
||||
VERIFY_IS_APPROX(res, square + ((s1*m1).eval() * m2.transpose()));
|
||||
res = square;
|
||||
res.noalias() -= s1 * m1 * m2.transpose();
|
||||
VERIFY_IS_APPROX(res, square - ((s1*m1).eval() * m2.transpose()));
|
||||
|
||||
// test d ?= a+b*c rules
|
||||
res.noalias() = square + m1 * m2.transpose();
|
||||
VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
|
||||
|
@ -35,6 +35,8 @@ void test_product_large()
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
CALL_SUBTEST_1( product(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_2( product(MatrixXd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_2( product(MatrixXd(internal::random<int>(1,10), internal::random<int>(1,10))) );
|
||||
|
||||
CALL_SUBTEST_3( product(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_4( product(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_5( product(Matrix<float,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
|
Loading…
Reference in New Issue
Block a user