Implement temporary-free path for "D.nolias() ?= C + A*B". (I thought it was already implemented)

This commit is contained in:
Gael Guennebaud 2015-10-09 15:28:09 +02:00
parent a4cc4c1e5e
commit 6536b4bad7
5 changed files with 47 additions and 19 deletions

View File

@ -187,6 +187,37 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
}
};
//----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
struct assignment_from_xpr_plus_product
{
typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const Func1& func)
{
call_assignment_no_alias(dst, src.lhs(), func);
call_assignment_no_alias(dst, src.rhs(), Func2());
}
};
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense>
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> >
{};
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense>
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> >
{};
template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense>
: assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> >
{};
//----------------------------------------
template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>

View File

@ -54,25 +54,8 @@ template <typename MatrixType> void run_nesting_ops_2(const MatrixType& _m)
if((MatrixType::SizeAtCompileTime==Dynamic))
{
VERIFY_EVALUATION_COUNT( use_n_times<10>(m1), 0 );
if(!NumTraits<Scalar>::IsComplex)
{
VERIFY_EVALUATION_COUNT( use_n_times<3>(2*m1), 0 );
VERIFY_EVALUATION_COUNT( use_n_times<4>(2*m1), 1 );
}
else
{
VERIFY_EVALUATION_COUNT( use_n_times<1>(2*m1), 0 );
VERIFY_EVALUATION_COUNT( use_n_times<2>(2*m1), 1 );
}
VERIFY_EVALUATION_COUNT( use_n_times<2>(m1+m1), 0 );
VERIFY_EVALUATION_COUNT( use_n_times<3>(m1+m1), 1 );
VERIFY_EVALUATION_COUNT( use_n_times<1>(m1*m1.transpose()), 1 );
VERIFY_EVALUATION_COUNT( use_n_times<2>(m1*m1.transpose()), 1 );
VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 2 ); // FIXME should already be 1 thanks the already existing rule
VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 2 );
VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 1 );
VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 1 );
VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.template triangularView<Lower>().solve(m1.col(0))), 1 );
VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.template triangularView<Lower>().solve(m1.col(0))), 1 );

View File

@ -111,6 +111,15 @@ template<typename MatrixType> void product(const MatrixType& m)
vcres.noalias() -= m1.transpose() * v1;
VERIFY_IS_APPROX(vcres, vc2 - m1.transpose() * v1);
// test d ?= a+b*c rules
res.noalias() = square + m1 * m2.transpose();
VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
res.noalias() += square + m1 * m2.transpose();
VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose()));
res.noalias() -= square + m1 * m2.transpose();
VERIFY_IS_APPROX(res, square + m1 * m2.transpose());
tm1 = m1;
VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1);
VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1);

View File

@ -47,6 +47,10 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0);
VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1);
VERIFY_EVALUATION_COUNT( m3.noalias() = (s1 * m1).adjoint() * s2 * m2, 0);

View File

@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed