From fc9480cbb38db2c7585e3ac5d14719237a85d2d7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 16 Aug 2009 10:55:10 +0200 Subject: [PATCH] bugfix in compute_matrix_flags, optimization in LU, improve doc, and workaround aliasing detection in MatrixBase_eval snippet (not very nice but I don't know how to do it in a better way) --- Eigen/src/Core/NoAlias.h | 52 +++++++---- Eigen/src/Core/ProductBase.h | 6 +- Eigen/src/Core/util/XprHelper.h | 2 +- Eigen/src/LU/LU.h | 2 +- Eigen/src/LU/PartialLU.h | 2 +- doc/I01_TopicLazyEvaluation.dox | 2 +- doc/I02_HiPerformance.dox | 154 +++++++------------------------ doc/snippets/MatrixBase_eval.cpp | 2 +- 8 files changed, 77 insertions(+), 145 deletions(-) diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index 5c2b6e3ca..a45493ddc 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -31,8 +31,9 @@ * * \param ExpressionType the type of the object on which to do the lazy assignment * - * This class represents an expression with a special assignment operator (operator=) + * This class represents an expression with special assignment operators * assuming no aliasing between the target expression and the source expression. + * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression. * It is the return type of MatrixBase::noalias() * and most of the time this is the only way it is used. * @@ -44,44 +45,61 @@ class NoAlias public: NoAlias(ExpressionType& expression) : m_expression(expression) {} - /** Behaves like MatrixBase::lazyAssign() */ + /** Behaves like MatrixBase::lazyAssign(other) + * \sa MatrixBase::lazyAssign() */ template ExpressionType& operator=(const MatrixBase& other) - { - return m_expression.lazyAssign(other.derived()); - } + { return m_expression.lazyAssign(other.derived()); } - // TODO could be removed if we decide that += is noalias by default + /** \sa MatrixBase::operator+= */ template ExpressionType& operator+=(const MatrixBase& other) - { - return m_expression.lazyAssign(m_expression + other.derived()); - } + { return m_expression.lazyAssign(m_expression + other.derived()); } - // TODO could be removed if we decide that += is noalias by default + /** \sa MatrixBase::operator-= */ template ExpressionType& operator-=(const MatrixBase& other) - { - return m_expression.lazyAssign(m_expression - other.derived()); - } + { return m_expression.lazyAssign(m_expression - other.derived()); } - // TODO could be removed if we decide that += is noalias by default +#ifndef EIGEN_PARSED_BY_DOXYGEN template ExpressionType& operator+=(const ProductBase& other) { other.derived().addTo(m_expression); return m_expression; } - // TODO could be removed if we decide that += is noalias by default template ExpressionType& operator-=(const ProductBase& other) { other.derived().subTo(m_expression); return m_expression; } +#endif protected: ExpressionType& m_expression; }; - /** \returns a pseudo expression of \c *this with an operator= assuming - * no aliasing between \c *this and the source expression + * no aliasing between \c *this and the source expression. + * + * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag. + * Currently, even though several expressions may alias, only product + * expressions have this flag. Therefore, noalias() is only usefull when + * the source expression contains a matrix product. + * + * Here are some examples where noalias is usefull: + * \code + * D.noalias() = A * B; + * D.noalias() += A.transpose() * B; + * D.noalias() -= 2 * A * B.adjoint(); + * \endcode + * + * On the other hand the following example will lead to a \b wrong result: + * \code + * A.noalias() = A * B; + * \endcode + * because the result matrix A is also an operand of the matrix product. Therefore, + * there is no alternative than evaluating A * B in a temporary, that is the default + * behavior when you write: + * \code + * A = A * B; + * \endcode * * \sa class NoAlias */ diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index 5e259b7f7..b2c4cd989 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -119,10 +119,8 @@ class ProductBase : public MatrixBase return res; } - const Flagged lazy() const - { - return *this; - } + EIGEN_DEPRECATED const Flagged lazy() const + { return *this; } const _LhsNested& lhs() const { return m_lhs; } const _RhsNested& rhs() const { return m_rhs; } diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index d392e27ba..871259b08 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -90,7 +90,7 @@ class ei_compute_matrix_flags : MaxRows==1 ? MaxCols : row_major_bit ? MaxCols : MaxRows, is_big = inner_max_size == Dynamic, - is_packet_size_multiple = Rows==Dynamic || Cols==Dynamic || ((Cols*Rows) % ei_packet_traits::size) == 0, + is_packet_size_multiple = MaxRows==Dynamic || MaxCols==Dynamic || ((MaxCols*MaxRows) % ei_packet_traits::size) == 0, aligned_bit = (((Options&DontAlign)==0) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0, packet_access_bit = ei_packet_traits::size > 1 && aligned_bit ? PacketAccessBit : 0 }; diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h index f072ff906..d32fe08a1 100644 --- a/Eigen/src/LU/LU.h +++ b/Eigen/src/LU/LU.h @@ -437,7 +437,7 @@ LU& LU::compute(const MatrixType& matrix) if(k \code m1 += m2 * m3; \endcode \code temp = m2 * m3; m1 += temp; \endcode -\code m1 += (m2 * m3).lazy(); \endcode -Use .lazy() to tell Eigen the result and right-hand-sides do not alias. +\code m1.noalias() += m2 * m3; \endcode +Use .noalias() to tell Eigen the result and right-hand-sides do not alias. + Otherwise the product m2 * m3 is evaluated into a temporary. -\code m1 += (s1 * (m2 * m3)).lazy(); \endcode -\code temp = (m2 * m3).lazy(); m1 += s1 * temp; \endcode -\code m1 += (s1 * m2 * m3).lazy(); \endcode -This is because m2 * m3 is immediately evaluated by the scalar product.
- Make sure the matrix product is the top most expression. + + +\code m1.noalias() += s1 * (m2 * m3); \endcode +This is a special feature of Eigen. Here the product between a scalar + and a matrix product does not evaluate the matrix product but instead it + returns a matrix product expression tracking the scalar scaling factor.
+ Without this optimization, the matrix product would be evaluated into a + temporary as in the next example. -\code m1 += s1 * (m2 * m3).lazy(); \endcode -\code m1 += s1 * m2 * m3; // using a naive product \endcode -\code m1 += (s1 * m2 * m3).lazy(); \endcode -Even though this expression is evaluated without temporary, it is actually even - worse than the previous case because here the .lazy() enforces Eigen to use a - naive (and slow) evaluation of the product. +\code m1.noalias() += (m2 * m3).transpose(); \endcode +\code temp = m2 * m3; m1 += temp.transpose(); \endcode +\code m1.noalias() += m3.adjoint() * m3.adjoint(); \endcode +This is because the product expression has the EvalBeforeNesting bit which + enforces the evaluation of the product by the Tranpose expression. \code m1 = m1 + m2 * m3; \endcode @@ -78,112 +81,25 @@ handled by a single GEMM-like call are correctly detected. and so the matrix product will be immediately evaluated. -\code m1 += ((s1*m2).block(....) * m3).lazy(); \endcode -\code temp = (s1*m2).block(....); m1 += (temp * m3).lazy(); \endcode -\code m1 += (s1 * m2.block(....) * m3).lazy(); \endcode +\code m1.noalias() = m4 + m2 * m3; \endcode +\code temp = m2 * m3; m1 = m4 + temp; \endcode +\code m1 = m4; m1.noalias() += m2 * m3; \endcode +First of all, here the .noalias() in the first expression is useless because + m2*m3 will be evaluated anyway. However, note how this expression can be rewritten + so that no temporary is evaluated. (tip: for very small fixed size matrix + it is slighlty better to rewrite it like this: m1.noalias() = m2 * m3; m1 += m4; + + +\code m1.noalias() += ((s1*m2).block(....) * m3); \endcode +\code temp = (s1*m2).block(....); m1 += temp * m3; \endcode +\code m1.noalias() += s1 * m2.block(....) * m3; \endcode This is because our expression analyzer is currently not able to extract trivial expressions nested in a Block expression. Therefore the nested scalar multiple cannot be properly extracted. -Of course all these remarks hold for all other kind of products that we will describe in the following paragraphs. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
BLAS equivalent routineEfficient version
(compile to a single optimized evaluation)
Less efficient equivalent version
(requires multiple evaluations)
comments
GEMMm1 = s1 * m2 * m3m1 = s1 * (m2 * m3)This is because m2 * m3 is evaluated by the scalar product.
GEMMm1 += s1 * m2.adjoint() * m3m1 += (s1 * m2).adjoint() * m3This is because our expression analyzer stops at the first transpose expression and cannot extract the nested scalar multiple.
GEMMm1 += m2.adjoint() * m3m1 += m2.conjugate().transpose() * m3For the same reason. Use .adjoint() or .transpose().conjugate()
GEMMm1 -= (-(s0*m2).conjugate()*s1) * (s2 * m3.adjoint() * s3)Note that s0 is automatically conjugated during the simplification of the expression.
SYRm.sefadjointView().rankUpdate(v,s)Computes m += s * v * v.adjoint()
SYR2m.sefadjointView().rankUpdate(u,v,s)Computes m += s * u * v.adjoint() + s * v * u.adjoint()
SYRKm1.sefadjointView().rankUpdate(m2.adjoint(),s)Computes m1 += s * m2.adjoint() * m2
SYMM/HEMMm3 -= s1 * m1.sefadjointView() * m2.adjoint()
SYMM/HEMMm3 += s1 * m2.transpose() * m1.conjugate().sefadjointView()
TRMMm3 -= s1 * m1.triangularView() * m2.adjoint()
TRSV / TRSMm1.adjoint().triangularView().solveInPlace(m2)
+Of course all these remarks hold for all other kind of products involving triangular or selfadjoint matrices. */ diff --git a/doc/snippets/MatrixBase_eval.cpp b/doc/snippets/MatrixBase_eval.cpp index 732293043..d70424562 100644 --- a/doc/snippets/MatrixBase_eval.cpp +++ b/doc/snippets/MatrixBase_eval.cpp @@ -4,7 +4,7 @@ m = M; cout << "Here is the matrix m:" << endl << m << endl; cout << "Now we want to replace m by its own transpose." << endl; cout << "If we do m = m.transpose(), then m becomes:" << endl; -m = m.transpose(); +m = m.transpose() * 1; cout << m << endl << "which is wrong!" << endl; cout << "Now let us instead do m = m.transpose().eval(). Then m becomes" << endl; m = M;