diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 5ea59e3db..0145fc7b3 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -58,7 +58,7 @@ private: MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MaySliceVectorize = MightVectorize && int(InnerMaxSize)==Dynamic /* slice vectorization can be slow, so we only + MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block in a fixed-size matrix */ }; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index c1bb99141..b50d2cdd5 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -323,7 +323,7 @@ template class MatrixBase typename OtherDerived::Eval solveTriangular(const MatrixBase& other) const; template - void solveTriangularInPlace(MatrixBase& other) const; + void solveTriangularInPlace(MatrixBase* p_other) const; template diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 5d6ee54eb..5f6cf7c28 100755 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -37,21 +37,21 @@ template -struct ei_trisolve_selector; +struct ei_solve_triangular_selector; // transform a Part xpr to a Flagged xpr template -struct ei_trisolve_selector,Rhs,TriangularPart,StorageOrder> +struct ei_solve_triangular_selector,Rhs,TriangularPart,StorageOrder> { static void run(const Part& lhs, Rhs& other) { - ei_trisolve_selector,Rhs>::run(lhs._expression(), other); + ei_solve_triangular_selector,Rhs>::run(lhs._expression(), other); } }; // forward substitution, row-major template -struct ei_trisolve_selector +struct ei_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; static void run(const Lhs& lhs, Rhs& other) @@ -74,7 +74,7 @@ struct ei_trisolve_selector // backward substitution, row-major template -struct ei_trisolve_selector +struct ei_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; static void run(const Lhs& lhs, Rhs& other) @@ -101,7 +101,7 @@ struct ei_trisolve_selector // FIXME the Lower and Upper specialization could be merged using a small helper class // performing reflexions on the coordinates... template -struct ei_trisolve_selector +struct ei_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; typedef typename ei_packet_traits::type Packet; @@ -168,7 +168,7 @@ struct ei_trisolve_selector // backward substitution, col-major // see the previous specialization for details on the algorithm template -struct ei_trisolve_selector +struct ei_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; static void run(const Lhs& lhs, Rhs& other) @@ -214,40 +214,58 @@ struct ei_trisolve_selector /** "in-place" version of MatrixBase::solveTriangular() where the result is written in \a other * - * \sa solveTriangular() + * See MatrixBase:solveTriangular() for the details. */ template template -void MatrixBase::solveTriangularInPlace(MatrixBase& other) const +void MatrixBase::solveTriangularInPlace(MatrixBase* p_other) const { + ei_assert(p_other!=0); ei_assert(derived().cols() == derived().rows()); - ei_assert(derived().cols() == other.rows()); + ei_assert(derived().cols() == p_other->rows()); ei_assert(!(Flags & ZeroDiagBit)); ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit)); - ei_trisolve_selector::run(derived(), other.derived()); + ei_solve_triangular_selector::run(derived(), p_other->derived()); } /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. * - * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other - * It works as a forward (resp. backward) substitution if \c *this is an upper (resp. lower) - * triangular matrix. + * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other. + * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the + * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this + * is an upper (resp. lower) triangular matrix. * - * It is required that \c *this be marked as either an upper or a lower triangular matrix, as - * can be done by marked(), and as is automatically the case with expressions such as those returned + * It is required that \c *this be marked as either an upper or a lower triangular matrix, which + * can be done by marked(), and that is automatically the case with expressions such as those returned * by extract(). + * + * \addexample SolveTriangular \label How to solve a triangular system (aka. how to multiply the inverse of a triangular matrix by another one) + * * Example: \include MatrixBase_marked.cpp * Output: \verbinclude MatrixBase_marked.out + * + * This function is essentially a wrapper to the faster solveTriangularInPlace() function creating + * a temporary copy of \a other, calling solveTriangularInPlace() on the copy and returning it. + * Therefore, if \a other is not needed anymore, it is quite faster to call solveTriangularInPlace() + * instead of solveTriangular(). + * + * For users comming from BLAS, this function (and more specifically solveTriangularInPlace()) offer + * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines. * - * \sa marked(), extract() + * \b Tips: to perform a \em "right-inverse-multiply" you can simply transpose the operation, e.g.: + * \code + * M * T^1 <=> T.transpose().solveTriangularInPlace(M.transpose()); + * \endcode + * + * \sa solveTriangularInPlace(), marked(), extract() */ template template typename OtherDerived::Eval MatrixBase::solveTriangular(const MatrixBase& other) const { typename OtherDerived::Eval res(other); - solveTriangularInPlace(res); + solveTriangularInPlace(&res); return res; } diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h index 1267ec386..0f68f563e 100644 --- a/Eigen/src/LU/LU.h +++ b/Eigen/src/LU/LU.h @@ -399,7 +399,7 @@ void LU::computeKernel(Matrix() - .solveTriangularInPlace(y); + .solveTriangularInPlace(&y); for(int i = 0; i < m_rank; i++) result->row(m_q.coeff(i)) = y.row(i); @@ -451,7 +451,7 @@ bool LU::solve( l.setZero(); l.corner(Eigen::TopLeft,rows,smalldim) = m_lu.corner(Eigen::TopLeft,rows,smalldim); - l.template marked().solveTriangularInPlace(c); + l.template marked().solveTriangularInPlace(&c); // Step 3 if(!isSurjective()) @@ -468,7 +468,7 @@ bool LU::solve( d(c.corner(TopLeft, m_rank, c.cols())); m_lu.corner(TopLeft, m_rank, m_rank) .template marked() - .solveTriangularInPlace(d); + .solveTriangularInPlace(&d); // Step 4 result->resize(m_lu.cols(), b.cols());