* Start of the LU module, with matrix inversion already there and

fully optimized.
* Even if LargeBit is set, only parallelize for large enough objects
  (controlled by EIGEN_PARALLELIZATION_TRESHOLD).
This commit is contained in:
Benoit Jacob 2008-04-14 08:20:24 +00:00
parent ab4046970b
commit ea3ccb1e8c
14 changed files with 360 additions and 23 deletions

View File

@ -10,7 +10,7 @@
#endif
#endif
#ifndef EIGEN_DONT_USE_OPENMP
#ifndef EIGEN_DONT_PARALLELIZE
#ifdef _OPENMP
#define EIGEN_USE_OPENMP
#include <omp.h>

12
Eigen/LU Normal file
View File

@ -0,0 +1,12 @@
#ifndef EIGEN_LU_H
#define EIGEN_LU_H
#include "Core"
namespace Eigen {
#include "Eigen/src/LU/Inverse.h"
} // namespace Eigen
#endif // EIGEN_LU_H

View File

@ -1 +1,2 @@
ADD_SUBDIRECTORY(Core)
ADD_SUBDIRECTORY(LU)

View File

@ -135,6 +135,11 @@ Derived& MatrixBase<Derived>
}
}
template<typename T1, typename T2> bool ei_should_parallelize_assignment(const T1& t, const T2&)
{
return (T1::Flags & T2::Flags & LargeBit) && t.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
}
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, false>
{
@ -157,7 +162,7 @@ struct ei_assignment_impl<Derived, OtherDerived, false>
for(int j = 0; j < dst.cols(); j++) \
for(int i = 0; i < dst.rows(); i++) \
dst.coeffRef(i, j) = src.coeff(i, j);
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
@ -168,7 +173,7 @@ struct ei_assignment_impl<Derived, OtherDerived, false>
for(int i = 0; i < dst.rows(); i++) \
for(int j = 0; j < dst.cols(); j++) \
dst.coeffRef(i, j) = src.coeff(i, j);
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}
@ -198,7 +203,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
for(int i = 0; i < dst.rows(); i++) \
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) \
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
@ -207,7 +212,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
for(int j = 0; j < dst.cols(); j++) \
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) \
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src))
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}

View File

@ -67,11 +67,11 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> >
: (BlockRows==Dynamic ? MatrixType::MaxRowsAtCompileTime : BlockRows),
MaxColsAtCompileTime = ColsAtCompileTime == 1 ? 1
: (BlockCols==Dynamic ? MatrixType::MaxColsAtCompileTime : BlockCols),
FlagsLargeBit = ((RowsAtCompileTime != Dynamic && MatrixType::RowsAtCompileTime == Dynamic)
|| (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic))
? ~LargeBit
: ~(unsigned int)0,
Flags = MatrixType::Flags & FlagsLargeBit & ~VectorizableBit,
FlagsMaskLargeBit = ((RowsAtCompileTime != Dynamic && MatrixType::RowsAtCompileTime == Dynamic)
|| (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic))
? ~LargeBit
: ~(unsigned int)0,
Flags = MatrixType::Flags & FlagsMaskLargeBit & ~VectorizableBit,
CoeffReadCost = MatrixType::CoeffReadCost
};
};

View File

@ -95,11 +95,8 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Flags, _MaxRows, _MaxCols> >
};
};
template<typename _Scalar, int _Rows, int _Cols,
unsigned int _Flags = EIGEN_DEFAULT_MATRIX_FLAGS,
int _MaxRows = _Rows, int _MaxCols = _Cols>
class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols,
_Flags, _MaxRows, _MaxCols> >
template<typename _Scalar, int _Rows, int _Cols, unsigned int _Flags, int _MaxRows, int _MaxCols>
class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Flags, _MaxRows, _MaxCols> >
{
public:

View File

@ -506,6 +506,15 @@ template<typename Derived> class MatrixBase
{ return *static_cast<Derived*>(const_cast<MatrixBase*>(this)); }
//@}
/** \name LU module
*
* \code #include <Eigen/LU> \endcode
*/
//@{
const Inverse<Derived, true> inverse() const;
const Inverse<Derived, false> quickInverse() const;
//@}
private:
PacketScalar _packetCoeff(int , int) const { ei_internal_assert(false && "_packetCoeff not defined"); }

View File

@ -280,6 +280,8 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
const bool should_parallelize = (Flags & DestDerived::Flags & LargeBit)
&& res.size() >= EIGEN_PARALLELIZATION_TRESHOLD;
#ifdef EIGEN_VECTORIZE
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
{
@ -318,7 +320,7 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \
} \
}
EIGEN_RUN_PARALLELIZABLE_LOOP(Flags & DestDerived::Flags & LargeBit)
EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
@ -345,7 +347,7 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \
} \
}
EIGEN_RUN_PARALLELIZABLE_LOOP(Flags & DestDerived::Flags & LargeBit)
EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}

View File

@ -29,7 +29,11 @@ template<typename T> struct ei_traits;
template<typename Lhs, typename Rhs> struct ei_product_eval_mode;
template<typename T> struct NumTraits;
template<typename _Scalar, int _Rows, int _Cols, unsigned int _Flags, int _MaxRows, int _MaxCols> class Matrix;
template<typename _Scalar, int _Rows, int _Cols,
unsigned int _Flags = EIGEN_DEFAULT_MATRIX_FLAGS,
int _MaxRows = _Rows, int _MaxCols = _Cols>
class Matrix;
template<typename ExpressionType> class Lazy;
template<typename ExpressionType> class Temporary;
template<typename MatrixType> class Minor;
@ -47,7 +51,6 @@ template<typename MatrixType> class DiagonalCoeffs;
template<typename MatrixType> class Identity;
template<typename MatrixType> class Map;
template<typename Derived> class Eval;
template<typename Derived> class EvalOMP;
template<int Direction, typename UnaryOp, typename MatrixType> class PartialRedux;
template<typename Scalar> struct ei_scalar_sum_op;
@ -70,4 +73,6 @@ template<typename Scalar> struct ei_scalar_quotient1_op;
template<typename Scalar> struct ei_scalar_min_op;
template<typename Scalar> struct ei_scalar_max_op;
template<typename ExpressionType, bool CheckExistence = true> class Inverse;
#endif // EIGEN_FORWARDDECLARATIONS_H

View File

@ -37,6 +37,10 @@
#define EIGEN_UNROLLING_LIMIT 400
#endif
#ifndef EIGEN_PARALLELIZATION_TRESHOLD
#define EIGEN_PARALLELIZATION_TRESHOLD 2000
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit
#else

View File

@ -182,5 +182,4 @@ template<typename T> struct ei_packet_traits
enum {size=1};
};
#endif // EIGEN_META_H

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_LU_SRCS "*.h")
INSTALL(FILES
${Eigen_LU_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU
)

296
Eigen/src/LU/Inverse.h Normal file
View File

@ -0,0 +1,296 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_INVERSE_H
#define EIGEN_INVERSE_H
/** \class Inverse
*
* \brief Inverse of a matrix
*
* \param ExpressionType the type of the matrix/expression of which we are taking the inverse
* \param CheckExistence whether or not to check the existence of the inverse while computing it
*
* This class represents the inverse of a matrix. It is the return
* type of MatrixBase::inverse() and most of the time this is the only way it
* is used.
*
* \sa MatrixBase::inverse(), MatrixBase::quickInverse()
*/
template<typename ExpressionType, bool CheckExistence>
struct ei_traits<Inverse<ExpressionType, CheckExistence> >
{
typedef typename ExpressionType::Scalar Scalar;
typedef typename ExpressionType::Eval MatrixType;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = MatrixType::Flags,
CoeffReadCost = MatrixType::CoeffReadCost
};
};
template<typename ExpressionType, bool CheckExistence> class Inverse : ei_no_assignment_operator,
public MatrixBase<Inverse<ExpressionType, CheckExistence> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Inverse)
typedef typename ei_traits<Inverse>::MatrixType MatrixType;
Inverse(const ExpressionType& xpr)
: m_exists(true),
m_inverse(MatrixType::identity(xpr.rows(), xpr.cols()))
{
ei_assert(xpr.rows() == xpr.cols());
_compute(xpr);
}
/** \returns whether or not the inverse exists.
*
* \note This method is only available if CheckExistence is set to true, which is the default value.
* For instance, when using quickInverse(), this method is not available.
*/
bool exists() const { assert(CheckExistence); return m_exists; }
private:
int _rows() const { return m_inverse.rows(); }
int _cols() const { return m_inverse.cols(); }
const Scalar _coeff(int row, int col) const
{
return m_inverse.coeff(row, col);
}
void _compute(const ExpressionType& xpr);
void _compute_not_unrolled(MatrixType& matrix, const RealScalar& max);
template<int Size, int Step, bool Finished = Size==Dynamic> struct _unroll_first_loop;
template<int Size, int Step, bool Finished = Size==Dynamic> struct _unroll_second_loop;
template<int Size, int Step, bool Finished = Size==Dynamic> struct _unroll_third_loop;
protected:
bool m_exists;
MatrixType m_inverse;
};
template<typename ExpressionType, bool CheckExistence>
void Inverse<ExpressionType, CheckExistence>
::_compute_not_unrolled(MatrixType& matrix, const RealScalar& max)
{
const int size = matrix.rows();
for(int k = 0; k < size-1; k++)
{
int rowOfBiggest;
const RealScalar max_in_this_col
= matrix.col(k).end(size-k).cwiseAbs().maxCoeff(&rowOfBiggest);
if(CheckExistence && ei_isMuchSmallerThan(max_in_this_col, max))
{ m_exists = false; return; }
m_inverse.row(k).swap(m_inverse.row(k+rowOfBiggest));
matrix.row(k).swap(matrix.row(k+rowOfBiggest));
const Scalar d = matrix(k,k);
m_inverse.block(k+1, 0, size-k-1, size)
-= matrix.col(k).end(size-k-1) * (m_inverse.row(k) / d);
matrix.corner(BottomRight, size-k-1, size-k)
-= matrix.col(k).end(size-k-1) * (matrix.row(k).end(size-k) / d);
}
for(int k = 0; k < size-1; k++)
{
const Scalar d = static_cast<Scalar>(1)/matrix(k,k);
matrix.row(k).end(size-k) *= d;
m_inverse.row(k) *= d;
}
if(CheckExistence && ei_isMuchSmallerThan(matrix(size-1,size-1), max))
{ m_exists = false; return; }
m_inverse.row(size-1) /= matrix(size-1,size-1);
for(int k = size-1; k >= 1; k--)
{
m_inverse.block(0,0,k,size) -= matrix.col(k).start(k) * m_inverse.row(k);
}
}
template<typename ExpressionType, bool CheckExistence>
template<int Size, int Step, bool Finished>
struct Inverse<ExpressionType, CheckExistence>::_unroll_first_loop
{
typedef Inverse<ExpressionType, CheckExistence> Inv;
typedef typename Inv::MatrixType MatrixType;
typedef typename Inv::RealScalar RealScalar;
static void run(Inv& object, MatrixType& matrix, const RealScalar& max)
{
MatrixType& inverse = object.m_inverse;
int rowOfBiggest;
const RealScalar max_in_this_col
= matrix.col(Step).template end<Size-Step>().cwiseAbs().maxCoeff(&rowOfBiggest);
if(CheckExistence && ei_isMuchSmallerThan(max_in_this_col, max))
{ object.m_exists = false; return; }
inverse.row(Step).swap(inverse.row(Step+rowOfBiggest));
matrix.row(Step).swap(matrix.row(Step+rowOfBiggest));
const Scalar d = matrix(Step,Step);
inverse.template block<Size-Step-1, Size>(Step+1, 0)
-= matrix.col(Step).template end<Size-Step-1>() * (inverse.row(Step) / d);
matrix.template corner<Size-Step-1, Size-Step>(BottomRight)
-= matrix.col(Step).template end<Size-Step-1>()
* (matrix.row(Step).template end<Size-Step>() / d);
_unroll_first_loop<Size, Step+1, Step >= Size-2>::run(object, matrix, max);
}
};
template<typename ExpressionType, bool CheckExistence>
template<int Size, int Step>
struct Inverse<ExpressionType, CheckExistence>::_unroll_first_loop<Step, Size, true>
{
typedef Inverse<ExpressionType, CheckExistence> Inv;
typedef typename Inv::MatrixType MatrixType;
typedef typename Inv::RealScalar RealScalar;
static void run(Inv&, MatrixType&, const RealScalar&) {}
};
template<typename ExpressionType, bool CheckExistence>
template<int Size, int Step, bool Finished>
struct Inverse<ExpressionType, CheckExistence>::_unroll_second_loop
{
typedef Inverse<ExpressionType, CheckExistence> Inv;
typedef typename Inv::MatrixType MatrixType;
typedef typename Inv::RealScalar RealScalar;
static void run(Inv& object, MatrixType& matrix, const RealScalar& max)
{
MatrixType& inverse = object.m_inverse;
if(Step == Size-1)
{
if(CheckExistence && ei_isMuchSmallerThan(matrix(Size-1,Size-1), max))
{ object.m_exists = false; return; }
inverse.row(Size-1) /= matrix(Size-1,Size-1);
}
else
{
const Scalar d = static_cast<Scalar>(1)/matrix(Step,Step);
matrix.row(Step).template end<Size-Step>() *= d;
inverse.row(Step) *= d;
}
_unroll_second_loop<Size, Step+1, Step >= Size-1>::run(object, matrix, max);
}
};
template<typename ExpressionType, bool CheckExistence>
template<int Size, int Step>
struct Inverse<ExpressionType, CheckExistence>::_unroll_second_loop <Step, Size, true>
{
typedef Inverse<ExpressionType, CheckExistence> Inv;
typedef typename Inv::MatrixType MatrixType;
typedef typename Inv::RealScalar RealScalar;
static void run(Inv&, MatrixType&, const RealScalar&) {}
};
template<typename ExpressionType, bool CheckExistence>
template<int Size, int Step, bool Finished>
struct Inverse<ExpressionType, CheckExistence>::_unroll_third_loop
{
typedef Inverse<ExpressionType, CheckExistence> Inv;
typedef typename Inv::MatrixType MatrixType;
typedef typename Inv::RealScalar RealScalar;
static void run(Inv& object, MatrixType& matrix, const RealScalar& max)
{
MatrixType& inverse = object.m_inverse;
inverse.template block<Size-Step,Size>(0,0)
-= matrix.col(Size-Step).template start<Size-Step>() * inverse.row(Size-Step);
_unroll_third_loop<Size, Step+1, Step >= Size-1>::run(object, matrix, max);
}
};
template<typename ExpressionType, bool CheckExistence>
template<int Size, int Step>
struct Inverse<ExpressionType, CheckExistence>::_unroll_third_loop<Step, Size, true>
{
typedef Inverse<ExpressionType, CheckExistence> Inv;
typedef typename Inv::MatrixType MatrixType;
typedef typename Inv::RealScalar RealScalar;
static void run(Inv&, MatrixType&, const RealScalar&) {}
};
template<typename ExpressionType, bool CheckExistence>
void Inverse<ExpressionType, CheckExistence>::_compute(const ExpressionType& xpr)
{
MatrixType matrix(xpr);
const RealScalar max = CheckExistence ? matrix.cwiseAbs().maxCoeff()
: static_cast<RealScalar>(0);
if(MatrixType::RowsAtCompileTime <= 4)
{
const int size = MatrixType::RowsAtCompileTime;
_unroll_first_loop<size, 0>::run(*this, matrix, max);
if(CheckExistence && !m_exists) return;
_unroll_second_loop<size, 0>::run(*this, matrix, max);
if(CheckExistence && !m_exists) return;
_unroll_third_loop<size, 1>::run(*this, matrix, max);
}
else
{
_compute_not_unrolled(matrix, max);
}
}
/** \return the matrix inverse of \c *this, if it exists.
*
* Example: \include MatrixBase_inverse.cpp
* Output: \verbinclude MatrixBase_inverse.out
*
* \sa class Inverse
*/
template<typename Derived>
const Inverse<Derived, true>
MatrixBase<Derived>::inverse() const
{
return Inverse<Derived, true>(derived());
}
/** \return the matrix inverse of \c *this, which is assumed to exist.
*
* Example: \include MatrixBase_quickInverse.cpp
* Output: \verbinclude MatrixBase_quickInverse.out
*
* \sa class Inverse
*/
template<typename Derived>
const Inverse<Derived, false>
MatrixBase<Derived>::quickInverse() const
{
return Inverse<Derived, false>(derived());
}
#endif // EIGEN_INVERSE_H

View File

@ -49,7 +49,7 @@ struct unroll_echelon<Derived, Dynamic>
{
static void run(MatrixBase<Derived>& m)
{
for(int k = 0; k < m.diagonal().size(); k++)
for(int k = 0; k < m.diagonal().size() - 1; k++)
{
int rowOfBiggest, colOfBiggest;
int cornerRows = m.rows()-k, cornerCols = m.cols()-k;
@ -63,13 +63,14 @@ struct unroll_echelon<Derived, Dynamic>
}
}
};
using namespace std;
template<typename Derived>
void echelon(MatrixBase<Derived>& m)
{
const int size = DiagonalCoeffs<Derived>::SizeAtCompileTime;
const bool unroll = size <= 4;
unroll_echelon<Derived, unroll ? size : Dynamic>::run(m);
unroll_echelon<Derived, unroll ? size-1 : Dynamic>::run(m);
}
template<typename Derived>
@ -100,7 +101,7 @@ using namespace std;
int main(int, char **)
{
srand((unsigned int)time(0));
const int Rows = 6, Cols = 6;
const int Rows = 6, Cols = 4;
typedef Matrix<double, Rows, Cols> Mat;
const int N = Rows < Cols ? Rows : Cols;