make matrix multiplication do immediate evaluation; add lazyMul() for the old behaviour

some reorganization, especially in MatrixStorage
start playing with loop unrolling, always_inline, and __restrict__
This commit is contained in:
Benoit Jacob 2007-09-29 08:28:36 +00:00
parent 51e29ae4bd
commit ee63e15e2c
10 changed files with 113 additions and 50 deletions

View File

@ -7,8 +7,8 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)
if(CMAKE_COMPILER_IS_GNUCXX)
if (CMAKE_SYSTEM_NAME MATCHES Linux)
set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common")
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common")
set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common -fstrict-aliasing")
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
endif (CMAKE_SYSTEM_NAME MATCHES Linux)
endif (CMAKE_COMPILER_IS_GNUCXX)

View File

@ -12,7 +12,7 @@ template<typename Scalar, typename Derived>
EiScalarProduct<Derived>
twice(const EiObject<Scalar, Derived>& m)
{
return static_cast<Scalar>(2) * m;
return 2 * m;
}
int main(int, char**)

View File

@ -29,23 +29,5 @@ int main(int, char **)
cout << "Row 0 of m2, written as a column vector, is:" << endl << m2.row(0) << endl;
cout << "Column 1 of m2 is:" << endl << m2.col(1) << endl;
cout << "The matrix m2 with row 0 and column 1 removed is:" << endl << m2.minor(0,1) << endl;
cout << endl << "Now let us study a tricky issue." << endl;
cout << "Recall that the matrix product m*m is:" << endl << m*m << endl;
cout << "We want to store that into m, i.e. do \"m = m * m;\"" << endl;
cout << "Here we must be very careful. For if we do \"m = m * m;\"," << endl
<< "the matrix m becomes" << endl;
EiMatrix<double,2,2> m_save = m;
m = m * m; // the bogus operation
cout << m << "," << endl;
cout << "which is not what was wanted!" << endl
<< "Explanation: because of the way expression templates work, the matrix m gets" << endl
<< "overwritten _while_ the matrix product m * m is being computed." << endl
<< "This is the counterpart of eliminating temporary objects!" << endl
<< "Anyway, if you want to store m * m into m, you can do this:" << endl
<< " m = (m * m).eval();" << endl;
m = m_save;
m = (m * m).eval();
cout << "And m is now:" << endl << m << endl << "as was expected." << endl;
return 0;
}

View File

@ -33,7 +33,7 @@ template<typename Expression> class EiEval
{
public:
typedef typename Expression::Scalar Scalar;
typedef EiMatrix< Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType;
typedef EiMatrix<Scalar, Expression::RowsAtCompileTime, Expression::ColsAtCompileTime> MatrixType;
typedef Expression Base;
friend class EiObject<Scalar, Expression>;

View File

@ -44,22 +44,22 @@ class EiMatrix : public EiObject<_Scalar, EiMatrix<_Scalar, _Rows, _Cols> >,
static const int RowsAtCompileTime = _Rows, ColsAtCompileTime = _Cols;
const Scalar* array() const
const Scalar* EI_RESTRICT array() const
{ return Storage::m_array; }
Scalar* array()
Scalar* EI_RESTRICT array()
{ return Storage::m_array; }
private:
Ref _ref() const { return Ref(*const_cast<EiMatrix*>(this)); }
const Scalar& _read(int row, int col = 0) const
const Scalar& EI_RESTRICT _read(int row, int col = 0) const
{
EI_CHECK_RANGES(*this, row, col);
return array()[row + col * Storage::_rows()];
}
Scalar& _write(int row, int col = 0)
Scalar& EI_RESTRICT _write(int row, int col = 0)
{
EI_CHECK_RANGES(*this, row, col);
return array()[row + col * Storage::_rows()];

View File

@ -39,7 +39,7 @@ template<typename Lhs, typename Rhs> class EiSum
static const int RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime;
EiSum(const LhsRef& lhs, const RhsRef& rhs)
EiSum(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs)
: m_lhs(lhs), m_rhs(rhs)
{
assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
@ -79,7 +79,7 @@ template<typename Lhs, typename Rhs> class EiDifference
static const int RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime;
EiDifference(const LhsRef& lhs, const RhsRef& rhs)
EiDifference(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs)
: m_lhs(lhs), m_rhs(rhs)
{
assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
@ -118,7 +118,7 @@ template<typename Lhs, typename Rhs> class EiMatrixProduct
static const int RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime;
EiMatrixProduct(const LhsRef& lhs, const RhsRef& rhs)
EiMatrixProduct(const LhsRef& EI_RESTRICT lhs, const RhsRef& EI_RESTRICT rhs)
: m_lhs(lhs), m_rhs(rhs)
{
assert(lhs.cols() == rhs.rows());
@ -136,10 +136,17 @@ template<typename Lhs, typename Rhs> class EiMatrixProduct
Scalar _read(int row, int col) const
{
Scalar x = static_cast<Scalar>(0);
for(int i = 0; i < m_lhs.cols(); i++)
x += m_lhs.read(row, i) * m_rhs.read(i, col);
return x;
if(Lhs::ColsAtCompileTime == 3)
{
return m_lhs(row,0) * m_rhs(0,col) + m_lhs(row,1) * m_rhs(1,col) + m_lhs(row,2) * m_rhs(2,col);
}
else
{
Scalar x = static_cast<Scalar>(0);
for(int i = 0; i < m_lhs.cols(); i++)
x += m_lhs.read(row, i) * m_rhs.read(i, col);
return x;
}
}
protected:
@ -161,11 +168,19 @@ operator-(const EiObject<Scalar, Derived1> &mat1, const EiObject<Scalar, Derived
return EiDifference<Derived1, Derived2>(mat1.ref(), mat2.ref());
}
template<typename Scalar, typename Derived>
template<typename OtherDerived>
EiMatrixProduct<Derived, OtherDerived>
EiObject<Scalar, Derived>::lazyMul(const EiObject<Scalar, OtherDerived> &other) const
{
return EiMatrixProduct<Derived, OtherDerived>(ref(), other.ref());
}
template<typename Scalar, typename Derived1, typename Derived2>
EiMatrixProduct<Derived1, Derived2>
EiEval<EiMatrixProduct<Derived1, Derived2> >
operator*(const EiObject<Scalar, Derived1> &mat1, const EiObject<Scalar, Derived2> &mat2)
{
return EiMatrixProduct<Derived1, Derived2>(mat1.ref(), mat2.ref());
return mat1.lazyMul(mat2).eval();
}
template<typename Scalar, typename Derived>

View File

@ -32,7 +32,7 @@ template<typename Scalar,
class EiMatrixStorage
{
protected:
Scalar m_array[RowsAtCompileTime * RowsAtCompileTime];
Scalar EI_RESTRICT m_array[RowsAtCompileTime * RowsAtCompileTime];
void resize(int rows, int cols)
{ assert(rows == RowsAtCompileTime && cols == ColsAtCompileTime); }
@ -54,18 +54,20 @@ class EiMatrixStorage
~EiMatrixStorage() {};
};
template<typename Scalar>
class EiMatrixStorage<Scalar, EiDynamic, 1>
template<typename Scalar, int ColsAtCompileTime>
class EiMatrixStorage<Scalar, EiDynamic, ColsAtCompileTime>
{
protected:
int m_rows;
Scalar *m_array;
Scalar* EI_RESTRICT m_array;
void resize(int rows, int cols)
{ assert(rows > 0 && cols == 1);
if(rows > m_rows) {
{
assert(rows > 0 && cols == ColsAtCompileTime);
if(rows > m_rows)
{
delete[] m_array;
m_array = new Scalar[rows];
m_array = new Scalar[rows * ColsAtCompileTime];
}
m_rows = rows;
}
@ -74,13 +76,48 @@ class EiMatrixStorage<Scalar, EiDynamic, 1>
{ return m_rows; }
int _cols() const
{ return 1; }
{ return ColsAtCompileTime; }
public:
EiMatrixStorage(int rows, int cols) : m_rows(rows)
{
assert(m_rows > 0 && cols == 1);
m_array = new Scalar[m_rows];
assert(m_rows > 0 && cols == ColsAtCompileTime);
m_array = new Scalar[m_rows * ColsAtCompileTime];
}
~EiMatrixStorage()
{ delete[] m_array; }
};
template<typename Scalar, int RowsAtCompileTime>
class EiMatrixStorage<Scalar, RowsAtCompileTime, EiDynamic>
{
protected:
int m_cols;
Scalar* EI_RESTRICT m_array;
void resize(int rows, int cols)
{
assert(rows == RowsAtCompileTime && cols > 0);
if(cols > m_cols)
{
delete[] m_array;
m_array = new Scalar[cols * RowsAtCompileTime];
}
m_cols = cols;
}
int _rows() const
{ return RowsAtCompileTime; }
int _cols() const
{ return m_cols; }
public:
EiMatrixStorage(int rows, int cols) : m_cols(cols)
{
assert(rows == RowsAtCompileTime && cols > 0);
m_array = new Scalar[m_cols * RowsAtCompileTime];
}
~EiMatrixStorage()
@ -92,7 +129,7 @@ class EiMatrixStorage<Scalar, EiDynamic, EiDynamic>
{
protected:
int m_rows, m_cols;
Scalar *m_array;
Scalar* EI_RESTRICT m_array;
void resize(int rows, int cols)
{

View File

@ -36,6 +36,19 @@ template<typename Scalar, typename Derived> class EiObject
template<typename OtherDerived>
void _copy_helper(const EiObject<Scalar, OtherDerived>& other)
{
if(RowsAtCompileTime == 3 && ColsAtCompileTime == 3)
{
write(0,0) = other.read(0,0);
write(1,0) = other.read(1,0);
write(2,0) = other.read(2,0);
write(0,1) = other.read(0,1);
write(1,1) = other.read(1,1);
write(2,1) = other.read(2,1);
write(0,2) = other.read(0,2);
write(1,2) = other.read(1,2);
write(2,2) = other.read(2,2);
}
else
for(int i = 0; i < rows(); i++)
for(int j = 0; j < cols(); j++)
write(i, j) = other.read(i, j);
@ -54,7 +67,7 @@ template<typename Scalar, typename Derived> class EiObject
Ref ref() const
{ return static_cast<const Derived *>(this)->_ref(); }
Scalar& write(int row, int col)
Scalar& EI_RESTRICT write(int row, int col)
{
return static_cast<Derived *>(this)->_write(row, col);
}
@ -86,6 +99,10 @@ template<typename Scalar, typename Derived> class EiObject
EiMinor<Derived> minor(int row, int col);
EiBlock<Derived> block(int startRow, int endRow, int startCol= 0, int endCol = 0);
template<typename OtherDerived>
EiMatrixProduct<Derived, OtherDerived>
lazyMul(const EiObject<Scalar, OtherDerived>& other) const EI_ALWAYS_INLINE;
template<typename OtherDerived>
Derived& operator+=(const EiObject<Scalar, OtherDerived>& other);
template<typename OtherDerived>
@ -110,10 +127,10 @@ template<typename Scalar, typename Derived> class EiObject
Scalar operator()(int row, int col = 0) const
{ return read(row, col); }
Scalar& operator()(int row, int col = 0)
Scalar& EI_RESTRICT operator()(int row, int col = 0)
{ return write(row, col); }
EiEval<Derived> eval() const;
EiEval<Derived> eval() const EI_ALWAYS_INLINE;
};
template<typename Scalar, typename Derived>

View File

@ -68,6 +68,14 @@ const int EiDynamic = -1;
#define EI_UNUSED(x) (void)x
#ifdef __GNUC__
# define EI_ALWAYS_INLINE __attribute__((always_inline))
# define EI_RESTRICT __restrict__
#else
# define EI_ALWAYS_INLINE
# define EI_RESTRICT
#endif
#define EI_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \
template<typename OtherScalar, typename OtherDerived> \
Derived& operator Op(const EiObject<OtherScalar, OtherDerived>& other) \

View File

@ -49,7 +49,11 @@ template<typename MatrixType1,
a -= b + b;
a *= s;
b /= s;
if(rows1 == cols1) a *= b;
if(rows1 == cols1)
{
a *= b;
a.lazyMul(b);
}
MatrixType1 d(rows1, cols1);
MatrixType2 e(rows2, cols2);