* find the proper way of nesting the expression in Flagged:

finally that's more subtle than just using ei_nested, because when
  flagging with NestByValueBit we want to store the expression by value
  already, regardless of whether it already had the NestByValueBit set.
* rename temporary() ----> nestByValue()
* move the old Product.h to disabled/, replace by what was ProductWIP.h
* tweak -O and -g flags for tests and examples
* reorder the tests -- basic things go first
* simplifications, e.g. in many methoeds return derived() and count on
  implicit casting to the actual return type.
* strip some not-really-useful stuff from the heaviest tests
This commit is contained in:
Benoit Jacob 2008-05-28 04:38:16 +00:00
parent 559233c73e
commit aebecae510
15 changed files with 394 additions and 399 deletions

View File

@ -7,7 +7,7 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON)
IF(CMAKE_COMPILER_IS_GNUCXX)
IF(CMAKE_SYSTEM_NAME MATCHES Linux)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
IF(TEST_OPENMP)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
MESSAGE("Enabling OpenMP in tests/examples")

View File

@ -35,7 +35,9 @@ namespace Eigen {
#include "src/Core/CwiseBinaryOp.h"
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
#include "src/Core/ProductWIP.h"
#include "src/Core/InverseProduct.h"
#include "src/Core/CacheFriendlyProduct.h"
#include "src/Core/Product.h"
#include "src/Core/Block.h"
#include "src/Core/Minor.h"
#include "src/Core/Transpose.h"
@ -51,7 +53,6 @@ namespace Eigen {
#include "src/Core/CommaInitializer.h"
#include "src/Core/Extract.h"
#include "src/Core/Part.h"
#include "src/Core/InverseProduct.h"
} // namespace Eigen

View File

@ -118,7 +118,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived>
MatrixBase<Derived>::operator-() const
{
return CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, Derived>(derived());
return derived();
}
/** \returns an expression of the coefficient-wise absolute value of \c *this
@ -127,7 +127,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_abs_op<typename ei_traits<Derived>::Scalar>,Derived>
MatrixBase<Derived>::cwiseAbs() const
{
return CwiseUnaryOp<ei_scalar_abs_op<Scalar>,Derived>(derived());
return derived();
}
/** \returns an expression of the coefficient-wise squared absolute value of \c *this
@ -136,7 +136,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_abs2_op<typename ei_traits<Derived>::Scalar>,Derived>
MatrixBase<Derived>::cwiseAbs2() const
{
return CwiseUnaryOp<ei_scalar_abs2_op<Scalar>,Derived>(derived());
return derived();
}
/** \returns an expression of the complex conjugate of *this.
@ -146,7 +146,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::conjugate() const
{
return CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>(derived());
return derived();
}
/** \returns an expression of *this with the \a Scalar type casted to
@ -161,7 +161,7 @@ template<typename NewType>
inline const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
MatrixBase<Derived>::cast() const
{
return CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, Derived>(derived());
return derived();
}
/** \relates MatrixBase */
@ -201,7 +201,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_sqrt_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseSqrt() const
{
return CwiseUnaryOp<ei_scalar_sqrt_op<Scalar>, Derived>(derived());
return derived();
}
/** \returns an expression of the coefficient-wise exponential of *this. */
@ -209,7 +209,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_exp_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseExp() const
{
return CwiseUnaryOp<ei_scalar_exp_op<Scalar>, Derived>(derived());
return derived();
}
/** \returns an expression of the coefficient-wise logarithm of *this. */
@ -217,7 +217,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_log_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseLog() const
{
return CwiseUnaryOp<ei_scalar_log_op<Scalar>, Derived>(derived());
return derived();
}
/** \returns an expression of the coefficient-wise cosine of *this. */
@ -225,7 +225,7 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_cos_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseCos() const
{
return CwiseUnaryOp<ei_scalar_cos_op<Scalar>, Derived>(derived());
return derived();
}
/** \returns an expression of the coefficient-wise sine of *this. */
@ -233,10 +233,10 @@ template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_sin_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwiseSin() const
{
return CwiseUnaryOp<ei_scalar_sin_op<Scalar>, Derived>(derived());
return derived();
}
/** \relates MatrixBase */
/** \returns an expression of the coefficient-wise power of *this to the given exponent. */
template<typename Derived>
inline const CwiseUnaryOp<ei_scalar_pow_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::cwisePow(const Scalar& exponent) const

View File

@ -95,7 +95,7 @@ template<typename Derived>
inline const DiagonalMatrix<Derived>
MatrixBase<Derived>::asDiagonal() const
{
return DiagonalMatrix<Derived>(derived());
return derived();
}
/** \returns true if *this is approximately equal to a diagonal matrix,

View File

@ -33,7 +33,7 @@
* \param Added the flags added to the expression
* \param Removed the flags removed from the expression (has priority over Added).
*
* This class represents an expression whose flags have been modified
* This class represents an expression whose flags have been modified.
* It is the return type of MatrixBase::flagged()
* and most of the time this is the only way it is used.
*
@ -94,7 +94,11 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
}
protected:
const ExpressionType m_matrix;
const typename ei_meta_if<
Added & ~Removed & NestByValueBit,
ExpressionType,
typename ExpressionType::Nested
>::ret m_matrix;
};
/** \returns an expression of *this with added flags
@ -121,7 +125,7 @@ MatrixBase<Derived>::lazy() const
*/
template<typename Derived>
inline const Flagged<Derived, NestByValueBit, 0>
MatrixBase<Derived>::temporary() const
MatrixBase<Derived>::nestByValue() const
{
return derived();
}

View File

@ -452,7 +452,7 @@ template<typename Derived> class MatrixBase
template<unsigned int Added>
const Flagged<Derived, Added, 0> marked() const;
const Flagged<Derived, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit> lazy() const;
const Flagged<Derived, NestByValueBit, 0> temporary() const;
const Flagged<Derived, NestByValueBit, 0> nestByValue() const;
/** \returns number of elements to skip to pass from one row (resp. column) to another
* for a row-major (resp. column-major) matrix.

View File

@ -41,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs>
struct ei_product_unroller<0, Size, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
typename Lhs::Scalar &res)
typename Lhs::Scalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@ -60,12 +60,6 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
template<typename Lhs, typename Rhs>
struct ei_product_unroller<0, Dynamic, Lhs, Rhs>
{
static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
struct ei_packet_product_unroller;
@ -119,12 +113,6 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
template<typename Lhs, typename Rhs, typename PacketScalar>
struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
{
static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
{ return product._packetCoeffRowMajor(row,col); }
@ -153,18 +141,74 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode
{
enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit)))
&& Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
? CacheFriendlyProduct : NormalProduct };
};
template<typename T> class ei_product_eval_to_column_major
{
typedef typename ei_traits<T>::Scalar _Scalar;
enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
_MaxCols = ei_traits<T>::MaxColsAtCompileTime,
_Flags = ei_traits<T>::Flags
};
public:
typedef Matrix<_Scalar,
ei_traits<T>::RowsAtCompileTime,
ei_traits<T>::ColsAtCompileTime,
ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit,
ei_traits<T>::MaxRowsAtCompileTime,
ei_traits<T>::MaxColsAtCompileTime> type;
};
template<typename T, int n=1> struct ei_product_nested_rhs
{
typedef typename ei_meta_if<
(ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
T,
typename ei_meta_if<
((ei_traits<T>::Flags & EvalBeforeNestingBit)
|| (ei_traits<T>::Flags & RowMajorBit)
|| (!(ei_traits<T>::Flags & DirectAccessBit))
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
typename ei_product_eval_to_column_major<T>::type,
const T&
>::ret
>::ret type;
};
template<typename T, int n=1> struct ei_product_nested_lhs
{
typedef typename ei_meta_if<
ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit),
T,
typename ei_meta_if<
int(ei_traits<T>::Flags) & EvalBeforeNestingBit
|| (!(int(ei_traits<T>::Flags) & DirectAccessBit))
|| (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
typename ei_eval<T>::type,
const T&
>::ret
>::ret type;
};
template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{
typedef typename Lhs::Scalar Scalar;
typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested;
// the cache friendly product evals lhs once only
// FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
typename ei_product_nested_lhs<Lhs,1>::type,
typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
// NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
@ -174,6 +218,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
// the vectorization flags are only used by the normal product,
// the other one is always vectorized !
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
@ -207,6 +253,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
enum {
PacketSize = ei_packet_traits<Scalar>::size
};
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@ -214,12 +264,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
/** \internal */
template<typename DestDerived, int AlignedMode>
void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
#ifdef EIGEN_VECTORIZE
template<typename DestDerived, int AlignedMode>
void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
#endif
template<typename DestDerived>
void _cacheFriendlyEval(DestDerived& res) const;
/** \internal */
template<typename DestDerived>
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
private:
@ -252,7 +302,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
{
PacketScalar res;
ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1,
ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested, PacketScalar>
@ -279,16 +329,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
for(int i = 1; i < m_lhs.cols(); i++)
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
return res;
// const PacketScalar tmp[4];
// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
//
// return
// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
}
template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
friend struct ei_cache_friendly_selector;
protected:
const LhsNested m_lhs;
@ -296,9 +340,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
};
/** \returns the matrix product of \c *this and \a other.
*
* \note This function causes an immediate evaluation. If you want to perform a matrix product
* without immediate evaluation, call .lazy() on one of the matrices before taking the product.
*
* \sa lazy(), operator*=(const MatrixBase&)
*/
@ -322,168 +363,107 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other;
}
/** \internal */
template<typename Derived>
template<typename Lhs,typename Rhs>
inline Derived&
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
{
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
return derived();
}
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
{
product.template _cacheOptimalEval<Derived, Aligned>(derived(),
#ifdef EIGEN_VECTORIZE
typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
#else
ei_meta_false()
#endif
);
product._cacheFriendlyEval(derived());
return derived();
}
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived, int AlignedMode>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess>
struct ei_cache_friendly_selector
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
if (Lhs::Flags&RowMajorBit)
typedef Product<Lhs,Rhs,EvalMode> Prod;
typedef typename Prod::_LhsNested _LhsNested;
typedef typename Prod::_RhsNested _RhsNested;
typedef typename Prod::Scalar Scalar;
static inline void eval(const Prod& product, DestDerived& res)
{
// std::cout << "opt rhs\n";
int j=0;
for(; j<cols4; j+=4)
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
)
{
for(int k=0; k<this->rows(); ++k)
{
const Scalar tmp0 = m_lhs.coeff(k,j );
const Scalar tmp1 = m_lhs.coeff(k,j+1);
const Scalar tmp2 = m_lhs.coeff(k,j+2);
const Scalar tmp3 = m_lhs.coeff(k,j+3);
for (int i=0; i<this->cols(); ++i)
res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i)
+ tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i);
}
res.setZero();
ei_cache_friendly_product<Scalar>(
product._rows(), product._cols(), product.m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
}
for(; j<m_lhs.cols(); ++j)
else
{
for(int k=0; k<this->rows(); ++k)
{
const Scalar tmp = m_rhs.coeff(k,j);
for (int i=0; i<this->cols(); ++i)
res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i);
}
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
}
else
static inline void eval_and_add(const Prod& product, DestDerived& res)
{
// std::cout << "opt lhs\n";
int j = 0;
for(; j<cols4; j+=4)
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
)
{
for(int k=0; k<this->cols(); ++k)
{
const Scalar tmp0 = m_rhs.coeff(j ,k);
const Scalar tmp1 = m_rhs.coeff(j+1,k);
const Scalar tmp2 = m_rhs.coeff(j+2,k);
const Scalar tmp3 = m_rhs.coeff(j+3,k);
for (int i=0; i<this->rows(); ++i)
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1)
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
}
ei_cache_friendly_product<Scalar>(
product._rows(), product._cols(), product.m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
}
for(; j<m_lhs.cols(); ++j)
else
{
for(int k=0; k<this->cols(); ++k)
{
const Scalar tmp = m_rhs.coeff(j,k);
for (int i=0; i<this->rows(); ++i)
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
}
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
}
};
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived>
struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false>
{
typedef Product<Lhs,Rhs,EvalMode> Prod;
typedef typename Prod::_LhsNested _LhsNested;
typedef typename Prod::_RhsNested _RhsNested;
typedef typename Prod::Scalar Scalar;
static inline void eval(const Prod& product, DestDerived& res)
{
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
static inline void eval_and_add(const Prod& product, DestDerived& res)
{
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
};
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
{
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
::eval(*this, res);
}
#ifdef EIGEN_VECTORIZE
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived, int AlignedMode>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
{
if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0))
|| (_rows() % ei_packet_traits<Scalar>::size != 0))
{
return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false());
}
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
if (Lhs::Flags&RowMajorBit)
{
// std::cout << "packet rhs\n";
int j=0;
for(; j<cols4; j+=4)
{
for(int k=0; k<this->rows(); k++)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
{
res.template writePacketCoeff<AlignedMode>(k,i,
ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
res.template packetCoeff<AlignedMode>(k,i)))))
);
}
}
}
for(; j<m_lhs.cols(); ++j)
{
for(int k=0; k<this->rows(); k++)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
res.template writePacketCoeff<AlignedMode>(k,i,
ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
}
}
}
else
{
// std::cout << "packet lhs\n";
int k=0;
for(; k<cols4; k+=4)
{
for(int j=0; j<this->cols(); j+=1)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.template writePacketCoeff<AlignedMode>(i,j,
ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
res.template packetCoeff<AlignedMode>(i,j)))))
);
}
}
}
for(; k<m_lhs.cols(); ++k)
{
for(int j=0; j<this->cols(); j++)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
res.template writePacketCoeff<AlignedMode>(k,j,
ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
}
}
}
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
::eval_and_add(*this, res);
}
#endif // EIGEN_VECTORIZE
#endif // EIGEN_PRODUCT_H

View File

@ -107,7 +107,7 @@ template<typename Derived>
inline Transpose<Derived>
MatrixBase<Derived>::transpose()
{
return Transpose<Derived>(derived());
return derived();
}
/** This is the const version of transpose(). \sa adjoint() */
@ -115,7 +115,7 @@ template<typename Derived>
inline const Transpose<Derived>
MatrixBase<Derived>::transpose() const
{
return Transpose<Derived>(derived());
return derived();
}
/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
@ -130,7 +130,7 @@ inline const Transpose<
, NestByValueBit, 0> >
MatrixBase<Derived>::adjoint() const
{
return conjugate().temporary().transpose();
return conjugate().nestByValue();
}
#endif // EIGEN_TRANSPOSE_H

View File

@ -53,16 +53,16 @@ const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeAssigningBit
| LargeBit;
// Possible values for the PartType parameter of part() and the ExtractType parameter of extract()
// Possible values for the Mode parameter of part() and of extract()
const unsigned int Upper = UpperTriangularBit;
const unsigned int StrictlyUpper = UpperTriangularBit | ZeroDiagBit;
const unsigned int Lower = LowerTriangularBit;
const unsigned int StrictlyLower = LowerTriangularBit | ZeroDiagBit;
// additional possible values for the PartType parameter of part()
// additional possible values for the Mode parameter of part()
const unsigned int SelfAdjoint = SelfAdjointBit;
// additional possible values for the ExtractType parameter of extract()
// additional possible values for the Mode parameter of extract()
const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit;
const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;

View File

@ -26,8 +26,6 @@
#ifndef EIGEN_PRODUCT_H
#define EIGEN_PRODUCT_H
#include "CacheFriendlyProduct.h"
template<int Index, int Size, typename Lhs, typename Rhs>
struct ei_product_unroller
{
@ -43,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs>
struct ei_product_unroller<0, Size, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
typename Lhs::Scalar &res)
typename Lhs::Scalar &res)
{
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
}
@ -62,6 +60,12 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
template<typename Lhs, typename Rhs>
struct ei_product_unroller<0, Dynamic, Lhs, Rhs>
{
static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
};
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
struct ei_packet_product_unroller;
@ -115,6 +119,12 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
template<typename Lhs, typename Rhs, typename PacketScalar>
struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
{
static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
{ return product._packetCoeffRowMajor(row,col); }
@ -143,74 +153,18 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode
{
enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit)))
? CacheFriendlyProduct : NormalProduct };
};
template<typename T> class ei_product_eval_to_column_major
{
typedef typename ei_traits<T>::Scalar _Scalar;
enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
_MaxCols = ei_traits<T>::MaxColsAtCompileTime,
_Flags = ei_traits<T>::Flags
};
public:
typedef Matrix<_Scalar,
ei_traits<T>::RowsAtCompileTime,
ei_traits<T>::ColsAtCompileTime,
ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit,
ei_traits<T>::MaxRowsAtCompileTime,
ei_traits<T>::MaxColsAtCompileTime> type;
};
template<typename T, int n=1> struct ei_product_nested_rhs
{
typedef typename ei_meta_if<
(ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
T,
typename ei_meta_if<
((ei_traits<T>::Flags & EvalBeforeNestingBit)
|| (ei_traits<T>::Flags & RowMajorBit)
|| (!(ei_traits<T>::Flags & DirectAccessBit))
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
typename ei_product_eval_to_column_major<T>::type,
const T&
>::ret
>::ret type;
};
template<typename T, int n=1> struct ei_product_nested_lhs
{
typedef typename ei_meta_if<
ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit),
T,
typename ei_meta_if<
int(ei_traits<T>::Flags) & EvalBeforeNestingBit
|| (!(int(ei_traits<T>::Flags) & DirectAccessBit))
|| (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
typename ei_eval<T>::type,
const T&
>::ret
>::ret type;
};
template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{
typedef typename Lhs::Scalar Scalar;
// the cache friendly product evals lhs once only
// FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
typename ei_product_nested_lhs<Lhs,1>::type,
typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
// NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
@ -220,8 +174,6 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
// the vectorization flags are only used by the normal product,
// the other one is always vectorized !
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
@ -255,10 +207,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
enum {
PacketSize = ei_packet_traits<Scalar>::size
};
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
@ -266,12 +214,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
/** \internal */
template<typename DestDerived>
void _cacheFriendlyEval(DestDerived& res) const;
/** \internal */
template<typename DestDerived>
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
template<typename DestDerived, int AlignedMode>
void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
#ifdef EIGEN_VECTORIZE
template<typename DestDerived, int AlignedMode>
void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
#endif
private:
@ -304,7 +252,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
{
PacketScalar res;
ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested, PacketScalar>
@ -331,10 +279,16 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
for(int i = 1; i < m_lhs.cols(); i++)
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
return res;
// const PacketScalar tmp[4];
// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
//
// return
// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
}
template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
friend struct ei_cache_friendly_selector;
protected:
const LhsNested m_lhs;
@ -342,6 +296,9 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
};
/** \returns the matrix product of \c *this and \a other.
*
* \note This function causes an immediate evaluation. If you want to perform a matrix product
* without immediate evaluation, call .lazy() on one of the matrices before taking the product.
*
* \sa lazy(), operator*=(const MatrixBase&)
*/
@ -365,107 +322,168 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other;
}
/** \internal */
template<typename Derived>
template<typename Lhs,typename Rhs>
inline Derived&
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
{
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
return derived();
}
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
{
product._cacheFriendlyEval(derived());
product.template _cacheOptimalEval<Derived, Aligned>(derived(),
#ifdef EIGEN_VECTORIZE
typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
#else
ei_meta_false()
#endif
);
return derived();
}
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess>
struct ei_cache_friendly_selector
{
typedef Product<Lhs,Rhs,EvalMode> Prod;
typedef typename Prod::_LhsNested _LhsNested;
typedef typename Prod::_RhsNested _RhsNested;
typedef typename Prod::Scalar Scalar;
static inline void eval(const Prod& product, DestDerived& res)
{
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
)
{
res.setZero();
ei_cache_friendly_product<Scalar>(
product._rows(), product._cols(), product.m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
}
else
{
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
}
static inline void eval_and_add(const Prod& product, DestDerived& res)
{
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
)
{
ei_cache_friendly_product<Scalar>(
product._rows(), product._cols(), product.m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
}
else
{
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
}
};
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived>
struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false>
{
typedef Product<Lhs,Rhs,EvalMode> Prod;
typedef typename Prod::_LhsNested _LhsNested;
typedef typename Prod::_RhsNested _RhsNested;
typedef typename Prod::Scalar Scalar;
static inline void eval(const Prod& product, DestDerived& res)
{
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
static inline void eval_and_add(const Prod& product, DestDerived& res)
{
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
}
};
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
template<typename DestDerived, int AlignedMode>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
{
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
::eval(*this, res);
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
if (Lhs::Flags&RowMajorBit)
{
// std::cout << "opt rhs\n";
int j=0;
for(; j<cols4; j+=4)
{
for(int k=0; k<this->rows(); ++k)
{
const Scalar tmp0 = m_lhs.coeff(k,j );
const Scalar tmp1 = m_lhs.coeff(k,j+1);
const Scalar tmp2 = m_lhs.coeff(k,j+2);
const Scalar tmp3 = m_lhs.coeff(k,j+3);
for (int i=0; i<this->cols(); ++i)
res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i)
+ tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i);
}
}
for(; j<m_lhs.cols(); ++j)
{
for(int k=0; k<this->rows(); ++k)
{
const Scalar tmp = m_rhs.coeff(k,j);
for (int i=0; i<this->cols(); ++i)
res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i);
}
}
}
else
{
// std::cout << "opt lhs\n";
int j = 0;
for(; j<cols4; j+=4)
{
for(int k=0; k<this->cols(); ++k)
{
const Scalar tmp0 = m_rhs.coeff(j ,k);
const Scalar tmp1 = m_rhs.coeff(j+1,k);
const Scalar tmp2 = m_rhs.coeff(j+2,k);
const Scalar tmp3 = m_rhs.coeff(j+3,k);
for (int i=0; i<this->rows(); ++i)
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1)
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
}
}
for(; j<m_lhs.cols(); ++j)
{
for(int k=0; k<this->cols(); ++k)
{
const Scalar tmp = m_rhs.coeff(j,k);
for (int i=0; i<this->rows(); ++i)
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
}
}
}
}
#ifdef EIGEN_VECTORIZE
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
template<typename DestDerived, int AlignedMode>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
{
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
::eval_and_add(*this, res);
if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0))
|| (_rows() % ei_packet_traits<Scalar>::size != 0))
{
return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false());
}
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
if (Lhs::Flags&RowMajorBit)
{
// std::cout << "packet rhs\n";
int j=0;
for(; j<cols4; j+=4)
{
for(int k=0; k<this->rows(); k++)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
{
res.template writePacketCoeff<AlignedMode>(k,i,
ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
res.template packetCoeff<AlignedMode>(k,i)))))
);
}
}
}
for(; j<m_lhs.cols(); ++j)
{
for(int k=0; k<this->rows(); k++)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
res.template writePacketCoeff<AlignedMode>(k,i,
ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
}
}
}
else
{
// std::cout << "packet lhs\n";
int k=0;
for(; k<cols4; k+=4)
{
for(int j=0; j<this->cols(); j+=1)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.template writePacketCoeff<AlignedMode>(i,j,
ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
res.template packetCoeff<AlignedMode>(i,j)))))
);
}
}
}
for(; k<m_lhs.cols(); ++k)
{
for(int j=0; j<this->cols(); j++)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
res.template writePacketCoeff<AlignedMode>(k,j,
ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
}
}
}
}
#endif // EIGEN_VECTORIZE
#endif // EIGEN_PRODUCT_H

View File

@ -1,5 +1,11 @@
IF(BUILD_DOC)
IF(CMAKE_COMPILER_IS_GNUCXX)
IF(CMAKE_SYSTEM_NAME MATCHES Linux)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g1")
ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
CONFIGURE_FILE(
${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile

View File

@ -1,5 +1,11 @@
IF(BUILD_TESTS)
IF(CMAKE_COMPILER_IS_GNUCXX)
IF(CMAKE_SYSTEM_NAME MATCHES Linux)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O1 -g2")
ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
OPTION(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions" OFF)
# similar to SET_TARGET_PROPERTIES but append the property instead of overwritting it
@ -64,14 +70,14 @@ FIND_PACKAGE(Qt4 REQUIRED)
INCLUDE_DIRECTORIES( ${QT_INCLUDE_DIR} )
EI_ADD_TEST(basicstuff)
EI_ADD_TEST(miscmatrices)
EI_ADD_TEST(linearstructure)
EI_ADD_TEST(cwiseop)
EI_ADD_TEST(product)
EI_ADD_TEST(adjoint)
EI_ADD_TEST(submatrices)
EI_ADD_TEST(miscmatrices)
EI_ADD_TEST(smallvectors)
EI_ADD_TEST(cwiseop)
EI_ADD_TEST(map)
EI_ADD_TEST(linearstructure)
EI_ADD_TEST(product)
EI_ADD_TEST(triangular)
EI_ADD_TEST(cholesky)
# EI_ADD_TEST(determinant)

View File

@ -51,23 +51,15 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
Scalar s1 = ei_random<Scalar>(),
s2 = ei_random<Scalar>();
// check involutivity of adjoint, transpose, conjugate
VERIFY_IS_APPROX(m1.transpose().transpose(), m1);
VERIFY_IS_APPROX(m1.conjugate().conjugate(), m1);
VERIFY_IS_APPROX(m1.adjoint().adjoint(), m1);
// check basic compatibility of adjoint, transpose, conjugate
VERIFY_IS_APPROX(m1.transpose().conjugate().adjoint(), m1);
VERIFY_IS_APPROX(m1.adjoint().conjugate().transpose(), m1);
if(!NumTraits<Scalar>::IsComplex)
VERIFY_IS_APPROX(m1.adjoint().transpose(), m1);
// check multiplicative behavior
VERIFY_IS_APPROX((m1.transpose() * m2).transpose(), m2.transpose() * m1);
std::cout << (m1.adjoint() * m2).adjoint() << std::endl;
std::cout << "------------------------------" << std::endl;
std::cout << m2.adjoint() * m1 << std::endl;
VERIFY_IS_APPROX((m1.adjoint() * m2).adjoint(), m2.adjoint() * m1);
VERIFY_IS_APPROX((m1.transpose() * m2).conjugate(), m1.adjoint() * m2.conjugate());
VERIFY_IS_APPROX((s1 * m1).transpose(), s1 * m1.transpose());
VERIFY_IS_APPROX((s1 * m1).conjugate(), ei_conj(s1) * m1.conjugate());
VERIFY_IS_APPROX((s1 * m1).adjoint(), ei_conj(s1) * m1.adjoint());
// check basic properties of dot, norm, norm2

View File

@ -62,11 +62,7 @@ template<typename MatrixType> void linearStructure(const MatrixType& m)
VERIFY_IS_APPROX(-m2+m1+m2, m1);
VERIFY_IS_APPROX(m1*s1, s1*m1);
VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2);
VERIFY_IS_APPROX((s1+s2)*m1, m1*s1+m1*s2);
VERIFY_IS_APPROX((m1-m2)*s1, s1*m1-s1*m2);
VERIFY_IS_APPROX((s1-s2)*m1, m1*s1-m1*s2);
VERIFY_IS_APPROX((-m1+m2)*s1, -s1*m1+s1*m2);
VERIFY_IS_APPROX((-s1+s2)*m1, -m1*s1+m1*s2);
m3 = m2; m3 += m1;
VERIFY_IS_APPROX(m3, m1+m2);
m3 = m2; m3 -= m1;

View File

@ -73,14 +73,10 @@ template<typename MatrixType> void product(const MatrixType& m)
VERIFY_IS_APPROX(s1*(square*m1), (s1*square)*m1);
VERIFY_IS_APPROX(s1*(square*m1), square*(m1*s1));
// continue testing Product.h: lazy product
VERIFY_IS_APPROX(square.lazy() * m1, square*m1);
VERIFY_IS_APPROX(square * m1.lazy(), square*m1);
// again, test operator() to check const-qualification
s1 += (square.lazy() * m1)(r,c);
// test Product.h together with Identity.h
VERIFY_IS_APPROX(m1, identity*m1);
VERIFY_IS_APPROX(v1, identity*v1);
// again, test operator() to check const-qualification
VERIFY_IS_APPROX(MatrixType::identity(rows, cols)(r,c), static_cast<Scalar>(r==c));
@ -92,18 +88,14 @@ template<typename MatrixType> void product(const MatrixType& m)
void test_product()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( product(Matrix<float, 1, 1>()) );
CALL_SUBTEST( product(Matrix<float, 3, 3>()) );
CALL_SUBTEST( product(Matrix<float, 4, 2>()) );
CALL_SUBTEST( product(Matrix3i()) );
CALL_SUBTEST( product(Matrix<float, 3, 2>()) );
CALL_SUBTEST( product(Matrix4d()) );
}
for(int i = 0; i < g_repeat; i++) {
int rows = ei_random<int>(1,320);
int cols = ei_random<int>(1,320);
CALL_SUBTEST( product(MatrixXf(rows, cols)) );
CALL_SUBTEST( product(MatrixXd(rows, cols)) );
CALL_SUBTEST( product(MatrixXi(rows, cols)) );
CALL_SUBTEST( product(MatrixXcf(rows, cols)) );
CALL_SUBTEST( product(MatrixXcd(rows, cols)) );
CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) );
CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) );
CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) );
}
}