Make the explicit vectorization much more flexible:

- support dynamic sizes
 - support arbitrary matrix size when the matrix can be seen as a 1D array
   (except for fixed size matrices where the size in Bytes must be a factor of 16,
    this is to allow compact storage of a vector of matrices)
Note that the explict vectorization is still experimental and far to be completely tested.
This commit is contained in:
Gael Guennebaud 2008-04-25 15:46:18 +00:00
parent 30d47b5250
commit a451835bce
10 changed files with 264 additions and 100 deletions

View File

@ -2,7 +2,7 @@
#define EIGEN_CORE_H
#ifndef EIGEN_DONT_VECTORIZE
#ifdef __SSE2__
#if ((defined __SSE2__) && ( (!defined __GNUC__) || (__GNUC__>=4 && __GNUC_MINOR__>=2)))
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
#include <emmintrin.h>

View File

@ -99,7 +99,11 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
template <typename Derived, typename OtherDerived,
bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit)
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))>
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))
&& ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
||((Derived::Flags&RowMajorBit)
? Derived::ColsAtCompileTime!=Dynamic && (Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)
: Derived::RowsAtCompileTime!=Dynamic && (Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)) )>
struct ei_assignment_impl;
template<typename Derived>
@ -107,6 +111,7 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other)
{
// std::cout << "lazyAssign = " << Derived::Flags << " " << OtherDerived::Flags << "\n";
ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived());
return derived();
}
@ -178,6 +183,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(unroll)
{
// std::cout << "vectorized unrolled\n";
ei_matrix_assignment_packet_unroller
<Derived, OtherDerived,
unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size
@ -188,15 +194,61 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
{
if(OtherDerived::Flags&RowMajorBit)
{
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
&& (Derived::ColsAtCompileTime==Dynamic
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
{
// std::cout << "vectorized linear row major\n";
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index/dst.rows();
int j = index%dst.rows();
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
for(int i = alignedSize/dst.rows(); i < dst.rows(); i++)
for(int j = alignedSize%dst.rows(); j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// std::cout << "vectorized normal row major\n";
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
}
else
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
&& ( Derived::RowsAtCompileTime==Dynamic
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
{
// std::cout << "vectorized linear col major\n";
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index%dst.rows();
int j = index/dst.rows();
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// std::cout << "vectorized normal col major\n";
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
}
}
}

View File

@ -31,8 +31,8 @@
*
* \param NullaryOp template functor implementing the operator
*
* This class represents an expression of a generic zeroary operator.
* It is the return type of the ones(), zero(), constant() and random() functions,
* This class represents an expression of a generic nullary operator.
* It is the return type of the ones(), zero(), constant(), identity() and random() functions,
* and most of the time this is the only way it is used.
*
* However, if you want to write a function returning such an expression, you
@ -94,12 +94,18 @@ class CwiseNullaryOp : ei_no_assignment_operator,
};
/* \returns an expression of a custom coefficient-wise operator \a func of *this and \a other
/** \returns an expression of a matrix defined by a custom functor \a func
*
* The template parameter \a CustomNullaryOp is the type of the functor
* of the custom operator (see class CwiseNullaryOp for an example)
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* \sa class CwiseNullaryOp, MatrixBase::operator+, MatrixBase::operator-, MatrixBase::cwiseProduct, MatrixBase::cwiseQuotient
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
@ -109,6 +115,21 @@ MatrixBase<Derived>::cwiseCreate(int rows, int cols, const CustomNullaryOp& func
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
}
/** \returns an expression of a matrix defined by a custom functor \a func
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
const CwiseNullaryOp<CustomNullaryOp, Derived>
@ -119,6 +140,15 @@ MatrixBase<Derived>::cwiseCreate(int size, const CustomNullaryOp& func)
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
}
/** \returns an expression of a matrix defined by a custom functor \a func
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
const CwiseNullaryOp<CustomNullaryOp, Derived>
@ -127,7 +157,16 @@ MatrixBase<Derived>::cwiseCreate(const CustomNullaryOp& func)
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows(), cols(), func);
}
/* \returns an expression of the coefficient-wise \< operator of *this and \a other
/** \returns an expression of a constant matrix of value \a value
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
@ -138,6 +177,21 @@ MatrixBase<Derived>::constant(int rows, int cols, const Scalar& value)
return cwiseCreate(rows, cols, ei_scalar_constant_op<Scalar>(value));
}
/** \returns an expression of a constant matrix of value \a value
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::constant(int size, const Scalar& value)
@ -145,6 +199,15 @@ MatrixBase<Derived>::constant(int size, const Scalar& value)
return cwiseCreate(size, ei_scalar_constant_op<Scalar>(value));
}
/** \returns an expression of a constant matrix of value \a value
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived>
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::constant(const Scalar& value)
@ -163,6 +226,10 @@ bool MatrixBase<Derived>::isEqualToConstant
return true;
}
/** Sets all coefficients in this expression to \a value.
*
* \sa class CwiseNullaryOp, zero(), ones()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
{
@ -238,7 +305,7 @@ MatrixBase<Derived>::zero()
* Example: \include MatrixBase_isZero.cpp
* Output: \verbinclude MatrixBase_isZero.out
*
* \sa class Zero, zero()
* \sa class CwiseNullaryOp, zero()
*/
template<typename Derived>
bool MatrixBase<Derived>::isZero
@ -256,7 +323,7 @@ bool MatrixBase<Derived>::isZero
* Example: \include MatrixBase_setZero.cpp
* Output: \verbinclude MatrixBase_setZero.out
*
* \sa class Zero, zero()
* \sa class CwiseNullaryOp, zero()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setZero()
@ -333,7 +400,7 @@ MatrixBase<Derived>::ones()
* Example: \include MatrixBase_isOnes.cpp
* Output: \verbinclude MatrixBase_isOnes.out
*
* \sa class Ones, ones()
* \sa class CwiseNullaryOp, ones()
*/
template<typename Derived>
bool MatrixBase<Derived>::isOnes
@ -347,7 +414,7 @@ bool MatrixBase<Derived>::isOnes
* Example: \include MatrixBase_setOnes.cpp
* Output: \verbinclude MatrixBase_setOnes.out
*
* \sa class Ones, ones()
* \sa class CwiseNullaryOp, ones()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setOnes()
@ -424,7 +491,7 @@ MatrixBase<Derived>::random()
* Example: \include MatrixBase_setRandom.cpp
* Output: \verbinclude MatrixBase_setRandom.out
*
* \sa class Random, ei_random()
* \sa class CwiseNullaryOp, ei_random()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setRandom()
@ -479,7 +546,7 @@ MatrixBase<Derived>::identity()
* Example: \include MatrixBase_isIdentity.cpp
* Output: \verbinclude MatrixBase_isIdentity.out
*
* \sa class Identity, identity(), identity(int,int), setIdentity()
* \sa class CwiseNullaryOp, identity(), identity(int,int), setIdentity()
*/
template<typename Derived>
bool MatrixBase<Derived>::isIdentity
@ -509,7 +576,7 @@ bool MatrixBase<Derived>::isIdentity
* Example: \include MatrixBase_setIdentity.cpp
* Output: \verbinclude MatrixBase_setIdentity.out
*
* \sa class Identity, identity(), identity(int,int), isIdentity()
* \sa class CwiseNullaryOp, identity(), identity(int,int), isIdentity()
*/
template<typename Derived>
Derived& MatrixBase<Derived>::setIdentity()

View File

@ -72,6 +72,11 @@ template<typename ExpressionType> class Lazy
return m_expression.coeff(row, col);
}
PacketScalar _packetCoeff(int row, int col) const
{
return m_expression.packetCoeff(row, col);
}
protected:
const typename ExpressionType::Nested m_expression;
};

View File

@ -79,7 +79,7 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _SuggestedFlags, _MaxRows, _MaxCo
ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _SuggestedFlags>::ret,
Flags = ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _SuggestedFlags>::ret,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
};

View File

@ -75,11 +75,8 @@ template<typename Derived> class MatrixBase
* it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
SizeAtCompileTime
= ei_traits<Derived>::RowsAtCompileTime == Dynamic
|| ei_traits<Derived>::ColsAtCompileTime == Dynamic
? Dynamic
: ei_traits<Derived>::RowsAtCompileTime * ei_traits<Derived>::ColsAtCompileTime,
SizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::RowsAtCompileTime,
ei_traits<Derived>::ColsAtCompileTime>::ret,
/**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
@ -106,11 +103,8 @@ template<typename Derived> class MatrixBase
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
*/
MaxSizeAtCompileTime
= ei_traits<Derived>::MaxRowsAtCompileTime == Dynamic
|| ei_traits<Derived>::MaxColsAtCompileTime == Dynamic
? Dynamic
: ei_traits<Derived>::MaxRowsAtCompileTime * ei_traits<Derived>::MaxColsAtCompileTime,
MaxSizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::MaxRowsAtCompileTime,
ei_traits<Derived>::MaxColsAtCompileTime>::ret,
/**< This value is equal to the maximum possible number of coefficients that this expression
* might have. If this expression might have an arbitrarily high number of coefficients,
* this value is set to \a Dynamic.

View File

@ -49,6 +49,28 @@ template <typename T, int Size> struct ei_aligned_array<T,Size,false>
T array[Size];
};
template<typename T>
T* ei_aligned_malloc(size_t size)
{
#ifdef EIGEN_VECTORIZE
if (ei_packet_traits<T>::size>1)
return static_cast<T*>(_mm_malloc(sizeof(T)*size, 16));
else
#endif
return new T[size];
}
template<typename T>
void ei_aligned_free(T* ptr)
{
#ifdef EIGEN_VECTORIZE
if (ei_packet_traits<T>::size>1)
_mm_free(ptr);
else
#endif
delete[] ptr;
}
// purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
{
@ -127,7 +149,7 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
int m_cols;
public:
ei_matrix_storage(int size, int rows, int cols)
: m_data(new T[size]), m_rows(rows), m_cols(cols) {}
: m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {}
~ei_matrix_storage() { delete[] m_data; }
int rows(void) const {return m_rows;}
int cols(void) const {return m_cols;}
@ -135,8 +157,8 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
{
if(size != m_rows*m_cols)
{
delete[] m_data;
m_data = new T[size];
ei_aligned_free(m_data);
m_data = ei_aligned_malloc<T>(size);
}
m_rows = rows;
m_cols = cols;
@ -151,7 +173,7 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
T *m_data;
int m_cols;
public:
ei_matrix_storage(int size, int, int cols) : m_data(new T[size]), m_cols(cols) {}
ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {}
~ei_matrix_storage() { delete[] m_data; }
static int rows(void) {return _Rows;}
int cols(void) const {return m_cols;}
@ -159,8 +181,8 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
{
if(size != _Rows*m_cols)
{
delete[] m_data;
m_data = new T[size];
ei_aligned_free(m_data);
m_data = ei_aligned_malloc<T>(size);
}
m_cols = cols;
}
@ -174,7 +196,7 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
T *m_data;
int m_rows;
public:
ei_matrix_storage(int size, int rows, int) : m_data(new T[size]), m_rows(rows) {}
ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {}
~ei_matrix_storage() { delete[] m_data; }
int rows(void) const {return m_rows;}
static int cols(void) {return _Cols;}
@ -182,8 +204,8 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
{
if(size != m_rows*_Cols)
{
delete[] m_data;
m_data = new T[size];
ei_aligned_free(m_data);
m_data = ei_aligned_malloc<T>(size);
}
m_rows = rows;
}

View File

@ -135,7 +135,7 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
| EvalBeforeAssigningBit
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimalProduct ? EvalBeforeNestingBit : 0))
& (
~(RowMajorBit | VectorizableBit)
~(RowMajorBit | VectorizableBit | Like1DArrayBit)
| (
(
!(Lhs::Flags & RowMajorBit) && (Lhs::Flags & VectorizableBit)
@ -178,7 +178,11 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
/** \internal */
template<typename DestDerived>
void _cacheOptimalEval(DestDerived& res) const;
void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
#ifdef EIGEN_VECTORIZE
template<typename DestDerived>
void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
#endif
private:
@ -267,59 +271,29 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
}
template<typename Derived>
template<typename Derived1, typename Derived2>
Derived& MatrixBase<Derived>::lazyAssign(const Product<Derived1,Derived2,CacheOptimalProduct>& product)
template<typename Lhs, typename Rhs>
Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheOptimalProduct>& product)
{
product._cacheOptimalEval(*this);
product._cacheOptimalEval(*this,
#ifdef EIGEN_VECTORIZE
typename ei_meta_if<(Flags & VectorizableBit)
&& (!(Lhs::Flags & RowMajorBit)
&& (Lhs::RowsAtCompileTime!=Dynamic)
&& (Lhs::RowsAtCompileTime%ei_packet_traits<Scalar>::size==0) ),
ei_meta_true,ei_meta_false>::ret()
#else
ei_meta_false
#endif
);
return derived();
}
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
#ifdef EIGEN_VECTORIZE
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
{
for(int k=0; k<this->cols(); k++)
{
int j=0;
for(; j<cols4; j+=4)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.writePacketCoeff(i,k,\
ei_padd(
res.packetCoeff(i,k),
ei_padd(
ei_padd(
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
ei_padd(
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
)
)
)
);
}
}
for(; j<m_lhs.cols(); ++j)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
for (int i=0; i<this->rows(); ++i)
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
}
}
}
else
#endif // EIGEN_VECTORIZE
{
for(int k=0; k<this->cols(); ++k)
{
@ -344,4 +318,48 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
}
}
#ifdef EIGEN_VECTORIZE
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
for(int k=0; k<this->cols(); k++)
{
int j=0;
for(; j<cols4; j+=4)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.writePacketCoeff(i,k,\
ei_padd(
res.packetCoeff(i,k),
ei_padd(
ei_padd(
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
ei_padd(
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
)
)
)
);
}
}
for(; j<m_lhs.cols(); ++j)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
for (int i=0; i<this->rows(); ++i)
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
}
}
}
#endif // EIGEN_VECTORIZE
#endif // EIGEN_PRODUCT_H

View File

@ -71,6 +71,11 @@ template<typename ExpressionType> class Temporary
return m_expression.coeff(row, col);
}
PacketScalar _packetCoeff(int row, int col) const
{
return m_expression.packetCoeff(row, col);
}
protected:
const ExpressionType m_expression;
};

View File

@ -70,6 +70,9 @@ struct ei_meta_if <false, Then, Else> { typedef Else ret; };
template<typename T, typename U> struct ei_is_same_type { enum { ret = 0 }; };
template<typename T> struct ei_is_same_type<T,T> { enum { ret = 1 }; };
struct ei_meta_true {};
struct ei_meta_false {};
/** \internal
* Convenient struct to get the result type of a unary or binary functor.
@ -145,19 +148,12 @@ template<typename T> struct ei_packet_traits
enum {size=1};
};
template<typename Scalar, int Rows, int Cols, unsigned int SuggestedFlags>
template<typename Scalar, int Size, unsigned int SuggestedFlags>
class ei_corrected_matrix_flags
{
enum { is_vectorizable
= ei_packet_traits<Scalar>::size > 1
&& Rows!=Dynamic
&& Cols!=Dynamic
&&
(
SuggestedFlags&RowMajorBit
? Cols%ei_packet_traits<Scalar>::size==0
: Rows%ei_packet_traits<Scalar>::size==0
),
&& (Size%ei_packet_traits<Scalar>::size==0),
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
};
@ -168,19 +164,24 @@ class ei_corrected_matrix_flags
};
};
template<int _Rows, int _Cols> struct ei_size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
};
template<typename T> class ei_eval
{
typedef typename ei_traits<T>::Scalar _Scalar;
enum { _Rows = ei_traits<T>::RowsAtCompileTime,
_Cols = ei_traits<T>::ColsAtCompileTime,
enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
_MaxCols = ei_traits<T>::MaxColsAtCompileTime,
_Flags = ei_traits<T>::Flags
};
public:
typedef Matrix<_Scalar,
_Rows,
_Cols,
ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _Flags>::ret,
ei_traits<T>::RowsAtCompileTime,
ei_traits<T>::ColsAtCompileTime,
ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret,
ei_traits<T>::MaxRowsAtCompileTime,
ei_traits<T>::MaxColsAtCompileTime> type;
};