mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-06 19:10:36 +08:00
Make the explicit vectorization much more flexible:
- support dynamic sizes - support arbitrary matrix size when the matrix can be seen as a 1D array (except for fixed size matrices where the size in Bytes must be a factor of 16, this is to allow compact storage of a vector of matrices) Note that the explict vectorization is still experimental and far to be completely tested.
This commit is contained in:
parent
30d47b5250
commit
a451835bce
@ -2,7 +2,7 @@
|
||||
#define EIGEN_CORE_H
|
||||
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#ifdef __SSE2__
|
||||
#if ((defined __SSE2__) && ( (!defined __GNUC__) || (__GNUC__>=4 && __GNUC_MINOR__>=2)))
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_SSE
|
||||
#include <emmintrin.h>
|
||||
|
@ -99,7 +99,11 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
|
||||
|
||||
template <typename Derived, typename OtherDerived,
|
||||
bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit)
|
||||
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))>
|
||||
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))
|
||||
&& ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
||||
||((Derived::Flags&RowMajorBit)
|
||||
? Derived::ColsAtCompileTime!=Dynamic && (Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)
|
||||
: Derived::RowsAtCompileTime!=Dynamic && (Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)) )>
|
||||
struct ei_assignment_impl;
|
||||
|
||||
template<typename Derived>
|
||||
@ -107,6 +111,7 @@ template<typename OtherDerived>
|
||||
Derived& MatrixBase<Derived>
|
||||
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
// std::cout << "lazyAssign = " << Derived::Flags << " " << OtherDerived::Flags << "\n";
|
||||
ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived());
|
||||
return derived();
|
||||
}
|
||||
@ -178,6 +183,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
|
||||
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
if(unroll)
|
||||
{
|
||||
// std::cout << "vectorized unrolled\n";
|
||||
ei_matrix_assignment_packet_unroller
|
||||
<Derived, OtherDerived,
|
||||
unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size
|
||||
@ -188,15 +194,61 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
|
||||
{
|
||||
if(OtherDerived::Flags&RowMajorBit)
|
||||
{
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
||||
&& (Derived::ColsAtCompileTime==Dynamic
|
||||
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
|
||||
{
|
||||
// std::cout << "vectorized linear row major\n";
|
||||
const int size = dst.rows() * dst.cols();
|
||||
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
|
||||
int index = 0;
|
||||
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
{
|
||||
// FIXME the following is not really efficient
|
||||
int i = index/dst.rows();
|
||||
int j = index%dst.rows();
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
}
|
||||
for(int i = alignedSize/dst.rows(); i < dst.rows(); i++)
|
||||
for(int j = alignedSize%dst.rows(); j < dst.cols(); j++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
// std::cout << "vectorized normal row major\n";
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
||||
&& ( Derived::RowsAtCompileTime==Dynamic
|
||||
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
|
||||
{
|
||||
// std::cout << "vectorized linear col major\n";
|
||||
const int size = dst.rows() * dst.cols();
|
||||
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
|
||||
int index = 0;
|
||||
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
{
|
||||
// FIXME the following is not really efficient
|
||||
int i = index%dst.rows();
|
||||
int j = index/dst.rows();
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
}
|
||||
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
|
||||
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
// std::cout << "vectorized normal col major\n";
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,8 +31,8 @@
|
||||
*
|
||||
* \param NullaryOp template functor implementing the operator
|
||||
*
|
||||
* This class represents an expression of a generic zeroary operator.
|
||||
* It is the return type of the ones(), zero(), constant() and random() functions,
|
||||
* This class represents an expression of a generic nullary operator.
|
||||
* It is the return type of the ones(), zero(), constant(), identity() and random() functions,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to write a function returning such an expression, you
|
||||
@ -94,12 +94,18 @@ class CwiseNullaryOp : ei_no_assignment_operator,
|
||||
};
|
||||
|
||||
|
||||
/* \returns an expression of a custom coefficient-wise operator \a func of *this and \a other
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor
|
||||
* of the custom operator (see class CwiseNullaryOp for an example)
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \sa class CwiseNullaryOp, MatrixBase::operator+, MatrixBase::operator-, MatrixBase::cwiseProduct, MatrixBase::cwiseQuotient
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
@ -109,6 +115,21 @@ MatrixBase<Derived>::cwiseCreate(int rows, int cols, const CustomNullaryOp& func
|
||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
@ -119,6 +140,15 @@ MatrixBase<Derived>::cwiseCreate(int size, const CustomNullaryOp& func)
|
||||
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
@ -127,7 +157,16 @@ MatrixBase<Derived>::cwiseCreate(const CustomNullaryOp& func)
|
||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows(), cols(), func);
|
||||
}
|
||||
|
||||
/* \returns an expression of the coefficient-wise \< operator of *this and \a other
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
@ -138,6 +177,21 @@ MatrixBase<Derived>::constant(int rows, int cols, const Scalar& value)
|
||||
return cwiseCreate(rows, cols, ei_scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::constant(int size, const Scalar& value)
|
||||
@ -145,6 +199,15 @@ MatrixBase<Derived>::constant(int size, const Scalar& value)
|
||||
return cwiseCreate(size, ei_scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::constant(const Scalar& value)
|
||||
@ -163,6 +226,10 @@ bool MatrixBase<Derived>::isEqualToConstant
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Sets all coefficients in this expression to \a value.
|
||||
*
|
||||
* \sa class CwiseNullaryOp, zero(), ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
|
||||
{
|
||||
@ -238,7 +305,7 @@ MatrixBase<Derived>::zero()
|
||||
* Example: \include MatrixBase_isZero.cpp
|
||||
* Output: \verbinclude MatrixBase_isZero.out
|
||||
*
|
||||
* \sa class Zero, zero()
|
||||
* \sa class CwiseNullaryOp, zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isZero
|
||||
@ -256,7 +323,7 @@ bool MatrixBase<Derived>::isZero
|
||||
* Example: \include MatrixBase_setZero.cpp
|
||||
* Output: \verbinclude MatrixBase_setZero.out
|
||||
*
|
||||
* \sa class Zero, zero()
|
||||
* \sa class CwiseNullaryOp, zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
Derived& MatrixBase<Derived>::setZero()
|
||||
@ -333,7 +400,7 @@ MatrixBase<Derived>::ones()
|
||||
* Example: \include MatrixBase_isOnes.cpp
|
||||
* Output: \verbinclude MatrixBase_isOnes.out
|
||||
*
|
||||
* \sa class Ones, ones()
|
||||
* \sa class CwiseNullaryOp, ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isOnes
|
||||
@ -347,7 +414,7 @@ bool MatrixBase<Derived>::isOnes
|
||||
* Example: \include MatrixBase_setOnes.cpp
|
||||
* Output: \verbinclude MatrixBase_setOnes.out
|
||||
*
|
||||
* \sa class Ones, ones()
|
||||
* \sa class CwiseNullaryOp, ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
Derived& MatrixBase<Derived>::setOnes()
|
||||
@ -424,7 +491,7 @@ MatrixBase<Derived>::random()
|
||||
* Example: \include MatrixBase_setRandom.cpp
|
||||
* Output: \verbinclude MatrixBase_setRandom.out
|
||||
*
|
||||
* \sa class Random, ei_random()
|
||||
* \sa class CwiseNullaryOp, ei_random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
Derived& MatrixBase<Derived>::setRandom()
|
||||
@ -479,7 +546,7 @@ MatrixBase<Derived>::identity()
|
||||
* Example: \include MatrixBase_isIdentity.cpp
|
||||
* Output: \verbinclude MatrixBase_isIdentity.out
|
||||
*
|
||||
* \sa class Identity, identity(), identity(int,int), setIdentity()
|
||||
* \sa class CwiseNullaryOp, identity(), identity(int,int), setIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isIdentity
|
||||
@ -509,7 +576,7 @@ bool MatrixBase<Derived>::isIdentity
|
||||
* Example: \include MatrixBase_setIdentity.cpp
|
||||
* Output: \verbinclude MatrixBase_setIdentity.out
|
||||
*
|
||||
* \sa class Identity, identity(), identity(int,int), isIdentity()
|
||||
* \sa class CwiseNullaryOp, identity(), identity(int,int), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
Derived& MatrixBase<Derived>::setIdentity()
|
||||
|
@ -72,6 +72,11 @@ template<typename ExpressionType> class Lazy
|
||||
return m_expression.coeff(row, col);
|
||||
}
|
||||
|
||||
PacketScalar _packetCoeff(int row, int col) const
|
||||
{
|
||||
return m_expression.packetCoeff(row, col);
|
||||
}
|
||||
|
||||
protected:
|
||||
const typename ExpressionType::Nested m_expression;
|
||||
};
|
||||
|
@ -79,7 +79,7 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _SuggestedFlags, _MaxRows, _MaxCo
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _SuggestedFlags>::ret,
|
||||
Flags = ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _SuggestedFlags>::ret,
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost
|
||||
};
|
||||
};
|
||||
|
@ -75,11 +75,8 @@ template<typename Derived> class MatrixBase
|
||||
* it is set to the \a Dynamic constant.
|
||||
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
|
||||
|
||||
SizeAtCompileTime
|
||||
= ei_traits<Derived>::RowsAtCompileTime == Dynamic
|
||||
|| ei_traits<Derived>::ColsAtCompileTime == Dynamic
|
||||
? Dynamic
|
||||
: ei_traits<Derived>::RowsAtCompileTime * ei_traits<Derived>::ColsAtCompileTime,
|
||||
SizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::RowsAtCompileTime,
|
||||
ei_traits<Derived>::ColsAtCompileTime>::ret,
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
@ -106,11 +103,8 @@ template<typename Derived> class MatrixBase
|
||||
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
|
||||
*/
|
||||
|
||||
MaxSizeAtCompileTime
|
||||
= ei_traits<Derived>::MaxRowsAtCompileTime == Dynamic
|
||||
|| ei_traits<Derived>::MaxColsAtCompileTime == Dynamic
|
||||
? Dynamic
|
||||
: ei_traits<Derived>::MaxRowsAtCompileTime * ei_traits<Derived>::MaxColsAtCompileTime,
|
||||
MaxSizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::MaxRowsAtCompileTime,
|
||||
ei_traits<Derived>::MaxColsAtCompileTime>::ret,
|
||||
/**< This value is equal to the maximum possible number of coefficients that this expression
|
||||
* might have. If this expression might have an arbitrarily high number of coefficients,
|
||||
* this value is set to \a Dynamic.
|
||||
|
@ -49,6 +49,28 @@ template <typename T, int Size> struct ei_aligned_array<T,Size,false>
|
||||
T array[Size];
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
T* ei_aligned_malloc(size_t size)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
if (ei_packet_traits<T>::size>1)
|
||||
return static_cast<T*>(_mm_malloc(sizeof(T)*size, 16));
|
||||
else
|
||||
#endif
|
||||
return new T[size];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void ei_aligned_free(T* ptr)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
if (ei_packet_traits<T>::size>1)
|
||||
_mm_free(ptr);
|
||||
else
|
||||
#endif
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
// purely fixed-size matrix
|
||||
template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
|
||||
{
|
||||
@ -127,7 +149,7 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
|
||||
int m_cols;
|
||||
public:
|
||||
ei_matrix_storage(int size, int rows, int cols)
|
||||
: m_data(new T[size]), m_rows(rows), m_cols(cols) {}
|
||||
: m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {}
|
||||
~ei_matrix_storage() { delete[] m_data; }
|
||||
int rows(void) const {return m_rows;}
|
||||
int cols(void) const {return m_cols;}
|
||||
@ -135,8 +157,8 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
|
||||
{
|
||||
if(size != m_rows*m_cols)
|
||||
{
|
||||
delete[] m_data;
|
||||
m_data = new T[size];
|
||||
ei_aligned_free(m_data);
|
||||
m_data = ei_aligned_malloc<T>(size);
|
||||
}
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
@ -151,7 +173,7 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
|
||||
T *m_data;
|
||||
int m_cols;
|
||||
public:
|
||||
ei_matrix_storage(int size, int, int cols) : m_data(new T[size]), m_cols(cols) {}
|
||||
ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {}
|
||||
~ei_matrix_storage() { delete[] m_data; }
|
||||
static int rows(void) {return _Rows;}
|
||||
int cols(void) const {return m_cols;}
|
||||
@ -159,8 +181,8 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
|
||||
{
|
||||
if(size != _Rows*m_cols)
|
||||
{
|
||||
delete[] m_data;
|
||||
m_data = new T[size];
|
||||
ei_aligned_free(m_data);
|
||||
m_data = ei_aligned_malloc<T>(size);
|
||||
}
|
||||
m_cols = cols;
|
||||
}
|
||||
@ -174,7 +196,7 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
|
||||
T *m_data;
|
||||
int m_rows;
|
||||
public:
|
||||
ei_matrix_storage(int size, int rows, int) : m_data(new T[size]), m_rows(rows) {}
|
||||
ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {}
|
||||
~ei_matrix_storage() { delete[] m_data; }
|
||||
int rows(void) const {return m_rows;}
|
||||
static int cols(void) {return _Cols;}
|
||||
@ -182,8 +204,8 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
|
||||
{
|
||||
if(size != m_rows*_Cols)
|
||||
{
|
||||
delete[] m_data;
|
||||
m_data = new T[size];
|
||||
ei_aligned_free(m_data);
|
||||
m_data = ei_aligned_malloc<T>(size);
|
||||
}
|
||||
m_rows = rows;
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
| EvalBeforeAssigningBit
|
||||
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimalProduct ? EvalBeforeNestingBit : 0))
|
||||
& (
|
||||
~(RowMajorBit | VectorizableBit)
|
||||
~(RowMajorBit | VectorizableBit | Like1DArrayBit)
|
||||
| (
|
||||
(
|
||||
!(Lhs::Flags & RowMajorBit) && (Lhs::Flags & VectorizableBit)
|
||||
@ -178,7 +178,11 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived>
|
||||
void _cacheOptimalEval(DestDerived& res) const;
|
||||
void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
template<typename DestDerived>
|
||||
void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
@ -267,59 +271,29 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename Derived1, typename Derived2>
|
||||
Derived& MatrixBase<Derived>::lazyAssign(const Product<Derived1,Derived2,CacheOptimalProduct>& product)
|
||||
template<typename Lhs, typename Rhs>
|
||||
Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheOptimalProduct>& product)
|
||||
{
|
||||
product._cacheOptimalEval(*this);
|
||||
product._cacheOptimalEval(*this,
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
typename ei_meta_if<(Flags & VectorizableBit)
|
||||
&& (!(Lhs::Flags & RowMajorBit)
|
||||
&& (Lhs::RowsAtCompileTime!=Dynamic)
|
||||
&& (Lhs::RowsAtCompileTime%ei_packet_traits<Scalar>::size==0) ),
|
||||
ei_meta_true,ei_meta_false>::ret()
|
||||
#else
|
||||
ei_meta_false
|
||||
#endif
|
||||
);
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
|
||||
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
|
||||
{
|
||||
res.setZero();
|
||||
const int cols4 = m_lhs.cols() & 0xfffffffC;
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
|
||||
{
|
||||
for(int k=0; k<this->cols(); k++)
|
||||
{
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.writePacketCoeff(i,k,\
|
||||
ei_padd(
|
||||
res.packetCoeff(i,k),
|
||||
ei_padd(
|
||||
ei_padd(
|
||||
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
|
||||
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
|
||||
ei_padd(
|
||||
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
|
||||
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif // EIGEN_VECTORIZE
|
||||
{
|
||||
for(int k=0; k<this->cols(); ++k)
|
||||
{
|
||||
@ -344,4 +318,48 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
|
||||
{
|
||||
res.setZero();
|
||||
const int cols4 = m_lhs.cols() & 0xfffffffC;
|
||||
for(int k=0; k<this->cols(); k++)
|
||||
{
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.writePacketCoeff(i,k,\
|
||||
ei_padd(
|
||||
res.packetCoeff(i,k),
|
||||
ei_padd(
|
||||
ei_padd(
|
||||
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
|
||||
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
|
||||
ei_padd(
|
||||
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
|
||||
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // EIGEN_VECTORIZE
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
|
@ -71,6 +71,11 @@ template<typename ExpressionType> class Temporary
|
||||
return m_expression.coeff(row, col);
|
||||
}
|
||||
|
||||
PacketScalar _packetCoeff(int row, int col) const
|
||||
{
|
||||
return m_expression.packetCoeff(row, col);
|
||||
}
|
||||
|
||||
protected:
|
||||
const ExpressionType m_expression;
|
||||
};
|
||||
|
@ -70,6 +70,9 @@ struct ei_meta_if <false, Then, Else> { typedef Else ret; };
|
||||
template<typename T, typename U> struct ei_is_same_type { enum { ret = 0 }; };
|
||||
template<typename T> struct ei_is_same_type<T,T> { enum { ret = 1 }; };
|
||||
|
||||
struct ei_meta_true {};
|
||||
struct ei_meta_false {};
|
||||
|
||||
|
||||
/** \internal
|
||||
* Convenient struct to get the result type of a unary or binary functor.
|
||||
@ -145,19 +148,12 @@ template<typename T> struct ei_packet_traits
|
||||
enum {size=1};
|
||||
};
|
||||
|
||||
template<typename Scalar, int Rows, int Cols, unsigned int SuggestedFlags>
|
||||
template<typename Scalar, int Size, unsigned int SuggestedFlags>
|
||||
class ei_corrected_matrix_flags
|
||||
{
|
||||
enum { is_vectorizable
|
||||
= ei_packet_traits<Scalar>::size > 1
|
||||
&& Rows!=Dynamic
|
||||
&& Cols!=Dynamic
|
||||
&&
|
||||
(
|
||||
SuggestedFlags&RowMajorBit
|
||||
? Cols%ei_packet_traits<Scalar>::size==0
|
||||
: Rows%ei_packet_traits<Scalar>::size==0
|
||||
),
|
||||
&& (Size%ei_packet_traits<Scalar>::size==0),
|
||||
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
|
||||
};
|
||||
|
||||
@ -168,19 +164,24 @@ class ei_corrected_matrix_flags
|
||||
};
|
||||
};
|
||||
|
||||
template<int _Rows, int _Cols> struct ei_size_at_compile_time
|
||||
{
|
||||
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
|
||||
};
|
||||
|
||||
template<typename T> class ei_eval
|
||||
{
|
||||
typedef typename ei_traits<T>::Scalar _Scalar;
|
||||
enum { _Rows = ei_traits<T>::RowsAtCompileTime,
|
||||
_Cols = ei_traits<T>::ColsAtCompileTime,
|
||||
enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
|
||||
_MaxCols = ei_traits<T>::MaxColsAtCompileTime,
|
||||
_Flags = ei_traits<T>::Flags
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Matrix<_Scalar,
|
||||
_Rows,
|
||||
_Cols,
|
||||
ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _Flags>::ret,
|
||||
ei_traits<T>::RowsAtCompileTime,
|
||||
ei_traits<T>::ColsAtCompileTime,
|
||||
ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret,
|
||||
ei_traits<T>::MaxRowsAtCompileTime,
|
||||
ei_traits<T>::MaxColsAtCompileTime> type;
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user