* rework Map, allow vectorization

* rework PacketMath and DummyPacketMath, make these actual template
specializations instead of just overriding by non-template inline
functions
* introduce ei_ploadt and ei_pstoret, make use of them in Map and Matrix
* remove Matrix::map() methods, use Map constructors instead.
This commit is contained in:
Benoit Jacob 2008-06-27 01:22:35 +00:00
parent e5d301dc96
commit e27b2b95cf
15 changed files with 220 additions and 216 deletions

View File

@ -348,7 +348,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
{
const int row = rowMajor ? i : index;
const int col = rowMajor ? index : i;
dst.template writePacket<UnAligned>(row, col, src.template packet<UnAligned>(row, col));
dst.template writePacket<Unaligned>(row, col, src.template packet<Unaligned>(row, col));
}
// do the non-vectorizable part of the assignment

View File

@ -168,26 +168,26 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block
template<int LoadMode>
inline PacketScalar packet(int row, int col) const
{
return m_matrix.template packet<UnAligned>(row + m_startRow.value(), col + m_startCol.value());
return m_matrix.template packet<Unaligned>(row + m_startRow.value(), col + m_startCol.value());
}
template<int LoadMode>
inline void writePacket(int row, int col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<UnAligned>(row + m_startRow.value(), col + m_startCol.value(), x);
m_matrix.const_cast_derived().template writePacket<Unaligned>(row + m_startRow.value(), col + m_startCol.value(), x);
}
template<int LoadMode>
inline PacketScalar packet(int index) const
{
return m_matrix.template packet<UnAligned>(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
return m_matrix.template packet<Unaligned>(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
template<int LoadMode>
inline void writePacket(int index, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<UnAligned>
m_matrix.const_cast_derived().template writePacket<Unaligned>
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), x);
}
@ -195,10 +195,10 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block
protected:
const typename MatrixType::Nested m_matrix;
ei_int_if_dynamic<MatrixType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
ei_int_if_dynamic<MatrixType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
ei_int_if_dynamic<RowsAtCompileTime> m_blockRows;
ei_int_if_dynamic<ColsAtCompileTime> m_blockCols;
const ei_int_if_dynamic<MatrixType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const ei_int_if_dynamic<MatrixType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const ei_int_if_dynamic<RowsAtCompileTime> m_blockRows;
const ei_int_if_dynamic<ColsAtCompileTime> m_blockCols;
};
/** \returns a dynamic-size expression of a block in *this.

View File

@ -214,7 +214,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@ -232,7 +232,7 @@ MatrixBase<Derived>::packet(int row, int col) const
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@ -250,7 +250,7 @@ inline void MatrixBase<Derived>::writePacket
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/
@ -267,7 +267,7 @@ MatrixBase<Derived>::packet(int index) const
* to ensure that a packet really starts there. This method is only available on expressions having the
* PacketAccessBit and the LinearAccessBit.
*
* The \a LoadMode parameter may have the value \a Aligned or \a UnAligned. Its effect is to select
* The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
* starting at an address which is a multiple of the packet size.
*/

View File

@ -30,50 +30,93 @@
// of generic vectorized code. However, at runtime, they should never be
// called, TODO so sould we raise an assertion or not ?
/** \internal \returns a + b (coeff-wise) */
template <typename Scalar> inline Scalar ei_padd(const Scalar& a, const Scalar& b) { return a + b; }
template <typename Packet> inline Packet
ei_padd(const Packet&,
const Packet&) { Packet ret; return ret; }
/** \internal \returns a - b (coeff-wise) */
template <typename Scalar> inline Scalar ei_psub(const Scalar& a, const Scalar& b) { return a - b; }
template <typename Packet> inline Packet
ei_psub(const Packet&,
const Packet&) { Packet ret; return ret; }
/** \internal \returns a * b (coeff-wise) */
template <typename Scalar> inline Scalar ei_pmul(const Scalar& a, const Scalar& b) { return a * b; }
template <typename Packet> inline Packet
ei_pmul(const Packet&,
const Packet&) { Packet ret; return ret; }
/** \internal \returns a / b (coeff-wise) */
template <typename Scalar> inline Scalar ei_pdiv(const Scalar& a, const Scalar& b) { return a / b; }
/** \internal \returns a * b - c (coeff-wise) */
template <typename Scalar> inline Scalar ei_pmadd(const Scalar& a, const Scalar& b, const Scalar& c)
{ return ei_padd(ei_pmul(a, b),c); }
template <typename Packet> inline Packet
ei_pdiv(const Packet&,
const Packet&) { Packet ret; return ret; }
/** \internal \returns the min of \a a and \a b (coeff-wise) */
template <typename Scalar> inline Scalar ei_pmin(const Scalar& a, const Scalar& b) { return std::min(a,b); }
template <typename Packet> inline Packet
ei_pmin(const Packet&,
const Packet&) { Packet ret; return ret; }
/** \internal \returns the max of \a a and \a b (coeff-wise) */
template <typename Scalar> inline Scalar ei_pmax(const Scalar& a, const Scalar& b) { return std::max(a,b); }
template <typename Packet> inline Packet
ei_pmax(const Packet&,
const Packet&) { Packet ret; return ret; }
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template <typename Scalar> inline Scalar ei_pload(const Scalar* from) { return *from; }
template <typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pload(const Scalar*) { typename ei_packet_traits<Scalar>::type ret; return ret; }
/** \internal \returns a packet version of \a *from, (un-aligned load) */
template <typename Scalar> inline Scalar ei_ploadu(const Scalar* from) { return *from; }
template <typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_ploadu(const Scalar*) { typename ei_packet_traits<Scalar>::type ret; return ret; }
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
template <typename Scalar> inline Scalar ei_pset1(const Scalar& a) { return a; }
template <typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pset1(const Scalar&) { typename ei_packet_traits<Scalar>::type ret; return ret; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
template <typename Scalar> inline void ei_pstore(Scalar* to, const Scalar& from) { (*to) = from; }
template <typename Scalar, typename Packet> inline void ei_pstore(Scalar*, const Packet&) {}
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
template <typename Scalar> inline void ei_pstoreu(Scalar* to, const Scalar& from) { (*to) = from; }
template <typename Scalar, typename Packet> inline void ei_pstoreu(Scalar*, const Packet&) {}
/** \internal \returns the first element of a packet */
template <typename Scalar> inline Scalar ei_pfirst(const Scalar& a) { return a; }
template <typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_pfirst(const Packet&)
{ typename ei_unpacket_traits<Packet>::type ret; return ret; }
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
template <typename Scalar> inline Scalar ei_preduxp(const Scalar* vecs) { return vecs[0]; }
template <typename Packet> inline Packet
ei_preduxp(const Packet*) { Packet ret; return ret; }
/** \internal \returns the sum of the elements of \a a*/
template <typename Scalar> inline Scalar ei_predux(const Scalar& a) { return a; }
template <typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet&)
{ typename ei_unpacket_traits<Packet>::type ret; return ret; }
////////////
/** \internal \returns a * b + c (coeff-wise) */
template <typename Packet> inline Packet
ei_pmadd(const Packet& a,
const Packet& b,
const Packet& c)
{ return ei_padd(ei_pmul(a, b),c); }
/** \internal \returns a packet version of \a *from. If LoadMode equals Aligned, \a from must be 16 bytes aligned */
template <typename Scalar, int LoadMode> inline typename ei_packet_traits<Scalar>::type ei_ploadt(const Scalar* from)
{
if(LoadMode == Aligned)
return ei_pload(from);
else
return ei_ploadu(from);
}
/** \internal copy the packet \a from to \a *to. If StoreMode equals Aligned, \a to must be 16 bytes aligned */
template <typename Scalar, typename Packet, int LoadMode> inline void ei_pstoret(Scalar* to, const Packet& from)
{
if(LoadMode == Aligned)
ei_pstore(to, from);
else
ei_pstoreu(to, from);
}
#endif // EIGEN_DUMMY_PACKET_MATH_H

View File

@ -65,9 +65,6 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */
inline const ExpressionType& _expression() const { return m_matrix; }
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
inline int stride() const { return m_matrix.stride(); }

View File

@ -29,17 +29,19 @@
*
* \brief A matrix or vector expression mapping an existing array of data.
*
* \param Alignment can be either Aligned or Unaligned. Tells whether the array is suitably aligned for
* vectorization on the present CPU architecture. Defaults to Unaligned.
*
* This class represents a matrix or vector expression mapping an existing array of data.
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
* such as plain C arrays or structures from other libraries.
*
* This class is the return type of Matrix::map() and most of the time this is the only
* way it is used.
* This class is the return type of Matrix::map() but can also be used directly.
*
* \sa Matrix::map()
*/
template<typename MatrixType>
struct ei_traits<Map<MatrixType> >
template<typename MatrixType, int Alignment>
struct ei_traits<Map<MatrixType, Alignment> >
{
typedef typename MatrixType::Scalar Scalar;
enum {
@ -47,35 +49,37 @@ struct ei_traits<Map<MatrixType> >
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = MatrixType::Flags & (HereditaryBits | DirectAccessBit),
Flags = MatrixType::Flags
& (HereditaryBits | LinearAccessBit | DirectAccessBit)
& (Alignment == Aligned ? PacketAccessBit : 0),
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
};
template<typename MatrixType> class Map
: public MatrixBase<Map<MatrixType> >
template<typename MatrixType, int Alignment> class Map
: public MatrixBase<Map<MatrixType, Alignment> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Map)
inline int rows() const { return m_rows; }
inline int cols() const { return m_cols; }
inline int rows() const { return m_rows.value(); }
inline int cols() const { return m_cols.value(); }
inline const Scalar& coeff(int row, int col) const
{
if(Flags & RowMajorBit)
return m_data[col + row * m_cols];
return m_data[col + row * m_cols.value()];
else // column-major
return m_data[row + col * m_rows];
return m_data[row + col * m_rows.value()];
}
inline Scalar& coeffRef(int row, int col)
{
if(Flags & RowMajorBit)
return const_cast<Scalar*>(m_data)[col + row * m_cols];
return const_cast<Scalar*>(m_data)[col + row * m_cols.value()];
else // column-major
return const_cast<Scalar*>(m_data)[row + col * m_rows];
return const_cast<Scalar*>(m_data)[row + col * m_rows.value()];
}
inline const Scalar& coeff(int index) const
@ -88,107 +92,69 @@ template<typename MatrixType> class Map
return m_data[index];
}
public:
inline Map(const Scalar* data, int rows, int cols) : m_data(data), m_rows(rows), m_cols(cols)
template<int LoadMode>
inline PacketScalar packet(int row, int col) const
{
ei_assert(rows > 0
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
return ei_ploadt<Scalar, LoadMode == Aligned ? Alignment : Unaligned>
(m_data + (Flags & RowMajorBit
? col + row * m_cols.value()
: row + col * m_rows.value()));
}
template<int LoadMode>
inline PacketScalar packet(int index) const
{
return ei_ploadt<Scalar, LoadMode == Aligned ? Alignment : Unaligned>(m_data + index);
}
template<int StoreMode>
inline void writePacket(int row, int col, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode == Aligned ? Alignment : Unaligned>
(m_data + (Flags & RowMajorBit
? col + row * m_cols.value()
: row + col * m_rows.value()), x);
}
template<int StoreMode>
inline void writePacket(int index, const PacketScalar& x)
{
ei_pstoret<Scalar, PacketScalar, StoreMode == Aligned ? Alignment : Unaligned>(m_data + index, x);
}
inline Map(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
{
ei_assert(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic);
ei_assert(RowsAtCompileTime > 0 && ColsAtCompileTime > 0);
}
inline Map(const Scalar* data, int size)
: m_data(data),
m_rows(RowsAtCompileTime == Dynamic ? size : RowsAtCompileTime),
m_cols(ColsAtCompileTime == Dynamic ? size : ColsAtCompileTime)
{
ei_assert(size > 0);
ei_assert((RowsAtCompileTime == 1
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == size))
|| (ColsAtCompileTime == 1
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == size)));
}
inline Map(const Scalar* data, int rows, int cols)
: m_data(data), m_rows(rows), m_cols(cols)
{
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
protected:
const Scalar* m_data;
const int m_rows, m_cols;
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
};
/** This is the const version of map(Scalar*,int,int). */
template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, unsigned int _Flags>
inline const Map<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> >
Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(const Scalar* data, int rows, int cols)
{
return Map<Matrix>(data, rows, cols);
}
/** This is the const version of map(Scalar*,int). */
template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, unsigned int _Flags>
inline const Map<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> >
Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(const Scalar* data, int size)
{
ei_assert(_Cols == 1 || _Rows ==1);
if(_Cols == 1)
return Map<Matrix>(data, size, 1);
else
return Map<Matrix>(data, 1, size);
}
/** This is the const version of map(Scalar*). */
template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, unsigned int _Flags>
inline const Map<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> >
Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(const Scalar* data)
{
return Map<Matrix>(data, _Rows, _Cols);
}
/** \returns a expression of a matrix or vector mapping the given data.
*
* \param data The array of data to map
* \param rows The number of rows of the expression to construct
* \param cols The number of columns of the expression to construct
*
* Example: \include MatrixBase_map_int_int.cpp
* Output: \verbinclude MatrixBase_map_int_int.out
*
* \sa map(const Scalar*, int, int), map(Scalar*, int), map(Scalar*), class Map
*/
template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, unsigned int _Flags>
inline Map<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> >
Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(Scalar* data, int rows, int cols)
{
return Map<Matrix>(data, rows, cols);
}
/** \returns a expression of a vector mapping the given data.
*
* \param data The array of data to map
* \param size The size (number of coefficients) of the expression to construct
*
* \only_for_vectors
*
* Example: \include MatrixBase_map_int.cpp
* Output: \verbinclude MatrixBase_map_int.out
*
* \sa map(const Scalar*, int), map(Scalar*, int, int), map(Scalar*), class Map
*/
template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, unsigned int _Flags>
inline Map<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> >
Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(Scalar* data, int size)
{
ei_assert(_Cols == 1 || _Rows ==1);
if(_Cols == 1)
return Map<Matrix>(data, size, 1);
else
return Map<Matrix>(data, 1, size);
}
/** \returns a expression of a fixed-size matrix or vector mapping the given data.
*
* \param data The array of data to map
*
* Example: \include MatrixBase_map.cpp
* Output: \verbinclude MatrixBase_map.out
*
* \sa map(const Scalar*), map(Scalar*, int), map(Scalar*, int, int), class Map
*/
template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, unsigned int _Flags>
inline Map<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags> >
Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>::map(Scalar* data)
{
return Map<Matrix>(data, _Rows, _Cols);
}
/** Constructor copying an existing array of data. Only useful for dynamic-size matrices:
* for fixed-size matrices, it is redundant to pass the \a rows and \a cols parameters.
* \param data The array of data to copy
@ -202,7 +168,7 @@ inline Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>
::Matrix(const Scalar *data, int rows, int cols)
: m_storage(rows*cols, rows, cols)
{
*this = map(data, rows, cols);
*this = Map<Matrix>(data, rows, cols);
}
/** Constructor copying an existing array of data. Only useful for dynamic-size vectors:
@ -220,7 +186,7 @@ inline Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>
::Matrix(const Scalar *data, int size)
: m_storage(size, RowsAtCompileTime == 1 ? 1 : size, ColsAtCompileTime == 1 ? 1 : size)
{
*this = map(data, size);
*this = Map<Matrix>(data, size);
}
/** Constructor copying an existing array of data.
@ -237,7 +203,7 @@ template<typename _Scalar, int _Rows, int _Cols, int _MaxRows, int _MaxCols, uns
inline Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCols, _Flags>
::Matrix(const Scalar *data)
{
*this = map(data);
*this = Map<Matrix>(data);
}
#endif // EIGEN_MAP_H

View File

@ -102,12 +102,13 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
friend class Eigen::Map<Matrix, Unaligned>;
friend class Eigen::Map<Matrix, Aligned>;
protected:
ei_matrix_storage<Scalar, MaxSizeAtCompileTime, RowsAtCompileTime, ColsAtCompileTime> m_storage;
public:
friend class Map<Matrix>;
inline int rows() const { return m_storage.rows(); }
inline int cols() const { return m_storage.cols(); }
@ -149,50 +150,31 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
template<int LoadMode>
inline PacketScalar packet(int row, int col) const
{
if(Flags & RowMajorBit)
if (LoadMode==Aligned)
return ei_pload(m_storage.data() + col + row * m_storage.cols());
else
return ei_ploadu(m_storage.data() + col + row * m_storage.cols());
else
if (LoadMode==Aligned)
return ei_pload(m_storage.data() + row + col * m_storage.rows());
else
return ei_ploadu(m_storage.data() + row + col * m_storage.rows());
return ei_ploadt<Scalar, LoadMode>
(m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols()
: row + col * m_storage.rows()));
}
template<int LoadMode>
inline PacketScalar packet(int index) const
{
if (LoadMode==Aligned)
return ei_pload(m_storage.data() + index);
else
return ei_ploadu(m_storage.data() + index);
return ei_ploadt<Scalar, LoadMode>(m_storage.data() + index);
}
template<int StoreMode>
inline void writePacket(int row, int col, const PacketScalar& x)
{
ei_internal_assert(Flags & PacketAccessBit);
if(Flags & RowMajorBit)
if (StoreMode==Aligned)
ei_pstore(m_storage.data() + col + row * m_storage.cols(), x);
else
ei_pstoreu(m_storage.data() + col + row * m_storage.cols(), x);
else
if (StoreMode==Aligned)
ei_pstore(m_storage.data() + row + col * m_storage.rows(), x);
else
ei_pstoreu(m_storage.data() + row + col * m_storage.rows(), x);
ei_pstoret<Scalar, PacketScalar, StoreMode>
(m_storage.data() + (Flags & RowMajorBit
? col + row * m_storage.cols()
: row + col * m_storage.rows()), x);
}
template<int StoreMode>
inline void writePacket(int index, const PacketScalar& x)
{
if (StoreMode==Aligned)
ei_pstore(m_storage.data() + index, x);
else
ei_pstoreu(m_storage.data() + index, x);
ei_pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x);
}
public:
@ -253,19 +235,13 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
EIGEN_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Matrix, *=)
EIGEN_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Matrix, /=)
static const Map<Matrix> map(const Scalar* array, int rows, int cols);
static const Map<Matrix> map(const Scalar* array, int size);
static const Map<Matrix> map(const Scalar* array);
static Map<Matrix> map(Scalar* array, int rows, int cols);
static Map<Matrix> map(Scalar* array, int size);
static Map<Matrix> map(Scalar* array);
/** Default constructor, does nothing. Only for fixed-size matrices.
* For dynamic-size matrices and vectors, this constructor is forbidden (guarded by
* an assertion) because it would leave the matrix without an allocated data buffer.
*/
inline explicit Matrix()
{
ei_assert(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic);
ei_assert(RowsAtCompileTime > 0 && ColsAtCompileTime > 0);
}

View File

@ -72,6 +72,11 @@ inline vector int ei_pmax(const vector int a, const vector int b) { r
inline vector float ei_pload(const float* from) { return vec_ld(0, from); }
inline vector int ei_pload(const int* from) { return vec_ld(0, from); }
inline vector float ei_ploadu(const float*)
{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) }
inline vector int ei_ploadu(const int* )
{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) }
inline vector float ei_pset1(const float& from)
{
static float __attribute__(aligned(16)) af[4];
@ -93,6 +98,11 @@ inline vector int ei_pset1(const int& from)
inline void ei_pstore(float* to, const vector float from) { vec_st(from, 0, to); }
inline void ei_pstore(int* to, const vector int from) { vec_st(from, 0, to); }
inline void ei_pstoreu(float*, const vector float)
{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) }
inline void ei_pstoreu(int* , const vector int )
{ EIGEN_STATIC_ASSERT(unaligned_load_and_store_operations_unimplemented_on_AltiVec) }
inline float ei_pfirst(const vector float a)
{
static float __attribute__(aligned(16)) af[4];

View File

@ -33,6 +33,10 @@ template<> struct ei_packet_traits<float> { typedef __m128 type; enum {size=4}
template<> struct ei_packet_traits<double> { typedef __m128d type; enum {size=2}; };
template<> struct ei_packet_traits<int> { typedef __m128i type; enum {size=4}; };
template<> struct ei_unpacket_traits<__m128> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<__m128d> { typedef double type; enum {size=2}; };
template<> struct ei_unpacket_traits<__m128i> { typedef int type; enum {size=4}; };
template<> inline __m128 ei_padd(const __m128& a, const __m128& b) { return _mm_add_ps(a,b); }
template<> inline __m128d ei_padd(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); }
template<> inline __m128i ei_padd(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); }
@ -79,29 +83,29 @@ template<> inline __m128i ei_pmax(const __m128i& a, const __m128i& b)
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); }
inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); }
inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
template<> inline __m128 ei_pload(const float* from) { return _mm_load_ps(from); }
template<> inline __m128d ei_pload(const double* from) { return _mm_load_pd(from); }
template<> inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
inline __m128 ei_ploadu(const float* from) { return _mm_loadu_ps(from); }
inline __m128d ei_ploadu(const double* from) { return _mm_loadu_pd(from); }
inline __m128i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
template<> inline __m128 ei_ploadu(const float* from) { return _mm_loadu_ps(from); }
template<> inline __m128d ei_ploadu(const double* from) { return _mm_loadu_pd(from); }
template<> inline __m128i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
inline __m128 ei_pset1(const float& from) { return _mm_set1_ps(from); }
inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); }
inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); }
template<> inline __m128 ei_pset1(const float& from) { return _mm_set1_ps(from); }
template<> inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); }
template<> inline __m128i ei_pset1(const int& from) { return _mm_set1_epi32(from); }
inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); }
inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); }
inline void ei_pstore(int* to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
template<> inline void ei_pstore(float* to, const __m128& from) { _mm_store_ps(to, from); }
template<> inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); }
template<> inline void ei_pstore(int* to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
inline void ei_pstoreu(float* to, const __m128& from) { _mm_storeu_ps(to, from); }
inline void ei_pstoreu(double* to, const __m128d& from) { _mm_storeu_pd(to, from); }
inline void ei_pstoreu(int* to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
template<> inline void ei_pstoreu(float* to, const __m128& from) { _mm_storeu_ps(to, from); }
template<> inline void ei_pstoreu(double* to, const __m128d& from) { _mm_storeu_pd(to, from); }
template<> inline void ei_pstoreu(int* to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); }
inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); }
inline int ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); }
template<> inline float ei_pfirst(const __m128& a) { return _mm_cvtss_f32(a); }
template<> inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); }
template<> inline int ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); }
#ifdef __SSE3__
// TODO implement SSE2 versions as well as integer versions

View File

@ -167,7 +167,7 @@ const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit;
const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;
const unsigned int Diagonal = Upper | Lower;
enum { Aligned=0, UnAligned=1 };
enum { Aligned=0, Unaligned=1 };
enum { ConditionalJumpCost = 5 };
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
enum DirectionType { Vertical, Horizontal };

View File

@ -51,7 +51,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Lhs, typename Rhs, int ProductMode> class Product;
template<typename CoeffsVectorType> class DiagonalMatrix;
template<typename MatrixType> class DiagonalCoeffs;
template<typename MatrixType> class Map;
template<typename MatrixType, int Alignment = Unaligned> class Map;
template<int Direction, typename UnaryOp, typename MatrixType> class PartialRedux;
template<typename MatrixType, unsigned int Mode> class Part;
template<typename MatrixType, unsigned int Mode> class Extract;

View File

@ -45,7 +45,7 @@
#define EIGEN_DEFAULT_MATRIX_FLAGS EIGEN_DEFAULT_MATRIX_STORAGE_ORDER
/** Define a hint size when dealling with large matrices and L2 cache friendlyness
/** Define a hint size when dealing with large matrices and L2 cache friendlyness
* More precisely, its square value represents the amount of bytes which can be assumed to stay in L2 cache.
*/
#ifndef EIGEN_TUNE_FOR_L2_CACHE_SIZE
@ -136,15 +136,15 @@ typedef typename Base::PacketScalar PacketScalar; \
typedef typename Eigen::ei_nested<Derived>::type Nested; \
typedef typename Eigen::ei_eval<Derived>::type Eval; \
typedef typename Eigen::Inverse<Eval> InverseType; \
enum { RowsAtCompileTime = Base::RowsAtCompileTime, \
ColsAtCompileTime = Base::ColsAtCompileTime, \
MaxRowsAtCompileTime = Base::MaxRowsAtCompileTime, \
MaxColsAtCompileTime = Base::MaxColsAtCompileTime, \
enum { RowsAtCompileTime = Eigen::ei_traits<Derived>::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::ei_traits<Derived>::ColsAtCompileTime, \
MaxRowsAtCompileTime = Eigen::ei_traits<Derived>::MaxRowsAtCompileTime, \
MaxColsAtCompileTime = Eigen::ei_traits<Derived>::MaxColsAtCompileTime, \
Flags = Eigen::ei_traits<Derived>::Flags, \
CoeffReadCost = Eigen::ei_traits<Derived>::CoeffReadCost, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
IsVectorAtCompileTime = Base::IsVectorAtCompileTime, \
Flags = Base::Flags, \
CoeffReadCost = Base::CoeffReadCost };
IsVectorAtCompileTime = Base::IsVectorAtCompileTime };
#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
_EIGEN_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::MatrixBase<Derived>) \

View File

@ -147,6 +147,13 @@ template<typename T> struct ei_packet_traits
enum {size=1};
};
template<typename T> struct ei_unpacket_traits
{
typedef T type;
enum {size=1};
};
template<typename Scalar, int Rows, int Cols, int MaxRows, int MaxCols, unsigned int SuggestedFlags>
class ei_corrected_matrix_flags
{

View File

@ -58,7 +58,8 @@
you_tried_calling_a_vector_method_on_a_matrix,
you_mixed_vectors_of_different_sizes,
you_mixed_matrices_of_different_sizes,
you_did_a_programming_error
you_did_a_programming_error,
unaligned_load_and_store_operations_unimplemented_on_AltiVec
};
};

View File

@ -31,16 +31,16 @@ template<typename VectorType> void tmap(const VectorType& m)
int size = m.size();
// test Map.h
Scalar* array1 = new Scalar[size];
Scalar* array2 = new Scalar[size];
VectorType::map(array1, size) = VectorType::random(size);
VectorType::map(array2, size) = VectorType::map(array1, size);
VectorType ma1 = VectorType::map(array1, size);
VectorType ma2 = VectorType::map(array2, size);
Scalar* array1 = ei_aligned_malloc<Scalar>(size);
Scalar* array2 = ei_aligned_malloc<Scalar>(size);
Map<VectorType, Aligned>(array1, size) = VectorType::random(size);
Map<VectorType>(array2, size) = Map<VectorType>(array1, size);
VectorType ma1 = Map<VectorType>(array1, size);
VectorType ma2 = Map<VectorType, Aligned>(array2, size);
VERIFY_IS_APPROX(ma1, ma2);
VERIFY_IS_APPROX(ma1, VectorType(array2, size));
delete[] array1;
delete[] array2;
ei_aligned_free(array1);
ei_aligned_free(array2);
}
void test_map()