* Block: row and column expressions in the inner direction

now have the Like1D flag.

* Big renaming:
  packetCoeff ---> packet
  VectorizableBit ---> PacketAccessBit
  Like1DArrayBit ---> LinearAccessBit
This commit is contained in:
Benoit Jacob 2008-06-16 14:54:31 +00:00
parent 9857764ae7
commit bb1f4e44f1
22 changed files with 192 additions and 188 deletions

View File

@ -38,7 +38,7 @@ template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
*
@ -53,7 +53,7 @@ template<typename Scalar> struct ei_scalar_exp_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_exp_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
*
@ -68,7 +68,7 @@ template<typename Scalar> struct ei_scalar_log_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_log_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
*
@ -83,7 +83,7 @@ template<typename Scalar> struct ei_scalar_cos_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_cos_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
*
@ -98,7 +98,7 @@ template<typename Scalar> struct ei_scalar_sin_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sin_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
*
@ -116,7 +116,7 @@ struct ei_scalar_pow_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_pow_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
*
@ -132,107 +132,107 @@ struct ei_scalar_inverse_op {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
// default ei_functor_traits for STL functors:
template<typename T>
struct ei_functor_traits<std::multiplies<T> >
{ enum { Cost = NumTraits<T>::MulCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::divides<T> >
{ enum { Cost = NumTraits<T>::MulCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::plus<T> >
{ enum { Cost = NumTraits<T>::AddCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::minus<T> >
{ enum { Cost = NumTraits<T>::AddCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::negate<T> >
{ enum { Cost = NumTraits<T>::AddCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::logical_or<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::logical_and<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::logical_not<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::greater<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::less<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::greater_equal<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::less_equal<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::equal_to<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::not_equal_to<T> >
{ enum { Cost = 1, IsVectorizable = false }; };
{ enum { Cost = 1, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::binder2nd<T> >
{ enum { Cost = ei_functor_traits<T>::Cost, IsVectorizable = false }; };
{ enum { Cost = ei_functor_traits<T>::Cost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::binder1st<T> >
{ enum { Cost = ei_functor_traits<T>::Cost, IsVectorizable = false }; };
{ enum { Cost = ei_functor_traits<T>::Cost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::unary_negate<T> >
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, IsVectorizable = false }; };
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, PacketAccess = false }; };
template<typename T>
struct ei_functor_traits<std::binary_negate<T> >
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, IsVectorizable = false }; };
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, PacketAccess = false }; };
#ifdef EIGEN_STDEXT_SUPPORT
template<typename T0,typename T1>
struct ei_functor_traits<std::project1st<T0,T1> >
{ enum { Cost = 0, IsVectorizable = false }; };
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct ei_functor_traits<std::project2nd<T0,T1> >
{ enum { Cost = 0, IsVectorizable = false }; };
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct ei_functor_traits<std::select2nd<std::pair<T0,T1> > >
{ enum { Cost = 0, IsVectorizable = false }; };
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct ei_functor_traits<std::select1st<std::pair<T0,T1> > >
{ enum { Cost = 0, IsVectorizable = false }; };
{ enum { Cost = 0, PacketAccess = false }; };
template<typename T0,typename T1>
struct ei_functor_traits<std::unary_compose<T0,T1> >
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost, IsVectorizable = false }; };
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost, PacketAccess = false }; };
template<typename T0,typename T1,typename T2>
struct ei_functor_traits<std::binary_compose<T0,T1,T2> >
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost + ei_functor_traits<T2>::Cost, IsVectorizable = false }; };
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost + ei_functor_traits<T2>::Cost, PacketAccess = false }; };
#endif // EIGEN_STDEXT_SUPPORT

View File

@ -31,7 +31,7 @@ template<typename Scalar> struct ei_scalar_random_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_random_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false, IsRepeatable = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
/** \array_module
*

View File

@ -34,8 +34,8 @@
enum {
NoVectorization,
InnerVectorization,
Like1DVectorization,
SlicedVectorization
LinearVectorization,
SliceVectorization
};
enum {
@ -56,18 +56,18 @@ private:
};
enum {
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & PacketAccessBit)
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0,
MayLike1DVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit),
MaySlicedVectorize = MightVectorize && InnerSize==Dynamic
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
MaySliceVectorize = MightVectorize && InnerSize==Dynamic
};
public:
enum {
Vectorization = MayInnerVectorize ? InnerVectorization
: MayLike1DVectorize ? Like1DVectorization
: MaySlicedVectorize ? SlicedVectorization
: MayLinearVectorize ? LinearVectorization
: MaySliceVectorize ? SliceVectorization
: NoVectorization
};
@ -86,7 +86,7 @@ public:
: MayUnrollInner ? InnerUnrolling
: NoUnrolling
)
: int(Vectorization) == int(Like1DVectorization)
: int(Vectorization) == int(LinearVectorization)
? ( MayUnrollCompletely ? CompleteUnrolling : NoUnrolling )
: NoUnrolling
};
@ -162,7 +162,7 @@ struct ei_assign_innervec_CompleteUnrolling
inline static void run(Derived1 &dst, const Derived2 &src)
{
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
}
@ -181,7 +181,7 @@ struct ei_assign_innervec_InnerUnrolling
{
const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
}
@ -267,7 +267,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
{
const int row = rowMajor ? j : i;
const int col = rowMajor ? i : j;
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
}
}
}
@ -298,11 +298,11 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
};
/***************************
*** Like1D vectorization ***
*** Linear vectorization ***
***************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
{
static void run(Derived1 &dst, const Derived2 &src)
{
@ -320,7 +320,7 @@ struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling>
// FIXME the following is not really efficient
const int row = rowMajor ? index/innerSize : index%innerSize;
const int col = rowMajor ? index%innerSize : index/innerSize;
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
}
// now we must do the rest without vectorization.
@ -347,7 +347,7 @@ struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, CompleteUnrolling>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@ -375,16 +375,16 @@ struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, CompleteUnrolling
}
};
/***************************
*** Sliced vectorization ***
/**************************
*** Slice vectorization ***
***************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, SlicedVectorization, NoUnrolling>
struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
{
static void run(Derived1 &dst, const Derived2 &src)
{
//FIXME unimplemented
//FIXME unimplemented, so for now we fall back to non-vectorized path
ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>::run(dst, src);
}
};

View File

@ -71,7 +71,11 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> >
|| (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic))
? ~LargeBit
: ~(unsigned int)0,
Flags = MatrixType::Flags & (HereditaryBits | VectorizableBit | DirectAccessBit) & FlagsMaskLargeBit,
FlagsLinearAccessBit = MatrixType::Flags & RowMajorBit
? (RowsAtCompileTime == 1 ? LinearAccessBit : 0)
: (ColsAtCompileTime == 1 ? LinearAccessBit : 0),
Flags = (MatrixType::Flags & (HereditaryBits | PacketAccessBit | DirectAccessBit) & FlagsMaskLargeBit)
| FlagsLinearAccessBit,
CoeffReadCost = MatrixType::CoeffReadCost
};
};
@ -146,15 +150,15 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block
}
template<int LoadMode>
inline PacketScalar _packetCoeff(int row, int col) const
inline PacketScalar _packet(int row, int col) const
{
return m_matrix.template packetCoeff<UnAligned>(row + m_startRow.value(), col + m_startCol.value());
return m_matrix.template packet<UnAligned>(row + m_startRow.value(), col + m_startCol.value());
}
template<int LoadMode>
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
inline void _writePacket(int row, int col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacketCoeff<UnAligned>(row + m_startRow.value(), col + m_startCol.value(), x);
m_matrix.const_cast_derived().template writePacket<UnAligned>(row + m_startRow.value(), col + m_startCol.value(), x);
}
protected:

View File

@ -249,14 +249,14 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
template<typename Derived>
template<int LoadMode>
inline typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
MatrixBase<Derived>::packetCoeff(int row, int col) const
{ return derived().template _packetCoeff<LoadMode>(row,col); }
MatrixBase<Derived>::packet(int row, int col) const
{ return derived().template _packet<LoadMode>(row,col); }
template<typename Derived>
template<int StoreMode>
inline void MatrixBase<Derived>::writePacketCoeff
inline void MatrixBase<Derived>::writePacket
(int row, int col, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x)
{ derived().template _writePacketCoeff<StoreMode>(row,col,x); }
{ derived().template _writePacket<StoreMode>(row,col,x); }
#endif // EIGEN_COEFFS_H

View File

@ -67,9 +67,9 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
Flags = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) & Like1DArrayBit)
| (ei_functor_traits<BinaryOp>::IsVectorizable && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
? int(LhsFlags) & int(RhsFlags) & VectorizableBit : 0)),
| (int(LhsFlags) & int(RhsFlags) & LinearAccessBit)
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
? int(LhsFlags) & int(RhsFlags) & PacketAccessBit : 0)),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
};
};
@ -101,9 +101,9 @@ class CwiseBinaryOp : ei_no_assignment_operator,
}
template<int LoadMode>
inline PacketScalar _packetCoeff(int row, int col) const
inline PacketScalar _packet(int row, int col) const
{
return m_functor.packetOp(m_lhs.template packetCoeff<LoadMode>(row, col), m_rhs.template packetCoeff<LoadMode>(row, col));
return m_functor.packetOp(m_lhs.template packet<LoadMode>(row, col), m_rhs.template packet<LoadMode>(row, col));
}
protected:

View File

@ -50,7 +50,7 @@ struct ei_traits<CwiseNullaryOp<NullaryOp, MatrixType> >
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = (MatrixType::Flags
& (HereditaryBits | Like1DArrayBit | (ei_functor_traits<NullaryOp>::IsVectorizable ? VectorizableBit : 0)))
& (HereditaryBits | LinearAccessBit | (ei_functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (ei_functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
CoeffReadCost = ei_functor_traits<NullaryOp>::Cost
};
@ -84,7 +84,7 @@ class CwiseNullaryOp : ei_no_assignment_operator,
}
template<int LoadMode>
PacketScalar _packetCoeff(int, int) const
PacketScalar _packet(int, int) const
{
return m_functor.packetOp();
}

View File

@ -55,8 +55,8 @@ struct ei_traits<CwiseUnaryOp<UnaryOp, MatrixType> >
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = (MatrixTypeFlags & (
HereditaryBits | Like1DArrayBit
| (ei_functor_traits<UnaryOp>::IsVectorizable ? VectorizableBit : 0))),
HereditaryBits | LinearAccessBit
| (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))),
CoeffReadCost = MatrixTypeCoeffReadCost + ei_functor_traits<UnaryOp>::Cost
};
};
@ -83,9 +83,9 @@ class CwiseUnaryOp : ei_no_assignment_operator,
}
template<int LoadMode>
inline PacketScalar _packetCoeff(int row, int col) const
inline PacketScalar _packet(int row, int col) const
{
return m_functor.packetOp(m_matrix.template packetCoeff<LoadMode>(row, col));
return m_functor.packetOp(m_matrix.template packet<LoadMode>(row, col));
}
protected:

View File

@ -41,14 +41,14 @@ struct ei_traits<Product<Lhs, Rhs, DiagonalProduct> >
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit)
_RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit)
_LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LostBits = ~(((RhsFlags & RowMajorBit) && (!_LhsVectorizable) ? 0 : RowMajorBit)
_LostBits = ~(((RhsFlags & RowMajorBit) && (!_LhsPacketAccess) ? 0 : RowMajorBit)
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
| (_LhsVectorizable || _RhsVectorizable ? VectorizableBit : 0),
| (_LhsPacketAccess || _RhsPacketAccess ? PacketAccessBit : 0),
CoeffReadCost = NumTraits<Scalar>::MulCost + _LhsNested::CoeffReadCost + _RhsNested::CoeffReadCost
};
};
@ -86,17 +86,17 @@ template<typename Lhs, typename Rhs> class Product<Lhs, Rhs, DiagonalProduct> :
}
template<int LoadMode>
const PacketScalar _packetCoeff(int row, int col) const
const PacketScalar _packet(int row, int col) const
{
if ((Rhs::Flags&Diagonal)==Diagonal)
{
ei_assert((_LhsNested::Flags&RowMajorBit)==0);
return ei_pmul(m_lhs.template packetCoeff<LoadMode>(row, col), ei_pset1(m_rhs.coeff(col, col)));
return ei_pmul(m_lhs.template packet<LoadMode>(row, col), ei_pset1(m_rhs.coeff(col, col)));
}
else
{
ei_assert(_RhsNested::Flags&RowMajorBit);
return ei_pmul(ei_pset1(m_lhs.coeff(row, row)), m_rhs.template packetCoeff<LoadMode>(row, col));
return ei_pmul(ei_pset1(m_lhs.coeff(row, row)), m_rhs.template packet<LoadMode>(row, col));
}
}

View File

@ -53,7 +53,7 @@ struct ei_traits<Extract<MatrixType, Mode> >
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = (_MatrixTypeNested::Flags & ~(VectorizableBit | Like1DArrayBit | DirectAccessBit)) | Mode,
Flags = (_MatrixTypeNested::Flags & ~(PacketAccessBit | LinearAccessBit | DirectAccessBit)) | Mode,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};

View File

@ -85,15 +85,15 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
}
template<int LoadMode>
inline const PacketScalar _packetCoeff(int row, int col) const
inline const PacketScalar _packet(int row, int col) const
{
return m_matrix.template packetCoeff<LoadMode>(row, col);
return m_matrix.template packet<LoadMode>(row, col);
}
template<int LoadMode>
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
inline void _writePacket(int row, int col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacketCoeff<LoadMode>(row, col, x);
m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
}
protected:

View File

@ -42,7 +42,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
IsVectorizable = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::size>1
};
};
@ -61,7 +61,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::MulCost,
IsVectorizable = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::size>1
};
};
@ -80,7 +80,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
IsVectorizable = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::size>1
};
};
@ -99,7 +99,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
IsVectorizable = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::size>1
};
};
@ -121,7 +121,7 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
enum {
Cost = NumTraits<Scalar>::AddCost,
IsVectorizable = ei_packet_traits<Scalar>::size>1
PacketAccess = ei_packet_traits<Scalar>::size>1
};
};
@ -135,7 +135,7 @@ template<typename Scalar> struct ei_scalar_quotient_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> >
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
// unary functors:
@ -150,7 +150,7 @@ template<typename Scalar> struct ei_scalar_opposite_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to compute the absolute value of a scalar
@ -163,7 +163,7 @@ template<typename Scalar> struct ei_scalar_abs_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to compute the squared absolute value of a scalar
@ -176,7 +176,7 @@ template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to compute the conjugate of a complex value
@ -188,7 +188,7 @@ template<typename Scalar> struct ei_scalar_conjugate_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0, IsVectorizable = false }; };
{ enum { Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0, PacketAccess = false }; };
/** \internal
* \brief Template functor to cast a scalar to another type
@ -202,7 +202,7 @@ struct ei_scalar_cast_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar, typename NewType>
struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> >
{ enum { Cost = ei_is_same_type<Scalar, NewType>::ret ? 0 : NumTraits<NewType>::AddCost, IsVectorizable = false }; };
{ enum { Cost = ei_is_same_type<Scalar, NewType>::ret ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to extract the real part of a complex
@ -216,14 +216,14 @@ struct ei_scalar_real_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_real_op<Scalar> >
{ enum { Cost = 0, IsVectorizable = false }; };
{ enum { Cost = 0, PacketAccess = false }; };
/** \internal
* \brief Template functor to multiply a scalar by a fixed other one
*
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
*/
template<typename Scalar, bool IsVectorizable = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op;
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op;
template<typename Scalar>
struct ei_scalar_multiple_op<Scalar,true> {
@ -242,7 +242,7 @@ struct ei_scalar_multiple_op<Scalar,false> {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = ei_packet_traits<Scalar>::size>1 }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
template<typename Scalar, bool HasFloatingPoint>
struct ei_scalar_quotient1_impl {
@ -252,7 +252,7 @@ struct ei_scalar_quotient1_impl {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,true> >
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
template<typename Scalar>
struct ei_scalar_quotient1_impl<Scalar,false> {
@ -263,7 +263,7 @@ struct ei_scalar_quotient1_impl<Scalar,false> {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
/** \internal
* \brief Template functor to divide a scalar by a fixed other one
@ -281,7 +281,7 @@ struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scala
// nullary functors
template<typename Scalar, bool IsVectorizable = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_constant_op;
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_constant_op;
template<typename Scalar>
struct ei_scalar_constant_op<Scalar,true> {
@ -300,7 +300,7 @@ struct ei_scalar_constant_op<Scalar,false> {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
{ enum { Cost = 1, IsVectorizable = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
template<typename Scalar> struct ei_scalar_identity_op EIGEN_EMPTY_STRUCT {
inline ei_scalar_identity_op(void) {}
@ -308,6 +308,6 @@ template<typename Scalar> struct ei_scalar_identity_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false, IsRepeatable = true }; };
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
#endif // EIGEN_FUNCTORS_H

View File

@ -137,9 +137,9 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
}
template<int LoadMode>
inline PacketScalar _packetCoeff(int row, int col) const
inline PacketScalar _packet(int row, int col) const
{
ei_internal_assert(Flags & VectorizableBit);
ei_internal_assert(Flags & PacketAccessBit);
if(Flags & RowMajorBit)
if (LoadMode==Aligned)
return ei_pload(&m_storage.data()[col + row * m_storage.cols()]);
@ -153,9 +153,9 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
}
template<int StoreMode>
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
inline void _writePacket(int row, int col, const PacketScalar& x)
{
ei_internal_assert(Flags & VectorizableBit);
ei_internal_assert(Flags & PacketAccessBit);
if(Flags & RowMajorBit)
if (StoreMode==Aligned)
ei_pstore(&m_storage.data()[col + row * m_storage.cols()], x);

View File

@ -229,9 +229,9 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived>
Scalar& operator[](int index);
template<int LoadMode>
PacketScalar packetCoeff(int row, int col) const;
PacketScalar packet(int row, int col) const;
template<int StoreMode>
void writePacketCoeff(int row, int col, const PacketScalar& x);
void writePacket(int row, int col, const PacketScalar& x);
const Scalar x() const;
const Scalar y() const;

View File

@ -77,15 +77,15 @@ template<typename ExpressionType> class NestByValue
}
template<int LoadMode>
inline const PacketScalar _packetCoeff(int row, int col) const
inline const PacketScalar _packet(int row, int col) const
{
return m_expression.template packetCoeff<LoadMode>(row, col);
return m_expression.template packet<LoadMode>(row, col);
}
template<int LoadMode>
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
inline void _writePacket(int row, int col, const PacketScalar& x)
{
m_expression.const_cast_derived().template writePacketCoeff<LoadMode>(row, col, x);
m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
}
protected:

View File

@ -76,7 +76,7 @@ struct ei_packet_product_impl<true, Index, Size, Lhs, Rhs, PacketScalar>
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
ei_packet_product_impl<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res);
}
};
@ -86,7 +86,7 @@ struct ei_packet_product_impl<false, Index, Size, Lhs, Rhs, PacketScalar>
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
ei_packet_product_impl<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
}
};
@ -95,7 +95,7 @@ struct ei_packet_product_impl<true, 0, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
}
};
@ -104,7 +104,7 @@ struct ei_packet_product_impl<false, 0, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
}
};
@ -113,9 +113,9 @@ struct ei_packet_product_impl<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
{
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
for(int i = 1; i < lhs.cols(); i++)
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packetCoeff<Aligned>(i, col), res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<Aligned>(i, col), res);
}
};
@ -124,9 +124,9 @@ struct ei_packet_product_impl<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
{
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
for(int i = 1; i < lhs.cols(); i++)
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
res = ei_pmadd(lhs.template packet<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
}
};
@ -210,17 +210,17 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
// the vectorization flags are only used by the normal product,
// the other one is always vectorized !
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
_RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0,
_RowMajor = (RhsFlags & RowMajorBit)
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)),
_LostBits = ~((_RowMajor ? 0 : RowMajorBit)
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
| EvalBeforeAssigningBit
| EvalBeforeNestingBit
| (_Vectorizable ? VectorizableBit : 0),
| (_PacketAccess ? PacketAccessBit : 0),
CoeffReadCost
= Lhs::ColsAtCompileTime == Dynamic
? Dynamic
@ -276,7 +276,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
template<int LoadMode>
const PacketScalar _packetCoeff(int row, int col) const
const PacketScalar _packet(int row, int col) const
{
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
PacketScalar res;

View File

@ -49,7 +49,7 @@ struct ei_traits<Transpose<MatrixType> >
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
Flags = ((int(_MatrixTypeNested::Flags) ^ RowMajorBit)
& ~( Like1DArrayBit | LowerTriangularBit | UpperTriangularBit))
& ~( LinearAccessBit | LowerTriangularBit | UpperTriangularBit))
| (int(_MatrixTypeNested::Flags)&UpperTriangularBit ? LowerTriangularBit : 0)
| (int(_MatrixTypeNested::Flags)&LowerTriangularBit ? UpperTriangularBit : 0),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
@ -85,15 +85,15 @@ template<typename MatrixType> class Transpose
}
template<int LoadMode>
inline const PacketScalar _packetCoeff(int row, int col) const
inline const PacketScalar _packet(int row, int col) const
{
return m_matrix.template packetCoeff<LoadMode>(col, row);
return m_matrix.template packet<LoadMode>(col, row);
}
template<int LoadMode>
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
inline void _writePacket(int row, int col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacketCoeff<LoadMode>(col, row, x);
m_matrix.const_cast_derived().template writePacket<LoadMode>(col, row, x);
}
protected:

View File

@ -67,15 +67,15 @@ const unsigned int LargeBit = 0x8;
/** \ingroup flags
*
* means the expression might be vectorized */
const unsigned int VectorizableBit = 0x10;
const unsigned int PacketAccessBit = 0x10;
#else
const unsigned int VectorizableBit = 0x0;
const unsigned int PacketAccessBit = 0x0;
#endif
/** \ingroup flags
*
* means the expression can be seen as 1D vector (used for explicit vectorization) */
const unsigned int Like1DArrayBit = 0x20;
const unsigned int LinearAccessBit = 0x20;
/** \ingroup flags
*

View File

@ -81,7 +81,7 @@ template<typename Scalar> struct ei_scalar_sin_op;
template<typename Scalar> struct ei_scalar_pow_op;
template<typename Scalar> struct ei_scalar_inverse_op;
template<typename Scalar, typename NewType> struct ei_scalar_cast_op;
template<typename Scalar, bool IsVectorizable> struct ei_scalar_multiple_op;
template<typename Scalar, bool PacketAccess> struct ei_scalar_multiple_op;
template<typename Scalar> struct ei_scalar_quotient1_op;
template<typename Scalar> struct ei_scalar_min_op;
template<typename Scalar> struct ei_scalar_max_op;

View File

@ -137,7 +137,7 @@ template<typename T> struct ei_functor_traits
enum
{
Cost = 10,
IsVectorizable = false
PacketAccess = false
};
};
@ -157,18 +157,18 @@ class ei_corrected_matrix_flags
: Cols > 1 ? RowMajorBit : 0,
is_big = MaxRows == Dynamic || MaxCols == Dynamic,
inner_size = row_major_bit ? Cols : Rows,
vectorizable_bit
packet_access_bit
= ei_packet_traits<Scalar>::size > 1
&& (is_big || inner_size%ei_packet_traits<Scalar>::size==0)
? VectorizableBit : 0,
? PacketAccessBit : 0,
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | VectorizableBit | RowMajorBit))
| Like1DArrayBit | DirectAccessBit
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
| LinearAccessBit | DirectAccessBit
};
public:
enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | VectorizableBit | RowMajorBit))
| Like1DArrayBit | DirectAccessBit | vectorizable_bit | row_major_bit
enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
| LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit
};
};

View File

@ -87,9 +87,9 @@ template<typename MatrixType, bool CheckExistence> class Inverse : ei_no_assignm
}
template<int LoadMode>
PacketScalar _packetCoeff(int row, int col) const
PacketScalar _packet(int row, int col) const
{
return m_inverse.template packetCoeff<LoadMode>(row, col);
return m_inverse.template packet<LoadMode>(row, col);
}
enum { _Size = MatrixType::RowsAtCompileTime };

View File

@ -75,7 +75,7 @@ struct ei_packet_product_unroller<true, Index, Size, Lhs, Rhs, PacketScalar>
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
ei_packet_product_unroller<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res);
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res);
}
};
@ -85,7 +85,7 @@ struct ei_packet_product_unroller<false, Index, Size, Lhs, Rhs, PacketScalar>
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
ei_packet_product_unroller<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
}
};
@ -94,7 +94,7 @@ struct ei_packet_product_unroller<true, 0, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
}
};
@ -103,7 +103,7 @@ struct ei_packet_product_unroller<false, 0, Size, Lhs, Rhs, PacketScalar>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
{
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
}
};
@ -125,14 +125,14 @@ struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
};
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
template<typename Product, bool RowMajor = true> struct ProductPacketImpl {
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
{ return product._packetCoeffRowMajor(row,col); }
{ return product._packetRowMajor(row,col); }
};
template<typename Product> struct ProductPacketCoeffImpl<Product, false> {
template<typename Product> struct ProductPacketImpl<Product, false> {
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
{ return product._packetCoeffColumnMajor(row,col); }
{ return product._packetColumnMajor(row,col); }
};
/** \class Product
@ -174,18 +174,18 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
_RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0,
_RowMajor = (RhsFlags & RowMajorBit)
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)),
_LostBits = HereditaryBits & ~(
(_RowMajor ? 0 : RowMajorBit)
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & _LostBits)
| EvalBeforeAssigningBit
| EvalBeforeNestingBit
| (_Vectorizable ? VectorizableBit : 0),
| (_PacketAccess ? PacketAccessBit : 0),
CoeffReadCost
= Lhs::ColsAtCompileTime == Dynamic
? Dynamic
@ -201,7 +201,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
friend class ProductPacketCoeffImpl<Product,Flags&RowMajorBit>;
friend class ProductPacketImpl<Product,Flags&RowMajorBit>;
typedef typename ei_traits<Product>::LhsNested LhsNested;
typedef typename ei_traits<Product>::RhsNested RhsNested;
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
@ -247,7 +247,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
template<int LoadMode>
const PacketScalar _packetCoeff(int row, int col) const
const PacketScalar _packet(int row, int col) const
{
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
{
@ -260,33 +260,33 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
return res;
}
else
return ProductPacketCoeffImpl<Product,Flags&RowMajorBit>::execute(*this, row, col);
return ProductPacketImpl<Product,Flags&RowMajorBit>::execute(*this, row, col);
}
const PacketScalar _packetCoeffRowMajor(int row, int col) const
const PacketScalar _packetRowMajor(int row, int col) const
{
PacketScalar res;
res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packetCoeff<Aligned>(0, col));
res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packet<Aligned>(0, col));
for(int i = 1; i < m_lhs.cols(); i++)
res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packetCoeff<Aligned>(i, col), res);
res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packet<Aligned>(i, col), res);
return res;
}
const PacketScalar _packetCoeffColumnMajor(int row, int col) const
const PacketScalar _packetColumnMajor(int row, int col) const
{
PacketScalar res;
res = ei_pmul(m_lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col)));
res = ei_pmul(m_lhs.template packet<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col)));
for(int i = 1; i < m_lhs.cols(); i++)
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
res = ei_pmadd(m_lhs.template packet<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
return res;
// const PacketScalar tmp[4];
// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
// ei_punpack(m_rhs.packet(0,col), tmp);
//
// return
// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
// ei_pmadd(m_lhs.packet(row, 0), tmp[0],
// ei_pmadd(m_lhs.packet(row, 1), tmp[1],
// ei_pmadd(m_lhs.packet(row, 2), tmp[2]
// ei_pmul(m_lhs.packet(row, 3), tmp[3]))));
}
@ -328,7 +328,7 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien
{
product.template _cacheOptimalEval<Derived, Aligned>(derived(),
#ifdef EIGEN_VECTORIZE
typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
typename ei_meta_if<Flags & PacketAccessBit, ei_meta_true, ei_meta_false>::ret()
#else
ei_meta_false()
#endif
@ -426,12 +426,12 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
{
res.template writePacketCoeff<AlignedMode>(k,i,
ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
res.template packetCoeff<AlignedMode>(k,i)))))
res.template writePacket<AlignedMode>(k,i,
ei_pmadd(tmp0, m_rhs.template packet<AlignedMode>(j+0,i),
ei_pmadd(tmp1, m_rhs.template packet<AlignedMode>(j+1,i),
ei_pmadd(tmp2, m_rhs.template packet<AlignedMode>(j+2,i),
ei_pmadd(tmp3, m_rhs.template packet<AlignedMode>(j+3,i),
res.template packet<AlignedMode>(k,i)))))
);
}
}
@ -442,8 +442,8 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
res.template writePacketCoeff<AlignedMode>(k,i,
ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
res.template writePacket<AlignedMode>(k,i,
ei_pmadd(tmp, m_rhs.template packet<AlignedMode>(j,i), res.template packet<AlignedMode>(k,i)));
}
}
}
@ -462,12 +462,12 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.template writePacketCoeff<AlignedMode>(i,j,
ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
res.template packetCoeff<AlignedMode>(i,j)))))
res.template writePacket<AlignedMode>(i,j,
ei_pmadd(tmp0, m_lhs.template packet<AlignedMode>(i,k),
ei_pmadd(tmp1, m_lhs.template packet<AlignedMode>(i,k+1),
ei_pmadd(tmp2, m_lhs.template packet<AlignedMode>(i,k+2),
ei_pmadd(tmp3, m_lhs.template packet<AlignedMode>(i,k+3),
res.template packet<AlignedMode>(i,j)))))
);
}
}
@ -478,8 +478,8 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
res.template writePacketCoeff<AlignedMode>(k,j,
ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
res.template writePacket<AlignedMode>(k,j,
ei_pmadd(tmp, m_lhs.template packet<AlignedMode>(i,k), res.template packet<AlignedMode>(i,j)));
}
}
}