mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-12 19:20:36 +08:00
* Block: row and column expressions in the inner direction
now have the Like1D flag. * Big renaming: packetCoeff ---> packet VectorizableBit ---> PacketAccessBit Like1DArrayBit ---> LinearAccessBit
This commit is contained in:
parent
9857764ae7
commit
bb1f4e44f1
@ -38,7 +38,7 @@ template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
@ -53,7 +53,7 @@ template<typename Scalar> struct ei_scalar_exp_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_exp_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
@ -68,7 +68,7 @@ template<typename Scalar> struct ei_scalar_log_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_log_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
@ -83,7 +83,7 @@ template<typename Scalar> struct ei_scalar_cos_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_cos_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
@ -98,7 +98,7 @@ template<typename Scalar> struct ei_scalar_sin_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_sin_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
@ -116,7 +116,7 @@ struct ei_scalar_pow_op {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_pow_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
*
|
||||
@ -132,107 +132,107 @@ struct ei_scalar_inverse_op {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_inverse_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
// default ei_functor_traits for STL functors:
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::multiplies<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::divides<T> >
|
||||
{ enum { Cost = NumTraits<T>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::plus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::minus<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::negate<T> >
|
||||
{ enum { Cost = NumTraits<T>::AddCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::logical_or<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::logical_and<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::logical_not<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::greater<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::less<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::greater_equal<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::less_equal<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::equal_to<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::not_equal_to<T> >
|
||||
{ enum { Cost = 1, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::binder2nd<T> >
|
||||
{ enum { Cost = ei_functor_traits<T>::Cost, IsVectorizable = false }; };
|
||||
{ enum { Cost = ei_functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::binder1st<T> >
|
||||
{ enum { Cost = ei_functor_traits<T>::Cost, IsVectorizable = false }; };
|
||||
{ enum { Cost = ei_functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::unary_negate<T> >
|
||||
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T>
|
||||
struct ei_functor_traits<std::binary_negate<T> >
|
||||
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 1 + ei_functor_traits<T>::Cost, PacketAccess = false }; };
|
||||
|
||||
#ifdef EIGEN_STDEXT_SUPPORT
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct ei_functor_traits<std::project1st<T0,T1> >
|
||||
{ enum { Cost = 0, IsVectorizable = false }; };
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct ei_functor_traits<std::project2nd<T0,T1> >
|
||||
{ enum { Cost = 0, IsVectorizable = false }; };
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct ei_functor_traits<std::select2nd<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, IsVectorizable = false }; };
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct ei_functor_traits<std::select1st<std::pair<T0,T1> > >
|
||||
{ enum { Cost = 0, IsVectorizable = false }; };
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1>
|
||||
struct ei_functor_traits<std::unary_compose<T0,T1> >
|
||||
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost, IsVectorizable = false }; };
|
||||
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost, PacketAccess = false }; };
|
||||
|
||||
template<typename T0,typename T1,typename T2>
|
||||
struct ei_functor_traits<std::binary_compose<T0,T1,T2> >
|
||||
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost + ei_functor_traits<T2>::Cost, IsVectorizable = false }; };
|
||||
{ enum { Cost = ei_functor_traits<T0>::Cost + ei_functor_traits<T1>::Cost + ei_functor_traits<T2>::Cost, PacketAccess = false }; };
|
||||
|
||||
#endif // EIGEN_STDEXT_SUPPORT
|
||||
|
||||
|
@ -31,7 +31,7 @@ template<typename Scalar> struct ei_scalar_random_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_random_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, IsVectorizable = false, IsRepeatable = false }; };
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
|
||||
|
||||
/** \array_module
|
||||
*
|
||||
|
@ -34,8 +34,8 @@
|
||||
enum {
|
||||
NoVectorization,
|
||||
InnerVectorization,
|
||||
Like1DVectorization,
|
||||
SlicedVectorization
|
||||
LinearVectorization,
|
||||
SliceVectorization
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -56,18 +56,18 @@ private:
|
||||
};
|
||||
|
||||
enum {
|
||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
|
||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & PacketAccessBit)
|
||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
||||
MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0,
|
||||
MayLike1DVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit),
|
||||
MaySlicedVectorize = MightVectorize && InnerSize==Dynamic
|
||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MaySliceVectorize = MightVectorize && InnerSize==Dynamic
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Vectorization = MayInnerVectorize ? InnerVectorization
|
||||
: MayLike1DVectorize ? Like1DVectorization
|
||||
: MaySlicedVectorize ? SlicedVectorization
|
||||
: MayLinearVectorize ? LinearVectorization
|
||||
: MaySliceVectorize ? SliceVectorization
|
||||
: NoVectorization
|
||||
};
|
||||
|
||||
@ -86,7 +86,7 @@ public:
|
||||
: MayUnrollInner ? InnerUnrolling
|
||||
: NoUnrolling
|
||||
)
|
||||
: int(Vectorization) == int(Like1DVectorization)
|
||||
: int(Vectorization) == int(LinearVectorization)
|
||||
? ( MayUnrollCompletely ? CompleteUnrolling : NoUnrolling )
|
||||
: NoUnrolling
|
||||
};
|
||||
@ -162,7 +162,7 @@ struct ei_assign_innervec_CompleteUnrolling
|
||||
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
|
||||
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
|
||||
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
|
||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
|
||||
}
|
||||
@ -181,7 +181,7 @@ struct ei_assign_innervec_InnerUnrolling
|
||||
{
|
||||
const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
|
||||
const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
|
||||
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
|
||||
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
|
||||
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
|
||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
|
||||
}
|
||||
@ -267,7 +267,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
|
||||
{
|
||||
const int row = rowMajor ? j : i;
|
||||
const int col = rowMajor ? i : j;
|
||||
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
|
||||
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -298,11 +298,11 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
|
||||
};
|
||||
|
||||
/***************************
|
||||
*** Like1D vectorization ***
|
||||
*** Linear vectorization ***
|
||||
***************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
||||
{
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -320,7 +320,7 @@ struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling>
|
||||
// FIXME the following is not really efficient
|
||||
const int row = rowMajor ? index/innerSize : index%innerSize;
|
||||
const int col = rowMajor ? index%innerSize : index/innerSize;
|
||||
dst.template writePacketCoeff<Aligned>(row, col, src.template packetCoeff<Aligned>(row, col));
|
||||
dst.template writePacket<Aligned>(row, col, src.template packet<Aligned>(row, col));
|
||||
}
|
||||
|
||||
// now we must do the rest without vectorization.
|
||||
@ -347,7 +347,7 @@ struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, CompleteUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -375,16 +375,16 @@ struct ei_assign_impl<Derived1, Derived2, Like1DVectorization, CompleteUnrolling
|
||||
}
|
||||
};
|
||||
|
||||
/***************************
|
||||
*** Sliced vectorization ***
|
||||
/**************************
|
||||
*** Slice vectorization ***
|
||||
***************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, SlicedVectorization, NoUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
|
||||
{
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
//FIXME unimplemented
|
||||
//FIXME unimplemented, so for now we fall back to non-vectorized path
|
||||
ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
@ -71,7 +71,11 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> >
|
||||
|| (ColsAtCompileTime != Dynamic && MatrixType::ColsAtCompileTime == Dynamic))
|
||||
? ~LargeBit
|
||||
: ~(unsigned int)0,
|
||||
Flags = MatrixType::Flags & (HereditaryBits | VectorizableBit | DirectAccessBit) & FlagsMaskLargeBit,
|
||||
FlagsLinearAccessBit = MatrixType::Flags & RowMajorBit
|
||||
? (RowsAtCompileTime == 1 ? LinearAccessBit : 0)
|
||||
: (ColsAtCompileTime == 1 ? LinearAccessBit : 0),
|
||||
Flags = (MatrixType::Flags & (HereditaryBits | PacketAccessBit | DirectAccessBit) & FlagsMaskLargeBit)
|
||||
| FlagsLinearAccessBit,
|
||||
CoeffReadCost = MatrixType::CoeffReadCost
|
||||
};
|
||||
};
|
||||
@ -146,15 +150,15 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar _packetCoeff(int row, int col) const
|
||||
inline PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_matrix.template packetCoeff<UnAligned>(row + m_startRow.value(), col + m_startCol.value());
|
||||
return m_matrix.template packet<UnAligned>(row + m_startRow.value(), col + m_startCol.value());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
|
||||
inline void _writePacket(int row, int col, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacketCoeff<UnAligned>(row + m_startRow.value(), col + m_startCol.value(), x);
|
||||
m_matrix.const_cast_derived().template writePacket<UnAligned>(row + m_startRow.value(), col + m_startCol.value(), x);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -249,14 +249,14 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
|
||||
template<typename Derived>
|
||||
template<int LoadMode>
|
||||
inline typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
|
||||
MatrixBase<Derived>::packetCoeff(int row, int col) const
|
||||
{ return derived().template _packetCoeff<LoadMode>(row,col); }
|
||||
MatrixBase<Derived>::packet(int row, int col) const
|
||||
{ return derived().template _packet<LoadMode>(row,col); }
|
||||
|
||||
template<typename Derived>
|
||||
template<int StoreMode>
|
||||
inline void MatrixBase<Derived>::writePacketCoeff
|
||||
inline void MatrixBase<Derived>::writePacket
|
||||
(int row, int col, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x)
|
||||
{ derived().template _writePacketCoeff<StoreMode>(row,col,x); }
|
||||
{ derived().template _writePacket<StoreMode>(row,col,x); }
|
||||
|
||||
|
||||
#endif // EIGEN_COEFFS_H
|
||||
|
@ -67,9 +67,9 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
||||
Flags = (int(LhsFlags) | int(RhsFlags)) & (
|
||||
HereditaryBits
|
||||
| (int(LhsFlags) & int(RhsFlags) & Like1DArrayBit)
|
||||
| (ei_functor_traits<BinaryOp>::IsVectorizable && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
||||
? int(LhsFlags) & int(RhsFlags) & VectorizableBit : 0)),
|
||||
| (int(LhsFlags) & int(RhsFlags) & LinearAccessBit)
|
||||
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
||||
? int(LhsFlags) & int(RhsFlags) & PacketAccessBit : 0)),
|
||||
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
||||
};
|
||||
};
|
||||
@ -101,9 +101,9 @@ class CwiseBinaryOp : ei_no_assignment_operator,
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar _packetCoeff(int row, int col) const
|
||||
inline PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhs.template packetCoeff<LoadMode>(row, col), m_rhs.template packetCoeff<LoadMode>(row, col));
|
||||
return m_functor.packetOp(m_lhs.template packet<LoadMode>(row, col), m_rhs.template packet<LoadMode>(row, col));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -50,7 +50,7 @@ struct ei_traits<CwiseNullaryOp<NullaryOp, MatrixType> >
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
Flags = (MatrixType::Flags
|
||||
& (HereditaryBits | Like1DArrayBit | (ei_functor_traits<NullaryOp>::IsVectorizable ? VectorizableBit : 0)))
|
||||
& (HereditaryBits | LinearAccessBit | (ei_functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
|
||||
| (ei_functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
|
||||
CoeffReadCost = ei_functor_traits<NullaryOp>::Cost
|
||||
};
|
||||
@ -84,7 +84,7 @@ class CwiseNullaryOp : ei_no_assignment_operator,
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar _packetCoeff(int, int) const
|
||||
PacketScalar _packet(int, int) const
|
||||
{
|
||||
return m_functor.packetOp();
|
||||
}
|
||||
|
@ -55,8 +55,8 @@ struct ei_traits<CwiseUnaryOp<UnaryOp, MatrixType> >
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
Flags = (MatrixTypeFlags & (
|
||||
HereditaryBits | Like1DArrayBit
|
||||
| (ei_functor_traits<UnaryOp>::IsVectorizable ? VectorizableBit : 0))),
|
||||
HereditaryBits | LinearAccessBit
|
||||
| (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))),
|
||||
CoeffReadCost = MatrixTypeCoeffReadCost + ei_functor_traits<UnaryOp>::Cost
|
||||
};
|
||||
};
|
||||
@ -83,9 +83,9 @@ class CwiseUnaryOp : ei_no_assignment_operator,
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar _packetCoeff(int row, int col) const
|
||||
inline PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_functor.packetOp(m_matrix.template packetCoeff<LoadMode>(row, col));
|
||||
return m_functor.packetOp(m_matrix.template packet<LoadMode>(row, col));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -41,14 +41,14 @@ struct ei_traits<Product<Lhs, Rhs, DiagonalProduct> >
|
||||
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit)
|
||||
_RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit)
|
||||
&& (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit)
|
||||
_LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit)
|
||||
&& (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LostBits = ~(((RhsFlags & RowMajorBit) && (!_LhsVectorizable) ? 0 : RowMajorBit)
|
||||
_LostBits = ~(((RhsFlags & RowMajorBit) && (!_LhsPacketAccess) ? 0 : RowMajorBit)
|
||||
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
|
||||
| (_LhsVectorizable || _RhsVectorizable ? VectorizableBit : 0),
|
||||
| (_LhsPacketAccess || _RhsPacketAccess ? PacketAccessBit : 0),
|
||||
CoeffReadCost = NumTraits<Scalar>::MulCost + _LhsNested::CoeffReadCost + _RhsNested::CoeffReadCost
|
||||
};
|
||||
};
|
||||
@ -86,17 +86,17 @@ template<typename Lhs, typename Rhs> class Product<Lhs, Rhs, DiagonalProduct> :
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketScalar _packetCoeff(int row, int col) const
|
||||
const PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
if ((Rhs::Flags&Diagonal)==Diagonal)
|
||||
{
|
||||
ei_assert((_LhsNested::Flags&RowMajorBit)==0);
|
||||
return ei_pmul(m_lhs.template packetCoeff<LoadMode>(row, col), ei_pset1(m_rhs.coeff(col, col)));
|
||||
return ei_pmul(m_lhs.template packet<LoadMode>(row, col), ei_pset1(m_rhs.coeff(col, col)));
|
||||
}
|
||||
else
|
||||
{
|
||||
ei_assert(_RhsNested::Flags&RowMajorBit);
|
||||
return ei_pmul(ei_pset1(m_lhs.coeff(row, row)), m_rhs.template packetCoeff<LoadMode>(row, col));
|
||||
return ei_pmul(ei_pset1(m_lhs.coeff(row, row)), m_rhs.template packet<LoadMode>(row, col));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ struct ei_traits<Extract<MatrixType, Mode> >
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
Flags = (_MatrixTypeNested::Flags & ~(VectorizableBit | Like1DArrayBit | DirectAccessBit)) | Mode,
|
||||
Flags = (_MatrixTypeNested::Flags & ~(PacketAccessBit | LinearAccessBit | DirectAccessBit)) | Mode,
|
||||
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
|
||||
};
|
||||
};
|
||||
|
@ -85,15 +85,15 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar _packetCoeff(int row, int col) const
|
||||
inline const PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_matrix.template packetCoeff<LoadMode>(row, col);
|
||||
return m_matrix.template packet<LoadMode>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
|
||||
inline void _writePacket(int row, int col, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacketCoeff<LoadMode>(row, col, x);
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -42,7 +42,7 @@ template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
IsVectorizable = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
};
|
||||
};
|
||||
|
||||
@ -61,7 +61,7 @@ template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::MulCost,
|
||||
IsVectorizable = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
};
|
||||
};
|
||||
|
||||
@ -80,7 +80,7 @@ template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
IsVectorizable = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
};
|
||||
};
|
||||
|
||||
@ -99,7 +99,7 @@ template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
IsVectorizable = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
};
|
||||
};
|
||||
|
||||
@ -121,7 +121,7 @@ template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost,
|
||||
IsVectorizable = ei_packet_traits<Scalar>::size>1
|
||||
PacketAccess = ei_packet_traits<Scalar>::size>1
|
||||
};
|
||||
};
|
||||
|
||||
@ -135,7 +135,7 @@ template<typename Scalar> struct ei_scalar_quotient_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_quotient_op<Scalar> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
// unary functors:
|
||||
@ -150,7 +150,7 @@ template<typename Scalar> struct ei_scalar_opposite_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the absolute value of a scalar
|
||||
@ -163,7 +163,7 @@ template<typename Scalar> struct ei_scalar_abs_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the squared absolute value of a scalar
|
||||
@ -176,7 +176,7 @@ template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
@ -188,7 +188,7 @@ template<typename Scalar> struct ei_scalar_conjugate_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to cast a scalar to another type
|
||||
@ -202,7 +202,7 @@ struct ei_scalar_cast_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar, typename NewType>
|
||||
struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> >
|
||||
{ enum { Cost = ei_is_same_type<Scalar, NewType>::ret ? 0 : NumTraits<NewType>::AddCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = ei_is_same_type<Scalar, NewType>::ret ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to extract the real part of a complex
|
||||
@ -216,14 +216,14 @@ struct ei_scalar_real_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_real_op<Scalar> >
|
||||
{ enum { Cost = 0, IsVectorizable = false }; };
|
||||
{ enum { Cost = 0, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to multiply a scalar by a fixed other one
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||
*/
|
||||
template<typename Scalar, bool IsVectorizable = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op;
|
||||
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op;
|
||||
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_multiple_op<Scalar,true> {
|
||||
@ -242,7 +242,7 @@ struct ei_scalar_multiple_op<Scalar,false> {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = ei_packet_traits<Scalar>::size>1 }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
|
||||
|
||||
template<typename Scalar, bool HasFloatingPoint>
|
||||
struct ei_scalar_quotient1_impl {
|
||||
@ -252,7 +252,7 @@ struct ei_scalar_quotient1_impl {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,true> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_quotient1_impl<Scalar,false> {
|
||||
@ -263,7 +263,7 @@ struct ei_scalar_quotient1_impl<Scalar,false> {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, IsVectorizable = false }; };
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to divide a scalar by a fixed other one
|
||||
@ -281,7 +281,7 @@ struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scala
|
||||
|
||||
// nullary functors
|
||||
|
||||
template<typename Scalar, bool IsVectorizable = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_constant_op;
|
||||
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_constant_op;
|
||||
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_constant_op<Scalar,true> {
|
||||
@ -300,7 +300,7 @@ struct ei_scalar_constant_op<Scalar,false> {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
|
||||
{ enum { Cost = 1, IsVectorizable = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
||||
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
|
||||
|
||||
template<typename Scalar> struct ei_scalar_identity_op EIGEN_EMPTY_STRUCT {
|
||||
inline ei_scalar_identity_op(void) {}
|
||||
@ -308,6 +308,6 @@ template<typename Scalar> struct ei_scalar_identity_op EIGEN_EMPTY_STRUCT {
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, IsVectorizable = false, IsRepeatable = true }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
#endif // EIGEN_FUNCTORS_H
|
||||
|
@ -137,9 +137,9 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar _packetCoeff(int row, int col) const
|
||||
inline PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
ei_internal_assert(Flags & VectorizableBit);
|
||||
ei_internal_assert(Flags & PacketAccessBit);
|
||||
if(Flags & RowMajorBit)
|
||||
if (LoadMode==Aligned)
|
||||
return ei_pload(&m_storage.data()[col + row * m_storage.cols()]);
|
||||
@ -153,9 +153,9 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
|
||||
inline void _writePacket(int row, int col, const PacketScalar& x)
|
||||
{
|
||||
ei_internal_assert(Flags & VectorizableBit);
|
||||
ei_internal_assert(Flags & PacketAccessBit);
|
||||
if(Flags & RowMajorBit)
|
||||
if (StoreMode==Aligned)
|
||||
ei_pstore(&m_storage.data()[col + row * m_storage.cols()], x);
|
||||
|
@ -229,9 +229,9 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived>
|
||||
Scalar& operator[](int index);
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packetCoeff(int row, int col) const;
|
||||
PacketScalar packet(int row, int col) const;
|
||||
template<int StoreMode>
|
||||
void writePacketCoeff(int row, int col, const PacketScalar& x);
|
||||
void writePacket(int row, int col, const PacketScalar& x);
|
||||
|
||||
const Scalar x() const;
|
||||
const Scalar y() const;
|
||||
|
@ -77,15 +77,15 @@ template<typename ExpressionType> class NestByValue
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar _packetCoeff(int row, int col) const
|
||||
inline const PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_expression.template packetCoeff<LoadMode>(row, col);
|
||||
return m_expression.template packet<LoadMode>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
|
||||
inline void _writePacket(int row, int col, const PacketScalar& x)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacketCoeff<LoadMode>(row, col, x);
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -76,7 +76,7 @@ struct ei_packet_product_impl<true, Index, Size, Lhs, Rhs, PacketScalar>
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
ei_packet_product_impl<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -86,7 +86,7 @@ struct ei_packet_product_impl<false, Index, Size, Lhs, Rhs, PacketScalar>
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
ei_packet_product_impl<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
|
||||
res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -95,7 +95,7 @@ struct ei_packet_product_impl<true, 0, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
@ -104,7 +104,7 @@ struct ei_packet_product_impl<false, 0, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
@ -113,9 +113,9 @@ struct ei_packet_product_impl<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
|
||||
{
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
|
||||
for(int i = 1; i < lhs.cols(); i++)
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packetCoeff<Aligned>(i, col), res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<Aligned>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -124,9 +124,9 @@ struct ei_packet_product_impl<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
|
||||
{
|
||||
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
for(int i = 1; i < lhs.cols(); i++)
|
||||
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
|
||||
res = ei_pmadd(lhs.template packet<Aligned>(row, i), ei_pset1(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -210,17 +210,17 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||
// the vectorization flags are only used by the normal product,
|
||||
// the other one is always vectorized !
|
||||
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
|
||||
_RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0,
|
||||
_RowMajor = (RhsFlags & RowMajorBit)
|
||||
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
|
||||
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)),
|
||||
_LostBits = ~((_RowMajor ? 0 : RowMajorBit)
|
||||
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits)
|
||||
| EvalBeforeAssigningBit
|
||||
| EvalBeforeNestingBit
|
||||
| (_Vectorizable ? VectorizableBit : 0),
|
||||
| (_PacketAccess ? PacketAccessBit : 0),
|
||||
CoeffReadCost
|
||||
= Lhs::ColsAtCompileTime == Dynamic
|
||||
? Dynamic
|
||||
@ -276,7 +276,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketScalar _packetCoeff(int row, int col) const
|
||||
const PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
PacketScalar res;
|
||||
|
@ -49,7 +49,7 @@ struct ei_traits<Transpose<MatrixType> >
|
||||
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
Flags = ((int(_MatrixTypeNested::Flags) ^ RowMajorBit)
|
||||
& ~( Like1DArrayBit | LowerTriangularBit | UpperTriangularBit))
|
||||
& ~( LinearAccessBit | LowerTriangularBit | UpperTriangularBit))
|
||||
| (int(_MatrixTypeNested::Flags)&UpperTriangularBit ? LowerTriangularBit : 0)
|
||||
| (int(_MatrixTypeNested::Flags)&LowerTriangularBit ? UpperTriangularBit : 0),
|
||||
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
|
||||
@ -85,15 +85,15 @@ template<typename MatrixType> class Transpose
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar _packetCoeff(int row, int col) const
|
||||
inline const PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_matrix.template packetCoeff<LoadMode>(col, row);
|
||||
return m_matrix.template packet<LoadMode>(col, row);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void _writePacketCoeff(int row, int col, const PacketScalar& x)
|
||||
inline void _writePacket(int row, int col, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacketCoeff<LoadMode>(col, row, x);
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(col, row, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -67,15 +67,15 @@ const unsigned int LargeBit = 0x8;
|
||||
/** \ingroup flags
|
||||
*
|
||||
* means the expression might be vectorized */
|
||||
const unsigned int VectorizableBit = 0x10;
|
||||
const unsigned int PacketAccessBit = 0x10;
|
||||
#else
|
||||
const unsigned int VectorizableBit = 0x0;
|
||||
const unsigned int PacketAccessBit = 0x0;
|
||||
#endif
|
||||
|
||||
/** \ingroup flags
|
||||
*
|
||||
* means the expression can be seen as 1D vector (used for explicit vectorization) */
|
||||
const unsigned int Like1DArrayBit = 0x20;
|
||||
const unsigned int LinearAccessBit = 0x20;
|
||||
|
||||
/** \ingroup flags
|
||||
*
|
||||
|
@ -81,7 +81,7 @@ template<typename Scalar> struct ei_scalar_sin_op;
|
||||
template<typename Scalar> struct ei_scalar_pow_op;
|
||||
template<typename Scalar> struct ei_scalar_inverse_op;
|
||||
template<typename Scalar, typename NewType> struct ei_scalar_cast_op;
|
||||
template<typename Scalar, bool IsVectorizable> struct ei_scalar_multiple_op;
|
||||
template<typename Scalar, bool PacketAccess> struct ei_scalar_multiple_op;
|
||||
template<typename Scalar> struct ei_scalar_quotient1_op;
|
||||
template<typename Scalar> struct ei_scalar_min_op;
|
||||
template<typename Scalar> struct ei_scalar_max_op;
|
||||
|
@ -137,7 +137,7 @@ template<typename T> struct ei_functor_traits
|
||||
enum
|
||||
{
|
||||
Cost = 10,
|
||||
IsVectorizable = false
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
@ -157,18 +157,18 @@ class ei_corrected_matrix_flags
|
||||
: Cols > 1 ? RowMajorBit : 0,
|
||||
is_big = MaxRows == Dynamic || MaxCols == Dynamic,
|
||||
inner_size = row_major_bit ? Cols : Rows,
|
||||
vectorizable_bit
|
||||
packet_access_bit
|
||||
= ei_packet_traits<Scalar>::size > 1
|
||||
&& (is_big || inner_size%ei_packet_traits<Scalar>::size==0)
|
||||
? VectorizableBit : 0,
|
||||
? PacketAccessBit : 0,
|
||||
|
||||
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | VectorizableBit | RowMajorBit))
|
||||
| Like1DArrayBit | DirectAccessBit
|
||||
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
|
||||
| LinearAccessBit | DirectAccessBit
|
||||
};
|
||||
|
||||
public:
|
||||
enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | VectorizableBit | RowMajorBit))
|
||||
| Like1DArrayBit | DirectAccessBit | vectorizable_bit | row_major_bit
|
||||
enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
|
||||
| LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -87,9 +87,9 @@ template<typename MatrixType, bool CheckExistence> class Inverse : ei_no_assignm
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar _packetCoeff(int row, int col) const
|
||||
PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
return m_inverse.template packetCoeff<LoadMode>(row, col);
|
||||
return m_inverse.template packet<LoadMode>(row, col);
|
||||
}
|
||||
|
||||
enum { _Size = MatrixType::RowsAtCompileTime };
|
||||
|
@ -75,7 +75,7 @@ struct ei_packet_product_unroller<true, Index, Size, Lhs, Rhs, PacketScalar>
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
ei_packet_product_unroller<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<Aligned>(Index, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -85,7 +85,7 @@ struct ei_packet_product_unroller<false, Index, Size, Lhs, Rhs, PacketScalar>
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
ei_packet_product_unroller<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
|
||||
res = ei_pmadd(lhs.template packet<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -94,7 +94,7 @@ struct ei_packet_product_unroller<true, 0, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<Aligned>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
@ -103,7 +103,7 @@ struct ei_packet_product_unroller<false, 0, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
res = ei_pmul(lhs.template packet<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
@ -125,14 +125,14 @@ struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
|
||||
};
|
||||
|
||||
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
|
||||
template<typename Product, bool RowMajor = true> struct ProductPacketImpl {
|
||||
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
|
||||
{ return product._packetCoeffRowMajor(row,col); }
|
||||
{ return product._packetRowMajor(row,col); }
|
||||
};
|
||||
|
||||
template<typename Product> struct ProductPacketCoeffImpl<Product, false> {
|
||||
template<typename Product> struct ProductPacketImpl<Product, false> {
|
||||
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
|
||||
{ return product._packetCoeffColumnMajor(row,col); }
|
||||
{ return product._packetColumnMajor(row,col); }
|
||||
};
|
||||
|
||||
/** \class Product
|
||||
@ -174,18 +174,18 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
|
||||
_RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0,
|
||||
_RowMajor = (RhsFlags & RowMajorBit)
|
||||
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
|
||||
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)),
|
||||
_LostBits = HereditaryBits & ~(
|
||||
(_RowMajor ? 0 : RowMajorBit)
|
||||
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & _LostBits)
|
||||
| EvalBeforeAssigningBit
|
||||
| EvalBeforeNestingBit
|
||||
| (_Vectorizable ? VectorizableBit : 0),
|
||||
| (_PacketAccess ? PacketAccessBit : 0),
|
||||
CoeffReadCost
|
||||
= Lhs::ColsAtCompileTime == Dynamic
|
||||
? Dynamic
|
||||
@ -201,7 +201,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
public:
|
||||
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
friend class ProductPacketCoeffImpl<Product,Flags&RowMajorBit>;
|
||||
friend class ProductPacketImpl<Product,Flags&RowMajorBit>;
|
||||
typedef typename ei_traits<Product>::LhsNested LhsNested;
|
||||
typedef typename ei_traits<Product>::RhsNested RhsNested;
|
||||
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
|
||||
@ -247,7 +247,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketScalar _packetCoeff(int row, int col) const
|
||||
const PacketScalar _packet(int row, int col) const
|
||||
{
|
||||
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
||||
{
|
||||
@ -260,33 +260,33 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
return res;
|
||||
}
|
||||
else
|
||||
return ProductPacketCoeffImpl<Product,Flags&RowMajorBit>::execute(*this, row, col);
|
||||
return ProductPacketImpl<Product,Flags&RowMajorBit>::execute(*this, row, col);
|
||||
}
|
||||
|
||||
const PacketScalar _packetCoeffRowMajor(int row, int col) const
|
||||
const PacketScalar _packetRowMajor(int row, int col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packetCoeff<Aligned>(0, col));
|
||||
res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packet<Aligned>(0, col));
|
||||
for(int i = 1; i < m_lhs.cols(); i++)
|
||||
res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packetCoeff<Aligned>(i, col), res);
|
||||
res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packet<Aligned>(i, col), res);
|
||||
return res;
|
||||
}
|
||||
|
||||
const PacketScalar _packetCoeffColumnMajor(int row, int col) const
|
||||
const PacketScalar _packetColumnMajor(int row, int col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
res = ei_pmul(m_lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col)));
|
||||
res = ei_pmul(m_lhs.template packet<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col)));
|
||||
for(int i = 1; i < m_lhs.cols(); i++)
|
||||
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
|
||||
res = ei_pmadd(m_lhs.template packet<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
|
||||
return res;
|
||||
// const PacketScalar tmp[4];
|
||||
// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
|
||||
// ei_punpack(m_rhs.packet(0,col), tmp);
|
||||
//
|
||||
// return
|
||||
// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
|
||||
// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
|
||||
// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
|
||||
// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
|
||||
// ei_pmadd(m_lhs.packet(row, 0), tmp[0],
|
||||
// ei_pmadd(m_lhs.packet(row, 1), tmp[1],
|
||||
// ei_pmadd(m_lhs.packet(row, 2), tmp[2]
|
||||
// ei_pmul(m_lhs.packet(row, 3), tmp[3]))));
|
||||
}
|
||||
|
||||
|
||||
@ -328,7 +328,7 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien
|
||||
{
|
||||
product.template _cacheOptimalEval<Derived, Aligned>(derived(),
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
|
||||
typename ei_meta_if<Flags & PacketAccessBit, ei_meta_true, ei_meta_false>::ret()
|
||||
#else
|
||||
ei_meta_false()
|
||||
#endif
|
||||
@ -426,12 +426,12 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
|
||||
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.template writePacketCoeff<AlignedMode>(k,i,
|
||||
ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
|
||||
ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
|
||||
ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
|
||||
ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
|
||||
res.template packetCoeff<AlignedMode>(k,i)))))
|
||||
res.template writePacket<AlignedMode>(k,i,
|
||||
ei_pmadd(tmp0, m_rhs.template packet<AlignedMode>(j+0,i),
|
||||
ei_pmadd(tmp1, m_rhs.template packet<AlignedMode>(j+1,i),
|
||||
ei_pmadd(tmp2, m_rhs.template packet<AlignedMode>(j+2,i),
|
||||
ei_pmadd(tmp3, m_rhs.template packet<AlignedMode>(j+3,i),
|
||||
res.template packet<AlignedMode>(k,i)))))
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -442,8 +442,8 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
|
||||
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
|
||||
res.template writePacketCoeff<AlignedMode>(k,i,
|
||||
ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
|
||||
res.template writePacket<AlignedMode>(k,i,
|
||||
ei_pmadd(tmp, m_rhs.template packet<AlignedMode>(j,i), res.template packet<AlignedMode>(k,i)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -462,12 +462,12 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
|
||||
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.template writePacketCoeff<AlignedMode>(i,j,
|
||||
ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
|
||||
ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
|
||||
ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
|
||||
ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
|
||||
res.template packetCoeff<AlignedMode>(i,j)))))
|
||||
res.template writePacket<AlignedMode>(i,j,
|
||||
ei_pmadd(tmp0, m_lhs.template packet<AlignedMode>(i,k),
|
||||
ei_pmadd(tmp1, m_lhs.template packet<AlignedMode>(i,k+1),
|
||||
ei_pmadd(tmp2, m_lhs.template packet<AlignedMode>(i,k+2),
|
||||
ei_pmadd(tmp3, m_lhs.template packet<AlignedMode>(i,k+3),
|
||||
res.template packet<AlignedMode>(i,j)))))
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -478,8 +478,8 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
res.template writePacketCoeff<AlignedMode>(k,j,
|
||||
ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
|
||||
res.template writePacket<AlignedMode>(k,j,
|
||||
ei_pmadd(tmp, m_lhs.template packet<AlignedMode>(i,k), res.template packet<AlignedMode>(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user