Assign.h: add LinearTraversal (non-vectorized index-based traversal)

Rename some constants to make names match more closely what they mean.
This commit is contained in:
Benoit Jacob 2009-11-18 11:57:07 -05:00
parent 9f21e2aab7
commit 94c706d04f
6 changed files with 176 additions and 115 deletions

View File

@ -57,40 +57,46 @@ private:
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit)
&& (DstIsAligned || InnerMaxSize == Dynamic),/* If the destination isn't aligned,
we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. See remark below
about InnerMaxSize. */
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only
want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case
of a dynamic block in a fixed-size matrix */
MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize
&& (DstIsAligned || InnerMaxSize == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. See remark below about InnerMaxSize. */
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */
};
public:
enum {
Vectorization = int(MayInnerVectorize) ? int(InnerVectorization)
: int(MayLinearVectorize) ? int(LinearVectorization)
: int(MaySliceVectorize) ? int(SliceVectorization)
: int(NoVectorization)
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(LinearTraversal)
: int(DefaultTraversal),
Vectorized = int(Traversal) != LinearTraversal && int(Traversal) == DefaultTraversal
};
private:
enum {
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)),
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? 1 : int(PacketSize)),
MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
};
public:
enum {
Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization))
? (
int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
: int(Vectorization) == int(LinearVectorization)
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
? (
int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(NoUnrolling)
};
@ -106,7 +112,7 @@ public:
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
EIGEN_DEBUG_VAR(Vectorization)
EIGEN_DEBUG_VAR(Traversal)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
@ -118,12 +124,12 @@ public:
* Part 2 : meta-unrollers
***************************************************************************/
/***********************
*** No vectorization ***
***********************/
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_novec_CompleteUnrolling
struct ei_assign_DefaultTraversal_CompleteUnrolling
{
enum {
row = int(Derived1::Flags)&RowMajorBit
@ -137,18 +143,18 @@ struct ei_assign_novec_CompleteUnrolling
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(row, col, src);
ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_novec_InnerUnrolling
struct ei_assign_DefaultTraversal_InnerUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
{
@ -156,16 +162,36 @@ struct ei_assign_novec_InnerUnrolling
const int row = rowMajor ? row_or_col : Index;
const int col = rowMajor ? Index : row_or_col;
dst.copyCoeff(row, col, src);
ei_assign_novec_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
};
/***********************
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_LinearTraversal_CompleteUnrolling
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
};
/**************************
*** Inner vectorization ***
**************************/
@ -221,16 +247,16 @@ struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
***************************************************************************/
template<typename Derived1, typename Derived2,
int Vectorization = ei_assign_traits<Derived1, Derived2>::Vectorization,
int Traversal = ei_assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling>
struct ei_assign_impl;
/***********************
*** No vectorization ***
***********************/
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@ -248,17 +274,17 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@ -266,17 +292,42 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = dst.outerSize();
for(int j = 0; j < outerSize; ++j)
ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize>
::run(dst, src, j);
}
};
/***********************
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
const int size = dst.size();
for(int i = 0; i < size; ++i)
dst.copyCoeff(i, src);
}
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@ -295,7 +346,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@ -305,7 +356,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@ -323,7 +374,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
***************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@ -347,7 +398,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
@ -356,7 +407,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
const int alignedSize = (size/packetSize)*packetSize;
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
ei_assign_novec_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
}
};
@ -365,7 +416,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
***************************/
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{

View File

@ -34,10 +34,10 @@ struct ei_dot_traits
{
public:
enum {
Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
? LinearVectorization
: NoVectorization
? LinearVectorizedTraversal
: DefaultTraversal
};
private:
@ -46,7 +46,7 @@ private:
PacketSize = ei_packet_traits<Scalar>::size,
Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost)
+ (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
};
public:
@ -142,13 +142,13 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
***************************************************************************/
template<typename Derived1, typename Derived2,
int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization,
int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal,
int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
>
struct ei_dot_impl;
template<typename Derived1, typename Derived2>
struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
typedef typename Derived1::Scalar Scalar;
static Scalar run(const Derived1& v1, const Derived2& v2)
@ -163,12 +163,12 @@ struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
: public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
{};
template<typename Derived1, typename Derived2>
struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived1::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -221,20 +221,20 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
};
template<typename Derived1, typename Derived2>
struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived1::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived1::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
VectorizedSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived1& v1, const Derived2& v2)
{
Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizationSize>::run(v1, v2));
if (VectorizationSize != Size)
res += ei_dot_novec_unroller<Derived1, Derived2, VectorizationSize, Size-VectorizationSize>::run(v1, v2);
Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2));
if (VectorizedSize != Size)
res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2);
return res;
}
};

View File

@ -54,16 +54,16 @@ private:
public:
enum {
Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
: int(MaySliceVectorize) ? int(SliceVectorization)
: int(NoVectorization)
Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(DefaultTraversal)
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
};
public:
@ -171,13 +171,13 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
***************************************************************************/
template<typename Func, typename Derived,
int Vectorization = ei_redux_traits<Func, Derived>::Vectorization,
int Traversal = ei_redux_traits<Func, Derived>::Traversal,
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
>
struct ei_redux_impl;
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat, const Func& func)
@ -195,12 +195,12 @@ struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling>
struct ei_redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -246,7 +246,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -277,7 +277,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
res = ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
}
return res;
@ -285,20 +285,20 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
VectorizedSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizationSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func));
if (VectorizedSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
return res;
}
};

View File

@ -36,7 +36,7 @@
* Note that here the inner-loops should always be unrolled.
*/
template<int VectorizationMode, int Index, typename Lhs, typename Rhs, typename RetScalar>
template<int Traversal, int Index, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl;
template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
@ -115,7 +115,7 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
CanVectorizeInner = ei_traits<GeneralProduct>::CanVectorizeInner
};
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization,
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
Unroll ? InnerSize-1 : Dynamic,
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
@ -182,17 +182,17 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
**************************************/
template<int Index, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs, RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, Index, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
ei_product_coeff_impl<DefaultTraversal, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
{
@ -201,7 +201,7 @@ struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
{
@ -214,7 +214,7 @@ struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
// prevent buggy user code from causing an infinite recursion
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs, RetScalar>
struct ei_product_coeff_impl<DefaultTraversal, -1, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {}
};
@ -244,7 +244,7 @@ struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
};
template<int Index, typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, Index, Lhs, Rhs, RetScalar>
{
typedef typename Lhs::PacketScalar PacketScalar;
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
@ -252,7 +252,7 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
{
PacketScalar pres;
ei_product_coeff_vectorized_unroller<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
ei_product_coeff_impl<DefaultTraversal,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
res = ei_predux(pres);
}
};
@ -265,7 +265,7 @@ struct ei_product_coeff_vectorized_dyn_selector
res = ei_dot_impl<
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs.col(col));
LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col));
}
};
@ -279,7 +279,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
res = ei_dot_impl<
Lhs,
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
LinearVectorization, NoUnrolling>::run(lhs, rhs.col(col));
LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col));
}
};
@ -291,7 +291,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
res = ei_dot_impl<
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
Rhs,
LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs);
LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs);
}
};
@ -303,12 +303,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
res = ei_dot_impl<
Lhs,
Rhs,
LinearVectorization, NoUnrolling>::run(lhs, rhs);
LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs);
}
};
template<typename Lhs, typename Rhs, typename RetScalar>
struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs, RetScalar>
struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
{
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
{

View File

@ -202,16 +202,19 @@ enum DirectionType { Vertical, Horizontal, BothDirections };
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, SparseTimeSparseProduct, SparseTimeDenseProduct, DenseTimeSparseProduct };
enum {
/** \internal Default traversal, no vectorization, no index-based access */
DefaultTraversal,
/** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
LinearTraversal,
/** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
* and good size */
InnerVectorization,
InnerVectorizedTraversal,
/** \internal Vectorization path using a single loop plus scalar loops for the
* unaligned boundaries */
LinearVectorization,
LinearVectorizedTraversal,
/** \internal Generic vectorization path using one vectorized loop per row/column with some
* scalar loops to handle the unaligned boundaries */
SliceVectorization,
NoVectorization
SliceVectorizedTraversal
};
enum {

View File

@ -26,17 +26,18 @@
#include <typeinfo>
template<typename Dst, typename Src>
bool test_assign(const Dst&, const Src&, int vectorization, int unrolling)
bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
{
return ei_assign_traits<Dst,Src>::Vectorization==vectorization
ei_assign_traits<Dst,Src>::debug();
return ei_assign_traits<Dst,Src>::Traversal==traversal
&& ei_assign_traits<Dst,Src>::Unrolling==unrolling;
}
template<typename Xpr>
bool test_redux(const Xpr&, int vectorization, int unrolling)
bool test_redux(const Xpr&, int traversal, int unrolling)
{
typedef ei_redux_traits<ei_scalar_sum_op<typename Xpr::Scalar>,Xpr> traits;
return traits::Vectorization==vectorization && traits::Unrolling==unrolling;
return traits::Traversal==traversal && traits::Unrolling==unrolling;
}
void test_vectorization_logic()
@ -45,61 +46,67 @@ void test_vectorization_logic()
#ifdef EIGEN_VECTORIZE
VERIFY(test_assign(Vector4f(),Vector4f(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Vector4f(),Vector4f()+Vector4f(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Vector4f(),Vector4f().cwise() * Vector4f(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Vector4f(),Vector4f().cast<float>(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix4f(),Matrix4f(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix4f(),Matrix4f()+Matrix4f(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix4f(),Matrix4f().cwise() * Matrix4f(),
InnerVectorization,CompleteUnrolling));
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,16,16>(),Matrix<float,16,16>()+Matrix<float,16,16>(),
InnerVectorization,InnerUnrolling));
InnerVectorizedTraversal,InnerUnrolling));
VERIFY(test_assign(Matrix<float,16,16,DontAlign>(),Matrix<float,16,16>()+Matrix<float,16,16>(),
NoVectorization,InnerUnrolling));
LinearTraversal,NoUnrolling));
VERIFY(test_assign(Matrix<float,2,2,DontAlign>(),Matrix<float,2,2>()+Matrix<float,2,2>(),
LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,6,2>(),Matrix<float,6,2>().cwise() / Matrix<float,6,2>(),
LinearVectorization,CompleteUnrolling));
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,17,17>(),Matrix<float,17,17>()+Matrix<float,17,17>(),
NoVectorization,InnerUnrolling));
LinearTraversal,NoUnrolling));
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix<float,4,4>(),Matrix<float,17,17>().block<4,4>(2,3)+Matrix<float,17,17>().block<4,4>(10,4),
NoVectorization,CompleteUnrolling));
DefaultTraversal,CompleteUnrolling));
VERIFY(test_assign(MatrixXf(10,10),MatrixXf(20,20).block(10,10,2,3),
SliceVectorization,NoUnrolling));
SliceVectorizedTraversal,NoUnrolling));
VERIFY(test_redux(VectorXf(10),
LinearVectorization,NoUnrolling));
LinearVectorizedTraversal,NoUnrolling));
VERIFY(test_redux(Matrix<float,5,2>(),
NoVectorization,CompleteUnrolling));
DefaultTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,6,2>(),
LinearVectorization,CompleteUnrolling));
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,16,16>(),
LinearVectorization,NoUnrolling));
LinearVectorizedTraversal,NoUnrolling));
VERIFY(test_redux(Matrix<float,16,16>().block<4,4>(1,2),
NoVectorization,CompleteUnrolling));
DefaultTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<float,16,16>().block<8,1>(1,2),
LinearVectorization,CompleteUnrolling));
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<double,7,3>(),
NoVectorization,CompleteUnrolling));
DefaultTraversal,CompleteUnrolling));
#endif // EIGEN_VECTORIZE