mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
Assign.h: add LinearTraversal (non-vectorized index-based traversal)
Rename some constants to make names match more closely what they mean.
This commit is contained in:
parent
9f21e2aab7
commit
94c706d04f
@ -57,40 +57,46 @@ private:
|
||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit)
|
||||
&& (DstIsAligned || InnerMaxSize == Dynamic),/* If the destination isn't aligned,
|
||||
we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. See remark below
|
||||
about InnerMaxSize. */
|
||||
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only
|
||||
want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case
|
||||
of a dynamic block in a fixed-size matrix */
|
||||
MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize
|
||||
&& (DstIsAligned || InnerMaxSize == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. See remark below about InnerMaxSize. */
|
||||
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix */
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Vectorization = int(MayInnerVectorize) ? int(InnerVectorization)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorization)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorization)
|
||||
: int(NoVectorization)
|
||||
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(MayLinearize) ? int(LinearTraversal)
|
||||
: int(DefaultTraversal),
|
||||
Vectorized = int(Traversal) != LinearTraversal && int(Traversal) == DefaultTraversal
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)),
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? 1 : int(PacketSize)),
|
||||
MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
|
||||
MayUnrollInner = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization))
|
||||
? (
|
||||
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Vectorization) == int(LinearVectorization)
|
||||
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
|
||||
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
|
||||
? (
|
||||
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
|
||||
: int(NoUnrolling)
|
||||
};
|
||||
|
||||
@ -106,7 +112,7 @@ public:
|
||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
EIGEN_DEBUG_VAR(Vectorization)
|
||||
EIGEN_DEBUG_VAR(Traversal)
|
||||
EIGEN_DEBUG_VAR(UnrollingLimit)
|
||||
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
||||
EIGEN_DEBUG_VAR(MayUnrollInner)
|
||||
@ -118,12 +124,12 @@ public:
|
||||
* Part 2 : meta-unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/***********************
|
||||
*** No vectorization ***
|
||||
***********************/
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_novec_CompleteUnrolling
|
||||
struct ei_assign_DefaultTraversal_CompleteUnrolling
|
||||
{
|
||||
enum {
|
||||
row = int(Derived1::Flags)&RowMajorBit
|
||||
@ -137,18 +143,18 @@ struct ei_assign_novec_CompleteUnrolling
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeff(row, col, src);
|
||||
ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
|
||||
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_novec_InnerUnrolling
|
||||
struct ei_assign_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
|
||||
{
|
||||
@ -156,16 +162,36 @@ struct ei_assign_novec_InnerUnrolling
|
||||
const int row = rowMajor ? row_or_col : Index;
|
||||
const int col = rowMajor ? Index : row_or_col;
|
||||
dst.copyCoeff(row, col, src);
|
||||
ei_assign_novec_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
|
||||
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
struct ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index, int Stop>
|
||||
struct ei_assign_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.copyCoeff(Index, src);
|
||||
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, int Stop>
|
||||
struct ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
@ -221,16 +247,16 @@ struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Derived1, typename Derived2,
|
||||
int Vectorization = ei_assign_traits<Derived1, Derived2>::Vectorization,
|
||||
int Traversal = ei_assign_traits<Derived1, Derived2>::Traversal,
|
||||
int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling>
|
||||
struct ei_assign_impl;
|
||||
|
||||
/***********************
|
||||
*** No vectorization ***
|
||||
***********************/
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -248,17 +274,17 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -266,17 +292,42 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
|
||||
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
|
||||
const int outerSize = dst.outerSize();
|
||||
for(int j = 0; j < outerSize; ++j)
|
||||
ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
|
||||
ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize>
|
||||
::run(dst, src, j);
|
||||
}
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const int size = dst.size();
|
||||
for(int i = 0; i < size; ++i)
|
||||
dst.copyCoeff(i, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
ei_assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -295,7 +346,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -305,7 +356,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -323,7 +374,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
|
||||
***************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -347,7 +398,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -356,7 +407,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
|
||||
const int alignedSize = (size/packetSize)*packetSize;
|
||||
|
||||
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
|
||||
ei_assign_novec_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
|
||||
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
@ -365,7 +416,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
|
||||
***************************/
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
|
||||
struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
|
@ -34,10 +34,10 @@ struct ei_dot_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
|
||||
Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
|
||||
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
|
||||
? LinearVectorization
|
||||
: NoVectorization
|
||||
? LinearVectorizedTraversal
|
||||
: DefaultTraversal
|
||||
};
|
||||
|
||||
private:
|
||||
@ -46,7 +46,7 @@ private:
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits<Scalar>::MulCost)
|
||||
+ (Derived1::SizeAtCompileTime-1) * NumTraits<Scalar>::AddCost,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
||||
};
|
||||
|
||||
public:
|
||||
@ -142,13 +142,13 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Derived1, typename Derived2,
|
||||
int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization,
|
||||
int Traversal = ei_dot_traits<Derived1, Derived2>::Traversal,
|
||||
int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
|
||||
>
|
||||
struct ei_dot_impl;
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
|
||||
struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
@ -163,12 +163,12 @@ struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
|
||||
struct ei_dot_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
|
||||
: public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
|
||||
{};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
||||
struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
@ -221,20 +221,20 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
|
||||
struct ei_dot_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
Size = Derived1::SizeAtCompileTime,
|
||||
VectorizationSize = (Size / PacketSize) * PacketSize
|
||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||
};
|
||||
static Scalar run(const Derived1& v1, const Derived2& v2)
|
||||
{
|
||||
Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizationSize>::run(v1, v2));
|
||||
if (VectorizationSize != Size)
|
||||
res += ei_dot_novec_unroller<Derived1, Derived2, VectorizationSize, Size-VectorizationSize>::run(v1, v2);
|
||||
Scalar res = ei_predux(ei_dot_vec_unroller<Derived1, Derived2, 0, VectorizedSize>::run(v1, v2));
|
||||
if (VectorizedSize != Size)
|
||||
res += ei_dot_novec_unroller<Derived1, Derived2, VectorizedSize, Size-VectorizedSize>::run(v1, v2);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
@ -54,16 +54,16 @@ private:
|
||||
|
||||
public:
|
||||
enum {
|
||||
Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorization)
|
||||
: int(NoVectorization)
|
||||
Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(DefaultTraversal)
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
|
||||
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
||||
};
|
||||
|
||||
public:
|
||||
@ -171,13 +171,13 @@ struct ei_redux_vec_unroller<Func, Derived, Start, 1>
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Func, typename Derived,
|
||||
int Vectorization = ei_redux_traits<Func, Derived>::Vectorization,
|
||||
int Traversal = ei_redux_traits<Func, Derived>::Traversal,
|
||||
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
|
||||
>
|
||||
struct ei_redux_impl;
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
|
||||
struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
static Scalar run(const Derived& mat, const Func& func)
|
||||
@ -195,12 +195,12 @@ struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling>
|
||||
struct ei_redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
|
||||
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
|
||||
{};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
|
||||
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
@ -246,7 +246,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
|
||||
struct ei_redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
@ -277,7 +277,7 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||
{
|
||||
res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
|
||||
res = ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
|
||||
}
|
||||
|
||||
return res;
|
||||
@ -285,20 +285,20 @@ struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
|
||||
struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size,
|
||||
Size = Derived::SizeAtCompileTime,
|
||||
VectorizationSize = (Size / PacketSize) * PacketSize
|
||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||
};
|
||||
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizationSize != Size)
|
||||
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
@ -36,7 +36,7 @@
|
||||
* Note that here the inner-loops should always be unrolled.
|
||||
*/
|
||||
|
||||
template<int VectorizationMode, int Index, typename Lhs, typename Rhs, typename RetScalar>
|
||||
template<int Traversal, int Index, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl;
|
||||
|
||||
template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
|
||||
@ -115,7 +115,7 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
|
||||
CanVectorizeInner = ei_traits<GeneralProduct>::CanVectorizeInner
|
||||
};
|
||||
|
||||
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization,
|
||||
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
_LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
|
||||
|
||||
@ -182,17 +182,17 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
|
||||
**************************************/
|
||||
|
||||
template<int Index, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs, RetScalar>
|
||||
struct ei_product_coeff_impl<DefaultTraversal, Index, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
|
||||
ei_product_coeff_impl<DefaultTraversal, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
|
||||
res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
|
||||
struct ei_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
||||
{
|
||||
@ -201,7 +201,7 @@ struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
|
||||
struct ei_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
|
||||
{
|
||||
@ -214,7 +214,7 @@ struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs, RetScalar>
|
||||
struct ei_product_coeff_impl<DefaultTraversal, -1, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {}
|
||||
};
|
||||
@ -244,7 +244,7 @@ struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
|
||||
};
|
||||
|
||||
template<int Index, typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
|
||||
struct ei_product_coeff_impl<InnerVectorizedTraversal, Index, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
typedef typename Lhs::PacketScalar PacketScalar;
|
||||
enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
|
||||
@ -252,7 +252,7 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
PacketScalar pres;
|
||||
ei_product_coeff_vectorized_unroller<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
|
||||
ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
|
||||
ei_product_coeff_impl<DefaultTraversal,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_predux(pres);
|
||||
}
|
||||
};
|
||||
@ -265,7 +265,7 @@ struct ei_product_coeff_vectorized_dyn_selector
|
||||
res = ei_dot_impl<
|
||||
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
|
||||
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
|
||||
LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs.col(col));
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col));
|
||||
}
|
||||
};
|
||||
|
||||
@ -279,7 +279,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||
res = ei_dot_impl<
|
||||
Lhs,
|
||||
Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>,
|
||||
LinearVectorization, NoUnrolling>::run(lhs, rhs.col(col));
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col));
|
||||
}
|
||||
};
|
||||
|
||||
@ -291,7 +291,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||
res = ei_dot_impl<
|
||||
Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
|
||||
Rhs,
|
||||
LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs);
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs);
|
||||
}
|
||||
};
|
||||
|
||||
@ -303,12 +303,12 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||
res = ei_dot_impl<
|
||||
Lhs,
|
||||
Rhs,
|
||||
LinearVectorization, NoUnrolling>::run(lhs, rhs);
|
||||
LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||
struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs, RetScalar>
|
||||
struct ei_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
||||
{
|
||||
|
@ -202,16 +202,19 @@ enum DirectionType { Vertical, Horizontal, BothDirections };
|
||||
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, SparseTimeSparseProduct, SparseTimeDenseProduct, DenseTimeSparseProduct };
|
||||
|
||||
enum {
|
||||
/** \internal Default traversal, no vectorization, no index-based access */
|
||||
DefaultTraversal,
|
||||
/** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
|
||||
LinearTraversal,
|
||||
/** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
|
||||
* and good size */
|
||||
InnerVectorization,
|
||||
InnerVectorizedTraversal,
|
||||
/** \internal Vectorization path using a single loop plus scalar loops for the
|
||||
* unaligned boundaries */
|
||||
LinearVectorization,
|
||||
LinearVectorizedTraversal,
|
||||
/** \internal Generic vectorization path using one vectorized loop per row/column with some
|
||||
* scalar loops to handle the unaligned boundaries */
|
||||
SliceVectorization,
|
||||
NoVectorization
|
||||
SliceVectorizedTraversal
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -26,17 +26,18 @@
|
||||
#include <typeinfo>
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
bool test_assign(const Dst&, const Src&, int vectorization, int unrolling)
|
||||
bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
|
||||
{
|
||||
return ei_assign_traits<Dst,Src>::Vectorization==vectorization
|
||||
ei_assign_traits<Dst,Src>::debug();
|
||||
return ei_assign_traits<Dst,Src>::Traversal==traversal
|
||||
&& ei_assign_traits<Dst,Src>::Unrolling==unrolling;
|
||||
}
|
||||
|
||||
template<typename Xpr>
|
||||
bool test_redux(const Xpr&, int vectorization, int unrolling)
|
||||
bool test_redux(const Xpr&, int traversal, int unrolling)
|
||||
{
|
||||
typedef ei_redux_traits<ei_scalar_sum_op<typename Xpr::Scalar>,Xpr> traits;
|
||||
return traits::Vectorization==vectorization && traits::Unrolling==unrolling;
|
||||
return traits::Traversal==traversal && traits::Unrolling==unrolling;
|
||||
}
|
||||
|
||||
void test_vectorization_logic()
|
||||
@ -45,61 +46,67 @@ void test_vectorization_logic()
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
|
||||
VERIFY(test_assign(Vector4f(),Vector4f(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector4f(),Vector4f()+Vector4f(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector4f(),Vector4f().cwise() * Vector4f(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Vector4f(),Vector4f().cast<float>(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
|
||||
VERIFY(test_assign(Matrix4f(),Matrix4f(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Matrix4f(),Matrix4f()+Matrix4f(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
VERIFY(test_assign(Matrix4f(),Matrix4f().cwise() * Matrix4f(),
|
||||
InnerVectorization,CompleteUnrolling));
|
||||
InnerVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,16,16>(),Matrix<float,16,16>()+Matrix<float,16,16>(),
|
||||
InnerVectorization,InnerUnrolling));
|
||||
InnerVectorizedTraversal,InnerUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,16,16,DontAlign>(),Matrix<float,16,16>()+Matrix<float,16,16>(),
|
||||
NoVectorization,InnerUnrolling));
|
||||
LinearTraversal,NoUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,2,2,DontAlign>(),Matrix<float,2,2>()+Matrix<float,2,2>(),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,6,2>(),Matrix<float,6,2>().cwise() / Matrix<float,6,2>(),
|
||||
LinearVectorization,CompleteUnrolling));
|
||||
LinearVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,17,17>(),Matrix<float,17,17>()+Matrix<float,17,17>(),
|
||||
NoVectorization,InnerUnrolling));
|
||||
LinearTraversal,NoUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
|
||||
LinearTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(Matrix<float,4,4>(),Matrix<float,17,17>().block<4,4>(2,3)+Matrix<float,17,17>().block<4,4>(10,4),
|
||||
NoVectorization,CompleteUnrolling));
|
||||
DefaultTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_assign(MatrixXf(10,10),MatrixXf(20,20).block(10,10,2,3),
|
||||
SliceVectorization,NoUnrolling));
|
||||
SliceVectorizedTraversal,NoUnrolling));
|
||||
|
||||
|
||||
VERIFY(test_redux(VectorXf(10),
|
||||
LinearVectorization,NoUnrolling));
|
||||
LinearVectorizedTraversal,NoUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<float,5,2>(),
|
||||
NoVectorization,CompleteUnrolling));
|
||||
DefaultTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<float,6,2>(),
|
||||
LinearVectorization,CompleteUnrolling));
|
||||
LinearVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<float,16,16>(),
|
||||
LinearVectorization,NoUnrolling));
|
||||
LinearVectorizedTraversal,NoUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<float,16,16>().block<4,4>(1,2),
|
||||
NoVectorization,CompleteUnrolling));
|
||||
DefaultTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<float,16,16>().block<8,1>(1,2),
|
||||
LinearVectorization,CompleteUnrolling));
|
||||
LinearVectorizedTraversal,CompleteUnrolling));
|
||||
|
||||
VERIFY(test_redux(Matrix<double,7,3>(),
|
||||
NoVectorization,CompleteUnrolling));
|
||||
DefaultTraversal,CompleteUnrolling));
|
||||
|
||||
#endif // EIGEN_VECTORIZE
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user