mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Rewrite the vectorized meta unroller of sum to reduce instruction
dependency => significant speed up
This commit is contained in:
parent
e556e647f4
commit
1eec38dc36
@ -101,17 +101,12 @@ struct ei_sum_novec_unroller<Derived, Start, 1>
|
|||||||
|
|
||||||
/*** vectorization ***/
|
/*** vectorization ***/
|
||||||
|
|
||||||
template<typename Derived, int Index, int Stop,
|
template<typename Derived, int Start, int Length>
|
||||||
bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived::Scalar>::size)>
|
|
||||||
struct ei_sum_vec_unroller
|
struct ei_sum_vec_unroller
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
row = int(Derived::Flags)&RowMajorBit
|
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
||||||
? Index / int(Derived::ColsAtCompileTime)
|
HalfLength = Length/2
|
||||||
: Index % Derived::RowsAtCompileTime,
|
|
||||||
col = int(Derived::Flags)&RowMajorBit
|
|
||||||
? Index % int(Derived::ColsAtCompileTime)
|
|
||||||
: Index / Derived::RowsAtCompileTime
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename Derived::Scalar Scalar;
|
typedef typename Derived::Scalar Scalar;
|
||||||
@ -120,22 +115,22 @@ struct ei_sum_vec_unroller
|
|||||||
inline static PacketScalar run(const Derived &mat)
|
inline static PacketScalar run(const Derived &mat)
|
||||||
{
|
{
|
||||||
return ei_padd(
|
return ei_padd(
|
||||||
mat.template packet<Aligned>(row, col),
|
ei_sum_vec_unroller<Derived, Start, HalfLength>::run(mat),
|
||||||
ei_sum_vec_unroller<Derived, Index+ei_packet_traits<typename Derived::Scalar>::size, Stop>::run(mat)
|
ei_sum_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
|
||||||
);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, int Index, int Stop>
|
template<typename Derived, int Start>
|
||||||
struct ei_sum_vec_unroller<Derived, Index, Stop, true>
|
struct ei_sum_vec_unroller<Derived, Start, 1>
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
|
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
|
||||||
row = int(Derived::Flags)&RowMajorBit
|
row = int(Derived::Flags)&RowMajorBit
|
||||||
? Index / int(Derived::ColsAtCompileTime)
|
? index / int(Derived::ColsAtCompileTime)
|
||||||
: Index % Derived::RowsAtCompileTime,
|
: index % Derived::RowsAtCompileTime,
|
||||||
col = int(Derived::Flags)&RowMajorBit
|
col = int(Derived::Flags)&RowMajorBit
|
||||||
? Index % int(Derived::ColsAtCompileTime)
|
? index % int(Derived::ColsAtCompileTime)
|
||||||
: Index / Derived::RowsAtCompileTime,
|
: index / Derived::RowsAtCompileTime,
|
||||||
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
|
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -238,7 +233,7 @@ struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
|
|||||||
};
|
};
|
||||||
static Scalar run(const Derived& mat)
|
static Scalar run(const Derived& mat)
|
||||||
{
|
{
|
||||||
Scalar res = ei_predux(ei_sum_vec_unroller<Derived, 0, VectorizationSize>::run(mat));
|
Scalar res = ei_predux(ei_sum_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
|
||||||
if (VectorizationSize != Size)
|
if (VectorizationSize != Size)
|
||||||
res += ei_sum_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
|
res += ei_sum_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
|
||||||
return res;
|
return res;
|
||||||
|
Loading…
Reference in New Issue
Block a user