mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
add "slice vectorization" of redux (eg. m.block().minCoeff() is now
vectorized)
This commit is contained in:
parent
c087373968
commit
8aa5aa269a
@ -39,18 +39,26 @@ struct ei_redux_traits
|
||||
{
|
||||
private:
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
|
||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
||||
InnerMaxSize = int(Derived::Flags)&RowMajorBit
|
||||
? Derived::MaxColsAtCompileTime
|
||||
: Derived::MaxRowsAtCompileTime
|
||||
};
|
||||
|
||||
enum {
|
||||
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||
&& (ei_functor_traits<Func>::PacketAccess),
|
||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
|
||||
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||
&& (int(Derived::Flags)&LinearAccessBit)
|
||||
&& (ei_functor_traits<Func>::PacketAccess)
|
||||
? LinearVectorization
|
||||
: NoVectorization
|
||||
Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorization)
|
||||
: int(NoVectorization)
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
enum {
|
||||
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
|
||||
@ -227,6 +235,45 @@ struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
static Scalar run(const Derived& mat, const Func& func)
|
||||
{
|
||||
const int innerSize = mat.innerSize();
|
||||
const int outerSize = mat.outerSize();
|
||||
enum {
|
||||
packetSize = ei_packet_traits<Scalar>::size,
|
||||
isRowMajor = Derived::Flags&RowMajorBit?1:0
|
||||
};
|
||||
const int packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
||||
Scalar res;
|
||||
if(packetedInnerSize)
|
||||
{
|
||||
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
|
||||
for(int j=0; j<outerSize; ++j)
|
||||
for(int i=0; i<packetedInnerSize; i+=int(packetSize))
|
||||
packet_res = func.packetOp(packet_res, mat.template packet<Unaligned>
|
||||
(isRowMajor?j:i, isRowMajor?i:j));
|
||||
|
||||
res = func.predux(packet_res);
|
||||
for(int j=0; j<outerSize; ++j)
|
||||
for(int i=packetedInnerSize; i<innerSize; ++i)
|
||||
res = func(res, mat.coeff(isRowMajor?j:i, isRowMajor?i:j));
|
||||
}
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||
{
|
||||
res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user