Implement unrolling in copy_using_evaluator() .

This commit is contained in:
Jitse Niesen 2011-04-13 11:49:48 +01:00
parent 7e86324898
commit e654405900
3 changed files with 300 additions and 7 deletions

View File

@ -29,10 +29,15 @@
// This implementation is based on Assign.h
// copy_using_evaluator_traits is based on assign_traits
namespace internal {
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
// copy_using_evaluator_traits is based on assign_traits
// (actually, it's identical)
template <typename Derived, typename OtherDerived>
struct copy_using_evaluator_traits
{
@ -101,15 +106,15 @@ public:
enum {
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
? (
int(MayUnrollCompletely) ? int(NoUnrolling) // int(CompleteUnrolling)
: int(MayUnrollInner) ? int(NoUnrolling) // int(InnerUnrolling)
int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(NoUnrolling) // int(CompleteUnrolling)
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(NoUnrolling) // int(CompleteUnrolling)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(NoUnrolling)
};
@ -138,6 +143,175 @@ public:
#endif
};
/***************************************************************************
* Part 2 : meta-unrollers
***************************************************************************/
// TODO:`Ideally, we want to use only the evaluator objects here, not the expression objects
// However, we need to access .rowIndexByOuterInner() which is in the expression object
/************************
*** Default traversal ***
************************/
template<typename DstXprType, typename SrcXprType, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
};
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
SrcEvaluatorType &srcEvaluator,
const DstXprType &dst)
{
// TODO: Use copyCoeffByOuterInner ?
typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, inner);
typename DstXprType::Index col = dst.colIndexByOuterInner(outer, inner);
dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, Index+1, Stop>
::run(dstEvaluator, srcEvaluator, dst);
}
};
template<typename DstXprType, typename SrcXprType, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, Stop, Stop>
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { }
};
template<typename DstXprType, typename SrcXprType, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
SrcEvaluatorType &srcEvaluator,
const DstXprType &dst,
int outer)
{
// TODO: Use copyCoeffByOuterInner ?
typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, Index);
typename DstXprType::Index col = dst.colIndexByOuterInner(outer, Index);
dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstXprType, SrcXprType, Index+1, Stop>
::run(dstEvaluator, srcEvaluator, dst, outer);
}
};
template<typename DstXprType, typename SrcXprType, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstXprType, SrcXprType, Stop, Stop>
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&, int) { }
};
/***********************
*** Linear traversal ***
***********************/
template<typename DstXprType, typename SrcXprType, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
SrcEvaluatorType &srcEvaluator,
const DstXprType &dst)
{
// use copyCoeff ?
dstEvaluator.coeffRef(Index) = srcEvaluator.coeff(Index);
copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstXprType, SrcXprType, Index+1, Stop>
::run(dstEvaluator, srcEvaluator, dst);
}
};
template<typename DstXprType, typename SrcXprType, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstXprType, SrcXprType, Stop, Stop>
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { }
};
/**************************
*** Inner vectorization ***
**************************/
template<typename DstXprType, typename SrcXprType, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
};
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
SrcEvaluatorType &srcEvaluator,
const DstXprType &dst)
{
// TODO: Use copyPacketByOuterInner ?
typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, inner);
typename DstXprType::Index col = dst.colIndexByOuterInner(outer, inner);
dstEvaluator.template writePacket<Aligned>(row, col, srcEvaluator.template packet<JointAlignment>(row, col));
copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType,
Index+packet_traits<typename DstXprType::Scalar>::size, Stop>::run(dstEvaluator, srcEvaluator, dst);
}
};
template<typename DstXprType, typename SrcXprType, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, Stop, Stop>
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { }
};
template<typename DstXprType, typename SrcXprType, int Index, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator,
SrcEvaluatorType &srcEvaluator,
const DstXprType &dst,
int outer)
{
// TODO: Use copyPacketByOuterInner ?
typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, Index);
typename DstXprType::Index col = dst.colIndexByOuterInner(outer, Index);
dstEvaluator.template writePacket<Aligned>(row, col, srcEvaluator.template packet<Aligned>(row, col));
copy_using_evaluator_innervec_InnerUnrolling<DstXprType, SrcXprType,
Index+packet_traits<typename DstXprType::Scalar>::size, Stop>::run(dstEvaluator, srcEvaluator, dst, outer);
}
};
template<typename DstXprType, typename SrcXprType, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling<DstXprType, SrcXprType, Stop, Stop>
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&, int) { }
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
// copy_using_evaluator_impl is based on assign_impl
template<typename DstXprType, typename SrcXprType,
@ -171,6 +345,41 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
}
};
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, 0, DstXprType::SizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, dst);
}
};
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
{
typedef typename DstXprType::Index Index;
EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstXprType, SrcXprType, 0, DstXprType::InnerSizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, dst, outer);
}
};
/***************************
*** Linear vectorization ***
***************************/
@ -239,6 +448,29 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
}
};
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
{
typedef typename DstXprType::Index Index;
EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
enum { size = DstXprType::SizeAtCompileTime,
packetSize = packet_traits<typename DstXprType::Scalar>::size,
alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, 0, alignedSize>
::run(dstEvaluator, srcEvaluator, dst);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstXprType, SrcXprType, alignedSize, size>
::run(dstEvaluator, srcEvaluator, dst);
}
};
/**************************
*** Inner vectorization ***
**************************/
@ -260,6 +492,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize) {
// TODO: Use copyPacketByOuterInner ?
Index row = dst.rowIndexByOuterInner(outer, inner);
Index col = dst.colIndexByOuterInner(outer, inner);
dstEvaluator.template writePacket<Aligned>(row, col, srcEvaluator.template packet<Aligned>(row, col));
@ -267,6 +500,41 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
}
};
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
copy_using_evaluator_innervec_CompleteUnrolling<DstXprType, SrcXprType, 0, DstXprType::SizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, dst);
}
};
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
{
typedef typename DstXprType::Index Index;
EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_innervec_InnerUnrolling<DstXprType, SrcXprType, 0, DstXprType::InnerSizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, dst, outer);
}
};
/***********************
*** Linear traversal ***
***********************/
@ -289,6 +557,22 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
}
};
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
{
EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstXprType, SrcXprType, 0, DstXprType::SizeAtCompileTime>
::run(dstEvaluator, srcEvaluator, dst);
}
};
/**************************
*** Slice vectorization ***
***************************/
@ -348,6 +632,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
}
};
/***************************************************************************
* Part 4 : Entry points
***************************************************************************/
// Based on DenseBase::LazyAssign()
template<typename DstXprType, typename SrcXprType>

View File

@ -219,6 +219,12 @@ struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> >
return m_functor(index);
}
template<int LoadMode>
PacketScalar packet(Index row, Index col) const
{
return m_functor.packetOp(row, col);
}
template<int LoadMode>
PacketScalar packet(Index index) const
{

View File

@ -1,4 +1,3 @@
#define EIGEN_ENABLE_EVALUATORS
#include "main.h"