Evaluators: Implement inner vectorization.

The implementation is minimal (I only wrote the functions called by
the unit test) and ugly (lots of copy and pasting).
This commit is contained in:
Jitse Niesen 2011-03-27 13:49:15 +01:00
parent 5c204d1ff7
commit 1b17a674dd
2 changed files with 115 additions and 1 deletions

View File

@ -76,7 +76,7 @@ private:
public:
enum {
Traversal = int(MayInnerVectorize) ? int(DefaultTraversal) // int(InnerVectorizedTraversal)
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(DefaultTraversal) // int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(DefaultTraversal) // int(LinearTraversal)
@ -145,6 +145,10 @@ template<typename DstXprType, typename SrcXprType,
int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling>
struct copy_using_evaluator_impl;
/************************
*** Default traversal ***
************************/
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling>
{
@ -167,6 +171,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
}
};
/***************************
*** Linear vectorization ***
***************************/
template <bool IsAligned = false>
struct unaligned_copy_using_evaluator_impl
{
@ -231,6 +239,32 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
}
};
/**************************
*** Inner vectorization ***
**************************/
template<typename DstXprType, typename SrcXprType>
struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling>
{
inline static void run(const DstXprType &dst, const SrcXprType &src)
{
typedef typename evaluator<DstXprType>::type DstEvaluatorType;
typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
typedef typename DstXprType::Index Index;
DstEvaluatorType dstEvaluator(dst.const_cast_derived());
SrcEvaluatorType srcEvaluator(src);
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
dstEvaluator.template writePacketByOuterInner<Aligned>(outer, inner, srcEvaluator.template packetByOuterInner<Aligned>(outer, inner));
}
};
// Based on DenseBase::LazyAssign()
template<typename DstXprType, typename SrcXprType>

View File

@ -71,6 +71,27 @@ struct evaluator_impl<Transpose<ExpressionType> >
return m_argImpl.template packet<LoadMode>(index);
}
// TODO: Difference between PacketScalar and PacketReturnType?
// TODO: Get this function by inheriting from DenseCoeffBase?
template<int LoadMode>
const typename ExpressionType::PacketScalar packetByOuterInner(Index outer, Index inner) const
{
return m_argImpl.template packetByOuterInner<LoadMode>(outer, inner);
}
// TODO: Is this function needed?
// template<int StoreMode>
// void writePacket(Index index, const typename ExpressionType::PacketScalar& x)
// {
// m_argImpl.template writePacket<StoreMode>(index, x);
// }
template<int StoreMode>
void writePacketByOuterInner(Index outer, Index inner, const typename ExpressionType::PacketScalar& x)
{
m_argImpl.template writePacketByOuterInner<StoreMode>(outer, inner, x);
}
protected:
typename evaluator<ExpressionType>::type m_argImpl;
};
@ -86,6 +107,16 @@ struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
typedef typename MatrixType::Index Index;
Index colIndexByOuterInner(Index outer, Index inner) const
{
return m_matrix.colIndexByOuterInner(outer, inner);
}
Index rowIndexByOuterInner(Index outer, Index inner) const
{
return m_matrix.rowIndexByOuterInner(outer, inner);
}
typename MatrixType::CoeffReturnType coeff(Index i, Index j) const
{
return m_matrix.coeff(i, j);
@ -103,6 +134,18 @@ struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
return m_matrix.template packet<LoadMode>(index);
}
template<int LoadMode>
typename MatrixType::PacketReturnType packet(Index row, Index col) const
{
return m_matrix.template packet<LoadMode>(row, col);
}
template<int LoadMode>
typename MatrixType::PacketReturnType packetByOuterInner(Index outer, Index inner) const
{
return m_matrix.template packetByOuterInner<LoadMode>(outer, inner);
}
template<int StoreMode>
void writePacket(Index index, const typename MatrixType::PacketScalar& x)
{
@ -110,6 +153,12 @@ struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
m_matrix.const_cast_derived().template writePacket<StoreMode>(index, x);
}
template<int StoreMode>
void writePacketByOuterInner(Index outer, Index inner, const typename MatrixType::PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacketByOuterInner<StoreMode>(outer, inner, x);
}
protected:
const MatrixType &m_matrix;
};
@ -149,6 +198,18 @@ struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
return m_array.template packet<LoadMode>(index);
}
template<int LoadMode>
typename ArrayType::PacketReturnType packet(Index row, Index col) const
{
return m_array.template packet<LoadMode>(row, col);
}
template<int LoadMode>
typename ArrayType::PacketReturnType packetByOuterInner(Index outer, Index inner) const
{
return m_array.template packetByOuterInner<LoadMode>(outer, inner);
}
template<int StoreMode>
void writePacket(Index index, const typename ArrayType::PacketScalar& x)
{
@ -156,6 +217,12 @@ struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
m_array.const_cast_derived().template writePacket<StoreMode>(index, x);
}
template<int StoreMode>
void writePacketByOuterInner(Index outer, Index inner, const typename ArrayType::PacketScalar& x)
{
m_array.const_cast_derived().template writePacketByOuterInner<StoreMode>(outer, inner, x);
}
protected:
const ArrayType &m_array;
};
@ -208,6 +275,19 @@ struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> >
return m_unaryOp.functor().packetOp(m_argImpl.template packet<LoadMode>(index));
}
template<int LoadMode>
typename UnaryOpType::PacketScalar packet(Index row, Index col) const
{
return m_unaryOp.functor().packetOp(m_argImpl.template packet<LoadMode>(row, col));
}
template<int LoadMode>
typename UnaryOpType::PacketScalar packetByOuterInner(Index outer, Index inner) const
{
return packet<LoadMode>(m_argImpl.rowIndexByOuterInner(outer, inner),
m_argImpl.colIndexByOuterInner(outer, inner));
}
protected:
const UnaryOpType& m_unaryOp;
typename evaluator<ArgType>::type m_argImpl;