mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Allow to use arbitrary packet-types during evaluation.
This is implemented by adding a PacketType template parameter to packet and writePacket members of evaluator<>.
This commit is contained in:
parent
3602926ed5
commit
65bfa5fce7
@ -30,12 +30,12 @@ struct copy_using_evaluator_traits
|
||||
typedef typename DstEvaluator::XprType Dst;
|
||||
typedef typename Dst::Scalar DstScalar;
|
||||
// TODO recursively find best packet size
|
||||
typedef typename packet_traits<DstScalar>::type DstPacket;
|
||||
typedef typename packet_traits<DstScalar>::type PacketType;
|
||||
|
||||
enum {
|
||||
DstFlags = DstEvaluator::Flags,
|
||||
SrcFlags = SrcEvaluator::Flags,
|
||||
RequiredAlignment = unpacket_traits<DstPacket>::alignment
|
||||
RequiredAlignment = unpacket_traits<PacketType>::alignment
|
||||
};
|
||||
|
||||
public:
|
||||
@ -230,6 +230,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
@ -239,8 +240,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner);
|
||||
enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
|
||||
kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
|
||||
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
@ -254,10 +255,11 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
template<typename Kernel, int Index_, int Stop>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index_);
|
||||
enum { NextIndex = Index_ + packet_traits<typename Kernel::Scalar>::size };
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
|
||||
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
@ -366,13 +368,13 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
const Index size = kernel.size();
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef packet_traits<Scalar> PacketTraits;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
|
||||
packetSize = PacketTraits::size,
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = PacketTraits::AlignedOnScalar ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
|
||||
@ -381,7 +383,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
|
||||
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment>(index);
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
|
||||
|
||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
||||
}
|
||||
@ -411,14 +413,15 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
{
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index packetSize = packet_traits<typename Kernel::Scalar>::size;
|
||||
const Index packetSize = unpacket_traits<PacketType>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@ -480,11 +483,11 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef packet_traits<Scalar> PacketTraits;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
packetSize = PacketTraits::size,
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
|
||||
alignable = PacketTraits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
|
||||
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
@ -510,7 +513,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned>(outer, inner);
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
@ -544,6 +547,7 @@ public:
|
||||
typedef typename DstEvaluatorType::Scalar Scalar;
|
||||
typedef typename DstEvaluatorType::StorageIndex StorageIndex;
|
||||
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
|
||||
typedef typename AssignmentTraits::PacketType PacketType;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
@ -588,24 +592,24 @@ public:
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col));
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index));
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
|
||||
|
@ -131,8 +131,6 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
typedef PlainObjectBase<Derived> PlainObjectType;
|
||||
typedef typename PlainObjectType::Scalar Scalar;
|
||||
typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PlainObjectType::PacketScalar PacketScalar;
|
||||
typedef typename PlainObjectType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
IsRowMajor = PlainObjectType::IsRowMajor,
|
||||
@ -182,36 +180,36 @@ struct evaluator<PlainObjectBase<Derived> >
|
||||
return const_cast<Scalar*>(m_data)[index];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
if (IsRowMajor)
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + row * m_outerStride.value() + col);
|
||||
return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
|
||||
else
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + row + col * m_outerStride.value());
|
||||
return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + index);
|
||||
return ploadt<PacketType, LoadMode>(m_data + index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode,typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
if (IsRowMajor)
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>
|
||||
return pstoret<Scalar, PacketType, StoreMode>
|
||||
(const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
|
||||
else
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>
|
||||
return pstoret<Scalar, PacketType, StoreMode>
|
||||
(const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -267,8 +265,6 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@ -290,28 +286,28 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
|
||||
return m_argImpl.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(col, row);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(index);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(col, row, x);
|
||||
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -345,7 +341,6 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
{ }
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@ -357,16 +352,16 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
||||
return m_functor(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(row, col);
|
||||
return m_functor.template packetOp<Index,PacketType>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(index);
|
||||
return m_functor.template packetOp<Index,PacketType>(index);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -395,7 +390,6 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
{ }
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@ -407,16 +401,16 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
||||
return m_functor(m_argImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(row, col));
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -469,7 +463,6 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
{ }
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@ -481,18 +474,18 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
||||
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
|
||||
m_rhsImpl.template packet<LoadMode>(row, col));
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
|
||||
m_rhsImpl.template packet<LoadMode>(index));
|
||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
|
||||
m_rhsImpl.template packet<LoadMode,PacketType>(index));
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -564,8 +557,6 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
typedef typename XprType::PointerType PointerType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
IsRowMajor = XprType::RowsAtCompileTime,
|
||||
@ -601,30 +592,30 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
||||
return m_data[index * m_xpr.innerStride()];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
return internal::ploadt<PacketScalar, LoadMode>(ptr);
|
||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||
return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
|
||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -770,8 +761,6 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime
|
||||
@ -797,31 +786,31 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(m_startRow.value() + row, m_startCol.value() + col);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
return m_argImpl.template writePacket<StoreMode>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||
RowsAtCompileTime == 1 ? index : 0,
|
||||
x);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -908,7 +897,6 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
{
|
||||
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
enum {
|
||||
Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
|
||||
};
|
||||
@ -953,8 +941,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
return m_argImpl.coeff(actual_index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
||||
: RowFactor==1 ? row
|
||||
@ -963,17 +951,17 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
||||
: ColFactor==1 ? col
|
||||
: col % m_cols.value();
|
||||
|
||||
return m_argImpl.template packet<LoadMode>(actual_row, actual_col);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||
? (ColFactor==1 ? index : index%m_cols.value())
|
||||
: (RowFactor==1 ? index : index%m_rows.value());
|
||||
|
||||
return m_argImpl.template packet<LoadMode>(actual_index);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -1050,8 +1038,6 @@ struct evaluator_wrapper_base
|
||||
|
||||
typedef typename ArgType::Scalar Scalar;
|
||||
typedef typename ArgType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename ArgType::PacketScalar PacketScalar;
|
||||
typedef typename ArgType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
@ -1073,26 +1059,26 @@ struct evaluator_wrapper_base
|
||||
return m_argImpl.coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(row, col);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return m_argImpl.template packet<LoadMode>(index);
|
||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(row, col, x);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int StoreMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||
}
|
||||
@ -1127,7 +1113,7 @@ struct unary_evaluator<ArrayWrapper<TArgType> >
|
||||
// -------------------- Reverse --------------------
|
||||
|
||||
// defined in Reverse.h:
|
||||
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
|
||||
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
|
||||
|
||||
template<typename ArgType, int Direction>
|
||||
struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
@ -1136,17 +1122,12 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
typedef Reverse<ArgType, Direction> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
IsColMajor = !IsRowMajor,
|
||||
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
|
||||
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
|
||||
ReversePacket = (Direction == BothDirections)
|
||||
|| ((Direction == Vertical) && IsColMajor)
|
||||
|| ((Direction == Horizontal) && IsRowMajor),
|
||||
@ -1163,7 +1144,6 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
|
||||
Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
|
||||
: m_argImpl(reverse.nestedExpression()),
|
||||
@ -1193,32 +1173,47 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
||||
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return reverse_packet::run(m_argImpl.template packet<LoadMode>(
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
|
||||
return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
|
||||
ReverseRow ? m_rows.value() - row - OffsetRow : row,
|
||||
ReverseCol ? m_cols.value() - col - OffsetCol : col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
PacketScalar packet(Index index) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{
|
||||
return preverse(m_argImpl.template packet<LoadMode>(m_rows.value() * m_cols.value() - index - PacketSize));
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
template<int LoadMode, typename PacketType>
|
||||
void writePacket(Index row, Index col, const PacketType& x)
|
||||
{
|
||||
// FIXME we could factorize some code with packet(i,j)
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
|
||||
m_argImpl.template writePacket<LoadMode>(
|
||||
ReverseRow ? m_rows.value() - row - OffsetRow : row,
|
||||
ReverseCol ? m_cols.value() - col - OffsetCol : col,
|
||||
reverse_packet::run(x));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template<int LoadMode, typename PacketType>
|
||||
void writePacket(Index index, const PacketType& x)
|
||||
{
|
||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||
m_argImpl.template writePacket<LoadMode>
|
||||
(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
|
||||
}
|
||||
|
@ -216,8 +216,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
|
||||
{
|
||||
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
|
||||
eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(row,col);
|
||||
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
|
||||
}
|
||||
|
||||
|
||||
@ -242,8 +243,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(index);
|
||||
return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode,DefaultPacketType>(index);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -170,7 +170,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
||||
// trigger a compile time error is someone try to call packet
|
||||
// trigger a compile-time error if someone try to call packet
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
|
||||
};
|
||||
|
@ -491,13 +491,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketReturnType packet(Index row, Index col) const
|
||||
template<int LoadMode, typename PacketType>
|
||||
const PacketType packet(Index row, Index col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
PacketType res;
|
||||
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
Unroll ? InnerSize : Dynamic,
|
||||
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
|
||||
LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
|
||||
|
||||
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
return res;
|
||||
@ -539,7 +539,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode,Packet>(UnrollingIndex-1, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -549,7 +549,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -558,7 +558,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode,Packet>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
@ -567,7 +567,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
res = pmul(lhs.template packet<LoadMode,Packet>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
@ -596,7 +596,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -607,7 +607,7 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -691,7 +691,6 @@ struct diagonal_product_evaluator_base
|
||||
: evaluator_base<Derived>
|
||||
{
|
||||
typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
public:
|
||||
enum {
|
||||
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
|
||||
@ -721,22 +720,22 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
|
||||
{
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
|
||||
internal::pset1<PacketScalar>(m_diagImpl.coeff(id)));
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
|
||||
{
|
||||
enum {
|
||||
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
||||
DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
||||
};
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
|
||||
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
|
||||
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
|
||||
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
|
||||
}
|
||||
|
||||
typename evaluator<DiagonalType>::nestedType m_diagImpl;
|
||||
@ -753,7 +752,6 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
using Base::m_matImpl;
|
||||
using Base::coeff;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
|
||||
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
@ -773,19 +771,19 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
||||
{
|
||||
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||
// See also similar calls below.
|
||||
return this->template packet_impl<LoadMode>(row,col, row,
|
||||
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
|
||||
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
||||
{
|
||||
return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
@ -800,7 +798,6 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
using Base::m_matImpl;
|
||||
using Base::coeff;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
|
||||
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
@ -818,17 +815,17 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
}
|
||||
|
||||
#ifndef __CUDACC__
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
|
||||
{
|
||||
return this->template packet_impl<LoadMode>(row,col, col,
|
||||
return this->template packet_impl<LoadMode,PacketType>(row,col, col,
|
||||
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
|
||||
template<int LoadMode,typename PacketType>
|
||||
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
||||
{
|
||||
return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
@ -173,7 +173,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
|
||||
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.template packetByOuterInner<alignment>(outer, inner);
|
||||
return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
@ -235,19 +235,19 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
Scalar res;
|
||||
if(alignedSize)
|
||||
{
|
||||
PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
|
||||
PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
|
||||
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
|
||||
{
|
||||
PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
|
||||
PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
|
||||
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
|
||||
{
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
|
||||
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
|
||||
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
|
||||
}
|
||||
|
||||
packet_res0 = func.packetOp(packet_res0,packet_res1);
|
||||
if(alignedEnd>alignedEnd2)
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
|
||||
}
|
||||
res = func.predux(packet_res0);
|
||||
|
||||
@ -273,7 +273,7 @@ template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename packet_traits<Scalar>::type PacketType;
|
||||
|
||||
EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
@ -287,10 +287,10 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
|
||||
Scalar res;
|
||||
if(packetedInnerSize)
|
||||
{
|
||||
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
|
||||
PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
|
||||
for(Index j=0; j<outerSize; ++j)
|
||||
for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
|
||||
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
|
||||
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
|
||||
|
||||
res = func.predux(packet_res);
|
||||
for(Index j=0; j<outerSize; ++j)
|
||||
@ -371,21 +371,21 @@ public:
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{ return m_evaluator.coeff(index); }
|
||||
|
||||
template<int LoadMode>
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packet(Index row, Index col) const
|
||||
{ return m_evaluator.template packet<LoadMode>(row, col); }
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
|
||||
|
||||
template<int LoadMode>
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packet(Index index) const
|
||||
{ return m_evaluator.template packet<LoadMode>(index); }
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
template<int LoadMode>
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketReturnType packetByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
const XprType & nestedExpression() const { return m_xpr; }
|
||||
|
||||
|
@ -48,14 +48,14 @@ struct traits<Reverse<MatrixType, Direction> >
|
||||
};
|
||||
};
|
||||
|
||||
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
|
||||
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
|
||||
{
|
||||
static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
|
||||
static inline PacketType run(const PacketType& x) { return preverse(x); }
|
||||
};
|
||||
|
||||
template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
|
||||
template<typename PacketType> struct reverse_packet_cond<PacketType,false>
|
||||
{
|
||||
static inline PacketScalar run(const PacketScalar& x) { return x; }
|
||||
static inline PacketType run(const PacketType& x) { return x; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -21,7 +21,6 @@ class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap
|
||||
{
|
||||
protected:
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
|
||||
typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
@ -35,29 +34,29 @@ public:
|
||||
: Base(dst, src, func, dstExpr)
|
||||
{}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
PacketScalar tmp = m_src.template packet<LoadMode>(row,col);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col));
|
||||
PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
|
||||
m_dst.template writePacket<StoreMode>(row,col,tmp);
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
PacketScalar tmp = m_src.template packet<LoadMode>(index);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index));
|
||||
PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
|
||||
m_dst.template writePacket<StoreMode>(index,tmp);
|
||||
}
|
||||
|
||||
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
|
||||
template<int StoreMode, int LoadMode>
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = Base::rowIndexByOuterInner(outer, inner);
|
||||
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode>(row, col);
|
||||
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -36,10 +36,14 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret)
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
|
||||
|
||||
};
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
|
@ -16,13 +16,12 @@ namespace internal {
|
||||
|
||||
template<typename Scalar>
|
||||
struct scalar_constant_op {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
|
||||
template<typename Index>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
|
||||
template<typename Index, typename PacketType>
|
||||
EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
|
||||
const Scalar m_other;
|
||||
};
|
||||
template<typename Scalar>
|
||||
@ -39,7 +38,7 @@ template<typename Scalar>
|
||||
struct functor_traits<scalar_identity_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl;
|
||||
|
||||
// linear access for packet ops:
|
||||
// 1) initialization
|
||||
@ -49,11 +48,9 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
|
||||
//
|
||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||
// in order to avoid the padd() in operator() ?
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,false>
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,false>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
|
||||
@ -78,11 +75,9 @@ struct linspaced_op_impl<Scalar,false>
|
||||
// random access for packet ops:
|
||||
// 1) each step
|
||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||
template <typename Scalar>
|
||||
struct linspaced_op_impl<Scalar,true>
|
||||
template <typename Scalar, typename Packet>
|
||||
struct linspaced_op_impl<Scalar,Packet,true>
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
|
||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
||||
m_low(low), m_step(step),
|
||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
|
||||
@ -111,7 +106,6 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o
|
||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
||||
template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
{
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
|
||||
|
||||
template<typename Index>
|
||||
@ -126,12 +120,12 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
return impl(col + row);
|
||||
}
|
||||
|
||||
template<typename Index>
|
||||
template<typename Index, typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
|
||||
|
||||
// We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
|
||||
// there row==0 and col is used for the actual iteration.
|
||||
template<typename Index>
|
||||
template<typename Index, typename Packet>
|
||||
EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(col==0 || row==0);
|
||||
@ -141,7 +135,8 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
|
||||
// This proxy object handles the actual required temporaries, the different
|
||||
// implementations (random vs. sequential access) as well as the
|
||||
// correct piping to size 2/4 packet operations.
|
||||
const linspaced_op_impl<Scalar,RandomAccess> impl;
|
||||
// TODO find a way to make the packet type configurable
|
||||
const linspaced_op_impl<Scalar,typename packet_traits<Scalar>::type,RandomAccess> impl;
|
||||
};
|
||||
|
||||
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
||||
|
Loading…
Reference in New Issue
Block a user