mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Enabled the vectorized evaluation of several tensor expressions that was previously disabled by mistake
This commit is contained in:
parent
44eedd8915
commit
925d0d375a
@ -106,8 +106,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
|
typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,8 +116,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
|
typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -334,7 +334,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
|||||||
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
|
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
|
||||||
|
|
||||||
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
|
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
|
||||||
PacketReturnType rslt = internal::pstore<PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
for (int i = 0; i < packetSize; ++i) {
|
for (int i = 0; i < packetSize; ++i) {
|
||||||
coeffRef(index+i) = values[i];
|
coeffRef(index+i) = values[i];
|
||||||
}
|
}
|
||||||
|
@ -510,7 +510,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<KernelArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
|
internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
|
||||||
|
|
||||||
m_kernel = local;
|
m_kernel = local;
|
||||||
m_local_kernel = true;
|
m_local_kernel = true;
|
||||||
@ -815,7 +815,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
internal::TensorExecutor<const EvalTo, GpuDevice, TensorEvaluator<KernelArgType, GpuDevice>::PacketAccess>::run(evalToTmp, m_device);
|
internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device);
|
||||||
|
|
||||||
m_kernel = local;
|
m_kernel = local;
|
||||||
m_local_kernel = true;
|
m_local_kernel = true;
|
||||||
|
@ -22,13 +22,8 @@ namespace Eigen {
|
|||||||
*/
|
*/
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template <typename Device, typename Expression>
|
|
||||||
struct IsVectorizable {
|
|
||||||
static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Default strategy: the expression is evaluated with a single cpu thread.
|
// Default strategy: the expression is evaluated with a single cpu thread.
|
||||||
template<typename Expression, typename Device = DefaultDevice, bool Vectorizable = IsVectorizable<Device, Expression>::value>
|
template<typename Expression, typename Device, bool Vectorizable>
|
||||||
class TensorExecutor
|
class TensorExecutor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -198,10 +193,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Expression>
|
|
||||||
struct IsVectorizable<GpuDevice, Expression> {
|
|
||||||
static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && TensorEvaluator<Expression, GpuDevice>::IsAligned;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Expression>
|
template<typename Expression>
|
||||||
class TensorExecutor<Expression, GpuDevice, false>
|
class TensorExecutor<Expression, GpuDevice, false>
|
||||||
|
@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
}
|
}
|
||||||
typedef TensorEvalToOp<const ArgType> EvalTo;
|
typedef TensorEvalToOp<const ArgType> EvalTo;
|
||||||
EvalTo evalToTmp(m_buffer, m_op);
|
EvalTo evalToTmp(m_buffer, m_op);
|
||||||
internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<ArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
|
internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -51,8 +51,27 @@ template<typename XprType> class TensorForcedEvalOp;
|
|||||||
template<typename ExpressionType, typename DeviceType> class TensorDevice;
|
template<typename ExpressionType, typename DeviceType> class TensorDevice;
|
||||||
template<typename Derived, typename Device> struct TensorEvaluator;
|
template<typename Derived, typename Device> struct TensorEvaluator;
|
||||||
|
|
||||||
|
class DefaultDevice;
|
||||||
|
class ThreadPoolDevice;
|
||||||
|
class GpuDevice;
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<typename Expression, typename Device, bool Vectorizable> class TensorExecutor;
|
|
||||||
|
template <typename Device, typename Expression>
|
||||||
|
struct IsVectorizable {
|
||||||
|
static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expression>
|
||||||
|
struct IsVectorizable<GpuDevice, Expression> {
|
||||||
|
static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
|
||||||
|
TensorEvaluator<Expression, GpuDevice>::IsAligned;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expression, typename Device,
|
||||||
|
bool Vectorizable = IsVectorizable<Device, Expression>::value>
|
||||||
|
class TensorExecutor;
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -90,7 +90,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
|
typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,7 +100,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
|
typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -262,7 +262,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,7 +271,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
|
typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -411,7 +411,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
{
|
{
|
||||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
eigen_assert(index+packetSize-1 < array_prod(dimensions()));
|
||||||
|
|
||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
|
@ -80,7 +80,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
|
typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
|
typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
|
typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user