diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 3b99ef069..2ef5ff205 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -106,8 +106,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > { typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign; Assign assign(*this, other); - static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess; - internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -117,8 +116,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > { typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign; Assign assign(*this, other); - static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess; - internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 6979fb4ec..759e8208f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -88,7 +88,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX { typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -98,7 +98,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX { typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -334,7 +334,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - PacketReturnType rslt = internal::pstore<PacketReturnType>(values, x); + internal::pstore<CoeffReturnType, PacketReturnType>(values, x); for (int i = 0; i < packetSize; ++i) { coeffRef(index+i) = values[i]; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 6b8f71b96..07cba649b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -510,7 +510,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<KernelArgType, Device>::PacketAccess>::run(evalToTmp, m_device); + internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; @@ -815,7 +815,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor<const EvalTo, GpuDevice, TensorEvaluator<KernelArgType, GpuDevice>::PacketAccess>::run(evalToTmp, m_device); + internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 6ea588e4b..24606b0c8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -22,13 +22,8 @@ namespace Eigen { */ namespace internal { -template <typename Device, typename Expression> -struct IsVectorizable { - static const bool value = TensorEvaluator<Expression, Device>::PacketAccess; -}; - // Default strategy: the expression is evaluated with a single cpu thread. -template<typename Expression, typename Device = DefaultDevice, bool Vectorizable = IsVectorizable<Device, Expression>::value> +template<typename Expression, typename Device, bool Vectorizable> class TensorExecutor { public: @@ -198,10 +193,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { } } -template <typename Expression> -struct IsVectorizable<GpuDevice, Expression> { - static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && TensorEvaluator<Expression, GpuDevice>::IsAligned; -}; template<typename Expression> class TensorExecutor<Expression, GpuDevice, false> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 41a36cb75..bd32249b6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> } typedef TensorEvalToOp<const ArgType> EvalTo; EvalTo evalToTmp(m_buffer, m_op); - internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<ArgType, Device>::PacketAccess>::run(evalToTmp, m_device); + internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device); m_impl.cleanup(); return true; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 7df8d1453..b3bc16bc4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -51,8 +51,27 @@ template<typename XprType> class TensorForcedEvalOp; template<typename ExpressionType, typename DeviceType> class TensorDevice; template<typename Derived, typename Device> struct TensorEvaluator; +class DefaultDevice; +class ThreadPoolDevice; +class GpuDevice; + namespace internal { -template<typename Expression, typename Device, bool Vectorizable> class TensorExecutor; + +template <typename Device, typename Expression> +struct IsVectorizable { + static const bool value = TensorEvaluator<Expression, Device>::PacketAccess; +}; + +template <typename Expression> +struct IsVectorizable<GpuDevice, Expression> { + static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && + TensorEvaluator<Expression, GpuDevice>::IsAligned; +}; + +template <typename Expression, typename Device, + bool Vectorizable = IsVectorizable<Device, Expression>::value> +class TensorExecutor; + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index 054ecf7b5..ee66ae192 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -90,7 +90,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA { typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -100,7 +100,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA { typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index fa1e6931c..15e004ee9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -78,7 +78,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr { typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -88,7 +88,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr { typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -262,7 +262,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X { typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -271,7 +271,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X { typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -411,7 +411,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi { const int packetSize = internal::unpacket_traits<PacketReturnType>::size; EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + eigen_assert(index+packetSize-1 < array_prod(dimensions())); Index inputIndices[] = {0, 0}; Index indices[] = {index, index + packetSize - 1}; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index 16bef2ad3..52f95b2a2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -80,7 +80,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, { typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -90,7 +90,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, { typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 1012ecd69..02f73dd37 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -78,7 +78,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> { typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -88,7 +88,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> { typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 00cb8e373..dd913fbae 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -78,7 +78,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> > { typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; } @@ -88,7 +88,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> > { typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign; Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return *this; }