mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
Only enable optimized reductions of fp16 if the reduction functor supports them
This commit is contained in:
parent
b6e306f189
commit
5aeb3687c4
@ -291,7 +291,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
#ifdef EIGEN_HAS_CUDA_FP16
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value);
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
@ -475,12 +475,6 @@ __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input,
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct InnerReductionLauncher {
|
||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||
// so reduce the scope of the optimized version of the code to the simple case
|
||||
// of floats.
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
|
||||
template <typename OutputType>
|
||||
static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
|
||||
assert(false && "Should only be called to reduce floats and half floats on a gpu device");
|
||||
@ -561,7 +555,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
||||
#ifdef EIGEN_HAS_CUDA_FP16
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value);
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
|
Loading…
Reference in New Issue
Block a user