From 9ff269a1d3ab3ff39df6b8f9444a3ec672b32649 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sat, 20 Feb 2016 07:47:23 +0000 Subject: [PATCH] Moved some of the fp16 operators outside the Eigen namespace to workaround some nvcc limitations. --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index d0106f4f1..7af0bdc60 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -10,10 +10,6 @@ #ifndef EIGEN_PACKET_MATH_HALF_CUDA_H #define EIGEN_PACKET_MATH_HALF_CUDA_H -namespace Eigen { - -namespace internal { - #if defined(EIGEN_HAS_CUDA_FP16) // Make sure this is only available when targeting a GPU: we don't want to @@ -41,22 +37,28 @@ __device__ half operator - (const half& a) { return __hneg(a); } __device__ half operator += (half& a, const half& b) { - a = __hadd(a, b); + a = a + b; return a; } __device__ half operator *= (half& a, const half& b) { - a = __hmul(a, b); + a = a * b; return a; } __device__ half operator -= (half& a, const half& b) { - a = __hsub(a, b); + a = a - b; return a; } __device__ half operator /= (half& a, const half& b) { a = a / b; return a; } +__device__ half __shfl_xor(half a, int) { + assert(false && "tbd"); + return a; +} +namespace Eigen { +namespace internal { template<> struct is_arithmetic { enum { value = true }; }; @@ -225,13 +227,11 @@ ptranspose(PacketBlock& kernel) { // kernel.packet[1].x = tmp; } -#endif -#endif -#endif - } // end namespace internal } // end namespace Eigen - +#endif +#endif +#endif #endif // EIGEN_PACKET_MATH_HALF_CUDA_H