diff --git a/Eigen/Core b/Eigen/Core index 3edbe6585..834ff9415 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -200,7 +200,10 @@ #if defined __CUDACC__ #define EIGEN_VECTORIZE_CUDA #include - #include + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #include + #endif #endif #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h index a8c06ff48..279fd4fd0 100644 --- a/Eigen/src/Core/arch/CUDA/TypeCasting.h +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -14,6 +14,8 @@ namespace Eigen { namespace internal { +#if defined(EIGEN_HAS_CUDA_FP16) + template<> struct scalar_cast_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) @@ -92,6 +94,7 @@ template<> EIGEN_STRONG_INLINE half2 pcast(const float4& a) { #endif } +#endif } // end namespace internal diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index e9f5dd968..aee222a14 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -19,6 +19,7 @@ using Eigen::Tensor; +#ifdef EIGEN_HAS_CUDA_FP16 void test_cuda_conversion() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -52,9 +53,11 @@ void test_cuda_conversion() { gpu_device.deallocate(d_half); gpu_device.deallocate(d_conv); } - +#endif void test_cxx11_tensor_of_float16_cuda() { +#ifdef EIGEN_HAS_CUDA_FP16 CALL_SUBTEST_1(test_cuda_conversion()); +#endif }