Do not rely on the compiler generating __device__ functions for constexpr in Cuda (via EIGEN_CONSTEXPR_ARE_DEVICE_FUNC. This breaks several target in the TensorFlow Cuda build, e.g.,

INFO: From Compiling tensorflow/core/kernels/maxpooling_op_gpu.cu.cc: /b/f/w/run/external/eigen_archive/Eigen/src/Core/arch/GPU/Half.h(197): error: calling a __host__ function("std::equal_to<float> ::operator () const") from a __global__ function("tensorflow::_NV_ANON_NAMESPACE::MaxPoolGradBackwardNoMaskNHWC< ::Eigen::half> ") is not allowed /b/f/w/run/external/eigen_archive/Eigen/src/Core/arch/GPU/Half.h(197): error: identifier "std::equal_to<float> ::operator () const" is undefined in device code" /b/f/w/run/external/eigen_archive/Eigen/src/Core/arch/GPU/Half.h(197): error: calling a __host__ function("std::equal_to<float> ::operator () const") from a __global__ function("tensorflow::_NV_ANON_NAMESPACE::MaxPoolGradBackwardNoMaskNCHW< ::Eigen::half> ") is not allowed /b/f/w/run/external/eigen_archive/Eigen/src/Core/arch/GPU/Half.h(197): error: identifier "std::equal_to<float> ::operator () const" is undefined in device code 4 errors detected in the compilation of "/tmp/tmpxft_00000011_00000000-6_maxpooling_op_gpu.cu.cpp1.ii". ERROR: /tmpfs/tensor_flow/tensorflow/core/kernels/BUILD:3753:1: output 'tensorflow/core/kernels/_objs/pooling_ops_gpu/maxpooling_op_gpu.cu.pic.o' was not created ERROR: /tmpfs/tensor_flow/tensorflow/core/kernels/BUILD:3753:1: Couldn't build file tensorflow/core/kernels/_objs/pooling_ops_gpu/maxpooling_op_gpu.cu.pic.o: not all outputs were created or valid
2025-01-06 14:14:46 +08:00 · 2018-10-22 16:18:24 -07:00 · 2018-10-22 16:18:24 -07:00 · 14054e217f
commit 14054e217f
parent 954b4ca9d0
1 changed files with 2 additions and 2 deletions
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@ -617,7 +617,7 @@ T div_ceil(const T &a, const T &b)
 template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool equal_strict(const X& x,const Y& y) { return x == y; }

-#if !defined(EIGEN_GPU_COMPILE_PHASE) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)
+#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
 template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }

@ -628,7 +628,7 @@ bool equal_strict(const double& x,const double& y) { return std::equal_to<double
 template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool not_equal_strict(const X& x,const Y& y) { return x != y; }

-#if !defined(EIGEN_GPU_COMPILE_PHASE) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)
+#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
 template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
 bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }