Clean up CUDA/NVCC version macros and their use in Eigen, and a few other CUDA build failures.

2024-12-21 07:19:46 +08:00 · 2019-05-31 15:26:06 -07:00 · 2019-05-31 15:26:06 -07:00 · b08527b0c1
commit b08527b0c1
parent 5614400581
11 changed files with 77 additions and 50 deletions
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@ -138,6 +138,7 @@ template <typename Derived>
 struct coeff_visitor
 {
  // default initialization to avoid countless invalid maybe-uninitialized warnings by gcc
+  EIGEN_DEVICE_FUNC
  coeff_visitor() : row(-1), col(-1), res(0) {}
  typedef typename Derived::Scalar Scalar;
  Index row, col;
--- a/Eigen/src/Core/arch/GPU/Half.h
+++ b/Eigen/src/Core/arch/GPU/Half.h
@ -60,7 +60,7 @@ struct __half_raw {
  // Nothing to do here
  // HIP fp16 header file has a definition for __half_raw
 #elif defined(EIGEN_HAS_CUDA_FP16)
- #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+ #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
 // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
 typedef __half __half_raw;
 #endif // defined(EIGEN_HAS_CUDA_FP16)
@ -83,7 +83,7 @@ struct half_base : public __half_raw {
 #if defined(EIGEN_HAS_HIP_FP16)
  EIGEN_DEVICE_FUNC half_base(const __half& h) { x = __half_as_ushort(h); }
 #elif defined(EIGEN_HAS_CUDA_FP16)
-  #if (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000)
+  #if (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000)
  EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
  #endif
 #endif    
@ -103,11 +103,12 @@ struct half : public half_impl::half_base {
  // Nothing to do here
  // HIP fp16 header file has a definition for __half_raw
 #elif defined(EIGEN_HAS_CUDA_FP16)
-  // Note that EIGEN_CUDACC_VER is set to 0 even when compiling with HIP, so (EIGEN_CUDACC_VER < 90000) is true even for HIP!
-  // So keeping this within #if defined(EIGEN_HAS_CUDA_FP16) is needed
- #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000  
-  typedef half_impl::__half_raw __half_raw;
- #endif
+  // Note that EIGEN_CUDA_SDK_VER is set to 0 even when compiling with HIP, so
+  // (EIGEN_CUDA_SDK_VER < 90000) is true even for HIP!  So keeping this within
+  // #if defined(EIGEN_HAS_CUDA_FP16) is needed
+  #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
+    typedef half_impl::__half_raw __half_raw;
+  #endif
 #endif

  EIGEN_DEVICE_FUNC half() {}
@ -119,7 +120,7 @@ struct half : public half_impl::half_base {
 #if defined(EIGEN_HAS_HIP_FP16)
  EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
 #elif defined(EIGEN_HAS_CUDA_FP16)
-  #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+  #if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
  EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
  #endif
 #endif
@ -238,9 +239,9 @@ namespace Eigen {

 namespace half_impl {

-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
-  (defined(EIGEN_HAS_HIP_FP16) && defined(HIP_DEVICE_COMPILE)) || \
-  (defined(EIGEN_HAS_CUDA_FP16) && defined(__clang__) && defined(__CUDA__))
+#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && \
+     EIGEN_CUDA_ARCH >= 530) ||                                  \
+    (defined(EIGEN_HAS_HIP_FP16) && defined(HIP_DEVICE_COMPILE))
 #define EIGEN_HAS_NATIVE_FP16
 #endif

@ -251,7 +252,7 @@ namespace half_impl {

 #if defined(EIGEN_HAS_NATIVE_FP16)
 EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
-#if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
  return __hadd(::__half(a), ::__half(b));
 #else
  return __hadd(a, b);
@ -264,7 +265,7 @@ EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
  return __hsub(a, b);
 }
 EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
-#if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
  return __hdiv(a, b);
 #else
  float num = __half2float(a);
@ -312,13 +313,13 @@ EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {

 #endif

-#if !defined(EIGEN_HAS_NATIVE_FP16) || defined(__clang__) // Emulate support for half floats
+#if !defined(EIGEN_HAS_NATIVE_FP16) || EIGEN_COMP_CLANG // Emulate support for half floats

-#if defined(__clang__) && defined(__CUDA__)
+#if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
 // We need to provide emulated *host-side* FP16 operators for clang.
 #pragma push_macro("EIGEN_DEVICE_FUNC")
 #undef EIGEN_DEVICE_FUNC
-#if defined(EIGEN_HAS_GPU_FP16)
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_HAS_NATIVE_FP16)
 #define EIGEN_DEVICE_FUNC __host__
 #else // both host and device need emulated ops.
 #define EIGEN_DEVICE_FUNC __host__ __device__
@ -517,7 +518,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
  return result;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
-#if (EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
  defined(EIGEN_HIP_DEVICE_COMPILE)
  return half(hexp(a));
 #else
@ -528,7 +529,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {
  return half(numext::expm1(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
+#if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
  return half(::hlog(a));
 #else
@ -542,7 +543,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
  return half(::log10f(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
-#if (EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
  defined(EIGEN_HIP_DEVICE_COMPILE)
  return half(hsqrt(a));
 #else
@ -565,7 +566,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
  return half(::tanhf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
-#if (EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
  defined(EIGEN_HIP_DEVICE_COMPILE)
  return half(hfloor(a));
 #else
@ -573,7 +574,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
-#if (EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
  defined(EIGEN_HIP_DEVICE_COMPILE)
  return half(hceil(a));
 #else
@ -673,7 +674,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
  return Eigen::half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
-#if (EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530) || \
  defined(EIGEN_HIP_DEVICE_COMPILE)
  return Eigen::half(::hlog(a));
 #else
@ -712,7 +713,7 @@ struct hash<Eigen::half> {
  defined(EIGEN_HIP_DEVICE_COMPILE)

 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
-  #if (EIGEN_CUDACC_VER < 90000) || \
+  #if (EIGEN_CUDA_SDK_VER < 90000) || \
    defined(EIGEN_HAS_HIP_FP16)
  return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
  #else
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@ -102,6 +102,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
  return make_double2(from, from);
 }

+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG)
 namespace {

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
@ -213,6 +214,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
 pcmp_eq<double2>(const double2& a, const double2& b) {
  return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
 }
+#endif  // EIGEN_CUDA_ARCH || defined(EIGEN_HIP_DEVICE_COMPILE)

 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
  return make_float4(a, a+1, a+2, a+3);
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@ -73,8 +73,13 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen:
 }

 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
+#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIP_DEVICE_COMPILE)
+  to[0] = from.x;
+  to[1] = from.y;
+#else
  to[0] = __low2half(from);
  to[1] = __high2half(from);
+#endif
 }

 template<>
@ -477,7 +482,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2
  return __floats2half2_rn(r1, r2);
 }

-#if (EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
  defined(EIGEN_HIP_DEVICE_COMPILE)

 template<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h
@ -380,7 +380,7 @@
 #if defined EIGEN_CUDACC
  #define EIGEN_VECTORIZE_GPU
  #include <vector_types.h>
-  #if EIGEN_CUDACC_VER >= 70500
+  #if EIGEN_CUDA_SDK_VER >= 70500
    #define EIGEN_HAS_CUDA_FP16
  #endif
 #endif
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -108,6 +108,18 @@
  #define EIGEN_COMP_MSVC 0
 #endif

+#if defined(__NVCC__)
+#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
+  #define EIGEN_COMP_NVCC  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
+#elif defined(__CUDACC_VER__)
+  #define EIGEN_COMP_NVCC __CUDACC_VER__
+#else
+  #error "NVCC did not define compiler version."
+#endif
+#else
+  #define EIGEN_COMP_NVCC 0
+#endif
+
 // For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:
 //  name        ver   MSC_VER
 //  2008         9      1500
@ -408,10 +420,11 @@
  #define EIGEN_CUDA_ARCH __CUDA_ARCH__
 #endif

-#if defined(CUDA_VERSION)
-  #define EIGEN_CUDACC_VER (CUDA_VERSION*10)
+#if defined(EIGEN_CUDACC)
+#include <cuda.h>
+  #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)
 #else
-  #define EIGEN_CUDACC_VER 0
+  #define EIGEN_CUDA_SDK_VER 0
 #endif

 #if defined(__HIPCC__) && !defined(EIGEN_NO_HIP)
@ -622,7 +635,7 @@
 // Does the compiler support variadic templates?
 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
 #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
-  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
+  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) )
    // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
    //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
@ -637,7 +650,7 @@
 #ifndef EIGEN_HAS_CONSTEXPR
  #if defined(EIGEN_CUDACC)
  // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-    #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
+    #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))
      #define EIGEN_HAS_CONSTEXPR 1
    #endif
  #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@ -981,7 +994,7 @@ namespace Eigen {
 #endif


-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_COMP_NVCC)
  // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
    using Base::operator =;
--- a/test/gpu_common.h
+++ b/test/gpu_common.h
@ -1,4 +1,3 @@
-
 #ifndef EIGEN_TEST_GPU_COMMON_H
 #define EIGEN_TEST_GPU_COMMON_H

@ -130,10 +129,14 @@ void ei_test_init_gpu()
  std::cout << "  EIGEN_CUDACC:                 " << int(EIGEN_CUDACC) << "\n";
  #endif
  
-  #ifdef EIGEN_CUDACC_VER
-  std::cout << "  EIGEN_CUDACC_VER:             " << int(EIGEN_CUDACC_VER) << "\n";
+  #ifdef EIGEN_CUDA_SDK_VER
+  std::cout << "  EIGEN_CUDA_SDK_VER:             " << int(EIGEN_CUDA_SDK_VER) << "\n";
  #endif

+  #ifdef EIGEN_COMP_NVCC
+  std::cout << "  EIGEN_COMP_NVCC:             " << int(EIGEN_COMP_NVCC) << "\n";
+  #endif
+  
  #ifdef EIGEN_HIPCC
  std::cout << "  EIGEN_HIPCC:                 " << int(EIGEN_HIPCC) << "\n";
  #endif
--- a/test/main.h
+++ b/test/main.h
@ -52,15 +52,17 @@
 #endif

 // Same for cuda_fp16.h
-#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
-#define EIGEN_TEST_CUDACC_VER  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
-#elif defined(__CUDACC_VER__)
-#define EIGEN_TEST_CUDACC_VER __CUDACC_VER__
-#else
-#define EIGEN_TEST_CUDACC_VER 0
+#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
+  // Means the compiler is either nvcc or clang with CUDA enabled
+  #define EIGEN_CUDACC __CUDACC__
 #endif
-
-#if EIGEN_TEST_CUDACC_VER >= 70500
+#if defined(EIGEN_CUDACC)
+#include <cuda.h>
+  #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)
+#else
+  #define EIGEN_CUDA_SDK_VER 0
+#endif
+#if EIGEN_CUDA_SDK_VER >= 70500
 #include <cuda_fp16.h>
 #endif

--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
@ -388,7 +388,7 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
  // the sum across all big k blocks of the product of little k block of index (x, y)
  // with block of index (y, z). To compute the final output, we need to reduce
  // the 8 threads over y by summation.
-#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
+#if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000)
 #define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask)
 #else
 #define shuffleInc(i, j, mask) res(i, j) += __shfl_xor_sync(0xFFFFFFFF, res(i, j), mask)
@ -621,7 +621,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
      x1 = rhs_pf0.x;
      x2 = rhs_pf0.z;
    }
-    #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
+    #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000)
    x1 = __shfl_xor(x1, 4);
    x2 = __shfl_xor(x2, 4);
    #else
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
@ -81,8 +81,8 @@
 // gpu_assert can be overridden
 #ifndef gpu_assert

-#if defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && (EIGEN_CUDACC_VER==0))
-// clang-cuda and HIPCC do not support the use of assert on the GPU side.
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+// HIPCC do not support the use of assert on the GPU side.
 #define gpu_assert(COND)
 #else
 #define gpu_assert(COND) assert(COND)
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@ -177,7 +177,7 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
    } else {
      reducer.reduce(__shfl_down(static_cast<int>(accum), offset, warpSize), &accum);
    }
-  #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+  #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
    reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
  #else
    reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum);
@ -269,7 +269,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input,
    wka_in.h = accum;
    wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
    reducer.reducePacket(wka_out.h, &accum);
-  #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+  #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
    reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum);
  #else
    int temp = __shfl_down_sync(0xFFFFFFFF, *(int*)(&accum), (unsigned)offset, warpSize);
@ -466,7 +466,7 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
        } else {
          reducer.reduce(__shfl_down(static_cast<int>(reduced_val), offset), &reduced_val);
        }
-      #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+      #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
        reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
      #else
        reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val);
@ -571,7 +571,7 @@ __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input,
 	wka_in.h = reduced_val2;
 	wka_out.i = __shfl_down(wka_in.i, offset, warpSize);
        reducer.reducePacket(wka_out.h, &reduced_val2);
-      #elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+      #elif defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000
        reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1);
        reducer.reducePacket(__shfl_down(reduced_val2, offset, warpSize), &reduced_val2);
      #else