Made it possible to compile reductions for an old cuda architecture and run them on a recent gpu.

2024-12-15 07:10:37 +08:00 · 2016-06-29 15:42:01 -07:00 · 2016-06-29 15:42:01 -07:00 · cb2d8b8fa6
commit cb2d8b8fa6
parent b2a47641ce
1 changed files with 10 additions and 2 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@ -336,9 +336,11 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
      (internal::is_same<typename Self::CoeffReturnType, float>::value ||
       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#else
+#elif __CUDA_ARCH__ >= 300
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                 internal::is_same<typename Self::CoeffReturnType, float>::value;
+#else
+  static const bool HasOptimizedImplementation = false;
 #endif

  template <typename OutputType>
@ -617,9 +619,11 @@ struct InnerReducer<Self, Op, GpuDevice> {
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
      (internal::is_same<typename Self::CoeffReturnType, float>::value ||
       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#else
+#elif __CUDA_ARCH__ >= 300
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                 internal::is_same<typename Self::CoeffReturnType, float>::value;
+#else
+  static const bool HasOptimizedImplementation = false;
 #endif

  template <typename OutputType>
@ -674,8 +678,12 @@ struct OuterReducer<Self, Op, GpuDevice> {
  // Unfortunately nvidia doesn't support well exotic types such as complex,
  // so reduce the scope of the optimized version of the code to the simple case
  // of floats.
+#if __CUDA_ARCH__ >= 300
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                 internal::is_same<typename Self::CoeffReturnType, float>::value;
+#else
+  static const bool HasOptimizedImplementation = false;
+#endif

  template <typename Device, typename OutputType>
  static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {