Only attempt to use the texture path on GPUs when it's supported by CUDA

2025-02-23 18:20:47 +08:00 · 2015-07-06 13:32:38 -07:00 · 2015-07-06 13:32:38 -07:00 · 81f9e968fd
commit 81f9e968fd
parent 864318e508
1 changed files with 20 additions and 10 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@ -109,6 +109,24 @@ struct TensorEvaluator
  const Device& m_device;
 };

+namespace {
+template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T loadConstant(const T* address) {
+  return *address;
+}
+// Use the texture cache on CUDA devices whenever possible
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float loadConstant(const float* address) {
+  return __ldg(address);
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double loadConstant(const double* address) {
+  return __ldg(address);
+}
+#endif
+}
+

 // Default evaluator for rvalues
 template<typename Derived, typename Device>
@ -150,11 +168,7 @@ struct TensorEvaluator<const Derived, Device>

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
    eigen_assert(m_data);
-#ifdef __CUDA_ARCH__
-    return __ldg(m_data+index);
-#else
-    return m_data[index];
-#endif
+    return loadConstant(m_data+index);
  }

  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -167,11 +181,7 @@ struct TensorEvaluator<const Derived, Device>
    eigen_assert(m_data);
    const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
                        : m_dims.IndexOfRowMajor(coords);
-#ifdef __CUDA_ARCH__
-    return __ldg(m_data+index);
-#else
-    return m_data[index];
-#endif
+    return loadConstant(m_data+index);
  }

  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }