Only attempt to use the texture path on GPUs when it's supported by CUDA

This commit is contained in:
Benoit Steiner 2015-07-06 13:32:38 -07:00
parent 864318e508
commit 81f9e968fd

View File

@ -109,6 +109,24 @@ struct TensorEvaluator
const Device& m_device;
};
namespace {
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T loadConstant(const T* address) {
return *address;
}
// Use the texture cache on CUDA devices whenever possible
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float loadConstant(const float* address) {
return __ldg(address);
}
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double loadConstant(const double* address) {
return __ldg(address);
}
#endif
}
// Default evaluator for rvalues
template<typename Derived, typename Device>
@ -150,11 +168,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data);
#ifdef __CUDA_ARCH__
return __ldg(m_data+index);
#else
return m_data[index];
#endif
return loadConstant(m_data+index);
}
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -167,11 +181,7 @@ struct TensorEvaluator<const Derived, Device>
eigen_assert(m_data);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords);
#ifdef __CUDA_ARCH__
return __ldg(m_data+index);
#else
return m_data[index];
#endif
return loadConstant(m_data+index);
}
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }