mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
Only attempt to use the texture path on GPUs when it's supported by CUDA
This commit is contained in:
parent
864318e508
commit
81f9e968fd
@ -109,6 +109,24 @@ struct TensorEvaluator
|
||||
const Device& m_device;
|
||||
};
|
||||
|
||||
namespace {
|
||||
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T loadConstant(const T* address) {
|
||||
return *address;
|
||||
}
|
||||
// Use the texture cache on CUDA devices whenever possible
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float loadConstant(const float* address) {
|
||||
return __ldg(address);
|
||||
}
|
||||
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double loadConstant(const double* address) {
|
||||
return __ldg(address);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Default evaluator for rvalues
|
||||
template<typename Derived, typename Device>
|
||||
@ -150,11 +168,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||
eigen_assert(m_data);
|
||||
#ifdef __CUDA_ARCH__
|
||||
return __ldg(m_data+index);
|
||||
#else
|
||||
return m_data[index];
|
||||
#endif
|
||||
return loadConstant(m_data+index);
|
||||
}
|
||||
|
||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -167,11 +181,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
eigen_assert(m_data);
|
||||
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
||||
: m_dims.IndexOfRowMajor(coords);
|
||||
#ifdef __CUDA_ARCH__
|
||||
return __ldg(m_data+index);
|
||||
#else
|
||||
return m_data[index];
|
||||
#endif
|
||||
return loadConstant(m_data+index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
|
||||
|
Loading…
Reference in New Issue
Block a user