mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
Added support for CUDA architectures that don's support for 3.5 capabilities
This commit is contained in:
parent
6d777e1bc7
commit
51be91f15e
@ -183,25 +183,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
|
||||
to[1] = from.y;
|
||||
}
|
||||
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return __ldg((const float4*)from);
|
||||
#else
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return __ldg((const double2*)from);
|
||||
#else
|
||||
return make_float2(from[0], from[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
|
||||
#else
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return make_double2(__ldg(from+0), __ldg(from+1));
|
||||
}
|
||||
#else
|
||||
return make_float2(from[0], from[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
||||
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
||||
|
Loading…
Reference in New Issue
Block a user