mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-11-27 06:30:28 +08:00
Fix CUDA constexpr issues for numeric_limits.
Some CUDA/HIP constants fail on device with `constexpr` since they internally rely on non-constexpr functions, e.g. ``` \#define CUDART_INF_F __int_as_float(0x7f800000) ``` This fails for cuda-clang (though passes with nvcc). These constants are currently used by `device::numeric_limits`. For portability, we need to remove `constexpr` from the affected functions. For C++11 or higher, we should be able to rely on the `std::numeric_limits` versions anyways, since the methods themselves are now `constexpr`, so should be supported on device (clang/hipcc natively, nvcc with `--expr-relaxed-constexpr`).
This commit is contained in:
parent
af1247fbc1
commit
78ee3d6261
@ -763,8 +763,6 @@
|
||||
#if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))
|
||||
#define EIGEN_HAS_CONSTEXPR 1
|
||||
#endif
|
||||
#elif defined(EIGEN_HIPCC)
|
||||
// Skip const_expr on the HIP platform
|
||||
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \
|
||||
(EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \
|
||||
(EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11)))
|
||||
|
@ -275,7 +275,7 @@ template<bool Condition, typename T=void> struct enable_if;
|
||||
template<typename T> struct enable_if<true,T>
|
||||
{ typedef T type; };
|
||||
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
|
||||
#if !defined(__FLT_EPSILON__)
|
||||
#define __FLT_EPSILON__ FLT_EPSILON
|
||||
#define __DBL_EPSILON__ DBL_EPSILON
|
||||
@ -296,7 +296,7 @@ template<> struct numeric_limits<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static float epsilon() { return __FLT_EPSILON__; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
EIGEN_DEVICE_FUNC
|
||||
static float (max)() {
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
return CUDART_MAX_NORMAL_F;
|
||||
@ -306,7 +306,7 @@ template<> struct numeric_limits<float>
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static float (min)() { return FLT_MIN; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
EIGEN_DEVICE_FUNC
|
||||
static float infinity() {
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
return CUDART_INF_F;
|
||||
@ -314,7 +314,7 @@ template<> struct numeric_limits<float>
|
||||
return HIPRT_INF_F;
|
||||
#endif
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
EIGEN_DEVICE_FUNC
|
||||
static float quiet_NaN() {
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
return CUDART_NAN_F;
|
||||
@ -331,7 +331,7 @@ template<> struct numeric_limits<double>
|
||||
static double (max)() { return DBL_MAX; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static double (min)() { return DBL_MIN; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
EIGEN_DEVICE_FUNC
|
||||
static double infinity() {
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
return CUDART_INF;
|
||||
@ -339,7 +339,7 @@ template<> struct numeric_limits<double>
|
||||
return HIPRT_INF;
|
||||
#endif
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
EIGEN_DEVICE_FUNC
|
||||
static double quiet_NaN() {
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
return CUDART_NAN;
|
||||
@ -414,7 +414,7 @@ template<> struct numeric_limits<bool>
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
|
||||
|
||||
/** \internal
|
||||
* A base class do disable default copy ctor and copy assignment operator.
|
||||
@ -761,7 +761,7 @@ template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b =
|
||||
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
|
||||
using internal::device::numeric_limits;
|
||||
#else
|
||||
using std::numeric_limits;
|
||||
|
@ -343,6 +343,21 @@ struct matrix_inverse {
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct numeric_limits_test {
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(in)
|
||||
int out_idx = i * 5;
|
||||
out[out_idx++] = numext::numeric_limits<float>::epsilon();
|
||||
out[out_idx++] = (numext::numeric_limits<float>::max)();
|
||||
out[out_idx++] = (numext::numeric_limits<float>::min)();
|
||||
out[out_idx++] = numext::numeric_limits<float>::infinity();
|
||||
out[out_idx++] = numext::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Type1, typename Type2>
|
||||
bool verifyIsApproxWithInfsNans(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only
|
||||
{
|
||||
@ -434,6 +449,9 @@ EIGEN_DECLARE_TEST(gpu_basic)
|
||||
CALL_SUBTEST( run_and_compare_to_gpu(complex_operators<Vector3cf>(), nthreads, cfin, cfout) );
|
||||
CALL_SUBTEST( test_with_infs_nans(complex_sqrt<Vector3cf>(), nthreads, cfin, cfout) );
|
||||
|
||||
// numeric_limits
|
||||
CALL_SUBTEST( test_with_infs_nans(numeric_limits_test<Vector3f>(), 1, in, out) );
|
||||
|
||||
#if defined(__NVCC__)
|
||||
// FIXME
|
||||
// These subtests compiles only with nvcc and fail with HIPCC and clang-cuda
|
||||
|
Loading…
Reference in New Issue
Block a user