Merged eigen/eigen into default

2025-03-07 18:27:40 +08:00 · 2016-04-01 17:19:36 -07:00 · 2016-04-01 17:19:36 -07:00 · 30242b7565
commit 30242b7565
parent 9d51f7c457 3da495e6b9
7 changed files with 67 additions and 12 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -42,7 +42,10 @@

 #endif

-#if defined(__CUDA_ARCH__)
+// When compiling CUDA device code with NVCC, pull in math functions from the
+// global namespace.  In host mode, and when device doee with clang, use the
+// std versions.
+#if defined(__CUDA_ARCH__) && defined(__NVCC__)
  #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
 #else
  #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@ -153,7 +153,9 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
    MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
  };

+  EIGEN_DEVICE_FUNC
  static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+  EIGEN_DEVICE_FUNC
  static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
 };

@ -166,7 +168,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
  typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
  typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
  typedef ArrayType & Nested;
-  
+
  enum {
    IsComplex = NumTraits<Scalar>::IsComplex,
    IsInteger = NumTraits<Scalar>::IsInteger,
@ -176,8 +178,10 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
  };
-  
+
+  EIGEN_DEVICE_FUNC
  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+  EIGEN_DEVICE_FUNC
  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
 };

--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@ -341,6 +341,18 @@ template<> struct is_arithmetic<half> { enum { value = true }; };

 } // end namespace internal

+template<> struct NumTraits<Eigen::half>
+    : GenericNumTraits<Eigen::half>
+{
+  EIGEN_DEVICE_FUNC static inline float dummy_precision() { return 1e-3f; }
+  EIGEN_DEVICE_FUNC static inline Eigen::half highest() {
+    return internal::raw_uint16_to_half(0x7bff);
+  }
+  EIGEN_DEVICE_FUNC static inline Eigen::half lowest() {
+    return internal::raw_uint16_to_half(0xfbff);
+  }
+};
+
 // Infinity/NaN checks.

 namespace numext {
@ -348,7 +360,7 @@ namespace numext {
 static inline EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
  return (a.x & 0x7fff) == 0x7c00;
 }
-static inline EIGEN_HALF_CUDA_H bool (isnan)(const Eigen::half& a) {
+static inline EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) {
 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
  return __hisnan(a);
 #else
@ -403,6 +415,15 @@ using ::sqrt;
 using ::floor;
 using ::ceil;

+#if __cplusplus > 199711L
+template <>
+struct hash<Eigen::half> {
+  size_t operator()(const Eigen::half& a) const {
+    return std::hash<unsigned short>()(a.x);
+  }
+};
+#endif
+
 } // end namespace std


@ -411,7 +432,14 @@ using ::ceil;
 __device__ inline Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
  return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
 }
+#endif

+// ldg() has an overload for __half, but we also need one for Eigen::half.
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320
+static inline EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
+  return Eigen::internal::raw_uint16_to_half(
+      __ldg(reinterpret_cast<const unsigned short*>(ptr)));
+}
 #endif


--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -370,8 +370,8 @@

 // Does the compiler support const expressions?
 #ifdef __CUDACC__
-// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above
-#if defined(__CUDACC_VER__) &&  __CUDACC_VER__ >= 70500 && __cplusplus > 199711L
+// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
+#if __cplusplus > 199711L && defined(__CUDACC_VER__) && (defined(__clang__) || __CUDACC_VER__ >= 70500)
  #define EIGEN_HAS_CONSTEXPR 1
 #endif
 #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@ -10,8 +10,9 @@
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H

-// NVCC fails to compile this code
-#if !defined(__CUDACC__)
+// This code requires the ability to initialize arrays of constant
+// values directly inside a class.
+#if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900

 namespace Eigen {

@ -564,7 +565,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D

  // This will support a maximum FFT size of 2^32 for each dimension
  // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
-  RealScalar m_sin_PI_div_n_LUT[32] = {
+  const RealScalar m_sin_PI_div_n_LUT[32] = {
    RealScalar(0.0),
    RealScalar(-2),
    RealScalar(-0.999999999999999),
@ -600,7 +601,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
  };

  // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i));
-  RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
+  const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
    RealScalar(0.0),
    RealScalar(0.0),
    RealScalar(-1.00000000000000e+00),
@ -638,7 +639,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D

 }  // end namespace Eigen

-#endif  // __CUDACC__
+#endif  // EIGEN_HAS_CONSTEXPR


 #endif  // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
@ -45,6 +45,23 @@ struct type2index {
  }
 };

+template<DenseIndex n> struct NumTraits<type2index<n> >
+{
+  typedef DenseIndex Real;
+  enum {
+    IsComplex = 0,
+    RequireInitialization = false,
+    ReadCost = 1,
+    AddCost = 1,
+    MulCost = 1
+  };
+
+  EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; }
+  EIGEN_DEVICE_FUNC static inline Real highest() { return n; }
+  EIGEN_DEVICE_FUNC static inline Real lowest() { return n; }
+};
+
 namespace internal {
 template <typename T>
 EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
@ -53,7 +53,9 @@ struct TensorUInt128
  template<typename T>
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
  explicit TensorUInt128(const T& x) : high(0), low(x) {
-    eigen_assert(x < NumTraits<LOW>::highest());
+    typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type UnsignedT;
+    typedef typename conditional<sizeof(LOW) == 8, uint64_t, uint32_t>::type UnsignedLow;
+    eigen_assert(static_cast<UnsignedT>(x) <= static_cast<UnsignedLow>(NumTraits<LOW>::highest()));
    eigen_assert(x >= 0);
  }