NVCC: fix closed-form eigenvalue decomposition, workaround gcc4.7/nvcc5.5 issue

2025-01-30 17:40:05 +08:00 · 2014-01-24 12:50:29 +01:00 · 2014-01-24 12:50:29 +01:00 · deab937d45
commit deab937d45
parent 66f1c56aab
3 changed files with 36 additions and 8 deletions
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@ -77,7 +77,12 @@ template<typename MatrixType, int _DiagIndex> class Diagonal

    EIGEN_DEVICE_FUNC
    inline Index rows() const
-    { return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    {
+      EIGEN_USING_STD_MATH(min);
+      return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
+                               : (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
+      
+    }

    EIGEN_DEVICE_FUNC
    inline Index cols() const { return 1; }
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@ -756,7 +756,9 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
  EIGEN_DEVICE_FUNC
  static inline void run(SolverType& solver, const MatrixType& mat, int options)
  {
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(max)
+    EIGEN_USING_STD_MATH(sqrt);
+    
    eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
    eigen_assert((options&~(EigVecMask|GenEigMask))==0
            && (options&EigVecMask)!=EigVecMask
@ -768,7 +770,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
  
    // map the matrix coefficients to [-1:1] to avoid over- and underflow.
    Scalar scale = mat.cwiseAbs().maxCoeff();
-    scale = (std::max)(scale,Scalar(1));
+    scale = (max)(scale,Scalar(1));
    MatrixType scaledMat = mat / scale;
    
    // Compute the eigenvalues
--- a/test/cuda_basic.cu
+++ b/test/cuda_basic.cu
@ -1,8 +1,16 @@


+// workaround issue between gcc >= 4.7 and cuda 5.5
+#if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
+  #undef _GLIBCXX_ATOMIC_BUILTINS
+  #undef _GLIBCXX_USE_INT128
+#endif
+
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cuda_basic
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+
 #include "main.h"
 #include "cuda_common.h"

@ -70,6 +78,17 @@ struct prod {
  }
 };

+template<typename T1, typename T2>
+struct diagonal {
+  EIGEN_DEVICE_FUNC
+  void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
+  {
+    using namespace Eigen;
+    T1 x1(in+i);
+    Map<T2> res(out+i*T2::MaxSizeAtCompileTime);
+    res += x1.diagonal();
+  }
+};

 template<typename T>
 struct eigenvalues {
@ -82,12 +101,11 @@ struct eigenvalues {
    Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
    T A = M*M.adjoint();
    SelfAdjointEigenSolver<T> eig;
-    eig.computeDirect(A);
-    res = A.eigenvalues();
+    eig.computeDirect(M);
+    res = eig.eigenvalues();
  }
 };

-
 void test_cuda_basic()
 {
  ei_test_init_cuda();
@ -110,7 +128,10 @@ void test_cuda_basic()
  CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix3f,Matrix3f>(), nthreads, in, out) );
  CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix4f,Vector4f>(), nthreads, in, out) );
  
-//   CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
-//   CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_c<uda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
+  
+  CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );

 }