add Cholesky and eigensolver benchmark

2025-02-17 18:09:55 +08:00 · 2008-07-08 17:20:17 +00:00 · 2008-07-08 17:20:17 +00:00 · 77a622f2bb
commit 77a622f2bb
parent 6f09d3a67d
8 changed files with 384 additions and 104 deletions
--- a/bench/BenchSparseUtil.h
+++ b/bench/BenchSparseUtil.h
@ -3,8 +3,6 @@
 #include <Eigen/Sparse>
 #include <bench/BenchTimer.h>

-
-
 using namespace std;
 using namespace Eigen;
 USING_PART_OF_NAMESPACE_EIGEN
--- a/bench/BenchUtil.h
+++ b/bench/BenchUtil.h
@ -26,3 +26,43 @@ template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
 {
  mat.setIdentity();
 }
+
+#ifndef __INTEL_COMPILER
+#define DISABLE_SSE_EXCEPTIONS()  { \
+  int aux; \
+  asm( \
+  "stmxcsr   %[aux]           \n\t" \
+  "orl       $32832, %[aux]   \n\t" \
+  "ldmxcsr   %[aux]           \n\t" \
+  : : [aux] "m" (aux)); \
+}
+#else
+#define DISABLE_SSE_EXCEPTIONS()  
+#endif
+
+#ifdef BENCH_GMM
+#include <gmm/gmm.h>
+template <typename EigenMatrixType, typename GmmMatrixType>
+void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
+{
+  dst.resize(src.rows(),src.cols());
+  for (int j=0; j<src.cols(); ++j)
+    for (int i=0; i<src.rows(); ++i)
+      dst(i,j) = src.coeff(i,j);
+}
+#endif
+
+
+#ifdef BENCH_GSL
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_linalg.h>
+#include <gsl/gsl_eigen.h>
+template <typename EigenMatrixType>
+void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
+{
+  for (int j=0; j<src.cols(); ++j)
+    for (int i=0; i<src.rows(); ++i)
+      gsl_matrix_set(*dst, i, j, src.coeff(i,j));
+}
+#endif
+
--- a/bench/basicbenchmark.cpp
+++ b/bench/basicbenchmark.cpp
@ -4,19 +4,7 @@

 int main(int argc, char *argv[])
 {
-  // disable floating point exceptions
-  // this leads to more stable bench results
-  // (this is done by default by ICC)
-  #ifndef __INTEL_COMPILER
-  {
-    int aux;
-    asm(
-    "stmxcsr   %[aux]           \n\t"
-    "orl       $32832, %[aux]   \n\t"
-    "ldmxcsr   %[aux]           \n\t"
-    : : [aux] "m" (aux));
-  }
-  #endif
+  DISABLE_SSE_EXCEPTIONS();

  // this is the list of matrix type and size we want to bench:
  // ((suffix) (matrix size) (number of iterations))
--- a/bench/benchCholesky.cpp
+++ b/bench/benchCholesky.cpp
@ -0,0 +1,132 @@
+
+// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp  -o benchCholesky && ./benchCholesky
+// options:
+//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
+//  -DEIGEN_DONT_VECTORIZE
+//  -msse2
+//  -DREPEAT=100
+//  -DTRIES=10
+//  -DSCALAR=double
+
+#include <Eigen/Array>
+#include <Eigen/Cholesky>
+#include <bench/BenchUtil.h>
+using namespace Eigen;
+
+#ifndef REPEAT
+#define REPEAT 10000
+#endif
+
+#ifndef TRIES
+#define TRIES 4
+#endif
+
+typedef float Scalar;
+
+template <typename MatrixType>
+__attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
+{
+  int rows = m.rows();
+  int cols = m.cols();
+
+  int repeats = (REPEAT*1000)/(rows*rows);
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
+
+  MatrixType a = MatrixType::random(rows,cols);
+  SquareMatrixType covMat =  a * a.adjoint();
+
+  BenchTimer timerNoSqrt, timerSqrt;
+
+  Scalar acc = 0;
+  int r = ei_random<int>(0,covMat.rows()-1);
+  int c = ei_random<int>(0,covMat.cols()-1);
+  for (int t=0; t<TRIES; ++t)
+  {
+    timerNoSqrt.start();
+    for (int k=0; k<repeats; ++k)
+    {
+      CholeskyWithoutSquareRoot<SquareMatrixType> cholnosqrt(covMat);
+      acc += cholnosqrt.matrixL().coeff(r,c);
+    }
+    timerNoSqrt.stop();
+  }
+
+  for (int t=0; t<TRIES; ++t)
+  {
+    timerSqrt.start();
+    for (int k=0; k<repeats; ++k)
+    {
+      Cholesky<SquareMatrixType> chol(covMat);
+      acc += chol.matrixL().coeff(r,c);
+    }
+    timerSqrt.stop();
+  }
+
+  if (MatrixType::RowsAtCompileTime==Dynamic)
+    std::cout << "dyn   ";
+  else
+    std::cout << "fixed ";
+  std::cout << covMat.rows() << " \t"
+            << (timerNoSqrt.value() * REPEAT) / repeats << "s \t"
+            << (timerSqrt.value() * REPEAT) / repeats << "s";
+
+
+  #ifdef BENCH_GSL
+  if (MatrixType::RowsAtCompileTime==Dynamic)
+  {
+    timerSqrt.reset();
+
+    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+    
+    eiToGsl(covMat, &gslCovMat);
+    for (int t=0; t<TRIES; ++t)
+    {
+      timerSqrt.start();
+      for (int k=0; k<repeats; ++k)
+      {
+        gsl_matrix_memcpy(gslCopy,gslCovMat);
+        gsl_linalg_cholesky_decomp(gslCopy);
+        acc += gsl_matrix_get(gslCopy,r,c);
+      }
+      timerSqrt.stop();
+    }
+
+    std::cout << " | \t"
+              << timerSqrt.value() * REPEAT / repeats << "s";
+
+    gsl_matrix_free(gslCovMat);
+  }
+  #endif
+  std::cout << "\n";
+  // make sure the compiler does not optimize too much
+  if (acc==123)
+    std::cout << acc;
+}
+
+int main(int argc, char* argv[])
+{
+  const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
+  std::cout << "size            no sqrt         standard";
+  #ifdef BENCH_GSL
+  std::cout << "       GSL (standard + double + ATLAS)  ";
+  #endif
+  std::cout << "\n";
+
+  for (uint i=0; dynsizes[i]>0; ++i)
+    benchCholesky(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+
+  benchCholesky(Matrix<Scalar,2,2>());
+  benchCholesky(Matrix<Scalar,3,3>());
+  benchCholesky(Matrix<Scalar,4,4>());
+  benchCholesky(Matrix<Scalar,5,5>());
+  benchCholesky(Matrix<Scalar,6,6>());
+  benchCholesky(Matrix<Scalar,7,7>());
+  benchCholesky(Matrix<Scalar,8,8>());
+  benchCholesky(Matrix<Scalar,12,12>());
+  benchCholesky(Matrix<Scalar,16,16>());
+  return 0;
+}
+
--- a/bench/benchEigenSolver.cpp
+++ b/bench/benchEigenSolver.cpp
@ -0,0 +1,210 @@
+
+// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp  -o benchEigenSolver && ./benchEigenSolver
+// options:
+//  -DBENCH_GMM
+//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
+//  -DEIGEN_DONT_VECTORIZE
+//  -msse2
+//  -DREPEAT=100
+//  -DTRIES=10
+//  -DSCALAR=double
+
+#include <Eigen/Array>
+#include <Eigen/QR>
+#include <bench/BenchUtil.h>
+using namespace Eigen;
+
+#ifndef REPEAT
+#define REPEAT 1000
+#endif
+
+#ifndef TRIES
+#define TRIES 4
+#endif
+
+#ifndef SCALAR
+#define SCALAR float
+#endif
+
+typedef SCALAR Scalar;
+
+template <typename MatrixType>
+__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
+{
+  int rows = m.rows();
+  int cols = m.cols();
+
+  int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
+  int saRepeats = stdRepeats * 4;
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
+
+  MatrixType a = MatrixType::random(rows,cols);
+  SquareMatrixType covMat =  a * a.adjoint();
+
+  BenchTimer timerSa, timerStd;
+
+  Scalar acc = 0;
+  int r = ei_random<int>(0,covMat.rows()-1);
+  int c = ei_random<int>(0,covMat.cols()-1);
+  {
+    SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
+    for (int t=0; t<TRIES; ++t)
+    {
+      timerSa.start();
+      for (int k=0; k<saRepeats; ++k)
+      {
+        ei.compute(covMat);
+        acc += ei.eigenvectors().coeff(r,c);
+      }
+      timerSa.stop();
+    }
+  }
+
+  {
+    EigenSolver<SquareMatrixType> ei(covMat);
+    for (int t=0; t<TRIES; ++t)
+    {
+      timerStd.start();
+      for (int k=0; k<stdRepeats; ++k)
+      {
+        ei.compute(covMat);
+        acc += ei.eigenvectors().coeff(r,c);
+      }
+      timerStd.stop();
+    }
+  }
+
+  if (MatrixType::RowsAtCompileTime==Dynamic)
+    std::cout << "dyn   ";
+  else
+    std::cout << "fixed ";
+  std::cout << covMat.rows() << " \t"
+            << timerSa.value() * REPEAT / saRepeats << "s \t"
+            << timerStd.value() * REPEAT / stdRepeats << "s";
+
+  #ifdef BENCH_GMM
+  if (MatrixType::RowsAtCompileTime==Dynamic)
+  {
+    timerSa.reset();
+    timerStd.reset();
+
+    gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
+    gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
+    std::vector<Scalar> eigval(covMat.rows());
+    eiToGmm(covMat, gmmCovMat);
+    for (int t=0; t<TRIES; ++t)
+    {
+      timerSa.start();
+      for (int k=0; k<saRepeats; ++k)
+      {
+        gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
+        acc += eigvect(r,c);
+      }
+      timerSa.stop();
+    }
+    // the non-selfadjoint solver does not compute the eigen vectors
+//     for (int t=0; t<TRIES; ++t)
+//     {
+//       timerStd.start();
+//       for (int k=0; k<stdRepeats; ++k)
+//       {
+//         gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
+//         acc += eigvect(r,c);
+//       }
+//       timerStd.stop();
+//     }
+
+    std::cout << " | \t"
+              << timerSa.value() * REPEAT / saRepeats << "s"
+              << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ "   na   ";
+  }
+  #endif
+
+  #ifdef BENCH_GSL
+  if (MatrixType::RowsAtCompileTime==Dynamic)
+  {
+    timerSa.reset();
+    timerStd.reset();
+
+    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+    gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+    gsl_vector* eigval  = gsl_vector_alloc(covMat.rows());
+    gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
+    
+    gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
+    gsl_vector_complex* eigvalz  = gsl_vector_complex_alloc(covMat.rows());
+    gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
+    
+    eiToGsl(covMat, &gslCovMat);
+    for (int t=0; t<TRIES; ++t)
+    {
+      timerSa.start();
+      for (int k=0; k<saRepeats; ++k)
+      {
+        gsl_matrix_memcpy(gslCopy,gslCovMat);
+        gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
+        acc += gsl_matrix_get(eigvect,r,c);
+      }
+      timerSa.stop();
+    }
+    for (int t=0; t<TRIES; ++t)
+    {
+      timerStd.start();
+      for (int k=0; k<stdRepeats; ++k)
+      {
+        gsl_matrix_memcpy(gslCopy,gslCovMat);
+        gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
+        acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
+      }
+      timerStd.stop();
+    }
+
+    std::cout << " | \t"
+              << timerSa.value() * REPEAT / saRepeats << "s \t"
+              << timerStd.value() * REPEAT / stdRepeats << "s";
+
+    gsl_matrix_free(gslCovMat);
+    gsl_vector_free(gslCopy);
+    gsl_matrix_free(eigvect);
+    gsl_vector_free(eigval);
+    gsl_matrix_complex_free(eigvectz);
+    gsl_vector_complex_free(eigvalz);
+    gsl_eigen_symmv_free(eisymm);
+    gsl_eigen_nonsymmv_free(einonsymm);
+  }
+  #endif
+
+  std::cout << "\n";
+  
+  // make sure the compiler does not optimize too much
+  if (acc==123)
+    std::cout << acc;
+}
+
+int main(int argc, char* argv[])
+{
+  const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
+  std::cout << "size            selfadjoint       generic";
+  #ifdef BENCH_GMM
+  std::cout << "        GMM++          ";
+  #endif
+  #ifdef BENCH_GSL
+  std::cout << "       GSL (double + ATLAS)  ";
+  #endif
+  std::cout << "\n";
+  for (uint i=0; dynsizes[i]>0; ++i)
+    benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+
+  benchEigenSolver(Matrix<Scalar,2,2>());
+  benchEigenSolver(Matrix<Scalar,3,3>());
+  benchEigenSolver(Matrix<Scalar,4,4>());
+  benchEigenSolver(Matrix<Scalar,6,6>());
+  benchEigenSolver(Matrix<Scalar,8,8>());
+  benchEigenSolver(Matrix<Scalar,12,12>());
+  benchEigenSolver(Matrix<Scalar,16,16>());
+  return 0;
+}
+
--- a/bench/bench_unrolling
+++ b/bench/bench_unrolling
@ -5,7 +5,7 @@

 for ((i=1; i<16; ++i)); do
    echo "Matrix size: $i x $i :"
-    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=25 -o benchmark && time ./benchmark >/dev/null
+    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
    $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
    echo " "
 done
--- a/bench/ompbench.cxxlist
+++ b/bench/ompbench.cxxlist
@ -1,7 +0,0 @@
-#!/bin/bash
-
-CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
-
-# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
-
-CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"
--- a/bench/ompbenchmark.cpp
+++ b/bench/ompbenchmark.cpp
@ -1,81 +0,0 @@
-// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
-// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp  benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
-
-#include <omp.h>
-#include "BenchUtil.h"
-#include "basicbenchmark.h"
-
-// #include <Eigen/Core>
-// #include "BenchTimer.h"
-//
-// using namespace std;
-// USING_PART_OF_NAMESPACE_EIGEN
-//
-// enum {LazyEval, EarlyEval, OmpEval};
-//
-// template<int Mode, typename MatrixType>
-// double benchSingleProc(const MatrixType& mat, int iterations, int tries)  __attribute__((noinline));
-//
-// template<int Mode, typename MatrixType>
-// double benchBasic(const MatrixType& mat, int iterations, int tries)
-// {
-//   const int rows = mat.rows();
-//   const int cols = mat.cols();
-//
-//   Eigen::BenchTimer timer;
-//   for(uint t=0; t<tries; ++t)
-//   {
-//     MatrixType I = MatrixType::identity(rows, cols);
-//     MatrixType m = MatrixType::random(rows, cols);
-//
-//     timer.start();
-//     for(int a = 0; a < iterations; a++)
-//     {
-//       if(Mode==LazyEval)
-//         m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
-//       else if(Mode==OmpEval)
-//         m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
-//       else
-//         m = I + 0.00005 * (m + m * m);
-//     }
-//     timer.stop();
-//     cerr << m;
-//   }
-//   return timer.value();
-// };
-
-int main(int argc, char *argv[])
-{
-  // disbale floating point exceptions
-  // this leads to more stable bench results
-  {
-    int aux;
-    asm(
-    "stmxcsr   %[aux]           \n\t"
-    "orl       $32832, %[aux]   \n\t"
-    "ldmxcsr   %[aux]           \n\t"
-    : : [aux] "m" (aux));
-  }
-
-  // commented since the default setting is use as many threads as processors
-  //omp_set_num_threads(omp_get_num_procs());
-
-  std::cout << "double, fixed-size 4x4: "
-    << benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s  "
-    << benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s  \n";
-
-  #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\
-      double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
-      double omp    = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
-      std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \
-        << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \
-    }
-
-  BENCH_MATRIX(double,   32, 1000, 10);
-  BENCH_MATRIX(double,  128,   10, 10);
-  BENCH_MATRIX(double,  512,    1,  6);
-  BENCH_MATRIX(double, 1024,    1,  4);
-
-  return 0;
-}
-