add Cholesky and eigensolver benchmark

This commit is contained in:
Gael Guennebaud 2008-07-08 17:20:17 +00:00
parent 6f09d3a67d
commit 77a622f2bb
8 changed files with 384 additions and 104 deletions

View File

@ -3,8 +3,6 @@
#include <Eigen/Sparse>
#include <bench/BenchTimer.h>
using namespace std;
using namespace Eigen;
USING_PART_OF_NAMESPACE_EIGEN

View File

@ -26,3 +26,43 @@ template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
{
mat.setIdentity();
}
#ifndef __INTEL_COMPILER
#define DISABLE_SSE_EXCEPTIONS() { \
int aux; \
asm( \
"stmxcsr %[aux] \n\t" \
"orl $32832, %[aux] \n\t" \
"ldmxcsr %[aux] \n\t" \
: : [aux] "m" (aux)); \
}
#else
#define DISABLE_SSE_EXCEPTIONS()
#endif
#ifdef BENCH_GMM
#include <gmm/gmm.h>
template <typename EigenMatrixType, typename GmmMatrixType>
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
{
dst.resize(src.rows(),src.cols());
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
dst(i,j) = src.coeff(i,j);
}
#endif
#ifdef BENCH_GSL
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_eigen.h>
template <typename EigenMatrixType>
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
{
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
gsl_matrix_set(*dst, i, j, src.coeff(i,j));
}
#endif

View File

@ -4,19 +4,7 @@
int main(int argc, char *argv[])
{
// disable floating point exceptions
// this leads to more stable bench results
// (this is done by default by ICC)
#ifndef __INTEL_COMPILER
{
int aux;
asm(
"stmxcsr %[aux] \n\t"
"orl $32832, %[aux] \n\t"
"ldmxcsr %[aux] \n\t"
: : [aux] "m" (aux));
}
#endif
DISABLE_SSE_EXCEPTIONS();
// this is the list of matrix type and size we want to bench:
// ((suffix) (matrix size) (number of iterations))

132
bench/benchCholesky.cpp Normal file
View File

@ -0,0 +1,132 @@
// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp -o benchCholesky && ./benchCholesky
// options:
// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
// -DEIGEN_DONT_VECTORIZE
// -msse2
// -DREPEAT=100
// -DTRIES=10
// -DSCALAR=double
#include <Eigen/Array>
#include <Eigen/Cholesky>
#include <bench/BenchUtil.h>
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 10000
#endif
#ifndef TRIES
#define TRIES 4
#endif
typedef float Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
{
int rows = m.rows();
int cols = m.cols();
int repeats = (REPEAT*1000)/(rows*rows);
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::random(rows,cols);
SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerNoSqrt, timerSqrt;
Scalar acc = 0;
int r = ei_random<int>(0,covMat.rows()-1);
int c = ei_random<int>(0,covMat.cols()-1);
for (int t=0; t<TRIES; ++t)
{
timerNoSqrt.start();
for (int k=0; k<repeats; ++k)
{
CholeskyWithoutSquareRoot<SquareMatrixType> cholnosqrt(covMat);
acc += cholnosqrt.matrixL().coeff(r,c);
}
timerNoSqrt.stop();
}
for (int t=0; t<TRIES; ++t)
{
timerSqrt.start();
for (int k=0; k<repeats; ++k)
{
Cholesky<SquareMatrixType> chol(covMat);
acc += chol.matrixL().coeff(r,c);
}
timerSqrt.stop();
}
if (MatrixType::RowsAtCompileTime==Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< (timerNoSqrt.value() * REPEAT) / repeats << "s \t"
<< (timerSqrt.value() * REPEAT) / repeats << "s";
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime==Dynamic)
{
timerSqrt.reset();
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
eiToGsl(covMat, &gslCovMat);
for (int t=0; t<TRIES; ++t)
{
timerSqrt.start();
for (int k=0; k<repeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
gsl_linalg_cholesky_decomp(gslCopy);
acc += gsl_matrix_get(gslCopy,r,c);
}
timerSqrt.stop();
}
std::cout << " | \t"
<< timerSqrt.value() * REPEAT / repeats << "s";
gsl_matrix_free(gslCovMat);
}
#endif
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
std::cout << "size no sqrt standard";
#ifdef BENCH_GSL
std::cout << " GSL (standard + double + ATLAS) ";
#endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
benchCholesky(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
benchCholesky(Matrix<Scalar,2,2>());
benchCholesky(Matrix<Scalar,3,3>());
benchCholesky(Matrix<Scalar,4,4>());
benchCholesky(Matrix<Scalar,5,5>());
benchCholesky(Matrix<Scalar,6,6>());
benchCholesky(Matrix<Scalar,7,7>());
benchCholesky(Matrix<Scalar,8,8>());
benchCholesky(Matrix<Scalar,12,12>());
benchCholesky(Matrix<Scalar,16,16>());
return 0;
}

210
bench/benchEigenSolver.cpp Normal file
View File

@ -0,0 +1,210 @@
// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver
// options:
// -DBENCH_GMM
// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
// -DEIGEN_DONT_VECTORIZE
// -msse2
// -DREPEAT=100
// -DTRIES=10
// -DSCALAR=double
#include <Eigen/Array>
#include <Eigen/QR>
#include <bench/BenchUtil.h>
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 1000
#endif
#ifndef TRIES
#define TRIES 4
#endif
#ifndef SCALAR
#define SCALAR float
#endif
typedef SCALAR Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
{
int rows = m.rows();
int cols = m.cols();
int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
int saRepeats = stdRepeats * 4;
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::random(rows,cols);
SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerSa, timerStd;
Scalar acc = 0;
int r = ei_random<int>(0,covMat.rows()-1);
int c = ei_random<int>(0,covMat.cols()-1);
{
SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
for (int t=0; t<TRIES; ++t)
{
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r,c);
}
timerSa.stop();
}
}
{
EigenSolver<SquareMatrixType> ei(covMat);
for (int t=0; t<TRIES; ++t)
{
timerStd.start();
for (int k=0; k<stdRepeats; ++k)
{
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r,c);
}
timerStd.stop();
}
}
if (MatrixType::RowsAtCompileTime==Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< timerSa.value() * REPEAT / saRepeats << "s \t"
<< timerStd.value() * REPEAT / stdRepeats << "s";
#ifdef BENCH_GMM
if (MatrixType::RowsAtCompileTime==Dynamic)
{
timerSa.reset();
timerStd.reset();
gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
std::vector<Scalar> eigval(covMat.rows());
eiToGmm(covMat, gmmCovMat);
for (int t=0; t<TRIES; ++t)
{
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
acc += eigvect(r,c);
}
timerSa.stop();
}
// the non-selfadjoint solver does not compute the eigen vectors
// for (int t=0; t<TRIES; ++t)
// {
// timerStd.start();
// for (int k=0; k<stdRepeats; ++k)
// {
// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
// acc += eigvect(r,c);
// }
// timerStd.stop();
// }
std::cout << " | \t"
<< timerSa.value() * REPEAT / saRepeats << "s"
<< /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na ";
}
#endif
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime==Dynamic)
{
timerSa.reset();
timerStd.reset();
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
eiToGsl(covMat, &gslCovMat);
for (int t=0; t<TRIES; ++t)
{
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
acc += gsl_matrix_get(eigvect,r,c);
}
timerSa.stop();
}
for (int t=0; t<TRIES; ++t)
{
timerStd.start();
for (int k=0; k<stdRepeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
}
timerStd.stop();
}
std::cout << " | \t"
<< timerSa.value() * REPEAT / saRepeats << "s \t"
<< timerStd.value() * REPEAT / stdRepeats << "s";
gsl_matrix_free(gslCovMat);
gsl_vector_free(gslCopy);
gsl_matrix_free(eigvect);
gsl_vector_free(eigval);
gsl_matrix_complex_free(eigvectz);
gsl_vector_complex_free(eigvalz);
gsl_eigen_symmv_free(eisymm);
gsl_eigen_nonsymmv_free(einonsymm);
}
#endif
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
std::cout << "size selfadjoint generic";
#ifdef BENCH_GMM
std::cout << " GMM++ ";
#endif
#ifdef BENCH_GSL
std::cout << " GSL (double + ATLAS) ";
#endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
benchEigenSolver(Matrix<Scalar,2,2>());
benchEigenSolver(Matrix<Scalar,3,3>());
benchEigenSolver(Matrix<Scalar,4,4>());
benchEigenSolver(Matrix<Scalar,6,6>());
benchEigenSolver(Matrix<Scalar,8,8>());
benchEigenSolver(Matrix<Scalar,12,12>());
benchEigenSolver(Matrix<Scalar,16,16>());
return 0;
}

View File

@ -5,7 +5,7 @@
for ((i=1; i<16; ++i)); do
echo "Matrix size: $i x $i :"
$CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=25 -o benchmark && time ./benchmark >/dev/null
$CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
$CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
echo " "
done

View File

@ -1,7 +0,0 @@
#!/bin/bash
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"

View File

@ -1,81 +0,0 @@
// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
#include <omp.h>
#include "BenchUtil.h"
#include "basicbenchmark.h"
// #include <Eigen/Core>
// #include "BenchTimer.h"
//
// using namespace std;
// USING_PART_OF_NAMESPACE_EIGEN
//
// enum {LazyEval, EarlyEval, OmpEval};
//
// template<int Mode, typename MatrixType>
// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline));
//
// template<int Mode, typename MatrixType>
// double benchBasic(const MatrixType& mat, int iterations, int tries)
// {
// const int rows = mat.rows();
// const int cols = mat.cols();
//
// Eigen::BenchTimer timer;
// for(uint t=0; t<tries; ++t)
// {
// MatrixType I = MatrixType::identity(rows, cols);
// MatrixType m = MatrixType::random(rows, cols);
//
// timer.start();
// for(int a = 0; a < iterations; a++)
// {
// if(Mode==LazyEval)
// m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
// else if(Mode==OmpEval)
// m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
// else
// m = I + 0.00005 * (m + m * m);
// }
// timer.stop();
// cerr << m;
// }
// return timer.value();
// };
int main(int argc, char *argv[])
{
// disbale floating point exceptions
// this leads to more stable bench results
{
int aux;
asm(
"stmxcsr %[aux] \n\t"
"orl $32832, %[aux] \n\t"
"ldmxcsr %[aux] \n\t"
: : [aux] "m" (aux));
}
// commented since the default setting is use as many threads as processors
//omp_set_num_threads(omp_get_num_procs());
std::cout << "double, fixed-size 4x4: "
<< benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s "
<< benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s \n";
#define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\
double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
double omp = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \
<< " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \
}
BENCH_MATRIX(double, 32, 1000, 10);
BENCH_MATRIX(double, 128, 10, 10);
BENCH_MATRIX(double, 512, 1, 6);
BENCH_MATRIX(double, 1024, 1, 4);
return 0;
}