Make our gemm bench a little more powerful.

This commit is contained in:
Gael Guennebaud 2014-04-17 21:03:26 +02:00
parent 9777a5ca60
commit c354bd47f7

View File

@ -2,6 +2,14 @@
// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out
// Compilation options:
//
// -DSCALAR=std::complex<double>
// -DSCALARA=double or -DSCALARB=double
// -DHAVE_BLAS
// -DDECOUPLED
//
#include <iostream>
#include <Eigen/Core>
#include <bench/BenchTimer.h>
@ -14,10 +22,18 @@ using namespace Eigen;
#define SCALAR float
#endif
#ifndef SCALARA
#define SCALARA SCALAR
#endif
#ifndef SCALARB
#define SCALARB SCALAR
#endif
typedef SCALAR Scalar;
typedef NumTraits<Scalar>::Real RealScalar;
typedef Matrix<RealScalar,Dynamic,Dynamic> A;
typedef Matrix</*Real*/Scalar,Dynamic,Dynamic> B;
typedef Matrix<SCALARA,Dynamic,Dynamic> A;
typedef Matrix<SCALARB,Dynamic,Dynamic> B;
typedef Matrix<Scalar,Dynamic,Dynamic> C;
typedef Matrix<RealScalar,Dynamic,Dynamic> M;
@ -135,32 +151,49 @@ int main(int argc, char ** argv)
int cache_size = -1;
bool need_help = false;
for (int i=1; i<argc; ++i)
for (int i=1; i<argc;)
{
if(argv[i][0]=='s')
if(argv[i][0]=='-')
{
s = atoi(argv[i]+1);
m = n = p = s;
if(argv[i][1]=='s')
{
++i;
s = atoi(argv[i++]);
m = n = p = s;
if(argv[i][0]!='-')
{
n = atoi(argv[i++]);
p = atoi(argv[i++]);
}
}
else if(argv[i][1]=='c')
{
++i;
cache_size = atoi(argv[i++]);
}
else if(argv[i][1]=='t')
{
++i;
tries = atoi(argv[i++]);
}
else if(argv[i][1]=='p')
{
++i;
rep = atoi(argv[i++]);
}
}
else if(argv[i][0]=='m')
{
m = atoi(argv[++i]);
n = atoi(argv[++i]);
p = atoi(argv[++i]);
}
else if(argv[i][0]=='c')
cache_size = atoi(argv[i]+1);
else if(argv[i][0]=='t')
tries = atoi(argv[i]+1);
else if(argv[i][0]=='p')
rep = atoi(argv[i]+1);
else
{
need_help = true;
break;
}
}
if(need_help)
{
std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n";
std::cout << argv[0] << " -s <matrix sizes> -c <cache size> -t <nb tries> -p <nb repeats>\n";
std::cout << " <matrix sizes> : size\n";
std::cout << " <matrix sizes> : rows columns depth\n";
return 1;
}
@ -182,6 +215,7 @@ int main(int argc, char ** argv)
// check the parallel product is correct
#if defined EIGEN_HAS_OPENMP
Eigen::initParallel();
int procs = omp_get_max_threads();
if(procs>1)
{
@ -198,11 +232,20 @@ int main(int argc, char ** argv)
#elif defined HAVE_BLAS
blas_gemm(a,b,r);
c.noalias() += a * b;
if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
if(!r.isApprox(c)) {
std::cout << r - c << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
#else
gemm(a,b,c);
r.noalias() += a.cast<Scalar>() * b.cast<Scalar>();
if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
if(1.*m*n*p<2000.*2000*2000)
{
gemm(a,b,c);
r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
if(!r.isApprox(c)) {
std::cout << r - c << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
}
#endif
#ifdef HAVE_BLAS
@ -224,7 +267,7 @@ int main(int argc, char ** argv)
{
BenchTimer tmono;
omp_set_num_threads(1);
Eigen::internal::setNbThreads(1);
Eigen::setNbThreads(1);
c = rc;
BENCH(tmono, tries, rep, gemm(a,b,c));
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n";
@ -233,6 +276,15 @@ int main(int argc, char ** argv)
}
#endif
if(1.*m*n*p<30*30*30)
{
BenchTimer tmt;
c = rc;
BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
}
#ifdef DECOUPLED
if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
{