mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
Make our gemm bench a little more powerful.
This commit is contained in:
parent
9777a5ca60
commit
c354bd47f7
@ -2,6 +2,14 @@
|
||||
// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out
|
||||
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out
|
||||
|
||||
// Compilation options:
|
||||
//
|
||||
// -DSCALAR=std::complex<double>
|
||||
// -DSCALARA=double or -DSCALARB=double
|
||||
// -DHAVE_BLAS
|
||||
// -DDECOUPLED
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <Eigen/Core>
|
||||
#include <bench/BenchTimer.h>
|
||||
@ -14,10 +22,18 @@ using namespace Eigen;
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
#ifndef SCALARA
|
||||
#define SCALARA SCALAR
|
||||
#endif
|
||||
|
||||
#ifndef SCALARB
|
||||
#define SCALARB SCALAR
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
typedef NumTraits<Scalar>::Real RealScalar;
|
||||
typedef Matrix<RealScalar,Dynamic,Dynamic> A;
|
||||
typedef Matrix</*Real*/Scalar,Dynamic,Dynamic> B;
|
||||
typedef Matrix<SCALARA,Dynamic,Dynamic> A;
|
||||
typedef Matrix<SCALARB,Dynamic,Dynamic> B;
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic> C;
|
||||
typedef Matrix<RealScalar,Dynamic,Dynamic> M;
|
||||
|
||||
@ -135,32 +151,49 @@ int main(int argc, char ** argv)
|
||||
int cache_size = -1;
|
||||
|
||||
bool need_help = false;
|
||||
for (int i=1; i<argc; ++i)
|
||||
for (int i=1; i<argc;)
|
||||
{
|
||||
if(argv[i][0]=='s')
|
||||
if(argv[i][0]=='-')
|
||||
{
|
||||
s = atoi(argv[i]+1);
|
||||
m = n = p = s;
|
||||
if(argv[i][1]=='s')
|
||||
{
|
||||
++i;
|
||||
s = atoi(argv[i++]);
|
||||
m = n = p = s;
|
||||
if(argv[i][0]!='-')
|
||||
{
|
||||
n = atoi(argv[i++]);
|
||||
p = atoi(argv[i++]);
|
||||
}
|
||||
}
|
||||
else if(argv[i][1]=='c')
|
||||
{
|
||||
++i;
|
||||
cache_size = atoi(argv[i++]);
|
||||
}
|
||||
else if(argv[i][1]=='t')
|
||||
{
|
||||
++i;
|
||||
tries = atoi(argv[i++]);
|
||||
}
|
||||
else if(argv[i][1]=='p')
|
||||
{
|
||||
++i;
|
||||
rep = atoi(argv[i++]);
|
||||
}
|
||||
}
|
||||
else if(argv[i][0]=='m')
|
||||
{
|
||||
m = atoi(argv[++i]);
|
||||
n = atoi(argv[++i]);
|
||||
p = atoi(argv[++i]);
|
||||
}
|
||||
else if(argv[i][0]=='c')
|
||||
cache_size = atoi(argv[i]+1);
|
||||
else if(argv[i][0]=='t')
|
||||
tries = atoi(argv[i]+1);
|
||||
else if(argv[i][0]=='p')
|
||||
rep = atoi(argv[i]+1);
|
||||
else
|
||||
{
|
||||
need_help = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(need_help)
|
||||
{
|
||||
std::cout << argv[0] << " s<matrix size> c<cache size> t<nb tries> p<nb repeats>\n";
|
||||
std::cout << argv[0] << " -s <matrix sizes> -c <cache size> -t <nb tries> -p <nb repeats>\n";
|
||||
std::cout << " <matrix sizes> : size\n";
|
||||
std::cout << " <matrix sizes> : rows columns depth\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -182,6 +215,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
// check the parallel product is correct
|
||||
#if defined EIGEN_HAS_OPENMP
|
||||
Eigen::initParallel();
|
||||
int procs = omp_get_max_threads();
|
||||
if(procs>1)
|
||||
{
|
||||
@ -198,11 +232,20 @@ int main(int argc, char ** argv)
|
||||
#elif defined HAVE_BLAS
|
||||
blas_gemm(a,b,r);
|
||||
c.noalias() += a * b;
|
||||
if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
|
||||
if(!r.isApprox(c)) {
|
||||
std::cout << r - c << "\n";
|
||||
std::cerr << "Warning, your product is crap!\n\n";
|
||||
}
|
||||
#else
|
||||
gemm(a,b,c);
|
||||
r.noalias() += a.cast<Scalar>() * b.cast<Scalar>();
|
||||
if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n";
|
||||
if(1.*m*n*p<2000.*2000*2000)
|
||||
{
|
||||
gemm(a,b,c);
|
||||
r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
|
||||
if(!r.isApprox(c)) {
|
||||
std::cout << r - c << "\n";
|
||||
std::cerr << "Warning, your product is crap!\n\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BLAS
|
||||
@ -224,7 +267,7 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
BenchTimer tmono;
|
||||
omp_set_num_threads(1);
|
||||
Eigen::internal::setNbThreads(1);
|
||||
Eigen::setNbThreads(1);
|
||||
c = rc;
|
||||
BENCH(tmono, tries, rep, gemm(a,b,c));
|
||||
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n";
|
||||
@ -233,6 +276,15 @@ int main(int argc, char ** argv)
|
||||
}
|
||||
#endif
|
||||
|
||||
if(1.*m*n*p<30*30*30)
|
||||
{
|
||||
BenchTimer tmt;
|
||||
c = rc;
|
||||
BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
|
||||
std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
|
||||
}
|
||||
|
||||
#ifdef DECOUPLED
|
||||
if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user