mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-19 18:40:38 +08:00
* basic support for multicore CPU via a .evalOMP() which
internaly uses OpenMP if enabled at compile time. * added a bench/ folder with a couple benchmarks and benchmark tools.
This commit is contained in:
parent
f64311e07d
commit
9d9d81ad71
@ -40,6 +40,7 @@ namespace Eigen {
|
||||
#include "src/Core/IO.h"
|
||||
#include "src/Core/Swap.h"
|
||||
#include "src/Core/CommaInitializer.h"
|
||||
#include "src/Core/EvalOMP.h"
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
|
118
Eigen/src/Core/EvalOMP.h
Normal file
118
Eigen/src/Core/EvalOMP.h
Normal file
@ -0,0 +1,118 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra. Eigen itself is part of the KDE project.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_EVAL_OMP_H
|
||||
#define EIGEN_EVAL_OMP_H
|
||||
|
||||
/** \class EvalOMP
|
||||
*
|
||||
* \brief Parallel evaluation of an expression using OpenMP
|
||||
*
|
||||
* The template parameter Expression is the type of the expression that we are evaluating.
|
||||
*
|
||||
* This class is the return type of MatrixBase::evalOMP() and most of the time this is the
|
||||
* only way it is used.
|
||||
*
|
||||
* Note that if OpenMP is not enabled, then this class is equivalent to Eval.
|
||||
*
|
||||
* \sa MatrixBase::evalOMP(), class Eval, MatrixBase::eval()
|
||||
*/
|
||||
template<typename ExpressionType> class EvalOMP : NoOperatorEquals,
|
||||
public Matrix< typename ExpressionType::Scalar,
|
||||
ExpressionType::Traits::RowsAtCompileTime,
|
||||
ExpressionType::Traits::ColsAtCompileTime,
|
||||
EIGEN_DEFAULT_MATRIX_STORAGE_ORDER,
|
||||
ExpressionType::Traits::MaxRowsAtCompileTime,
|
||||
ExpressionType::Traits::MaxColsAtCompileTime>
|
||||
{
|
||||
public:
|
||||
typedef typename ExpressionType::Scalar Scalar;
|
||||
|
||||
/** The actual matrix type to evaluate to. This type can be used independently
|
||||
* of the rest of this class to get the actual matrix type to evaluate and store
|
||||
* the value of an expression.
|
||||
*/
|
||||
typedef Matrix<Scalar,
|
||||
ExpressionType::Traits::RowsAtCompileTime,
|
||||
ExpressionType::Traits::ColsAtCompileTime,
|
||||
EIGEN_DEFAULT_MATRIX_STORAGE_ORDER,
|
||||
ExpressionType::Traits::MaxRowsAtCompileTime,
|
||||
ExpressionType::Traits::MaxColsAtCompileTime> MatrixType;
|
||||
|
||||
#ifdef _OPENMP
|
||||
explicit EvalOMP(const ExpressionType& other)
|
||||
: MatrixType(other.rows(), other.cols())
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma omp parallel default(none) shared(other)
|
||||
#else
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
if (this->cols()>this->rows())
|
||||
{
|
||||
#pragma omp for
|
||||
for(int j = 0; j < this->cols(); j++)
|
||||
for(int i = 0; i < this->rows(); i++)
|
||||
this->coeffRef(i, j) = other.coeff(i, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp for
|
||||
for(int i = 0; i < this->rows(); i++)
|
||||
for(int j = 0; j < this->cols(); j++)
|
||||
this->coeffRef(i, j) = other.coeff(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
explicit EvalOMP(const ExpressionType& other) : MatrixType(other) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
/** Evaluates *this in a parallel fashion using OpenMP and returns the obtained matrix.
|
||||
*
|
||||
* Of course, it only makes sense to call this function for complex expressions, and/or
|
||||
* large matrices (>32x32), \b and if there is no outer loop which can be parallelized.
|
||||
*
|
||||
* It is the responsibility of the user manage the OpenMP parameters, for instance:
|
||||
* \code
|
||||
* #include <omp.h>
|
||||
* // ...
|
||||
* omp_set_num_threads(omp_get_num_procs());
|
||||
* \endcode
|
||||
* You also need to enable OpenMP on your compiler (e.g., -fopenmp) during both compilation and linking.
|
||||
*
|
||||
* Note that if OpenMP is not enabled, then evalOMP() is equivalent to eval().
|
||||
*
|
||||
* \sa class EvalOMP, eval()
|
||||
*/
|
||||
template<typename Scalar, typename Derived>
|
||||
const EvalOMP<Derived> MatrixBase<Scalar, Derived>::evalOMP() const
|
||||
{
|
||||
return EvalOMP<Derived>(*static_cast<const Derived*>(this));
|
||||
}
|
||||
|
||||
#endif // EIGEN_EVAL_OMP_H
|
@ -44,6 +44,7 @@ template<typename MatrixType> class DiagonalCoeffs;
|
||||
template<typename MatrixType> class Identity;
|
||||
template<typename MatrixType> class Map;
|
||||
template<typename Derived> class Eval;
|
||||
template<typename Derived> class EvalOMP;
|
||||
|
||||
struct ScalarProductOp;
|
||||
struct ScalarQuotientOp;
|
||||
|
@ -359,6 +359,7 @@ template<typename Scalar, typename Derived> class MatrixBase
|
||||
/// \name special functions
|
||||
//@{
|
||||
const Eval<Derived> eval() const EIGEN_ALWAYS_INLINE;
|
||||
const EvalOMP<Derived> evalOMP() const EIGEN_ALWAYS_INLINE;
|
||||
|
||||
template<typename CustomUnaryOp>
|
||||
const CwiseUnaryOp<CustomUnaryOp, Derived> cwise(const CustomUnaryOp& func = CustomUnaryOp()) const;
|
||||
|
75
bench/BenchTimer.h
Normal file
75
bench/BenchTimer.h
Normal file
@ -0,0 +1,75 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra. Eigen itself is part of the KDE project.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_BENCH_TIMER_H
|
||||
#define EIGEN_BENCH_TIMER_H
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
|
||||
/** Elapsed time timer keeping the best try.
|
||||
*/
|
||||
class BenchTimer
|
||||
{
|
||||
public:
|
||||
|
||||
BenchTimer() : m_best(1e99) {}
|
||||
|
||||
~BenchTimer() {}
|
||||
|
||||
inline void start(void) {m_start = getTime();}
|
||||
inline void stop(void)
|
||||
{
|
||||
m_best = std::min(m_best, getTime() - m_start);
|
||||
}
|
||||
|
||||
/** Return the best elapsed time.
|
||||
*/
|
||||
inline double value(void)
|
||||
{
|
||||
return m_best;
|
||||
}
|
||||
|
||||
static inline double getTime(void)
|
||||
{
|
||||
struct timeval tv;
|
||||
struct timezone tz;
|
||||
gettimeofday(&tv, &tz);
|
||||
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
double m_best, m_start;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // EIGEN_BENCH_TIMER_H
|
28
bench/BenchUtil.h
Normal file
28
bench/BenchUtil.h
Normal file
@ -0,0 +1,28 @@
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include "BenchTimer.h"
|
||||
|
||||
using namespace std;
|
||||
USING_PART_OF_NAMESPACE_EIGEN
|
||||
|
||||
#include <boost/preprocessor/repetition/enum_params.hpp>
|
||||
#include <boost/preprocessor/repetition.hpp>
|
||||
#include <boost/preprocessor/seq.hpp>
|
||||
#include <boost/preprocessor/array.hpp>
|
||||
#include <boost/preprocessor/arithmetic.hpp>
|
||||
#include <boost/preprocessor/comparison.hpp>
|
||||
#include <boost/preprocessor/punctuation.hpp>
|
||||
#include <boost/preprocessor/punctuation/comma.hpp>
|
||||
#include <boost/preprocessor/stringize.hpp>
|
||||
|
||||
template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
|
||||
template<typename MatrixType> void initMatrix_random(MatrixType& mat)
|
||||
{
|
||||
mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
|
||||
}
|
||||
|
||||
template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
|
||||
template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
|
||||
{
|
||||
mat.setIdentity();
|
||||
}
|
55
bench/README.txt
Normal file
55
bench/README.txt
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
This folder contains a couple of benchmark utities and Eigen benchmarks.
|
||||
|
||||
****************************
|
||||
* bench_multi_compilers.sh *
|
||||
****************************
|
||||
|
||||
This script allows to run a benchmark on a set of different compilers/compiler options.
|
||||
It takes two arguments:
|
||||
- a file defining the list of the compilers with their options
|
||||
- the .cpp file of the benchmark
|
||||
|
||||
Examples:
|
||||
|
||||
$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp
|
||||
|
||||
g++-4.1 -O3 -DNDEBUG -finline-limit=10000
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.271102 0.131416 0.422322 0.198633
|
||||
0.201658 0.102436 0.397566 0.207282
|
||||
|
||||
g++-4.2 -O3 -DNDEBUG -finline-limit=10000
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.107805 0.0890579 0.30265 0.161843
|
||||
0.127157 0.0712581 0.278341 0.191029
|
||||
|
||||
g++-4.3 -O3 -DNDEBUG -finline-limit=10000
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.134318 0.105291 0.3704 0.180966
|
||||
0.137703 0.0732472 0.31225 0.202204
|
||||
|
||||
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.226145 0.0941319 0.371873 0.159433
|
||||
0.109302 0.0837538 0.328102 0.173891
|
||||
|
||||
|
||||
$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp
|
||||
|
||||
g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp
|
||||
double, fixed-size 4x4: 0.00165105s 0.0778739s
|
||||
double, 32x32: 0.0654769s 0.075289s => x0.869674 (2)
|
||||
double, 128x128: 0.054148s 0.0419669s => x1.29025 (2)
|
||||
double, 512x512: 0.913799s 0.428533s => x2.13239 (2)
|
||||
double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2)
|
||||
|
||||
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp
|
||||
double, fixed-size 4x4: 0.000589848s 0.019949s
|
||||
double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2)
|
||||
double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2)
|
||||
double, 512x512: 0.829436s 0.424438s => x1.9542 (2)
|
||||
double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2)
|
||||
|
||||
|
||||
|
12
bench/basicbench.cxxlist
Normal file
12
bench/basicbench.cxxlist
Normal file
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG"
|
||||
CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000"
|
||||
|
||||
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG"
|
||||
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000"
|
||||
|
||||
CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG"
|
||||
CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000"
|
||||
|
||||
CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size"
|
46
bench/basicbenchmark.cpp
Normal file
46
bench/basicbenchmark.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
#include "BenchUtil.h"
|
||||
#include "basicbenchmark.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// disbale floating point exceptions
|
||||
// this leads to more stable bench results
|
||||
// (this is done by default by ICC)
|
||||
#ifndef __INTEL_COMPILER
|
||||
{
|
||||
int aux;
|
||||
asm(
|
||||
"stmxcsr %[aux] \n\t"
|
||||
"orl $32832, %[aux] \n\t"
|
||||
"ldmxcsr %[aux] \n\t"
|
||||
: : [aux] "m" (aux));
|
||||
}
|
||||
#endif
|
||||
|
||||
// this is the list of matrix type and size we want to bench:
|
||||
// ((suffix) (matrix size) (number of iterations))
|
||||
#define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000))
|
||||
// #define MODES ((Xd)(20)(10000))
|
||||
|
||||
#define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
|
||||
|
||||
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl;
|
||||
|
||||
const int tries = 10;
|
||||
|
||||
#define _RUN_BENCH(R,ARG,EL) \
|
||||
std::cout << ARG( \
|
||||
BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
|
||||
BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
|
||||
<< " ";
|
||||
|
||||
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES );
|
||||
std::cout << endl;
|
||||
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES );
|
||||
std::cout << endl;
|
||||
|
||||
return 0;
|
||||
}
|
59
bench/basicbenchmark.h
Normal file
59
bench/basicbenchmark.h
Normal file
@ -0,0 +1,59 @@
|
||||
|
||||
enum {LazyEval, EarlyEval, OmpEval};
|
||||
|
||||
template<int Mode, typename MatrixType>
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
|
||||
|
||||
template<int Mode, typename MatrixType>
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
|
||||
{
|
||||
for(int a = 0; a < iterations; a++)
|
||||
{
|
||||
if (Mode==LazyEval)
|
||||
{
|
||||
asm("#begin_bench_loop LazyEval");
|
||||
if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
|
||||
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||
}
|
||||
else if (Mode==OmpEval)
|
||||
{
|
||||
asm("#begin_bench_loop OmpEval");
|
||||
if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
|
||||
m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
|
||||
}
|
||||
else
|
||||
{
|
||||
asm("#begin_bench_loop EarlyEval");
|
||||
if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
|
||||
m = I + 0.00005 * (m + m * m);
|
||||
}
|
||||
asm("#end_bench_loop");
|
||||
}
|
||||
}
|
||||
|
||||
template<int Mode, typename MatrixType>
|
||||
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
|
||||
|
||||
template<int Mode, typename MatrixType>
|
||||
double benchBasic(const MatrixType& mat, int iterations, int tries)
|
||||
{
|
||||
const int rows = mat.rows();
|
||||
const int cols = mat.cols();
|
||||
|
||||
MatrixType I(rows,cols);
|
||||
MatrixType m(rows,cols);
|
||||
|
||||
initMatrix_identity(I);
|
||||
|
||||
Eigen::BenchTimer timer;
|
||||
for(uint t=0; t<tries; ++t)
|
||||
{
|
||||
initMatrix_random(m);
|
||||
timer.start();
|
||||
benchBasic_loop<Mode>(I, m, iterations);
|
||||
timer.stop();
|
||||
cerr << m;
|
||||
}
|
||||
return timer.value();
|
||||
};
|
||||
|
28
bench/bench_multi_compilers.sh
Executable file
28
bench/bench_multi_compilers.sh
Executable file
@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
|
||||
if (($# < 2)); then
|
||||
echo "Usage: $0 compilerlist.txt benchfile.cpp"
|
||||
else
|
||||
|
||||
compilerlist=$1
|
||||
benchfile=$2
|
||||
|
||||
g=0
|
||||
source $compilerlist
|
||||
|
||||
# for each compiler, compile benchfile and run the benchmark
|
||||
for (( i=0 ; i<g ; ++i )) ; do
|
||||
# check the compiler exists
|
||||
compiler=`echo ${CLIST[$i]} | cut -d " " -f 1`
|
||||
if [ -e `which $compiler` ]; then
|
||||
echo "${CLIST[$i]}"
|
||||
# echo "${CLIST[$i]} $benchfile -I.. -o bench~"
|
||||
if [ -e ./.bench ] ; then rm .bench; fi
|
||||
${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null
|
||||
echo ""
|
||||
else
|
||||
echo "compiler not found: $compiler"
|
||||
fi
|
||||
done
|
||||
|
||||
fi
|
@ -13,7 +13,7 @@ int main(int argc, char *argv[])
|
||||
{
|
||||
m(i,j) = 0.1 * (i+20*j);
|
||||
}
|
||||
for(int a = 0; a < 1000000; a++)
|
||||
for(int a = 0; a < 100000; a++)
|
||||
{
|
||||
m = I + 0.00005 * (m + m*m);
|
||||
}
|
7
bench/ompbench.cxxlist
Normal file
7
bench/ompbench.cxxlist
Normal file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
|
||||
|
||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
|
||||
|
||||
CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"
|
81
bench/ompbenchmark.cpp
Normal file
81
bench/ompbenchmark.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
|
||||
// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
|
||||
|
||||
#include <omp.h>
|
||||
#include "BenchUtil.h"
|
||||
#include "basicbenchmark.h"
|
||||
|
||||
// #include <Eigen/Core>
|
||||
// #include "BenchTimer.h"
|
||||
//
|
||||
// using namespace std;
|
||||
// USING_PART_OF_NAMESPACE_EIGEN
|
||||
//
|
||||
// enum {LazyEval, EarlyEval, OmpEval};
|
||||
//
|
||||
// template<int Mode, typename MatrixType>
|
||||
// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline));
|
||||
//
|
||||
// template<int Mode, typename MatrixType>
|
||||
// double benchBasic(const MatrixType& mat, int iterations, int tries)
|
||||
// {
|
||||
// const int rows = mat.rows();
|
||||
// const int cols = mat.cols();
|
||||
//
|
||||
// Eigen::BenchTimer timer;
|
||||
// for(uint t=0; t<tries; ++t)
|
||||
// {
|
||||
// MatrixType I = MatrixType::identity(rows, cols);
|
||||
// MatrixType m = MatrixType::random(rows, cols);
|
||||
//
|
||||
// timer.start();
|
||||
// for(int a = 0; a < iterations; a++)
|
||||
// {
|
||||
// if(Mode==LazyEval)
|
||||
// m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||
// else if(Mode==OmpEval)
|
||||
// m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
|
||||
// else
|
||||
// m = I + 0.00005 * (m + m * m);
|
||||
// }
|
||||
// timer.stop();
|
||||
// cerr << m;
|
||||
// }
|
||||
// return timer.value();
|
||||
// };
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// disbale floating point exceptions
|
||||
// this leads to more stable bench results
|
||||
{
|
||||
int aux;
|
||||
asm(
|
||||
"stmxcsr %[aux] \n\t"
|
||||
"orl $32832, %[aux] \n\t"
|
||||
"ldmxcsr %[aux] \n\t"
|
||||
: : [aux] "m" (aux));
|
||||
}
|
||||
|
||||
// commented since the default setting is use as many threads as processors
|
||||
//omp_set_num_threads(omp_get_num_procs());
|
||||
|
||||
std::cout << "double, fixed-size 4x4: "
|
||||
<< benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s "
|
||||
<< benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s \n";
|
||||
|
||||
#define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\
|
||||
double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
|
||||
double omp = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
|
||||
std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \
|
||||
<< " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \
|
||||
}
|
||||
|
||||
BENCH_MATRIX(double, 32, 1000, 10);
|
||||
BENCH_MATRIX(double, 128, 10, 10);
|
||||
BENCH_MATRIX(double, 512, 1, 6);
|
||||
BENCH_MATRIX(double, 1024, 1, 4);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ Eigen is well tested with recent versions of GCC and ICC. Both GCC 4.2 and ICC g
|
||||
|
||||
For best performance, we recommend the following compilation flags:
|
||||
<ul>
|
||||
<li>\b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000 \c -falign-functions=64 </li>
|
||||
<li>\b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000 </li>
|
||||
<li>\b ICC: \c -O3 \c -DNDEBUG \c -xT \c -ipo \c -no-prec-div \c -no-inline-max-size </li>
|
||||
</ul>
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user