- cleaner use of OpenMP (no code duplication anymore)

using a macro and _Pragma.
- use OpenMP also in cacheOptimalProduct and in the
  vectorized paths as well
- kill the vector assignment unroller. implement in
  operator= the logic for assigning a row-vector in
  a col-vector.
- CMakeLists support for building tests/examples
  with -fopenmp and/or -msse2
- updates in bench/, especially replace identity()
  by ones() which prevents underflows from perturbing
  bench results.
This commit is contained in:
Benoit Jacob 2008-04-11 14:28:42 +00:00
parent 7bee90a62a
commit dcebc46cdc
8 changed files with 179 additions and 231 deletions

View File

@ -8,6 +8,14 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON)
IF(CMAKE_COMPILER_IS_GNUCXX)
IF(CMAKE_SYSTEM_NAME MATCHES Linux)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
IF(TEST_OPENMP)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
MESSAGE("Enabling OpenMP in tests/examples")
ENDIF(TEST_OPENMP)
IF(TEST_SSE2)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
MESSAGE("Enabling SSE2 in tests/examples")
ENDIF(TEST_SSE2)
ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)

View File

@ -28,7 +28,7 @@
#define EIGEN_ASSIGN_H
template<typename Derived1, typename Derived2, int UnrollCount>
struct ei_matrix_operator_equals_unroller
struct ei_matrix_assignment_unroller
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@ -37,13 +37,13 @@ struct ei_matrix_operator_equals_unroller
static void run(Derived1 &dst, const Derived2 &src)
{
ei_matrix_operator_equals_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src);
ei_matrix_assignment_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src);
dst.coeffRef(row, col) = src.coeff(row, col);
}
};
template<typename Derived1, typename Derived2>
struct ei_matrix_operator_equals_unroller<Derived1, Derived2, 1>
struct ei_matrix_assignment_unroller<Derived1, Derived2, 1>
{
static void run(Derived1 &dst, const Derived2 &src)
{
@ -53,13 +53,13 @@ struct ei_matrix_operator_equals_unroller<Derived1, Derived2, 1>
// prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2>
struct ei_matrix_operator_equals_unroller<Derived1, Derived2, 0>
struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
{
static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2>
struct ei_matrix_operator_equals_unroller<Derived1, Derived2, Dynamic>
struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic>
{
static void run(Derived1 &, const Derived2 &) {}
};
@ -67,7 +67,7 @@ struct ei_matrix_operator_equals_unroller<Derived1, Derived2, Dynamic>
//----
template<typename Derived1, typename Derived2, int Index>
struct ei_matrix_operator_equals_packet_unroller
struct ei_matrix_assignment_packet_unroller
{
enum {
row = Derived1::Flags&RowMajorBit ? Index / Derived1::ColsAtCompileTime : Index % Derived1::RowsAtCompileTime,
@ -76,14 +76,14 @@ struct ei_matrix_operator_equals_packet_unroller
static void run(Derived1 &dst, const Derived2 &src)
{
ei_matrix_operator_equals_packet_unroller<Derived1, Derived2,
ei_matrix_assignment_packet_unroller<Derived1, Derived2,
Index-ei_packet_traits<typename Derived1::Scalar>::size>::run(dst, src);
dst.writePacketCoeff(row, col, src.packetCoeff(row, col));
}
};
template<typename Derived1, typename Derived2>
struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, 0 >
struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, 0 >
{
static void run(Derived1 &dst, const Derived2 &src)
{
@ -92,58 +92,22 @@ struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, 0 >
};
template<typename Derived1, typename Derived2>
struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, Dynamic>
struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
{
static void run(Derived1 &, const Derived2 &) { ei_internal_assert(false && "ei_matrix_operator_equals_packet_unroller"); }
};
//----
template<typename Derived1, typename Derived2, int UnrollCount>
struct ei_vector_operator_equals_unroller
{
enum { index = UnrollCount - 1 };
static void run(Derived1 &dst, const Derived2 &src)
{
ei_vector_operator_equals_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src);
dst.coeffRef(index) = src.coeff(index);
}
};
// prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2>
struct ei_vector_operator_equals_unroller<Derived1, Derived2, 0>
{
static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2>
struct ei_vector_operator_equals_unroller<Derived1, Derived2, 1>
{
static void run(Derived1 &dst, const Derived2 &src)
{
dst.coeffRef(0) = src.coeff(0);
}
};
template<typename Derived1, typename Derived2>
struct ei_vector_operator_equals_unroller<Derived1, Derived2, Dynamic>
{
static void run(Derived1 &, const Derived2 &) {}
static void run(Derived1 &, const Derived2 &) { ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); }
};
template <typename Derived, typename OtherDerived,
bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit)
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))>
struct ei_operator_equals_impl;
struct ei_assignment_impl;
template<typename Derived>
template<typename OtherDerived>
Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other)
{
ei_operator_equals_impl<Derived,OtherDerived>::execute(derived(),other.derived());
ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived());
return derived();
}
@ -152,125 +116,27 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived>
::operator=(const MatrixBase<OtherDerived>& other)
{
const bool need_to_transpose = Derived::IsVectorAtCompileTime
&& OtherDerived::IsVectorAtCompileTime
&& (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime;
if(OtherDerived::Flags & EvalBeforeAssigningBit)
{
return lazyAssign(other.derived().eval());
if(need_to_transpose)
return lazyAssign(other.transpose().eval());
else
return lazyAssign(other.eval());
}
else
return lazyAssign(other.derived());
{
if(need_to_transpose)
return lazyAssign(other.transpose());
else
return lazyAssign(other.derived());
}
}
template <typename Derived, typename OtherDerived>
struct ei_operator_equals_impl<Derived, OtherDerived, false>
{
static void execute(Derived & dst, const OtherDerived & src)
{
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
if(Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
// copying a vector expression into a vector
{
ei_assert(dst.size() == src.size());
if(unroll)
ei_vector_operator_equals_unroller
<Derived, OtherDerived,
unroll ? Derived::SizeAtCompileTime : Dynamic
>::run(dst.derived(), src.derived());
else
{
#ifdef EIGEN_USE_OPENMPf
if(Derived::Flags & OtherDerived::Flags & LargeBit)
{
#ifdef __INTEL_COMPILER
#pragma omp parallel default(none) shared(other)
#else
#pragma omp parallel default(none)
#endif
{
#pragma omp for
for(int i = 0; i < dst.size(); i++)
dst.coeffRef(i) = src.coeff(i);
}
}
else
#endif // EIGEN_USE_OPENMP
{
for(int i = 0; i < dst.size(); i++)
dst.coeffRef(i) = src.coeff(i);
}
}
}
else // copying a matrix expression into a matrix
{
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(unroll)
{
ei_matrix_operator_equals_unroller
<Derived, OtherDerived,
unroll ? Derived::SizeAtCompileTime : Dynamic
>::run(dst.derived(), src.derived());
}
else
{
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
{
#ifdef EIGEN_USE_OPENMP
if(Derived::Flags & OtherDerived::Flags & LargeBit)
{
#ifdef __INTEL_COMPILER
#pragma omp parallel default(none) shared(other)
#else
#pragma omp parallel default(none)
#endif
{
#pragma omp for
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
else
#endif // EIGEN_USE_OPENMP
{
// traverse in column-major order
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
else
{
#ifdef EIGEN_USE_OPENMP
if(Derived::Flags & OtherDerived::Flags & LargeBit)
{
#ifdef __INTEL_COMPILER
#pragma omp parallel default(none) shared(other)
#else
#pragma omp parallel default(none)
#endif
{
#pragma omp for
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
else
#endif // EIGEN_USE_OPENMP
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
}
}
}
};
template <typename Derived, typename OtherDerived>
struct ei_operator_equals_impl<Derived, OtherDerived, true>
struct ei_assignment_impl<Derived, OtherDerived, false>
{
static void execute(Derived & dst, const OtherDerived & src)
{
@ -278,7 +144,47 @@ struct ei_operator_equals_impl<Derived, OtherDerived, true>
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(unroll)
{
ei_matrix_operator_equals_packet_unroller
ei_matrix_assignment_unroller
<Derived, OtherDerived,
unroll ? Derived::SizeAtCompileTime : Dynamic
>::run(dst.derived(), src.derived());
}
else
{
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
{
#define EIGEN_THE_PARALLELIZABLE_LOOP \
for(int j = 0; j < dst.cols(); j++) \
for(int i = 0; i < dst.rows(); i++) \
dst.coeffRef(i, j) = src.coeff(i, j);
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
#define EIGEN_THE_PARALLELIZABLE_LOOP \
for(int i = 0; i < dst.rows(); i++) \
for(int j = 0; j < dst.cols(); j++) \
dst.coeffRef(i, j) = src.coeff(i, j);
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}
}
};
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, true>
{
static void execute(Derived & dst, const OtherDerived & src)
{
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(unroll)
{
ei_matrix_assignment_packet_unroller
<Derived, OtherDerived,
unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size
? Derived::SizeAtCompileTime-ei_packet_traits<typename Derived::Scalar>::size
@ -288,15 +194,21 @@ struct ei_operator_equals_impl<Derived, OtherDerived, true>
{
if(OtherDerived::Flags&RowMajorBit)
{
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
#define EIGEN_THE_PARALLELIZABLE_LOOP \
for(int i = 0; i < dst.rows(); i++) \
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) \
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
#define EIGEN_THE_PARALLELIZABLE_LOOP \
for(int j = 0; j < dst.cols(); j++) \
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) \
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}
}

View File

@ -283,64 +283,70 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
#ifdef EIGEN_VECTORIZE
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
{
for(int k=0; k<m_rhs.cols(); k++)
{
int j=0;
for(; j<cols4; j+=4)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
for (int i=0; i<m_lhs.rows(); i+=ei_packet_traits<Scalar>::size)
{
res.writePacketCoeff(i,k,
ei_padd(
res.packetCoeff(i,k),
ei_padd(
ei_padd(
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
ei_padd(
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
)
)
)
);
}
#define EIGEN_THE_PARALLELIZABLE_LOOP \
for(int k=0; k<this->cols(); k++) \
{ \
int j=0; \
for(; j<cols4; j+=4) \
{ \
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); \
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); \
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); \
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); \
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) \
{ \
res.writePacketCoeff(i,k,\
ei_padd( \
res.packetCoeff(i,k), \
ei_padd( \
ei_padd( \
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), \
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), \
ei_padd( \
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), \
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) \
) \
) \
) \
); \
} \
} \
for(; j<m_lhs.cols(); ++j) \
{ \
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k)); \
for (int i=0; i<this->rows(); ++i) \
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \
} \
}
for(; j<m_lhs.cols(); ++j)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
for (int i=0; i<m_lhs.rows(); ++i)
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
}
}
EIGEN_RUN_PARALLELIZABLE_LOOP(Flags & DestDerived::Flags & LargeBit)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
else
#endif
#endif // EIGEN_VECTORIZE
{
for(int k=0; k<m_rhs.cols(); ++k)
{
int j=0;
for(; j<cols4; j+=4)
{
const Scalar tmp0 = m_rhs.coeff(j ,k);
const Scalar tmp1 = m_rhs.coeff(j+1,k);
const Scalar tmp2 = m_rhs.coeff(j+2,k);
const Scalar tmp3 = m_rhs.coeff(j+3,k);
for (int i=0; i<m_lhs.rows(); ++i)
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1)
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
#define EIGEN_THE_PARALLELIZABLE_LOOP \
for(int k=0; k<this->cols(); ++k) \
{ \
int j=0; \
for(; j<cols4; j+=4) \
{ \
const Scalar tmp0 = m_rhs.coeff(j ,k); \
const Scalar tmp1 = m_rhs.coeff(j+1,k); \
const Scalar tmp2 = m_rhs.coeff(j+2,k); \
const Scalar tmp3 = m_rhs.coeff(j+3,k); \
for (int i=0; i<this->rows(); ++i) \
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) \
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); \
} \
for(; j<m_lhs.cols(); ++j) \
{ \
const Scalar tmp = m_rhs.coeff(j,k); \
for (int i=0; i<this->rows(); ++i) \
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \
} \
}
for(; j<m_lhs.cols(); ++j)
{
const Scalar tmp = m_rhs.coeff(j,k);
for (int i=0; i<m_lhs.rows(); ++i)
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
}
}
EIGEN_RUN_PARALLELIZABLE_LOOP(Flags & DestDerived::Flags & LargeBit)
#undef EIGEN_THE_PARALLELIZABLE_LOOP
}
}

View File

@ -74,6 +74,30 @@ using Eigen::MatrixBase;
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
#endif
#ifdef EIGEN_USE_OPENMP
# ifdef __INTEL_COMPILER
# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none) shared(other)")
# else
# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none)")
# endif
# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) \
if(condition) \
{ \
EIGEN_PRAGMA_OMP_PARALLEL \
{ \
_Pragma("omp for") \
EIGEN_THE_PARALLELIZABLE_LOOP \
} \
} \
else \
{ \
EIGEN_THE_PARALLELIZABLE_LOOP \
}
#else // EIGEN_USE_OPENMP
# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) EIGEN_THE_PARALLELIZABLE_LOOP
#endif
// FIXME with the always_inline attribute,
// gcc 3.4.x reports the following compilation error:
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'

View File

@ -18,12 +18,11 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[])
{
Matrix<SCALAR,MATSIZE,MATSIZE> I;
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::ones();
Matrix<SCALAR,MATSIZE,MATSIZE> m;
for(int i = 0; i < MATSIZE; i++)
for(int j = 0; j < MATSIZE; j++)
{
I(i,j) = (i==j);
m(i,j) = (i+MATSIZE*j);
}
asm("#begin");

View File

@ -1,33 +1,32 @@
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b
#include <Eigen/Core>
using namespace std;
USING_PART_OF_NAMESPACE_EIGEN
#ifndef MATTYPE
#define MATTYPE MatrixXd
#define MATTYPE MatrixXLd
#endif
#ifndef MATSIZE
#define MATSIZE 20
#define MATSIZE 400
#endif
#ifndef REPEAT
#define REPEAT 100000
#define REPEAT 100
#endif
int main(int argc, char *argv[])
{
MATTYPE I = MATTYPE::identity(MATSIZE,MATSIZE);
MATTYPE I = MATTYPE::ones(MATSIZE,MATSIZE);
MATTYPE m(MATSIZE,MATSIZE);
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
{
m(i,j) = 0.1 * (i+MATSIZE*j)/MATSIZE;
m(i,j) = (i+j+1)/(MATSIZE*MATSIZE);
}
for(int a = 0; a < REPEAT; a++)
{
m = I + 0.00005 * (m + m*m);
m = I + 0.0001 * (m + m*m);
}
cout << m(0,0) << endl;
return 0;

View File

@ -19,11 +19,11 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[])
{
MATTYPE I = MATTYPE::identity(MATSIZE,MATSIZE);
MATTYPE I = MATTYPE::ones(MATSIZE,MATSIZE);
MATTYPE m(MATSIZE,MATSIZE);
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
{
m(i,j) = 0.1 * (i+MATSIZE*j)/MATSIZE;
m(i,j) = 0.1 * (i+j+1)/(MATSIZE*MATSIZE);
}
for(int a = 0; a < REPEAT; a++)
{

View File

@ -100,7 +100,7 @@ void EigenTest::testProduct()
}
// test a large matrix only once
product(Matrix<float, 100, 100>());
product(MatrixXf(100,100));
}
} // namespace Eigen