play with inlining to get better performance when the compiler is not asked to optimize

2025-04-12 19:20:36 +08:00 · 2007-12-11 13:14:14 +00:00 · 2007-12-11 13:14:14 +00:00 · 936b0de9cc
commit 936b0de9cc
parent 8117c9aa83
16 changed files with 71 additions and 63 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -7,7 +7,6 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)

 if(CMAKE_COMPILER_IS_GNUCXX)
   if (CMAKE_SYSTEM_NAME MATCHES Linux)
-     set ( CMAKE_C_FLAGS     "${CMAKE_C_FLAGS} -Wno-long-long -ansi -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common -fstrict-aliasing")
     set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
   endif (CMAKE_SYSTEM_NAME MATCHES Linux)
 endif (CMAKE_COMPILER_IS_GNUCXX)
--- a/src/Core/Column.h
+++ b/src/Core/Column.h
@ -50,16 +50,16 @@ template<typename MatrixType> class Column
    
  private:
    const Column& _ref() const { return *this; }
-    int _rows() const { return m_matrix.rows(); }
-    int _cols() const { return 1; }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_matrix.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return 1; }
    
-    Scalar& _write(int row, int col)
+    Scalar& _write(int row, int col) EIGEN_ALWAYS_INLINE
    {
      EIGEN_UNUSED(col);
      return m_matrix.write(row, m_col);
    }
    
-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      EIGEN_UNUSED(col);
      return m_matrix.read(row, m_col);
--- a/src/Core/Conjugate.h
+++ b/src/Core/Conjugate.h
@ -47,10 +47,10 @@ template<typename MatrixType> class Conjugate
    
  private:
    const Conjugate& _ref() const { return *this; }
-    int _rows() const { return m_matrix.rows(); }
-    int _cols() const { return m_matrix.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_matrix.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_matrix.cols(); }
    
-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return conj(m_matrix.read(row, col));
    }
--- a/src/Core/Difference.h
+++ b/src/Core/Difference.h
@ -52,10 +52,10 @@ template<typename Lhs, typename Rhs> class Difference

  private:
    const Difference& _ref() const { return *this; }
-    int _rows() const { return m_lhs.rows(); }
-    int _cols() const { return m_lhs.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_lhs.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_lhs.cols(); }

-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return m_lhs.read(row, col) - m_rhs.read(row, col);
    }
--- a/src/Core/Matrix.h
+++ b/src/Core/Matrix.h
@ -40,21 +40,21 @@ class Matrix : public MatrixBase<_Scalar, Matrix<_Scalar, _Rows, _Cols> >,
    
    static const int RowsAtCompileTime = _Rows, ColsAtCompileTime = _Cols;
    
-    const Scalar* array() const
+    const Scalar* array() const EIGEN_ALWAYS_INLINE
    { return Storage::m_array; }
    
-    Scalar* array()
+    Scalar* array() EIGEN_ALWAYS_INLINE
    { return Storage::m_array; }
    
  private:
    Ref _ref() const { return Ref(*this); }
    
-    const Scalar& _read(int row, int col) const
+    const Scalar& _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return array()[row + col * Storage::_rows()];
    }
    
-    Scalar& _write(int row, int col)
+    Scalar& _write(int row, int col) EIGEN_ALWAYS_INLINE
    {
      return array()[row + col * Storage::_rows()];
    }
--- a/src/Core/MatrixBase.h
+++ b/src/Core/MatrixBase.h
@ -40,9 +40,12 @@ template<typename Scalar, typename Derived> class MatrixBase
    typedef typename ForwardDecl<Derived>::Ref Ref;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    
-    int rows() const { return static_cast<const Derived *>(this)->_rows(); }
-    int cols() const { return static_cast<const Derived *>(this)->_cols(); }
-    int size() const { return rows() * cols(); }
+    int rows() const EIGEN_ALWAYS_INLINE
+    { return static_cast<const Derived *>(this)->_rows(); }
+    int cols() const EIGEN_ALWAYS_INLINE
+    { return static_cast<const Derived *>(this)->_cols(); }
+    int size() const EIGEN_ALWAYS_INLINE
+    { return rows() * cols(); }
    
    Ref ref() const
    { return static_cast<const Derived *>(this)->_ref(); }
@ -129,6 +132,7 @@ template<typename Scalar, typename Derived> class MatrixBase
    Derived& operator/=(const std::complex<double>& other);

    Scalar read(int row, int col, AssertLevel assertLevel = InternalDebugging) const
+    EIGEN_ALWAYS_INLINE
    {
      eigen_assert(assertLevel, row >= 0 && row < rows()
                                && col >= 0 && col < cols());
@ -137,6 +141,7 @@ template<typename Scalar, typename Derived> class MatrixBase
    Scalar operator()(int row, int col) const { return read(row, col, UserDebugging); }
    
    Scalar& write(int row, int col, AssertLevel assertLevel = InternalDebugging)
+    EIGEN_ALWAYS_INLINE
    {
      eigen_assert(assertLevel, row >= 0 && row < rows()
                                && col >= 0 && col < cols());
@ -145,6 +150,7 @@ template<typename Scalar, typename Derived> class MatrixBase
    Scalar& operator()(int row, int col) { return write(row, col, UserDebugging); }
    
    Scalar read(int index, AssertLevel assertLevel = InternalDebugging) const
+    EIGEN_ALWAYS_INLINE
    {
      eigen_assert(assertLevel, IsVector);
      if(RowsAtCompileTime == 1)
@ -161,6 +167,7 @@ template<typename Scalar, typename Derived> class MatrixBase
    Scalar operator[](int index) const { return read(index, UserDebugging); }
    
    Scalar& write(int index, AssertLevel assertLevel = InternalDebugging)
+    EIGEN_ALWAYS_INLINE
    {
      eigen_assert(assertLevel, IsVector);
      if(RowsAtCompileTime == 1)
--- a/src/Core/MatrixRef.h
+++ b/src/Core/MatrixRef.h
@ -40,15 +40,15 @@ template<typename MatrixType> class MatrixRef
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixRef)

  private:
-    int _rows() const { return m_matrix.rows(); }
-    int _cols() const { return m_matrix.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_matrix.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_matrix.cols(); }

-    const Scalar& _read(int row, int col) const
+    const Scalar& _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return m_matrix._read(row, col);
    }
    
-    Scalar& _write(int row, int col)
+    Scalar& _write(int row, int col) EIGEN_ALWAYS_INLINE
    {
      return m_matrix.write(row, col);
    }
--- a/src/Core/MatrixStorage.h
+++ b/src/Core/MatrixStorage.h
@ -41,10 +41,10 @@ class MatrixStorage
      assert(rows == RowsAtCompileTime && cols == ColsAtCompileTime);
    }
    
-    int _rows() const
+    int _rows() const EIGEN_ALWAYS_INLINE
    { return RowsAtCompileTime; }
    
-    int _cols() const
+    int _cols() const EIGEN_ALWAYS_INLINE
    { return ColsAtCompileTime; }

  public:
@ -80,10 +80,10 @@ class MatrixStorage<Scalar, Dynamic, ColsAtCompileTime>
      m_rows = rows;
    }
    
-    int _rows() const
+    int _rows() const EIGEN_ALWAYS_INLINE
    { return m_rows; }
    
-    int _cols() const
+    int _cols() const EIGEN_ALWAYS_INLINE
    { return ColsAtCompileTime; }
    
  public:
@ -124,10 +124,10 @@ class MatrixStorage<Scalar, RowsAtCompileTime, Dynamic>
      m_cols = cols;
    }
    
-    int _rows() const
+    int _rows() const EIGEN_ALWAYS_INLINE
    { return RowsAtCompileTime; }
    
-    int _cols() const
+    int _cols() const EIGEN_ALWAYS_INLINE
    { return m_cols; }
    
  public:
@ -168,10 +168,10 @@ class MatrixStorage<Scalar, Dynamic, Dynamic>
      m_cols = cols;
    }
    
-    int _rows() const
+    int _rows() const EIGEN_ALWAYS_INLINE
    { return m_rows; }
    
-    int _cols() const
+    int _cols() const EIGEN_ALWAYS_INLINE
    { return m_cols; }
    
  public:
--- a/src/Core/OperatorEquals.h
+++ b/src/Core/OperatorEquals.h
@ -27,23 +27,23 @@
 #ifndef EIGEN_OPERATOREQUALS_H
 #define EIGEN_OPERATOREQUALS_H

-template<int UnrollCount, int Rows> struct OperatorEqualsUnroller
+template<typename Derived1, typename Derived2, int UnrollCount, int Rows>
+struct OperatorEqualsUnroller
 {
  static const int col = (UnrollCount-1) / Rows;
  static const int row = (UnrollCount-1) % Rows;

-  template <typename Derived1, typename Derived2>
  static void run(Derived1 &dst, const Derived2 &src)
  {
-    OperatorEqualsUnroller<UnrollCount-1, Rows>::run(dst, src);
+    OperatorEqualsUnroller<Derived1, Derived2, UnrollCount-1, Rows>::run(dst, src);
    dst.write(row, col) = src.read(row, col);
  }
 };

 // prevent buggy user code from causing an infinite recursion
-template<int UnrollCount> struct OperatorEqualsUnroller<UnrollCount, 0>
+template<typename Derived1, typename Derived2, int UnrollCount>
+struct OperatorEqualsUnroller<Derived1, Derived2, UnrollCount, 0>
 {
-  template <typename Derived1, typename Derived2>
  static void run(Derived1 &dst, const Derived2 &src)
  {
    EIGEN_UNUSED(dst);
@ -51,18 +51,18 @@ template<int UnrollCount> struct OperatorEqualsUnroller<UnrollCount, 0>
  }
 };

-template<int Rows> struct OperatorEqualsUnroller<1, Rows>
+template<typename Derived1, typename Derived2, int Rows>
+struct OperatorEqualsUnroller<Derived1, Derived2, 1, Rows>
 {
-  template <typename Derived1, typename Derived2>
  static void run(Derived1 &dst, const Derived2 &src)
  {
    dst.write(0, 0) = src.read(0, 0);
  }
 };

-template<int Rows> struct OperatorEqualsUnroller<Dynamic, Rows>
+template<typename Derived1, typename Derived2, int Rows>
+struct OperatorEqualsUnroller<Derived1, Derived2, Dynamic, Rows>
 {
-  template <typename Derived1, typename Derived2>
  static void run(Derived1 &dst, const Derived2 &src)
  {
    EIGEN_UNUSED(dst);
@ -77,7 +77,9 @@ Derived& MatrixBase<Scalar, Derived>
 {
  assert(rows() == other.rows() && cols() == other.cols());
  if(EIGEN_UNROLLED_LOOPS && SizeAtCompileTime != Dynamic && SizeAtCompileTime <= 25)
-    OperatorEqualsUnroller<SizeAtCompileTime, RowsAtCompileTime>::run(*this, other);
+    OperatorEqualsUnroller
+      <Derived, OtherDerived, SizeAtCompileTime, RowsAtCompileTime>::run
+        (*static_cast<Derived*>(this), *static_cast<const OtherDerived*>(&other));
  else
    for(int j = 0; j < cols(); j++) //traverse in column-dominant order
      for(int i = 0; i < rows(); i++)
--- a/src/Core/Opposite.h
+++ b/src/Core/Opposite.h
@ -47,10 +47,10 @@ template<typename MatrixType> class Opposite
    
  private:
    const Opposite& _ref() const { return *this; }
-    int _rows() const { return m_matrix.rows(); }
-    int _cols() const { return m_matrix.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_matrix.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_matrix.cols(); }
    
-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return -(m_matrix.read(row, col));
    }
--- a/src/Core/Product.h
+++ b/src/Core/Product.h
@ -30,7 +30,7 @@ template<int Index, int Size, typename Lhs, typename Rhs>
 struct ProductUnroller
 {
  static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
-                  typename Lhs::Scalar &res)
+                         typename Lhs::Scalar &res)
  {
    ProductUnroller<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res);
    res += lhs.read(row, Index) * rhs.read(Index, col);
@ -41,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs>
 struct ProductUnroller<0, Size, Lhs, Rhs>
 {
  static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
-                  typename Lhs::Scalar &res)
+                         typename Lhs::Scalar &res)
  {
    res = lhs.read(row, 0) * rhs.read(0, col);
  }
@ -51,7 +51,7 @@ template<int Index, typename Lhs, typename Rhs>
 struct ProductUnroller<Index, Dynamic, Lhs, Rhs>
 {
  static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
-                  typename Lhs::Scalar &res)
+                         typename Lhs::Scalar &res)
  {
    EIGEN_UNUSED(row);
    EIGEN_UNUSED(col);
@ -66,7 +66,7 @@ template<int Index, typename Lhs, typename Rhs>
 struct ProductUnroller<Index, 0, Lhs, Rhs>
 {
  static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
-                  typename Lhs::Scalar &res)
+                         typename Lhs::Scalar &res)
  {
    EIGEN_UNUSED(row);
    EIGEN_UNUSED(col);
--- a/src/Core/Row.h
+++ b/src/Core/Row.h
@ -57,16 +57,16 @@ template<typename MatrixType> class Row
  private:
    const Row& _ref() const { return *this; }
    
-    int _rows() const { return 1; }
-    int _cols() const { return m_matrix.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return 1; }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_matrix.cols(); }
    
-    Scalar& _write(int row, int col)
+    Scalar& _write(int row, int col) EIGEN_ALWAYS_INLINE
    {
      EIGEN_UNUSED(row);
      return m_matrix.write(m_row, col);
    }
    
-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      EIGEN_UNUSED(row);
      return m_matrix.read(m_row, col);
--- a/src/Core/ScalarMultiple.h
+++ b/src/Core/ScalarMultiple.h
@ -48,10 +48,10 @@ template<typename MatrixType> class ScalarMultiple

  private:
    const ScalarMultiple& _ref() const { return *this; }
-    int _rows() const { return m_matrix.rows(); }
-    int _cols() const { return m_matrix.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_matrix.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_matrix.cols(); }

-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return m_matrix.read(row, col) * m_scalar;
    }
--- a/src/Core/Sum.h
+++ b/src/Core/Sum.h
@ -52,10 +52,10 @@ template<typename Lhs, typename Rhs> class Sum

  private:
    const Sum& _ref() const { return *this; }
-    int _rows() const { return m_lhs.rows(); }
-    int _cols() const { return m_lhs.cols(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_lhs.rows(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_lhs.cols(); }

-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return m_lhs.read(row, col) + m_rhs.read(row, col);
    }
--- a/src/Core/Transpose.h
+++ b/src/Core/Transpose.h
@ -46,15 +46,15 @@ template<typename MatrixType> class Transpose
    
  private:
    const Transpose& _ref() const { return *this; }
-    int _rows() const { return m_matrix.cols(); }
-    int _cols() const { return m_matrix.rows(); }
+    int _rows() const EIGEN_ALWAYS_INLINE { return m_matrix.cols(); }
+    int _cols() const EIGEN_ALWAYS_INLINE { return m_matrix.rows(); }
    
-    Scalar& _write(int row, int col)
+    Scalar& _write(int row, int col) EIGEN_ALWAYS_INLINE
    {
      return m_matrix.write(col, row);
    }
    
-    Scalar _read(int row, int col) const
+    Scalar _read(int row, int col) const EIGEN_ALWAYS_INLINE
    {
      return m_matrix.read(col, row);
    }
--- a/test/main.cpp
+++ b/test/main.cpp
@ -32,8 +32,8 @@ int main(int argc, char *argv[])
    bool has_set_repeat = false;
    bool has_set_seed = false;
    bool need_help = false;
-    unsigned int seed;
-    int repeat;
+    unsigned int seed = 0;
+    int repeat = 0;
    
    QStringList args = QCoreApplication::instance()->arguments();
    args.takeFirst(); // throw away the first argument (path to executable)