* added an in-place version of inverseProduct which

might be twice faster fot small fixed size matrix * added a sparse triangular solver (sparse version of inverseProduct) * various other improvements in the Sparse module
2025-03-19 18:40:38 +08:00 · 2008-06-29 21:29:12 +00:00 · 2008-06-29 21:29:12 +00:00 · 37a50fa526
commit 37a50fa526
parent fbdecf09e1
12 changed files with 582 additions and 209 deletions
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@ -18,6 +18,7 @@ namespace Eigen {
 #include "src/Sparse/CoreIterators.h"
 #include "src/Sparse/SparseSetter.h"
 #include "src/Sparse/SparseProduct.h"
+#include "src/Sparse/TriangularSolver.h"

 } // namespace Eigen

--- a/Eigen/src/Core/InverseProduct.h
+++ b/Eigen/src/Core/InverseProduct.h
@ -25,6 +25,53 @@
 #ifndef EIGEN_INVERSEPRODUCT_H
 #define EIGEN_INVERSEPRODUCT_H

+
+/** "in-place" version of MatrixBase::inverseProduct() where the result is written in \a other
+  *
+  * \sa inverseProduct()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+void MatrixBase<Derived>::inverseProductInPlace(MatrixBase<OtherDerived>& other) const
+{
+  ei_assert(cols() == other.rows());
+  ei_assert(!(Flags & ZeroDiagBit));
+  ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
+
+  for(int c=0 ; c<other.cols() ; ++c)
+  {
+    if(Flags & LowerTriangularBit)
+    {
+      // forward substitution
+      if(!(Flags & UnitDiagBit))
+        other.coeffRef(0,c) = other.coeff(0,c)/coeff(0, 0);
+      for(int i=1; i<rows(); ++i)
+      {
+        Scalar tmp = other.coeff(i,c) - ((this->row(i).start(i)) * other.col(c).start(i)).coeff(0,0);
+        if (Flags & UnitDiagBit)
+          other.coeffRef(i,c) = tmp;
+        else
+          other.coeffRef(i,c) = tmp/coeff(i,i);
+      }
+    }
+    else
+    {
+      // backward substitution
+      if(!(Flags & UnitDiagBit))
+        other.coeffRef(cols()-1,c) = other.coeff(cols()-1, c)/coeff(rows()-1, cols()-1);
+      for(int i=rows()-2 ; i>=0 ; --i)
+      {
+        Scalar tmp = other.coeff(i,c)
+                   - ((this->row(i).end(cols()-i-1)) * other.col(c).end(cols()-i-1)).coeff(0,0);
+        if (Flags & UnitDiagBit)
+          other.coeffRef(i,c) = tmp;
+        else
+          other.coeffRef(i,c) = tmp/coeff(i,i);
+      }
+    }
+  }
+}
+
 /** \returns the product of the inverse of \c *this with \a other.
  *
  * This function computes the inverse-matrix matrix product inverse(\c*this) * \a other
@ -43,48 +90,8 @@ template<typename Derived>
 template<typename OtherDerived>
 typename OtherDerived::Eval MatrixBase<Derived>::inverseProduct(const MatrixBase<OtherDerived>& other) const
 {
-  assert(cols() == other.rows());
-  assert(!(Flags & ZeroDiagBit));
-  assert(Flags & (UpperTriangularBit|LowerTriangularBit));
-
-  typename OtherDerived::Eval res(other.rows(), other.cols());
-
-  for(int c=0 ; c<other.cols() ; ++c)
-  {
-    if(Flags & LowerTriangularBit)
-    {
-      // forward substitution
-      if(Flags & UnitDiagBit)
-        res.coeffRef(0,c) = other.coeff(0,c);
-      else
-        res.coeffRef(0,c) = other.coeff(0,c)/coeff(0, 0);
-      for(int i=1; i<rows(); ++i)
-      {
-        Scalar tmp = other.coeff(i,c) - ((this->row(i).start(i)) * res.col(c).start(i)).coeff(0,0);
-        if (Flags & UnitDiagBit)
-          res.coeffRef(i,c) = tmp;
-        else
-          res.coeffRef(i,c) = tmp/coeff(i,i);
-      }
-    }
-    else
-    {
-      // backward substitution
-      if(Flags & UnitDiagBit)
-        res.coeffRef(cols()-1,c) = other.coeff(cols()-1,c);
-      else
-        res.coeffRef(cols()-1,c) = other.coeff(cols()-1, c)/coeff(rows()-1, cols()-1);
-      for(int i=rows()-2 ; i>=0 ; --i)
-      {
-        Scalar tmp = other.coeff(i,c)
-                   - ((this->row(i).end(cols()-i-1)) * res.col(c).end(cols()-i-1)).coeff(0,0);
-        if (Flags & UnitDiagBit)
-          res.coeffRef(i,c) = tmp;
-        else
-          res.coeffRef(i,c) = tmp/coeff(i,i);
-      }
-    }
-  }
+  typename OtherDerived::Eval res(other);
+  inverseProductInPlace(res);
  return res;
 }

--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -296,6 +296,9 @@ template<typename Derived> class MatrixBase
    template<typename OtherDerived>
    typename OtherDerived::Eval inverseProduct(const MatrixBase<OtherDerived>& other) const;

+    template<typename OtherDerived>
+    void inverseProductInPlace(MatrixBase<OtherDerived>& other) const;
+

    template<typename OtherDerived>
    Scalar dot(const MatrixBase<OtherDerived>& other) const;
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@ -191,6 +191,11 @@ enum {
  Sparse  = SparseBit
 };

+enum {
+  ColMajor = 0,
+  RowMajor = RowMajorBit
+};
+
 const int FullyCoherentAccessPattern  = 0x1;
 const int InnerCoherentAccessPattern  = 0x2 | FullyCoherentAccessPattern;
 const int OuterCoherentAccessPattern  = 0x4 | InnerCoherentAccessPattern;
--- a/Eigen/src/Sparse/LinkedVectorMatrix.h
+++ b/Eigen/src/Sparse/LinkedVectorMatrix.h
@ -40,15 +40,15 @@ struct ei_traits<LinkedVectorMatrix<_Scalar,_Flags> >
  };
 };

-template<typename Element, int BlockSize = 8>
-struct LinkedVector
+template<typename Element, int ChunkSize = 8>
+struct LinkedVectorChunk
 {
-  LinkedVector() : size(0), next(0), prev(0) {}
-  Element data[BlockSize];
-  LinkedVector* next;
-  LinkedVector* prev;
+  LinkedVectorChunk() : size(0), next(0), prev(0) {}
+  Element data[ChunkSize];
+  LinkedVectorChunk* next;
+  LinkedVectorChunk* prev;
  int size;
-  bool isFull() const { return size==BlockSize; }
+  bool isFull() const { return size==ChunkSize; }
 };

 template<typename _Scalar, int _Flags>
@ -70,11 +70,11 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
      Scalar value;
      int index;
    };
-    typedef LinkedVector<ValueIndex,8> LinkedVectorBlock;
+    typedef LinkedVectorChunk<ValueIndex,8> VectorChunk;

-    inline int find(LinkedVectorBlock** _el, int id)
+    inline int find(VectorChunk** _el, int id)
    {
-      LinkedVectorBlock* el = *_el;
+      VectorChunk* el = *_el;
      while (el && el->data[el->size-1].index<id)
        el = el->next;
      *_el = el;
@ -115,7 +115,7 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
      const int outer = RowMajor ? row : col;
      const int inner = RowMajor ? col : row;

-      LinkedVectorBlock* el = m_data[outer];
+      VectorChunk* el = m_data[outer];
      int id = find(&el, inner);
      if (id<0)
        return Scalar(0);
@ -127,7 +127,7 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
      const int outer = RowMajor ? row : col;
      const int inner = RowMajor ? col : row;

-      LinkedVectorBlock* el = m_data[outer];
+      VectorChunk* el = m_data[outer];
      int id = find(&el, inner);
      ei_assert(id>=0);
 //       if (id<0)
@ -150,7 +150,7 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
      const int inner = RowMajor ? col : row;
      if (m_ends[outer]==0)
      {
-        m_data[outer] = m_ends[outer] = new LinkedVectorBlock();
+        m_data[outer] = m_ends[outer] = new VectorChunk();
      }
      else
      {
@ -158,7 +158,7 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
        if (m_ends[outer]->isFull())
        {

-          LinkedVectorBlock* el = new LinkedVectorBlock();
+          VectorChunk* el = new VectorChunk();
          m_ends[outer]->next = el;
          el->prev = m_ends[outer];
          m_ends[outer] = el;
@ -168,24 +168,21 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
      return m_ends[outer]->data[m_ends[outer]->size++].value;
    }

-    inline void endFill()
-    {
-    }
+    inline void endFill() { }

    ~LinkedVectorMatrix()
    {
-      if (this->isNotShared())
-        clear();
+      clear();
    }

    void clear()
    {
      for (int i=0; i<m_data.size(); ++i)
      {
-        LinkedVectorBlock* el = m_data[i];
+        VectorChunk* el = m_data[i];
        while (el)
        {
-          LinkedVectorBlock* tmp = el;
+          VectorChunk* tmp = el;
          el = el->next;
          delete tmp;
        }
@ -221,30 +218,26 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F
      *this = other.derived();
    }

-    inline void shallowCopy(const LinkedVectorMatrix& other)
+    inline void swap(LinkedVectorMatrix& other)
    {
-      EIGEN_DBG_SPARSE(std::cout << "LinkedVectorMatrix:: shallowCopy\n");
+      EIGEN_DBG_SPARSE(std::cout << "LinkedVectorMatrix:: swap\n");
      resize(other.rows(), other.cols());
-      for (int j=0; j<this->outerSize(); ++j)
-      {
-        m_data[j] = other.m_data[j];
-        m_ends[j] = other.m_ends[j];
-      }
-      other.markAsCopied();
+      m_data.swap(other.m_data);
+      m_ends.swap(other.m_ends);
    }

    inline LinkedVectorMatrix& operator=(const LinkedVectorMatrix& other)
    {
      if (other.isRValue())
      {
-        shallowCopy(other);
-        return *this;
+        swap(other.const_cast_derived());
      }
      else
      {
        // TODO implement a specialized deep copy here
        return operator=<LinkedVectorMatrix>(other);
      }
+      return *this;
    }

    template<typename OtherDerived>
@ -255,8 +248,10 @@ class LinkedVectorMatrix : public SparseMatrixBase<LinkedVectorMatrix<_Scalar,_F

  protected:

-    std::vector<LinkedVectorBlock*> m_data;
-    std::vector<LinkedVectorBlock*> m_ends;
+    // outer vector of inner linked vector chunks
+    std::vector<VectorChunk*> m_data;
+    // stores a reference to the last vector chunk for efficient filling
+    std::vector<VectorChunk*> m_ends;
    int m_innerSize;

 };
@ -281,7 +276,7 @@ class LinkedVectorMatrix<Scalar,_Flags>::InnerIterator

  protected:
    const LinkedVectorMatrix& m_matrix;
-    LinkedVectorBlock* m_el;
+    VectorChunk* m_el;
    int m_it;
 };

--- a/Eigen/src/Sparse/SparseArray.h
+++ b/Eigen/src/Sparse/SparseArray.h
@ -32,11 +32,11 @@ template<typename Scalar> class SparseArray
 {
  public:
    SparseArray()
-      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0), m_isShared(0)
+      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)
    {}

    SparseArray(int size)
-      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0), m_isShared(0)
+      : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0)
    {
      resize(size);
    }
@ -51,66 +51,39 @@ template<typename Scalar> class SparseArray
      resize(other.size());
      memcpy(m_values, other.m_values, m_size * sizeof(Scalar));
      memcpy(m_indices, other.m_indices, m_size * sizeof(int));
-      m_isShared = 0;
    }

-    void shallowCopy(const SparseArray& other)
+    void swap(SparseArray& other)
    {
-      delete[] m_values;
-      delete[] m_indices;
-      m_values = other.m_values;
-      m_indices = other.m_indices;
-      m_size = other.m_size;
-      m_allocatedSize = other.m_allocatedSize;
-      m_isShared = false;
-      other.m_isShared = true;
+      std::swap(m_values, other.m_values);
+      std::swap(m_indices, other.m_indices);
+      std::swap(m_size, other.m_size);
+      std::swap(m_allocatedSize, other.m_allocatedSize);
    }

    ~SparseArray()
    {
-      if (!m_isShared)
-      {
-        delete[] m_values;
-        delete[] m_indices;
-      }
+      delete[] m_values;
+      delete[] m_indices;
    }

    void reserve(int size)
    {
      int newAllocatedSize = m_size + size;
      if (newAllocatedSize > m_allocatedSize)
-      {
-        Scalar* newValues  = new Scalar[newAllocatedSize];
-        int* newIndices = new int[newAllocatedSize];
-        // copy
-        memcpy(newValues,  m_values,  m_size * sizeof(Scalar));
-        memcpy(newIndices, m_indices, m_size * sizeof(int));
-        // delete old stuff
-        delete[] m_values;
-        delete[] m_indices;
-        m_values = newValues;
-        m_indices = newIndices;
-        m_allocatedSize = newAllocatedSize;
-      }
+        reallocate(newAllocatedSize);
+    }
+
+    void squeeze()
+    {
+      if (m_allocatedSize>m_size)
+        reallocate(m_size);
    }

    void resize(int size, int reserveSizeFactor = 0)
    {
      if (m_allocatedSize<size)
-      {
-        int newAllocatedSize = size + reserveSizeFactor*size;
-        Scalar* newValues  = new Scalar[newAllocatedSize];
-        int* newIndices = new int[newAllocatedSize];
-        // copy
-        memcpy(newValues,  m_values,  m_size * sizeof(Scalar));
-        memcpy(newIndices, m_indices, m_size * sizeof(int));
-        // delete old stuff
-        delete[] m_values;
-        delete[] m_indices;
-        m_values = newValues;
-        m_indices = newIndices;
-        m_allocatedSize = newAllocatedSize;
-      }
+        reallocate(size + reserveSizeFactor*size);
      m_size = size;
    }

@ -123,26 +96,37 @@ template<typename Scalar> class SparseArray
    }

    int size() const { return m_size; }
-
-    void clear()
-    {
-      m_size = 0;
-    }
+    void clear() { m_size = 0; }

    Scalar& value(int i) { return m_values[i]; }
-    Scalar value(int i) const { return m_values[i]; }
+    const Scalar& value(int i) const { return m_values[i]; }

    int& index(int i) { return m_indices[i]; }
-    int index(int i) const { return m_indices[i]; }
+    const int& index(int i) const { return m_indices[i]; }
+
+  protected:
+
+    void reallocate(int size)
+    {
+      Scalar* newValues  = new Scalar[size];
+      int* newIndices = new int[size];
+      int copySize = std::min(size, m_size);
+      // copy
+      memcpy(newValues,  m_values,  copySize * sizeof(Scalar));
+      memcpy(newIndices, m_indices, copySize * sizeof(int));
+      // delete old stuff
+      delete[] m_values;
+      delete[] m_indices;
+      m_values = newValues;
+      m_indices = newIndices;
+      m_allocatedSize = size;
+    }

  protected:
    Scalar* m_values;
    int* m_indices;
    int m_size;
-    struct {
-      int m_allocatedSize:31;
-      mutable int m_isShared:1;
-    };
+    int m_allocatedSize;

 };

--- a/Eigen/src/Sparse/SparseMatrix.h
+++ b/Eigen/src/Sparse/SparseMatrix.h
@ -58,41 +58,50 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
    EIGEN_GENERIC_PUBLIC_INTERFACE(SparseMatrix)

  protected:
+  public:

-    int* m_colPtrs;
+    typedef SparseMatrixBase<SparseMatrix> SparseBase;
+    enum {
+      RowMajor = SparseBase::RowMajor
+    };
+
+    int m_outerSize;
+    int m_innerSize;
+    int* m_outerIndex;
    SparseArray<Scalar> m_data;
-    int m_rows;
-    int m_cols;
+

  public:

-    inline int rows() const { return m_rows; }
-    inline int cols() const { return m_cols; }
-    inline int innerNonZeros(int j) const { return m_colPtrs[j+1]-m_colPtrs[j]; }
+    inline int rows() const { return RowMajor ? m_outerSize : m_innerSize; }
+    inline int cols() const { return RowMajor ? m_innerSize : m_outerSize; }
+    inline int innerSize() const { return m_innerSize; }
+    inline int outerSize() const { return m_outerSize; }
+    inline int innerNonZeros(int j) const { return m_outerIndex[j+1]-m_outerIndex[j]; }

    inline Scalar coeff(int row, int col) const
    {
-      int id = m_colPtrs[col];
-      int end = m_colPtrs[col+1];
-      while (id<end && m_data.index(id)!=row)
-      {
-        ++id;
-      }
-      if (id==end)
-        return 0;
-      return m_data.value(id);
+      const int outer = RowMajor ? row : col;
+      const int inner = RowMajor ? col : row;
+
+      int id = m_outerIndex[outer];
+      int end = m_outerIndex[outer+1]-1;
+      if (m_data.index(end)==inner)
+        return m_data.value(end);
+      const int* r = std::lower_bound(&m_data.index(id),&m_data.index(end),inner);
+      return (*r==inner) ? m_data.value(*r) : Scalar(0);
    }

    inline Scalar& coeffRef(int row, int col)
    {
-      int id = m_colPtrs[col];
-      int end = m_colPtrs[col+1];
-      while (id<end && m_data.index(id)!=row)
-      {
-        ++id;
-      }
-      ei_assert(id!=end);
-      return m_data.value(id);
+      const int outer = RowMajor ? row : col;
+      const int inner = RowMajor ? col : row;
+
+      int id = m_outerIndex[outer];
+      int end = m_outerIndex[outer+1];
+      int* r = std::lower_bound(&m_data.index(id),&m_data.index(end),inner);
+      ei_assert(*r==inner);
+      return m_data.value(*r);
    }

  public:
@ -106,92 +115,94 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
    {
      m_data.clear();
      m_data.reserve(reserveSize);
-      for (int i=0; i<=m_cols; ++i)
-        m_colPtrs[i] = 0;
+      for (int i=0; i<=m_outerSize; ++i)
+        m_outerIndex[i] = 0;
    }

    inline Scalar& fill(int row, int col)
    {
-      if (m_colPtrs[col+1]==0)
+      const int outer = RowMajor ? row : col;
+      const int inner = RowMajor ? col : row;
+
+      if (m_outerIndex[outer+1]==0)
      {
        int i=col;
-        while (i>=0 && m_colPtrs[i]==0)
+        while (i>=0 && m_outerIndex[i]==0)
        {
-          m_colPtrs[i] = m_data.size();
+          m_outerIndex[i] = m_data.size();
          --i;
        }
-        m_colPtrs[col+1] = m_colPtrs[col];
+        m_outerIndex[outer+1] = m_outerIndex[outer];
      }
-      assert(m_colPtrs[col+1] == m_data.size());
-      int id = m_colPtrs[col+1];
-      m_colPtrs[col+1]++;
+      assert(m_outerIndex[outer+1] == m_data.size());
+      int id = m_outerIndex[outer+1];
+      m_outerIndex[outer+1]++;

-      m_data.append(0, row);
+      m_data.append(0, inner);
      return m_data.value(id);
    }

    inline void endFill()
    {
      int size = m_data.size();
-      int i = m_cols;
+      int i = m_outerSize;
      // find the last filled column
-      while (i>=0 && m_colPtrs[i]==0)
+      while (i>=0 && m_outerIndex[i]==0)
        --i;
      i++;
-      while (i<=m_cols)
+      while (i<=m_outerSize)
      {
-        m_colPtrs[i] = size;
+        m_outerIndex[i] = size;
        ++i;
      }
    }

    void resize(int rows, int cols)
    {
-      if (m_cols != cols)
+      const int outerSize = RowMajor ? rows : cols;
+      m_innerSize = RowMajor ? cols : rows;
+      m_data.clear();
+      if (m_outerSize != outerSize)
      {
-        delete[] m_colPtrs;
-        m_colPtrs = new int [cols+1];
-        m_rows = rows;
-        m_cols = cols;
+        delete[] m_outerIndex;
+        m_outerIndex = new int [outerSize+1];
+        m_outerSize = outerSize;
      }
    }

    inline SparseMatrix(int rows, int cols)
-      : m_rows(0), m_cols(0), m_colPtrs(0)
+      : m_outerSize(0), m_innerSize(0), m_outerIndex(0)
    {
      resize(rows, cols);
    }

    template<typename OtherDerived>
    inline SparseMatrix(const MatrixBase<OtherDerived>& other)
-      : m_rows(0), m_cols(0), m_colPtrs(0)
+      : m_outerSize(0), m_innerSize(0), m_outerIndex(0)
    {
      *this = other.derived();
    }

-    inline void shallowCopy(const SparseMatrix& other)
+    inline void swap(SparseMatrix& other)
    {
-      EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: shallowCopy\n");
-      delete[] m_colPtrs;
-      m_colPtrs = 0;
-      m_rows = other.rows();
-      m_cols = other.cols();
-      m_colPtrs = other.m_colPtrs;
-      m_data.shallowCopy(other.m_data);
-      other.markAsCopied();
+      EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: swap\n");
+      std::swap(m_outerIndex, other.m_outerIndex);
+      std::swap(m_innerSize, other.m_innerSize);
+      std::swap(m_outerSize, other.m_outerSize);
+      m_data.swap(other.m_data);
    }

    inline SparseMatrix& operator=(const SparseMatrix& other)
    {
      if (other.isRValue())
      {
-        shallowCopy(other);
+        swap(other.const_cast_derived());
      }
      else
      {
        resize(other.rows(), other.cols());
-        for (int col=0; col<=cols(); ++col)
-          m_colPtrs[col] = other.m_colPtrs[col];
+        for (int j=0; j<=m_outerSize; ++j)
+          m_outerIndex[j] = other.m_outerIndex[j];
        m_data = other.m_data;
        return *this;
      }
@ -216,7 +227,7 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
        s << "Column pointers:\n";
        for (uint i=0; i<m.cols(); ++i)
        {
-          s << m.m_colPtrs[i] << " ";
+          s << m.m_outerIndex[i] << " ";
        }
        s << std::endl;
        s << std::endl;
@ -228,8 +239,7 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
    /** Destructor */
    inline ~SparseMatrix()
    {
-      if (this->isNotShared())
-        delete[] m_colPtrs;
+      delete[] m_outerIndex;
    }
 };

@ -237,8 +247,8 @@ template<typename Scalar, int _Flags>
 class SparseMatrix<Scalar,_Flags>::InnerIterator
 {
  public:
-    InnerIterator(const SparseMatrix& mat, int col)
-      : m_matrix(mat), m_id(mat.m_colPtrs[col]), m_start(m_id), m_end(mat.m_colPtrs[col+1])
+    InnerIterator(const SparseMatrix& mat, int outer)
+      : m_matrix(mat), m_id(mat.m_outerIndex[outer]), m_start(m_id), m_end(mat.m_outerIndex[outer+1])
    {}

    InnerIterator& operator++() { m_id++; return *this; }
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@ -43,19 +43,16 @@ class SparseMatrixBase : public MatrixBase<Derived>
    { return *static_cast<Derived*>(const_cast<SparseMatrixBase*>(this)); }

    SparseMatrixBase()
-      : m_isRValue(false), m_hasBeenCopied(false)
+      : m_isRValue(false)
    {}

    bool isRValue() const { return m_isRValue; }
-    Derived& temporary() { m_isRValue = true; return derived(); }
+    Derived& markAsRValue() { m_isRValue = true; return derived(); }

    inline Derived& operator=(const Derived& other)
    {
      if (other.isRValue())
-      {
-        m_hasBeenCopied = true;
-        derived().shallowCopy(other);
-      }
+        derived().swap(other.const_cast_derived());
      else
        this->operator=<Derived>(other);
      return derived();
@ -82,7 +79,7 @@ class SparseMatrixBase : public MatrixBase<Derived>
      }
      temp.endFill();

-      derived() = temp.temporary();
+      derived() = temp.markAsRValue();
      return derived();
    }

@ -119,7 +116,6 @@ class SparseMatrixBase : public MatrixBase<Derived>

    friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
    {
-
      if (Flags&RowMajorBit)
      {
        for (int row=0; row<m.outerSize(); ++row)
@ -130,6 +126,7 @@ class SparseMatrixBase : public MatrixBase<Derived>
            for ( ; col<it.index(); ++col)
              s << "0 ";
            std::cout << it.value() << " ";
+            ++col;
          }
          for ( ; col<m.cols(); ++col)
            s << "0 ";
@ -144,15 +141,12 @@ class SparseMatrixBase : public MatrixBase<Derived>
      return s;
    }

-  protected:
-
-    bool isNotShared() { return !m_hasBeenCopied; }
-    void markAsCopied() const { m_hasBeenCopied = true; }
+    template<typename OtherDerived>
+    OtherDerived inverseProduct(const MatrixBase<OtherDerived>& other) const;

  protected:

    bool m_isRValue;
-    mutable bool m_hasBeenCopied;
 };

 #endif // EIGEN_SPARSEMATRIXBASE_H
--- a/Eigen/src/Sparse/SparseProduct.h
+++ b/Eigen/src/Sparse/SparseProduct.h
@ -123,9 +123,6 @@ template<typename LhsNested, typename RhsNested> class Product<LhsNested,RhsNest
    const RhsNested m_rhs;
 };

-const int RowMajor = RowMajorBit;
-const int ColMajor = 0;
-
 template<typename Lhs, typename Rhs, typename ResultType,
  int LhsStorageOrder = ei_traits<Lhs>::Flags&RowMajorBit,
  int RhsStorageOrder = ei_traits<Rhs>::Flags&RowMajorBit,
--- a/Eigen/src/Sparse/TriangularSolver.h
+++ b/Eigen/src/Sparse/TriangularSolver.h
@ -0,0 +1,170 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_SPARSETRIANGULARSOLVER_H
+#define EIGEN_SPARSETRIANGULARSOLVER_H
+
+template<typename Lhs, typename Rhs,
+  int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
+                     ? Lower
+                     : (int(Lhs::Flags) & UpperTriangularBit)
+                     ? Upper
+                     : -1,
+  int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+  >
+struct ei_inverse_product_selector;
+
+// forward substitution, row-major
+template<typename Lhs, typename Rhs>
+struct ei_inverse_product_selector<Lhs,Rhs,Lower,RowMajor>
+{
+  typedef typename Rhs::Scalar Scalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  {
+    for(int col=0 ; col<rhs.cols() ; ++col)
+    {
+      for(int i=0; i<lhs.rows(); ++i)
+      {
+        Scalar tmp = rhs.coeff(i,col);
+        Scalar lastVal = 0;
+        int lastIndex = 0;
+        for(typename Lhs::InnerIterator it(lhs, i); it; ++it)
+        {
+          lastVal = it.value();
+          lastIndex = it.index();
+          tmp -= lastVal * res.coeff(lastIndex,col);
+        }
+        if (Lhs::Flags & UnitDiagBit)
+          res.coeffRef(i,col) = tmp;
+        else
+        {
+          ei_assert(lastIndex==i);
+          res.coeffRef(i,col) = tmp/lastVal;
+        }
+      }
+    }
+  }
+};
+
+// backward substitution, row-major
+template<typename Lhs, typename Rhs>
+struct ei_inverse_product_selector<Lhs,Rhs,Upper,RowMajor>
+{
+  typedef typename Rhs::Scalar Scalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  {
+    for(int col=0 ; col<rhs.cols() ; ++col)
+    {
+      for(int i=lhs.rows()-1 ; i>=0 ; --i)
+      {
+        Scalar tmp = rhs.coeff(i,col);
+        typename Lhs::InnerIterator it(lhs, i);
+        for(++it; it; ++it)
+        {
+          tmp -= it.value() * res.coeff(it.index(),col);
+        }
+
+        if (Lhs::Flags & UnitDiagBit)
+          res.coeffRef(i,col) = tmp;
+        else
+        {
+          typename Lhs::InnerIterator it(lhs, i);
+          ei_assert(it.index() == i);
+          res.coeffRef(i,col) = tmp/it.value();
+        }
+      }
+    }
+  }
+};
+
+// forward substitution, col-major
+template<typename Lhs, typename Rhs>
+struct ei_inverse_product_selector<Lhs,Rhs,Lower,ColMajor>
+{
+  typedef typename Rhs::Scalar Scalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  {
+    // NOTE we could avoid this copy using an in-place API
+    res = rhs;
+    for(int col=0 ; col<rhs.cols() ; ++col)
+    {
+      for(int i=0; i<lhs.cols(); ++i)
+      {
+        typename Lhs::InnerIterator it(lhs, i);
+        if(!(Lhs::Flags & UnitDiagBit))
+        {
+          ei_assert(it.index()==i);
+          res.coeffRef(i,col) /= it.value();
+        }
+        Scalar tmp = res.coeffRef(i,col);
+        for(++it; it; ++it)
+          res.coeffRef(it.index(), col) -= tmp * it.value();
+      }
+    }
+  }
+};
+
+// backward substitution, col-major
+template<typename Lhs, typename Rhs>
+struct ei_inverse_product_selector<Lhs,Rhs,Upper,ColMajor>
+{
+  typedef typename Rhs::Scalar Scalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  {
+    // NOTE we could avoid this copy using an in-place API
+    res = rhs;
+    for(int col=0 ; col<rhs.cols() ; ++col)
+    {
+      for(int i=lhs.cols()-1; i>=0; --i)
+      {
+        if(!(Lhs::Flags & UnitDiagBit))
+        {
+          // FIXME lhs.coeff(i,i) might not be always efficient while it must simply be the
+          // last element of the column !
+          res.coeffRef(i,col) /= lhs.coeff(i,i);
+        }
+        Scalar tmp = res.coeffRef(i,col);
+        typename Lhs::InnerIterator it(lhs, i);
+        for(; it && it.index()<i; ++it)
+          res.coeffRef(it.index(), col) -= tmp * it.value();
+      }
+    }
+  }
+};
+
+template<typename Derived>
+template<typename OtherDerived>
+OtherDerived
+SparseMatrixBase<Derived>::inverseProduct(const MatrixBase<OtherDerived>& other) const
+{
+  ei_assert(derived().cols() == other.rows());
+  ei_assert(!(Flags & ZeroDiagBit));
+  ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
+
+  OtherDerived res(other.rows(), other.cols());
+  ei_inverse_product_selector<Derived, OtherDerived>::run(derived(), other.derived(), res);
+  return res;
+}
+
+#endif // EIGEN_SPARSETRIANGULARSOLVER_H
--- a/bench/BenchSparseUtil.h
+++ b/bench/BenchSparseUtil.h
@ -64,7 +64,8 @@ void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)

 #ifndef NOMTL
 #include <boost/numeric/mtl/mtl.hpp>
-typedef mtl::compressed2D<Scalar> MtlSparse;
+typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
+typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
 void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
 {
  mtl::matrix::inserter<MtlSparse> ins(dst);
--- a/bench/sparse_trisolver.cpp
+++ b/bench/sparse_trisolver.cpp
@ -0,0 +1,206 @@
+
+//g++ -O3 -g0 -DNDEBUG  sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out
+//g++ -O3 -g0 -DNDEBUG  sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.05 -DSIZE=2000 && ./a.out
+// -DNOGMM -DNOMTL
+
+#ifndef SIZE
+#define SIZE 10000
+#endif
+
+#ifndef DENSITY
+#define DENSITY 0.01
+#endif
+
+#ifndef REPEAT
+#define REPEAT 1
+#endif
+
+#include "BenchSparseUtil.h"
+
+#ifndef MINDENSITY
+#define MINDENSITY 0.0004
+#endif
+
+#ifndef NBTRIES
+#define NBTRIES 10
+#endif
+
+#define BENCH(X) \
+  timer.reset(); \
+  for (int _j=0; _j<NBTRIES; ++_j) { \
+    timer.start(); \
+    for (int _k=0; _k<REPEAT; ++_k) { \
+        X  \
+  } timer.stop(); }
+
+typedef SparseMatrix<Scalar,Upper> EigenSparseTriMatrix;
+typedef SparseMatrix<Scalar,RowMajorBit|Upper> EigenSparseTriMatrixRow;
+
+void fillMatrix(float density, int rows, int cols,  EigenSparseTriMatrix& dst)
+{
+  dst.startFill(rows*cols*density);
+  for(int j = 0; j < cols; j++)
+  {
+    for(int i = 0; i < j; i++)
+    {
+      Scalar v = (ei_random<float>(0,1) < density) ? ei_random<Scalar>() : 0;
+      if (v!=0)
+        dst.fill(i,j) = v;
+    }
+    dst.fill(j,j) = ei_random<Scalar>();
+  }
+  dst.endFill();
+}
+
+int main(int argc, char *argv[])
+{
+  int rows = SIZE;
+  int cols = SIZE;
+  float density = DENSITY;
+  BenchTimer timer;
+  #if 1
+  EigenSparseTriMatrix sm1(rows,cols);
+  VectorXf b = VectorXf::random(cols);
+  VectorXf x = VectorXf::random(cols);
+
+  bool densedone = false;
+
+  for (float density = DENSITY; density>=MINDENSITY; density*=0.5)
+  {
+    EigenSparseTriMatrix sm1(rows, cols);
+    fillMatrix(density, rows, cols, sm1);
+
+    // dense matrices
+    #ifdef DENSEMATRIX
+    if (!densedone)
+    {
+      densedone = true;
+      std::cout << "Eigen Dense\t" << density*100 << "%\n";
+      DenseMatrix m1(rows,cols);
+      Matrix<Scalar,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit> m2(rows,cols);
+      eiToDense(sm1, m1);
+      m2 = m1;
+
+      BENCH(x = m1.marked<Upper>().inverseProduct(b);)
+      std::cout << "   colmajor^-1 * b:\t" << timer.value() << endl;
+      std::cerr << x.transpose() << "\n";
+
+      BENCH(x = m2.marked<Upper>().inverseProduct(b);)
+      std::cout << "   rowmajor^-1 * b:\t" << timer.value() << endl;
+      std::cerr << x.transpose() << "\n";
+    }
+    #endif
+
+    // eigen sparse matrices
+    {
+      std::cout << "Eigen sparse\t" << density*100 << "%\n";
+      EigenSparseTriMatrixRow sm2 = sm1;
+
+      BENCH(x = sm1.inverseProduct(b);)
+      std::cout << "   colmajor^-1 * b:\t" << timer.value() << endl;
+      std::cerr << x.transpose() << "\n";
+
+      BENCH(x = sm2.inverseProduct(b);)
+      std::cout << "   rowmajor^-1 * b:\t" << timer.value() << endl;
+      std::cerr << x.transpose() << "\n";
+
+//       x = b;
+//       BENCH(sm1.inverseProductInPlace(x);)
+//       std::cout << "   colmajor^-1 * b:\t" << timer.value() << " (inplace)" << endl;
+//       std::cerr << x.transpose() << "\n";
+//
+//       x = b;
+//       BENCH(sm2.inverseProductInPlace(x);)
+//       std::cout << "   rowmajor^-1 * b:\t" << timer.value() << " (inplace)" << endl;
+//       std::cerr << x.transpose() << "\n";
+    }
+
+    // GMM++
+    #ifndef NOGMM
+    {
+      std::cout << "GMM++ sparse\t" << density*100 << "%\n";
+      GmmSparse m1(rows,cols);
+      gmm::csr_matrix<Scalar> m2;
+      eiToGmm(sm1, m1);
+      gmm::copy(m1,m2);
+      std::vector<Scalar> gmmX(cols), gmmB(cols);
+      Map<Matrix<Scalar,Dynamic,1> >(&gmmX[0], cols) = x;
+      Map<Matrix<Scalar,Dynamic,1> >(&gmmB[0], cols) = b;
+
+      gmmX = gmmB;
+      BENCH(gmm::upper_tri_solve(m1, gmmX, false);)
+      std::cout << "   colmajor^-1 * b:\t" << timer.value() << endl;
+      std::cerr << Map<Matrix<Scalar,Dynamic,1> >(&gmmX[0], cols).transpose() << "\n";
+
+      gmmX = gmmB;
+      BENCH(gmm::upper_tri_solve(m2, gmmX, false);)
+      timer.stop();
+      std::cout << "   rowmajor^-1 * b:\t" << timer.value() << endl;
+      std::cerr << Map<Matrix<Scalar,Dynamic,1> >(&gmmX[0], cols).transpose() << "\n";
+    }
+    #endif
+
+    // MTL4
+    #ifndef NOMTL
+    {
+      std::cout << "MTL4\t" << density*100 << "%\n";
+      MtlSparse m1(rows,cols);
+      MtlSparseRowMajor m2(rows,cols);
+      eiToMtl(sm1, m1);
+      m2 = m1;
+      mtl::dense_vector<Scalar> x(rows, 1.0);
+      mtl::dense_vector<Scalar> b(rows, 1.0);
+
+      BENCH(x = mtl::upper_trisolve(m1,b);)
+      std::cout << "   colmajor^-1 * b:\t" << timer.value() << endl;
+//       std::cerr << x << "\n";
+
+      BENCH(x = mtl::upper_trisolve(m2,b);)
+      std::cout << "   rowmajor^-1 * b:\t" << timer.value() << endl;
+//       std::cerr << x << "\n";
+    }
+    #endif
+
+
+
+  }
+  #endif
+
+  #if 0
+    // bench small matrices (in-place versus return bye value)
+    {
+      timer.reset();
+      for (int _j=0; _j<10; ++_j) {
+        Matrix4f m = Matrix4f::random();
+        Vector4f b = Vector4f::random();
+        Vector4f x = Vector4f::random();
+        timer.start();
+        for (int _k=0; _k<1000000; ++_k) {
+          b = m.inverseProduct(b);
+        }
+        timer.stop();
+      }
+      std::cout << "4x4 :\t" << timer.value() << endl;
+    }
+
+    {
+      timer.reset();
+      for (int _j=0; _j<10; ++_j) {
+        Matrix4f m = Matrix4f::random();
+        Vector4f b = Vector4f::random();
+        Vector4f x = Vector4f::random();
+        timer.start();
+        for (int _k=0; _k<1000000; ++_k) {
+          m.inverseProductInPlace(x);
+        }
+        timer.stop();
+      }
+      std::cout << "4x4 IP :\t" << timer.value() << endl;
+    }
+  #endif
+
+    std::cout << "\n\n";
+
+  return 0;
+}
+