* make PartialLU avoid to generate inf/nan when given a singular matrix

(result undefined, but at least it won't take forever on intel 387) * add lots of comments, especially to LU.h * fix stuff I had broken in Inverse.h * split inverse test
2025-04-06 19:10:36 +08:00 · 2009-10-20 00:36:07 -04:00 · 2009-10-20 00:36:07 -04:00 · 13f31b8daf
commit 13f31b8daf
parent d1db1352f5
5 changed files with 82 additions and 27 deletions
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@ -200,7 +200,7 @@ struct ei_compute_inverse
 {
  static inline void run(const MatrixType& matrix, ResultType* result)
  {
-    result = matrix.partialLu().inverse();
+    *result = matrix.partialLu().inverse();
  }
 };

@ -282,7 +282,9 @@ inline void MatrixBase<Derived>::computeInverse(ResultType *result) const
 template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainMatrixType MatrixBase<Derived>::inverse() const
 {
-  return inverse(*this);
+  typename MatrixBase<Derived>::PlainMatrixType result;
+  computeInverse(&result);
+  return result;
 }


--- a/Eigen/src/LU/LU.h
+++ b/Eigen/src/LU/LU.h
@ -410,28 +410,32 @@ LU<MatrixType>& LU<MatrixType>::compute(const MatrixType& matrix)
  const int rows = matrix.rows();
  const int cols = matrix.cols();

+  // will store the transpositions, before we accumulate them at the end.
+  // can't accumulate on-the-fly because that will be done in reverse order for the rows.
  IntColVectorType rows_transpositions(matrix.rows());
  IntRowVectorType cols_transpositions(matrix.cols());
-  int number_of_transpositions = 0;
+  int number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. rows_transpositions[i]!=i

-  RealScalar biggest = RealScalar(0);
-  m_nonzero_pivots = size;
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
  m_maxpivot = RealScalar(0);
  for(int k = 0; k < size; ++k)
  {
+    // First, we need to find the pivot.
+
+    // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
    int row_of_biggest_in_corner, col_of_biggest_in_corner;
    RealScalar biggest_in_corner;
-
    biggest_in_corner = m_lu.corner(Eigen::BottomRight, rows-k, cols-k)
                        .cwise().abs()
                        .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
-    row_of_biggest_in_corner += k;
-    col_of_biggest_in_corner += k;
-    if(k==0) biggest = biggest_in_corner;
+    row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
+    col_of_biggest_in_corner += k; // need to add k to them.

-    // if the corner is exactly zero, terminate to avoid generating nan/inf values
+    // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
    if(biggest_in_corner == RealScalar(0))
    {
+      // before exiting, make sure to initialize the still uninitialized row_transpositions
+      // in a sane state without destroying what we already have.
      m_nonzero_pivots = k;
      for(int i = k; i < size; i++)
      {
@ -443,6 +447,9 @@ LU<MatrixType>& LU<MatrixType>::compute(const MatrixType& matrix)

    if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner;

+    // Now that we've found the pivot, we need to apply the row/col swaps to
+    // bring it to the location (k,k).
+
    rows_transpositions.coeffRef(k) = row_of_biggest_in_corner;
    cols_transpositions.coeffRef(k) = col_of_biggest_in_corner;
    if(k != row_of_biggest_in_corner) {
@ -453,12 +460,19 @@ LU<MatrixType>& LU<MatrixType>::compute(const MatrixType& matrix)
      m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner));
      ++number_of_transpositions;
    }
+
+    // Now that the pivot is at the right location, we update the remaining
+    // bottom-right corner by Gaussian elimination.
+    
    if(k<rows-1)
      m_lu.col(k).end(rows-k-1) /= m_lu.coeff(k,k);
    if(k<size-1)
      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).end(rows-k-1) * m_lu.row(k).end(cols-k-1);
  }

+  // the main loop is over, we still have to accumulate the transpositions to find the
+  // permutations P and Q
+
  for(int k = 0; k < matrix.rows(); ++k) m_p.coeffRef(k) = k;
  for(int k = size-1; k >= 0; --k)
    std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k)));
@ -549,7 +563,10 @@ struct ei_lu_kernel_impl : public ReturnByValue<ei_lu_kernel_impl<MatrixType> >
      if(ei_abs(m_lu.matrixLU().coeff(i,i)) > premultiplied_threshold)
        pivots.coeffRef(p++) = i;
    ei_assert(p == m_rank && "You hit a bug in Eigen! Please report (backtrace and matrix)!");
-    
+
+    // we construct a temporaty trapezoid matrix m, by taking the U matrix and
+    // permuting the rows and cols to bring the nonnegligible pivots to the top of
+    // the main diagonal. We need that to be able to apply our triangular solvers.
    // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
           LUType::MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
@ -560,18 +577,22 @@ struct ei_lu_kernel_impl : public ReturnByValue<ei_lu_kernel_impl<MatrixType> >
      m.row(i).end(cols-i) = m_lu.matrixLU().row(pivots.coeff(i)).end(cols-i);
    }
    m.block(0, 0, m_rank, m_rank).template triangularView<StrictlyLowerTriangular>().setZero();
-    
    for(int i = 0; i < m_rank; ++i)
      m.col(i).swap(m.col(pivots.coeff(i)));

+    // ok, we have our trapezoid matrix, we can apply the triangular solver.
+    // notice that the math behind this suggests that we should apply this to the
+    // negative of the RHS, but for performance we just put the negative sign elsewhere, see below.
    m.corner(TopLeft, m_rank, m_rank)
     .template triangularView<UpperTriangular>().solveInPlace(
        m.corner(TopRight, m_rank, dimker)
      );

+    // now we must undo the column permutation that we had applied!
    for(int i = m_rank-1; i >= 0; --i)
      m.col(i).swap(m.col(pivots.coeff(i)));

+    // see the negative sign in the next line, that's what we were talking about above.
    for(int i = 0; i < m_rank; ++i) dst.row(m_lu.permutationQ().coeff(i)) = -m.row(i).end(dimker);
    for(int i = m_rank; i < cols; ++i) dst.row(m_lu.permutationQ().coeff(i)).setZero();
    for(int k = 0; k < dimker; ++k) dst.coeffRef(m_lu.permutationQ().coeff(m_rank+k), k) = Scalar(1);
--- a/Eigen/src/LU/PartialLU.h
+++ b/Eigen/src/LU/PartialLU.h
@ -216,6 +216,7 @@ struct ei_partial_lu_impl
  typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
  typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+  typedef typename MatrixType::RealScalar RealScalar;

  /** \internal performs the LU decomposition in-place of the matrix \a lu
    * using an unblocked algorithm.
@ -224,8 +225,12 @@ struct ei_partial_lu_impl
    * vector \a row_transpositions which must have a size equal to the number
    * of columns of the matrix \a lu, and an integer \a nb_transpositions
    * which returns the actual number of transpositions.
+    *
+    * \returns false if some pivot is exactly zero, in which case the matrix is left with
+    *          undefined coefficients (to avoid generating inf/nan values). Returns true
+    *          otherwise.
    */
-  static void unblocked_lu(MatrixType& lu, int* row_transpositions, int& nb_transpositions)
+  static bool unblocked_lu(MatrixType& lu, int* row_transpositions, int& nb_transpositions)
  {
    const int rows = lu.rows();
    const int size = std::min(lu.rows(),lu.cols());
@ -233,9 +238,22 @@ struct ei_partial_lu_impl
    for(int k = 0; k < size; ++k)
    {
      int row_of_biggest_in_col;
-      lu.col(k).end(rows-k).cwise().abs().maxCoeff(&row_of_biggest_in_col);
+      RealScalar biggest_in_corner
+        = lu.col(k).end(rows-k).cwise().abs().maxCoeff(&row_of_biggest_in_col);
      row_of_biggest_in_col += k;

+      if(biggest_in_corner == 0) // the pivot is exactly zero: the matrix is singular
+      {
+        // end quickly, avoid generating inf/nan values. Although in this unblocked_lu case
+        // the result is still valid, there's no need to boast about it because
+        // the blocked_lu code can't guarantee the same.
+        // before exiting, make sure to initialize the still uninitialized row_transpositions
+        // in a sane state without destroying what we already have.
+        for(int i = k; i < size; i++)
+          row_transpositions[i] = i;
+        return false;
+      }
+
      row_transpositions[k] = row_of_biggest_in_col;

      if(k != row_of_biggest_in_col)
@ -252,6 +270,7 @@ struct ei_partial_lu_impl
        lu.corner(BottomRight,rrows,rsize).noalias() -= lu.col(k).end(rrows) * lu.row(k).end(rsize);
      }
    }
+    return true;
  }

  /** \internal performs the LU decomposition in-place of the matrix represented
@ -263,11 +282,15 @@ struct ei_partial_lu_impl
    * of columns of the matrix \a lu, and an integer \a nb_transpositions
    * which returns the actual number of transpositions.
    *
+    * \returns false if some pivot is exactly zero, in which case the matrix is left with
+    *          undefined coefficients (to avoid generating inf/nan values). Returns true
+    *          otherwise.
+    *
    * \note This very low level interface using pointers, etc. is to:
    *   1 - reduce the number of instanciations to the strict minimum
    *   2 - avoid infinite recursion of the instanciations with Block<Block<Block<...> > >
    */
-  static void blocked_lu(int rows, int cols, Scalar* lu_data, int luStride, int* row_transpositions, int& nb_transpositions, int maxBlockSize=256)
+  static bool blocked_lu(int rows, int cols, Scalar* lu_data, int luStride, int* row_transpositions, int& nb_transpositions, int maxBlockSize=256)
  {
    MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
    MatrixType lu(lu1,0,0,rows,cols);
@ -277,8 +300,7 @@ struct ei_partial_lu_impl
    // if the matrix is too small, no blocking:
    if(size<=16)
    {
-      unblocked_lu(lu, row_transpositions, nb_transpositions);
-      return;
+      return unblocked_lu(lu, row_transpositions, nb_transpositions);
    }

    // automatically adjust the number of subdivisions to the size
@ -311,12 +333,20 @@ struct ei_partial_lu_impl
      int nb_transpositions_in_panel;
      // recursively calls the blocked LU algorithm with a very small
      // blocking size:
-      blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
-                 row_transpositions+k, nb_transpositions_in_panel, 16);
+      if(!blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
+                   row_transpositions+k, nb_transpositions_in_panel, 16))
+      {
+        // end quickly with undefined coefficients, just avoid generating inf/nan values.
+        // before exiting, make sure to initialize the still uninitialized row_transpositions
+        // in a sane state without destroying what we already have.
+        for(int i=k; i<size; ++i)
+          row_transpositions[i] = i;
+        return false;
+      }
      nb_transpositions += nb_transpositions_in_panel;

      // update permutations and apply them to A10
-      for(int i=k;i<k+bs; ++i)
+      for(int i=k; i<k+bs; ++i)
      {
        int piv = (row_transpositions[i] += k);
        A_0.row(i).swap(A_0.row(piv));
@ -334,6 +364,7 @@ struct ei_partial_lu_impl
        A22 -= A21 * A12;
      }
    }
+    return true;
  }
 };

--- a/doc/snippets/LU_solve.cpp
+++ b/doc/snippets/LU_solve.cpp
@ -9,4 +9,3 @@ if((m*x).isApprox(y))
 }
 else
  cout << "The equation mx=y does not have any solution." << endl;
-
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@ -83,19 +83,21 @@ void test_inverse()
 {
  int s;
  for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST( inverse(Matrix<double,1,1>()) );
-    CALL_SUBTEST( inverse(Matrix2d()) );
-    CALL_SUBTEST( inverse(Matrix3f()) );
-    CALL_SUBTEST( inverse(Matrix4f()) );
+    CALL_SUBTEST1( inverse(Matrix<double,1,1>()) );
+    CALL_SUBTEST2( inverse(Matrix2d()) );
+    CALL_SUBTEST3( inverse(Matrix3f()) );
+    CALL_SUBTEST4( inverse(Matrix4f()) );
    s = ei_random<int>(50,320);
-    CALL_SUBTEST( inverse(MatrixXf(s,s)) );
+    CALL_SUBTEST5( inverse(MatrixXf(s,s)) );
    s = ei_random<int>(25,100);
-    CALL_SUBTEST( inverse(MatrixXcd(s,s)) );
+    CALL_SUBTEST6( inverse(MatrixXcd(s,s)) );
  }

+#ifdef EIGEN_TEST_PART_4
  // test some tricky cases for 4x4 matrices
  VERIFY_IS_APPROX((Matrix4f() << 0,0,1,0, 1,0,0,0, 0,1,0,0, 0,0,0,1).finished().inverse(),
                   (Matrix4f() << 0,1,0,0, 0,0,1,0, 1,0,0,0, 0,0,0,1).finished());
  VERIFY_IS_APPROX((Matrix4f() << 1,0,0,0, 0,0,1,0, 0,0,0,1, 0,1,0,0).finished().inverse(),
                   (Matrix4f() << 1,0,0,0, 0,0,0,1, 0,1,0,0, 0,0,1,0).finished());
+#endif
 }