fix two perf issues in product.

fix positive definite test in Cholesky. remove #include <cstring> in CoreDeclaration.
2025-01-30 17:40:05 +08:00 · 2008-08-03 20:23:06 +00:00 · 2008-08-03 20:23:06 +00:00 · f81dfcf00b
commit f81dfcf00b
parent 49ae3fca89
5 changed files with 19 additions and 17 deletions
--- a/Eigen/CoreDeclarations
+++ b/Eigen/CoreDeclarations
@ -30,7 +30,6 @@

 #include <cstdlib>
 #include <cmath>
-#include <cstring>
 #include <complex>
 #include <cassert>
 #include <functional>
--- a/Eigen/src/Cholesky/Cholesky.h
+++ b/Eigen/src/Cholesky/Cholesky.h
@ -96,7 +96,7 @@ void Cholesky<MatrixType>::compute(const MatrixType& a)

  RealScalar x;
  x = ei_real(a.coeff(0,0));
-  m_isPositiveDefinite = x > precision<Scalar>() && ei_isMuchSmallerThan(ei_imag(m_matrix.coeff(0,0)), RealScalar(1));
+  m_isPositiveDefinite = x > precision<Scalar>() && ei_isMuchSmallerThan(ei_imag(a.coeff(0,0)), RealScalar(1));
  m_matrix.coeffRef(0,0) = ei_sqrt(x);
  m_matrix.col(0).end(size-1) = a.row(0).end(size-1).adjoint() / ei_real(m_matrix.coeff(0,0));
  for (int j = 1; j < size; ++j)
@ -105,7 +105,7 @@ void Cholesky<MatrixType>::compute(const MatrixType& a)
    x = ei_real(tmp);
    if (x < precision<Scalar>() || (!ei_isMuchSmallerThan(ei_imag(tmp), RealScalar(1))))
    {
-      m_isPositiveDefinite = m_isPositiveDefinite;
+      m_isPositiveDefinite = false;
      return;
    }
    m_matrix.coeffRef(j,j) = x = ei_sqrt(x);
@ -117,6 +117,7 @@ void Cholesky<MatrixType>::compute(const MatrixType& a)
      m_matrix.col(j).end(endSize) =
        (m_matrix.block(j+1, 0, endSize, j) * m_matrix.row(j).start(j).adjoint()).lazy();

+      // FIXME could use a.col instead of a.row
      m_matrix.col(j).end(endSize) = (a.row(j).end(endSize).adjoint()
        - m_matrix.col(j).end(endSize) ) / x;
    }
--- a/Eigen/src/Core/CacheFriendlyProduct.h
+++ b/Eigen/src/Core/CacheFriendlyProduct.h
@ -402,8 +402,9 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
  {
    ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
    
-    for (; skipColumns<PacketSize && alignedStart != lhsAlignmentOffset + alignmentStep*skipColumns; ++skipColumns)
-    {}
+    while (skipColumns<PacketSize &&
+           alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize))
+      ++skipColumns;
    if (skipColumns==PacketSize)
    {
      // nothing can be aligned, no need to skip any column
@ -568,7 +569,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
  typedef typename ei_packet_traits<Scalar>::type Packet;
  const int PacketSize = sizeof(Packet)/sizeof(Scalar);

-  enum { AllAligned, EvenAligned, FirstAligned, NoneAligned };
+  enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
  const int rowsAtOnce = 4;
  const int peels = 2;
  const int PacketAlignedMask = PacketSize-1;
@ -595,8 +596,9 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
  {
    ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0  || size<PacketSize);
    
-    for (; skipRows<PacketSize && alignedStart != lhsAlignmentOffset + alignmentStep*skipRows; ++skipRows)
-    {}
+    while (skipRows<PacketSize &&
+           alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize))
+      ++skipRows;
    if (skipRows==PacketSize)
    {
      // nothing can be aligned, no need to skip any column
@ -611,7 +613,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
    ei_internal_assert((alignmentPattern==NoneAligned) || PacketSize==1
      || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
  }
-
+  
  int rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
  for (int i=skipRows; i<rowBound; i+=rowsAtOnce)
  {
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -637,7 +637,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
  typedef typename ei_traits<ProductType>::_LhsNested Lhs;
  enum {
      UseLhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (Lhs::Flags&ActualPacketAccessBit))
-                     && (!(Lhs::Flags & RowMajorBit)) };
+                     && (Lhs::Flags & RowMajorBit) };

  template<typename DestDerived>
  inline static void run(DestDerived& res, const ProductType& product)
--- a/Eigen/src/QR/HessenbergDecomposition.h
+++ b/Eigen/src/QR/HessenbergDecomposition.h
@ -49,8 +49,9 @@ template<typename _MatrixType> class HessenbergDecomposition
    enum {
      Size = MatrixType::RowsAtCompileTime,
      SizeMinusOne = MatrixType::RowsAtCompileTime==Dynamic
-                        ? Dynamic
-                        : MatrixType::RowsAtCompileTime-1};
+                   ? Dynamic
+                   : MatrixType::RowsAtCompileTime-1
+    };

    typedef Matrix<Scalar, SizeMinusOne, 1> CoeffVectorType;
    typedef Matrix<RealScalar, Size, 1> DiagonalType;
@ -59,8 +60,7 @@ template<typename _MatrixType> class HessenbergDecomposition
    typedef typename NestByValue<DiagonalCoeffs<MatrixType> >::RealReturnType DiagonalReturnType;

    typedef typename NestByValue<DiagonalCoeffs<
-        NestByValue<Block<
-          MatrixType,SizeMinusOne,SizeMinusOne> > > >::RealReturnType SubDiagonalReturnType;
+        NestByValue<Block<MatrixType,SizeMinusOne,SizeMinusOne> > > >::RealReturnType SubDiagonalReturnType;

    /** This constructor initializes a HessenbergDecomposition object for
      * further use with HessenbergDecomposition::compute()
@ -171,11 +171,11 @@ void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVector

      // first let's do A = H A
      matA.corner(BottomRight,n-i-1,n-i-1) -= ((ei_conj(h) * matA.col(i).end(n-i-1)) *
-        (matA.col(i).end(n-i-1).adjoint() * matA.corner(BottomRight,n-i-1,n-i-1)).lazy()).lazy();
+        (matA.col(i).end(n-i-1).adjoint() * matA.corner(BottomRight,n-i-1,n-i-1))).lazy();

      // now let's do A = A H
-      matA.corner(BottomRight,n,n-i-1) -= ((matA.corner(BottomRight,n,n-i-1) * matA.col(i).end(n-i-1)).lazy() *
-        (h * matA.col(i).end(n-i-1).adjoint())).lazy();
+      matA.corner(BottomRight,n,n-i-1) -= ((matA.corner(BottomRight,n,n-i-1) * matA.col(i).end(n-i-1))
+                                        * (h * matA.col(i).end(n-i-1).adjoint())).lazy();

      matA.col(i).coeffRef(i+1) = beta;
      hCoeffs.coeffRef(i) = h;