Fix a couple issues introduced in the previous commit:

* removed DirectAccessBit from Part * use a template specialization in inverseProduct() to transform a Part xpr to a Flagged xpr
2025-01-24 14:45:14 +08:00 · 2008-07-26 23:05:44 +00:00 · 2008-07-26 23:05:44 +00:00 · e9e5261664
commit e9e5261664
parent e77ccf2928
5 changed files with 47 additions and 15 deletions
--- a/Eigen/src/Core/InverseProduct.h
+++ b/Eigen/src/Core/InverseProduct.h
@ -25,8 +25,12 @@
 #ifndef EIGEN_INVERSEPRODUCT_H
 #define EIGEN_INVERSEPRODUCT_H

+template<typename XprType> struct ei_is_part { enum {value=false}; };
+template<typename XprType, unsigned int Mode> struct ei_is_part<Part<XprType,Mode> > { enum {value=true}; };
+
 template<typename Lhs, typename Rhs,
-  int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
+  int TriangularPart = ei_is_part<Lhs>::value ? -1  // this is to solve ambiguous specializations
+                     : (int(Lhs::Flags) & LowerTriangularBit)
                     ? Lower
                     : (int(Lhs::Flags) & UpperTriangularBit)
                     ? Upper
@ -35,6 +39,16 @@ template<typename Lhs, typename Rhs,
  >
 struct ei_trisolve_selector;

+// transform a Part xpr to a Flagged xpr
+template<typename Lhs, unsigned int LhsMode, typename Rhs, int TriangularPart, int StorageOrder>
+struct ei_trisolve_selector<Part<Lhs,LhsMode>,Rhs,TriangularPart,StorageOrder>
+{
+  static void run(const Part<Lhs,LhsMode>& lhs, Rhs& other)
+  {
+    ei_trisolve_selector<Flagged<Lhs,LhsMode,0>,Rhs>::run(lhs._expression(), other);
+  }
+};
+
 // forward substitution, row-major
 template<typename Lhs, typename Rhs>
 struct ei_trisolve_selector<Lhs,Rhs,Lower,RowMajor>
@ -102,12 +116,12 @@ struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
      int blockyEnd = (std::max(size-5,0)/4)*4;
      for(int i=0; i<blockyEnd;)
      {
-        int startBlock = i;
-        int endBlock = startBlock+4;
-        Matrix<Scalar,4,1> btmp;
        /* Let's process the 4x4 sub-matrix as usual.
         * btmp stores the diagonal coefficients used to update the remaining part of the result.
         */
+        int startBlock = i;
+        int endBlock = startBlock+4;
+        Matrix<Scalar,4,1> btmp;
        for (;i<endBlock;++i)
        {
          if(!(Lhs::Flags & UnitDiagBit))
@ -135,8 +149,10 @@ struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
      {
        if(!(Lhs::Flags & UnitDiagBit))
          other.coeffRef(i,c) /= lhs.coeff(i,i);
-        // NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to
-        // get the address of the start of the row
+
+        /* NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to
+         * get the address of the start of the row
+         */
        other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1);
      }
      if(!(Lhs::Flags & UnitDiagBit))
@ -146,6 +162,7 @@ struct ei_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
 };

 // backward substitution, col-major
+// see the previous specialization for details on the algorithm
 template<typename Lhs, typename Rhs>
 struct ei_trisolve_selector<Lhs,Rhs,Upper,ColMajor>
 {
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -32,7 +32,7 @@
  * This class is the base that is inherited by all matrix, vector, and expression
  * types. Most of the Eigen API is contained in this class. Other important classes for
  * the Eigen API are Matrix, Cwise, and PartialRedux.
-  * 
+  *
  * Note that some methods are defined in the \ref Array module.
  *
  * \param Derived is the derived type, e.g. a matrix type, or an expression, etc.
@ -550,7 +550,7 @@ template<typename Derived> class MatrixBase
    template<typename OtherDerived>
    typename ei_eval<Derived>::type
    cross(const MatrixBase<OtherDerived>& other) const;
-    typename ei_eval<Derived>::type perpendicular(void) const;
+    typename ei_eval<Derived>::type someOrthogonal(void) const;
 };

 #endif // EIGEN_MATRIXBASE_H
--- a/Eigen/src/Core/Part.h
+++ b/Eigen/src/Core/Part.h
@ -53,7 +53,7 @@ struct ei_traits<Part<MatrixType, Mode> >
    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Flags = (_MatrixTypeNested::Flags & (HereditaryBits | DirectAccessBit) & (~(PacketAccessBit | LinearAccessBit))) | Mode,
+    Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
  };
 };
@ -109,6 +109,9 @@ template<typename MatrixType, unsigned int Mode> class Part
      return m_matrix.const_cast_derived().coeffRef(row, col);
    }

+    /** \internal */
+    const MatrixType& _expression() const { return m_matrix; }
+
    /** discard any writes to a row */
    const Block<Part, 1, ColsAtCompileTime> row(int i) { return Base::row(i); }
    const Block<Part, 1, ColsAtCompileTime> row(int i) const { return Base::row(i); }
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -378,7 +378,7 @@ struct ei_product_coeff_vectorized_dyn_selector
  }
 };

-// NOTE the 2 following specializations are because taking .col(0) on a vector is a bit slower
+// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
 template<typename Lhs, typename Rhs, int RhsCols>
 struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
@ -403,6 +403,18 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
  }
 };

+template<typename Lhs, typename Rhs>
+struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
+{
+  inline static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  {
+    res = ei_dot_impl<
+      Lhs,
+      Rhs,
+      LinearVectorization, NoUnrolling>::run(lhs, rhs);
+  }
+};
+
 template<typename Lhs, typename Rhs>
 struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs>
 {
@ -518,7 +530,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,NoDirectA
 };

 // optimized cache friendly colmajor * vector path for matrix with direct access flag
-// NOTE this path coul also be enabled for expressions if we add runtime align queries
+// NOTE this path could also be enabled for expressions if we add runtime align queries
 template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
 struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirectAccess,1,RhsOrder,RhsAccess>
 {
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@ -23,8 +23,8 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.

-#ifndef EIGEN_CROSS_H
-#define EIGEN_CROSS_H
+#ifndef EIGEN_ORTHOMETHODS_H
+#define EIGEN_ORTHOMETHODS_H

 /** \geometry_module
  * \returns the cross product of \c *this and \a other */
@ -34,7 +34,7 @@ inline typename ei_eval<Derived>::type
 MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3);
-  
+
  // Note that there is no need for an expression here since the compiler
  // optimize such a small temporary very well (even within a complex expression)
  const typename ei_nested<Derived,2>::type lhs(derived());
@ -107,4 +107,4 @@ MatrixBase<Derived>::someOrthogonal() const
  return ei_perpendicular_selector<Derived>::run(derived());
 }

-#endif // EIGEN_CROSS_H
+#endif // EIGEN_ORTHOMETHODS_H