make colmaj * vector uses pointers only

2025-01-24 14:45:14 +08:00 · 2010-07-11 16:01:48 +02:00 · 2010-07-11 16:01:48 +02:00 · 8e3c4283f5
commit 8e3c4283f5
parent ff96c94043
5 changed files with 19 additions and 16 deletions
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -298,8 +298,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
    {
      ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
      ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
-                       bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)
-                       /*&& ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret*/>::run(*this, dst, alpha);
+                       bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
    }
 };

@ -357,7 +356,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
        actualLhs.rows(), actualLhs.cols(),
        &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
-        actualRhs, actualRhs.innerStride(),
+        actualRhs.data(), actualRhs.innerStride(),
        actualDest, 1,
        actualAlpha);

--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@ -151,7 +151,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
        ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run(
            r, actualPanelWidth,
            &(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
-            other.segment(startBlock, actualPanelWidth), other.innerStride(),
+            &other.coeff(startBlock), other.innerStride(),
            &(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1));
      }
    }
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
 }

 // FIXME
-// #ifdef EIGEN_HAS_FUSE_CJMADD
+#ifndef EIGEN_HAS_FUSE_CJMADD
+#define EIGEN_HAS_FUSE_CJMADD
+#endif 
+#ifdef EIGEN_HAS_FUSE_CJMADD
  #define MADD(CJ,A,B,C,T)  C = CJ.pmadd(A,B,C);
-// #else
-  //#define MADD(CJ,A,B,C,T)  T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
-//   #define MADD(CJ,A,B,C,T)  T = B; T = CJ.pmul(A,T); 
-// #endif
+#else
+  #define MADD(CJ,A,B,C,T)  T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
+#endif

 // optimized GEneral packed Block * packed Panel product kernel
 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend");
        const RhsScalar* blB = unpackedB;
        for(Index k=0; k<depth; k++)
        {
+          #ifndef EIGEN_HAS_FUSE_CJMADD
          RhsPacket T0;
+          #endif
          MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0);
          blB += RhsPacketSize;
          blA += LhsPacketSize;
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@ -53,15 +53,13 @@ typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
 typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
 typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;

-template<typename RhsType>
 EIGEN_DONT_INLINE static void run(
  Index rows, Index cols,
  const LhsScalar* lhs, Index lhsStride,
-  const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr,
+  const RhsScalar* rhs, Index rhsIncr,
  ResScalar* res, Index resIncr,
  ResScalar alpha)
 {
-  EIGEN_UNUSED_VARIABLE(rhsIncr);
  ei_internal_assert(resIncr==1);
  #ifdef _EIGEN_ACCUMULATE_PACKETS
  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run(
  Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
  for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
  {
-    RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]),   ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[i+offset1]),
-              ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[i+2]), ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[i+offset3]);
+    RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
+              ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
+              ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
+              ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);

    // this helps a lot generating better binary code
    const LhsScalar *lhs0 = lhs + i*lhsStride,     *lhs1 = lhs + (i+offset1)*lhsStride,
@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run(
  {
    for (Index i=start; i<end; ++i)
    {
-      RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]);
+      RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]);
      const LhsScalar* lhs0 = lhs + i*lhsStride;

      if (Vectorizable)
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
        ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
            r, actualPanelWidth,
            &(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
-            rhs.segment(pi, actualPanelWidth), rhs.innerStride(),
+            &rhs.coeff(pi), rhs.innerStride(),
            &res.coeffRef(s), res.innerStride(), alpha);
      }
    }