Evaluators: add Block evaluator as dumb wrapper, add slice vectorization.

2025-03-07 18:27:40 +08:00 · 2011-03-31 13:50:52 +01:00 · 2011-03-31 13:50:52 +01:00 · d90a8ee8bd
commit d90a8ee8bd
parent b471161f28
3 changed files with 143 additions and 1 deletions
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@ -289,6 +289,64 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
  }
 };

+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
+{
+  inline static void run(const DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst.const_cast_derived());
+    SrcEvaluatorType srcEvaluator(src);
+
+    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      alignable = PacketTraits::AlignedOnScalar,
+      dstAlignment = alignable ? Aligned : int(assign_traits<DstXprType,SrcXprType>::DstIsAligned) ,
+      srcAlignment = assign_traits<DstXprType,SrcXprType>::JointAlignment
+    };
+    const Index packetAlignedMask = packetSize - 1;
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
+    Index alignedStart = ((!alignable) || assign_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
+                       : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+
+    for(Index outer = 0; outer < outerSize; ++outer)
+    {
+      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+      // do the non-vectorizable part of the assignment
+      for(Index inner = 0; inner<alignedStart ; ++inner) {
+	Index row = dst.rowIndexByOuterInner(outer, inner);
+	Index col = dst.colIndexByOuterInner(outer, inner);
+        dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col);
+      }
+
+      // do the vectorizable part of the assignment
+      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
+	Index row = dst.rowIndexByOuterInner(outer, inner);
+	Index col = dst.colIndexByOuterInner(outer, inner);
+        dstEvaluator.template writePacket<dstAlignment>(row, col, srcEvaluator.template packet<srcAlignment>(row, col));
+      }
+
+      // do the non-vectorizable part of the assignment
+      for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
+	Index row = dst.rowIndexByOuterInner(outer, inner);
+	Index col = dst.colIndexByOuterInner(outer, inner);
+        dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col);
+      }
+
+      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+    }
+  }
+};

 // Based on DenseBase::LazyAssign()

--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@ -368,6 +368,73 @@ protected:
  PlainObject m_result;
 };

+// -------------------- Block --------------------
+//
+// This evaluator is implemented as a dumb wrapper around Block expression class.
+// TODO: Make this a real evaluator
+
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> 
+struct evaluator_impl<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >
+{
+  typedef Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> BlockType;
+  evaluator_impl(const BlockType& block) : m_block(block) { }
+ 
+  typedef typename BlockType::Index Index;
+  typedef typename BlockType::Scalar Scalar;
+  typedef typename BlockType::CoeffReturnType CoeffReturnType;
+  typedef typename BlockType::PacketScalar PacketScalar;
+  typedef typename BlockType::PacketReturnType PacketReturnType;
+ 
+ 
+  CoeffReturnType coeff(Index i, Index j) const 
+  { 
+    return m_block.coeff(i,j); 
+  }
+  
+  CoeffReturnType coeff(Index index) const 
+  { 
+    return m_block.coeff(index); 
+  }
+
+  Scalar& coeffRef(Index i, Index j) 
+  { 
+    return m_block.const_cast_derived().coeffRef(i,j); 
+  }
+  
+  Scalar& coeffRef(Index index) 
+  { 
+    return m_block.const_cast_derived().coeffRef(index); 
+  }
+ 
+  template<int LoadMode> 
+  PacketReturnType packet(Index row, Index col) const 
+  { 
+    return m_block.template packet<LoadMode>(row, col); 
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index index) const 
+  { 
+    return m_block.template packet<LoadMode>(index); 
+  }
+  
+  template<int StoreMode> 
+  void writePacket(Index row, Index col, const PacketScalar& x) 
+  { 
+    m_block.const_cast_derived().template writePacket<StoreMode>(row, col, x); 
+  }
+  
+  template<int StoreMode> 
+  void writePacket(Index index, const PacketScalar& x) 
+  { 
+    m_block.const_cast_derived().template writePacket<StoreMode>(index, x); 
+  }
+ 
+protected:
+  const BlockType& m_block;
+};
+
+
 } // namespace internal

 #endif // EIGEN_COREEVALUATORS_H
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@ -98,6 +98,7 @@ void test_evaluators()
  VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose());  // transpose
  VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity());  // unary
  VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + m3);  // binary
+  VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2));  // block

  // test linear traversal
  VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero());  // matrix, nullary
@ -118,11 +119,27 @@ void test_evaluators()

  // test linear vectorization
  MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6);
-  ArrayXXf aX(6,6), aXsrc = MatrixXf::Random(6,6);
+  ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6);
  VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc);  // matrix
  VERIFY_IS_APPROX_EVALUATOR(aX, aXsrc);  // array
  VERIFY_IS_APPROX_EVALUATOR(mX.transpose(), mXsrc.transpose());  // transpose
  VERIFY_IS_APPROX_EVALUATOR(mX, MatrixXf::Zero(6,6));  // nullary
  VERIFY_IS_APPROX_EVALUATOR(mX, 2 * mXsrc);  // unary
  VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc + mXsrc);  // binary
+
+  // test blocks and slice vectorization
+  VERIFY_IS_APPROX_EVALUATOR(m4, (mXsrc.block<4,4>(1,0)));
+  VERIFY_IS_APPROX_EVALUATOR(aX, ArrayXXf::Constant(10, 10, 3.0).block(2, 3, 6, 6));
+
+  Matrix4f m4ref = m4;
+  copy_using_evaluator(m4.block(1, 1, 2, 3), m3.bottomRows(2));
+  m4ref.block(1, 1, 2, 3) = m3.bottomRows(2);
+  VERIFY_IS_APPROX(m4, m4ref);
+
+  mX.setIdentity(20,20);
+  MatrixXf mXref = MatrixXf::Identity(20,20);
+  mXsrc = MatrixXf::Random(9,12);
+  copy_using_evaluator(mX.block(4, 4, 9, 12), mXsrc);
+  mXref.block(4, 4, 9, 12) = mXsrc;
+  VERIFY_IS_APPROX(mX, mXref);
 }