* introduce ei_alignmentOffset(MatrixBase&,Integer)

couldnt put it in Memory.h as it needs the definition of MatrixBase * make Redux use it
2024-12-15 07:10:37 +08:00 · 2009-12-16 08:53:14 -05:00 · 2009-12-16 08:53:14 -05:00 · 5cb779e5e1
commit 5cb779e5e1
parent e0aa29121f
3 changed files with 38 additions and 5 deletions
--- a/Eigen/src/Core/Coeffs.h
+++ b/Eigen/src/Core/Coeffs.h
@ -379,6 +379,38 @@ EIGEN_STRONG_INLINE void MatrixBase<Derived>::copyPacket(int index, const Matrix
    other.derived().template packet<LoadMode>(index));
 }

+
+template<typename Derived, typename Integer, bool JustReturnZero>
+struct ei_alignmentOffset_impl
+{
+  inline static Integer run(const MatrixBase<Derived>&, Integer)
+  { return 0; }
+};
+
+template<typename Derived, typename Integer>
+struct ei_alignmentOffset_impl<Derived, Integer, false>
+{
+  inline static Integer run(const MatrixBase<Derived>& m, Integer maxOffset)
+  {
+    return ei_alignmentOffset(&m.const_cast_derived().coeffRef(0,0), maxOffset);
+  }
+};
+
+/** \internal \returns the number of elements which have to be skipped, starting
+  * from the address of coeffRef(0,0), to find the first 16-byte aligned element.
+  *
+  * \note If the expression doesn't have the DirectAccessBit, this function returns 0.
+  *
+  * There is also the variant ei_alignmentOffset(const Scalar*, Integer) defined in Memory.h.
+  */
+template<typename Derived, typename Integer>
+inline static Integer ei_alignmentOffset(const MatrixBase<Derived>& m, Integer maxOffset)
+{
+  return ei_alignmentOffset_impl<Derived, Integer,
+                                 (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
+                                 ::run(m, maxOffset);
+}
+
 #endif

 #endif // EIGEN_COEFFS_H
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@ -209,10 +209,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
  {
    const int size = mat.size();
    const int packetSize = ei_packet_traits<Scalar>::size;
-    const int alignedStart =  (Derived::Flags & AlignedBit)
-                           || !(Derived::Flags & DirectAccessBit)
-                           ? 0
-                           : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
+    const int alignedStart = ei_alignmentOffset(mat,size);
    enum {
      alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
                ? Aligned : Unaligned
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -209,7 +209,11 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
  ei_conditional_aligned_free<Align>(ptr);
 }

-/** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */
+/** \internal \returns the number of elements which have to be skipped to
+  * find the first 16-byte aligned element
+  *
+  * There is also the variant ei_alignmentOffset(const MatrixBase&, Integer) defined in Coeffs.h.
+  */
 template<typename Scalar, typename Integer>
 inline static Integer ei_alignmentOffset(const Scalar* ptr, Integer maxOffset)
 {