Optimized the tensor padding code.

2025-03-13 18:37:27 +08:00 · 2014-08-26 09:47:18 -07:00 · 2014-08-26 09:47:18 -07:00 · 2959045f2f
commit 2959045f2f
parent 36fffe48f7
1 changed files with 81 additions and 14 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@ -87,7 +87,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device

  enum {
    IsAligned = false,
-    PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false,
+    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
  };

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -100,15 +100,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
    }

    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-    for (int i = 0; i < NumDims; ++i) {
-      if (i > 0) {
-        m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
-        m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
-      } else {
-        m_inputStrides[0] = 1;
-        m_outputStrides[0] = 1;
-      }
+    m_inputStrides[0] = 1;
+    m_outputStrides[0] = 1;
+    for (int i = 1; i < NumDims; ++i) {
+      m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
+      m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
    }
+    m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
  }

  typedef typename XprType::Scalar Scalar;
@ -128,7 +126,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
  {
    Index inputIndex = 0;
-    for (int i = NumDims - 1; i >= 0; --i) {
+    for (int i = NumDims - 1; i > 0; --i) {
      const Index idx = index / m_outputStrides[i];
      if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
        return Scalar(0);
@ -136,21 +134,90 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
      inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
      index -= idx * m_outputStrides[i];
    }
+    if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
+      return Scalar(0);
+    }
+    inputIndex += (index - m_padding[0].first);
    return m_impl.coeff(inputIndex);
  }

-  /*  template<int LoadMode>
+  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
  {
-    return m_impl.template packet<LoadMode>(index);
-    }*/
+    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
+    eigen_assert(index+packetSize-1 < dimensions().TotalSize());
+
+    const Index initialIndex = index;
+    Index inputIndex = 0;
+    for (int i = NumDims - 1; i > 0; --i) {
+      const int first = index;
+      const int last = index + packetSize - 1;
+      const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
+      const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
+      const int lastPaddedRight = m_outputStrides[i+1];
+
+      if (last < lastPaddedLeft) {
+        // all the coefficient are in the padding zone.
+        return internal::pset1<PacketReturnType>(Scalar(0));
+      }
+      else if (first >= firstPaddedRight && last < lastPaddedRight) {
+        // all the coefficient are in the padding zone.
+        return internal::pset1<PacketReturnType>(Scalar(0));
+      }
+      else if (first >= lastPaddedLeft && last < firstPaddedRight) {
+        // all the coefficient are between the 2 padding zones.
+        const Index idx = index / m_outputStrides[i];
+        inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
+        index -= idx * m_outputStrides[i];
+      }
+      else {
+        // Every other case
+        return packetWithPossibleZero(initialIndex);
+      }
+    }
+
+    const Index last = index + packetSize - 1;
+    const Index first = index;
+    const int lastPaddedLeft = m_padding[0].first;
+    const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
+    const int lastPaddedRight = m_outputStrides[1];
+
+    if (last < lastPaddedLeft) {
+      // all the coefficient are in the padding zone.
+      return internal::pset1<PacketReturnType>(Scalar(0));
+    }
+    else if (first >= firstPaddedRight && last < lastPaddedRight) {
+      // all the coefficient are in the padding zone.
+      return internal::pset1<PacketReturnType>(Scalar(0));
+    }
+    else if (first >= lastPaddedLeft && last < firstPaddedRight) {
+      // all the coefficient are between the 2 padding zones.
+      inputIndex += (index - m_padding[0].first);
+      return m_impl.template packet<Unaligned>(inputIndex);
+    }
+    // Every other case
+    return packetWithPossibleZero(initialIndex);
+  }

  Scalar* data() const { return NULL; }

 protected:
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
+  {
+    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+    EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
+    for (int i = 0; i < packetSize; ++i) {
+      values[i] = coeff(index+i);
+    }
+    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
+    return rslt;
+  }
+
  PaddingDimensions m_padding;
  Dimensions m_dimensions;
-  array<Index, NumDims> m_outputStrides;
+  array<Index, NumDims+1> m_outputStrides;
  array<Index, NumDims> m_inputStrides;
  TensorEvaluator<ArgType, Device> m_impl;
 };