mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-13 18:37:27 +08:00
Optimized the tensor padding code.
This commit is contained in:
parent
36fffe48f7
commit
2959045f2f
@ -87,7 +87,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -100,15 +100,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
}
|
||||
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
if (i > 0) {
|
||||
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
||||
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
||||
} else {
|
||||
m_inputStrides[0] = 1;
|
||||
m_outputStrides[0] = 1;
|
||||
}
|
||||
m_inputStrides[0] = 1;
|
||||
m_outputStrides[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
||||
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
||||
}
|
||||
m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
|
||||
}
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
@ -128,7 +126,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
Index inputIndex = 0;
|
||||
for (int i = NumDims - 1; i >= 0; --i) {
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx = index / m_outputStrides[i];
|
||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
||||
return Scalar(0);
|
||||
@ -136,21 +134,90 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
|
||||
return Scalar(0);
|
||||
}
|
||||
inputIndex += (index - m_padding[0].first);
|
||||
return m_impl.coeff(inputIndex);
|
||||
}
|
||||
|
||||
/* template<int LoadMode>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_impl.template packet<LoadMode>(index);
|
||||
}*/
|
||||
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
|
||||
const Index initialIndex = index;
|
||||
Index inputIndex = 0;
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const int first = index;
|
||||
const int last = index + packetSize - 1;
|
||||
const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
|
||||
const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
|
||||
const int lastPaddedRight = m_outputStrides[i+1];
|
||||
|
||||
if (last < lastPaddedLeft) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||
}
|
||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||
}
|
||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||
// all the coefficient are between the 2 padding zones.
|
||||
const Index idx = index / m_outputStrides[i];
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
else {
|
||||
// Every other case
|
||||
return packetWithPossibleZero(initialIndex);
|
||||
}
|
||||
}
|
||||
|
||||
const Index last = index + packetSize - 1;
|
||||
const Index first = index;
|
||||
const int lastPaddedLeft = m_padding[0].first;
|
||||
const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
|
||||
const int lastPaddedRight = m_outputStrides[1];
|
||||
|
||||
if (last < lastPaddedLeft) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||
}
|
||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||
}
|
||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||
// all the coefficient are between the 2 padding zones.
|
||||
inputIndex += (index - m_padding[0].first);
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
}
|
||||
// Every other case
|
||||
return packetWithPossibleZero(initialIndex);
|
||||
}
|
||||
|
||||
Scalar* data() const { return NULL; }
|
||||
|
||||
protected:
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
for (int i = 0; i < packetSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
}
|
||||
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||
return rslt;
|
||||
}
|
||||
|
||||
PaddingDimensions m_padding;
|
||||
Dimensions m_dimensions;
|
||||
array<Index, NumDims> m_outputStrides;
|
||||
array<Index, NumDims+1> m_outputStrides;
|
||||
array<Index, NumDims> m_inputStrides;
|
||||
TensorEvaluator<ArgType, Device> m_impl;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user