Optimized the tensor padding code.

This commit is contained in:
Benoit Steiner 2014-08-26 09:47:18 -07:00
parent 36fffe48f7
commit 2959045f2f

View File

@ -87,7 +87,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
enum {
IsAligned = false,
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@ -100,15 +100,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
}
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
for (int i = 0; i < NumDims; ++i) {
if (i > 0) {
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
} else {
m_inputStrides[0] = 1;
m_outputStrides[0] = 1;
}
m_inputStrides[0] = 1;
m_outputStrides[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
}
m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
}
typedef typename XprType::Scalar Scalar;
@ -128,7 +126,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{
Index inputIndex = 0;
for (int i = NumDims - 1; i >= 0; --i) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i];
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
return Scalar(0);
@ -136,21 +134,90 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
return Scalar(0);
}
inputIndex += (index - m_padding[0].first);
return m_impl.coeff(inputIndex);
}
/* template<int LoadMode>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
return m_impl.template packet<LoadMode>(index);
}*/
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
const Index initialIndex = index;
Index inputIndex = 0;
for (int i = NumDims - 1; i > 0; --i) {
const int first = index;
const int last = index + packetSize - 1;
const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
const int lastPaddedRight = m_outputStrides[i+1];
if (last < lastPaddedLeft) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(Scalar(0));
}
else if (first >= firstPaddedRight && last < lastPaddedRight) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(Scalar(0));
}
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
// all the coefficient are between the 2 padding zones.
const Index idx = index / m_outputStrides[i];
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
else {
// Every other case
return packetWithPossibleZero(initialIndex);
}
}
const Index last = index + packetSize - 1;
const Index first = index;
const int lastPaddedLeft = m_padding[0].first;
const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
const int lastPaddedRight = m_outputStrides[1];
if (last < lastPaddedLeft) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(Scalar(0));
}
else if (first >= firstPaddedRight && last < lastPaddedRight) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(Scalar(0));
}
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
// all the coefficient are between the 2 padding zones.
inputIndex += (index - m_padding[0].first);
return m_impl.template packet<Unaligned>(inputIndex);
}
// Every other case
return packetWithPossibleZero(initialIndex);
}
Scalar* data() const { return NULL; }
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
for (int i = 0; i < packetSize; ++i) {
values[i] = coeff(index+i);
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt;
}
PaddingDimensions m_padding;
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;
array<Index, NumDims+1> m_outputStrides;
array<Index, NumDims> m_inputStrides;
TensorEvaluator<ArgType, Device> m_impl;
};