Improved the performance of tensor padding

This commit is contained in:
Benoit Steiner 2016-05-25 11:43:08 -07:00
parent 58026905ae
commit 034aa3b2c0
2 changed files with 53 additions and 54 deletions

View File

@ -178,9 +178,14 @@ template <typename Device, typename T> class BenchmarkSuite {
size_b[1] = m_;
TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
#if defined(EIGEN_HAS_INDEX_LIST)
Eigen::IndexPairList<Eigen::type2indexpair<0, 0>,
Eigen::type2indexpair<2, 1> > paddings;
#else
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
paddings[0] = Eigen::IndexPair<TensorIndex>(0, 0);
paddings[1] = Eigen::IndexPair<TensorIndex>(2, 1);
#endif
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {

View File

@ -150,27 +150,26 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i];
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
if (isPaddingAtIndexForDim(idx, i)) {
return m_paddingValue;
}
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
if (isPaddingAtIndexForDim(index, 0)) {
return m_paddingValue;
}
inputIndex += (index - m_padding[0].first);
} else {
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStrides[i+1];
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
if (isPaddingAtIndexForDim(idx, i)) {
return m_paddingValue;
}
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
index -= idx * m_outputStrides[i+1];
}
if (index < m_padding[NumDims-1].first ||
index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
if (isPaddingAtIndexForDim(index, NumDims-1)) {
return m_paddingValue;
}
inputIndex += (index - m_padding[NumDims-1].first);
@ -187,43 +186,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
return packetRowMajor(index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
{
Index inputIndex;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
{
const Index idx = coords[0];
if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) {
return m_paddingValue;
}
inputIndex = idx - m_padding[0].first;
}
for (int i = 1; i < NumDims; ++i) {
const Index idx = coords[i];
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
return m_paddingValue;
}
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
}
} else {
{
const Index idx = coords[NumDims-1];
if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
return m_paddingValue;
}
inputIndex = idx - m_padding[NumDims-1].first;
}
for (int i = NumDims - 2; i >= 0; --i) {
const Index idx = coords[i];
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
return m_paddingValue;
}
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
}
}
return m_impl.coeff(inputIndex);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
TensorOpCost cost = m_impl.costPerCoeff(vectorized);
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@ -239,6 +201,38 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
private:
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
Index index, int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
index < m_padding[dim_index].first) ||
(!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
index >= m_dimensions[dim_index] - m_padding[dim_index].second);
#else
return (index < m_padding[dim_index].first) ||
(index >= m_dimensions[dim_index] - m_padding[dim_index].second);
#endif
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
#else
return false;
#endif
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
int dim_index) const {
#if defined(EIGEN_HAS_INDEX_LIST)
return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
#else
return false;
#endif
}
void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
const double in = static_cast<double>(m_impl.dimensions()[i]);
const double out = in + m_padding[i].first + m_padding[i].second;
@ -273,15 +267,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
const Index lastPaddedRight = m_outputStrides[i+1];
if (last < lastPaddedLeft) {
if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= firstPaddedRight && last < lastPaddedRight) {
else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
// all the coefficient are between the 2 padding zones.
const Index idx = index / m_outputStrides[i];
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
@ -299,15 +293,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
const Index lastPaddedRight = m_outputStrides[1];
if (last < lastPaddedLeft) {
if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= firstPaddedRight && last < lastPaddedRight) {
else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
// all the coefficient are between the 2 padding zones.
inputIndex += (index - m_padding[0].first);
return m_impl.template packet<Unaligned>(inputIndex);
@ -331,15 +325,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
const Index lastPaddedRight = m_outputStrides[i];
if (last < lastPaddedLeft) {
if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= firstPaddedRight && last < lastPaddedRight) {
else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
// all the coefficient are between the 2 padding zones.
const Index idx = index / m_outputStrides[i+1];
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
@ -357,15 +351,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
const Index lastPaddedRight = m_outputStrides[NumDims-1];
if (last < lastPaddedLeft) {
if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= firstPaddedRight && last < lastPaddedRight) {
else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) {
// all the coefficient are in the padding zone.
return internal::pset1<PacketReturnType>(m_paddingValue);
}
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
// all the coefficient are between the 2 padding zones.
inputIndex += (index - m_padding[NumDims-1].first);
return m_impl.template packet<Unaligned>(inputIndex);