mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-15 07:10:37 +08:00
Improved the performance of tensor padding
This commit is contained in:
parent
58026905ae
commit
034aa3b2c0
@ -178,9 +178,14 @@ template <typename Device, typename T> class BenchmarkSuite {
|
||||
size_b[1] = m_;
|
||||
TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
|
||||
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
Eigen::IndexPairList<Eigen::type2indexpair<0, 0>,
|
||||
Eigen::type2indexpair<2, 1> > paddings;
|
||||
#else
|
||||
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
|
||||
paddings[0] = Eigen::IndexPair<TensorIndex>(0, 0);
|
||||
paddings[1] = Eigen::IndexPair<TensorIndex>(2, 1);
|
||||
#endif
|
||||
|
||||
StartBenchmarkTiming();
|
||||
for (int iter = 0; iter < num_iters; ++iter) {
|
||||
|
@ -150,27 +150,26 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx = index / m_outputStrides[i];
|
||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
||||
if (isPaddingAtIndexForDim(idx, i)) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
|
||||
if (isPaddingAtIndexForDim(index, 0)) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex += (index - m_padding[0].first);
|
||||
} else {
|
||||
for (int i = 0; i < NumDims - 1; ++i) {
|
||||
const Index idx = index / m_outputStrides[i+1];
|
||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
||||
if (isPaddingAtIndexForDim(idx, i)) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
index -= idx * m_outputStrides[i+1];
|
||||
}
|
||||
if (index < m_padding[NumDims-1].first ||
|
||||
index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
|
||||
if (isPaddingAtIndexForDim(index, NumDims-1)) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex += (index - m_padding[NumDims-1].first);
|
||||
@ -187,43 +186,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
return packetRowMajor(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
|
||||
{
|
||||
Index inputIndex;
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
{
|
||||
const Index idx = coords[0];
|
||||
if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex = idx - m_padding[0].first;
|
||||
}
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
const Index idx = coords[i];
|
||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
}
|
||||
} else {
|
||||
{
|
||||
const Index idx = coords[NumDims-1];
|
||||
if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex = idx - m_padding[NumDims-1].first;
|
||||
}
|
||||
for (int i = NumDims - 2; i >= 0; --i) {
|
||||
const Index idx = coords[i];
|
||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
||||
return m_paddingValue;
|
||||
}
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
}
|
||||
}
|
||||
return m_impl.coeff(inputIndex);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||
TensorOpCost cost = m_impl.costPerCoeff(vectorized);
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
@ -239,6 +201,38 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
||||
Index index, int dim_index) const {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
|
||||
index < m_padding[dim_index].first) ||
|
||||
(!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
|
||||
index >= m_dimensions[dim_index] - m_padding[dim_index].second);
|
||||
#else
|
||||
return (index < m_padding[dim_index].first) ||
|
||||
(index >= m_dimensions[dim_index] - m_padding[dim_index].second);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
|
||||
int dim_index) const {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
|
||||
int dim_index) const {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
|
||||
const double in = static_cast<double>(m_impl.dimensions()[i]);
|
||||
const double out = in + m_padding[i].first + m_padding[i].second;
|
||||
@ -273,15 +267,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
|
||||
const Index lastPaddedRight = m_outputStrides[i+1];
|
||||
|
||||
if (last < lastPaddedLeft) {
|
||||
if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||
// all the coefficient are between the 2 padding zones.
|
||||
const Index idx = index / m_outputStrides[i];
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
@ -299,15 +293,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
|
||||
const Index lastPaddedRight = m_outputStrides[1];
|
||||
|
||||
if (last < lastPaddedLeft) {
|
||||
if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||
else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||
// all the coefficient are between the 2 padding zones.
|
||||
inputIndex += (index - m_padding[0].first);
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
@ -331,15 +325,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
|
||||
const Index lastPaddedRight = m_outputStrides[i];
|
||||
|
||||
if (last < lastPaddedLeft) {
|
||||
if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||
// all the coefficient are between the 2 padding zones.
|
||||
const Index idx = index / m_outputStrides[i+1];
|
||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||
@ -357,15 +351,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
|
||||
const Index lastPaddedRight = m_outputStrides[NumDims-1];
|
||||
|
||||
if (last < lastPaddedLeft) {
|
||||
if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||
// all the coefficient are in the padding zone.
|
||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||
}
|
||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||
else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||
// all the coefficient are between the 2 padding zones.
|
||||
inputIndex += (index - m_padding[NumDims-1].first);
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
|
Loading…
Reference in New Issue
Block a user