mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
Fix typo + get rid of redundant member variables for block sizes
This commit is contained in:
parent
385b3ff12f
commit
64abdf1d7e
@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
// Block based access to the XprType (input) tensor.
|
// Block based access to the XprType (input) tensor.
|
||||||
using TensorBlock = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>;
|
using TensorBlock = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>;
|
||||||
using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>;
|
using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>;
|
||||||
// We do block based broadcasting using a a trick with 2x tensor rank and 0
|
// We do block based broadcasting using a trick with 2x tensor rank and 0
|
||||||
// strides. See block method implementation for details.
|
// strides. See block method implementation for details.
|
||||||
using BroadcastDimensions = DSizes<Index, 2 * NumDims>;
|
using BroadcastDimensions = DSizes<Index, 2 * NumDims>;
|
||||||
using BroadcastTensorBlock = internal::TensorBlock<ScalarNoConst, Index, 2 * NumDims, Layout>;
|
using BroadcastTensorBlock = internal::TensorBlock<ScalarNoConst, Index, 2 * NumDims, Layout>;
|
||||||
@ -589,8 +589,8 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
|
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
|
||||||
// tensors. But this might need further tuning.
|
// tensors. But this might need further tuning.
|
||||||
Index l1_cache_scalars = m_device.firstLevelCacheSize() / sizeof(Scalar);
|
auto block_total_size_max = numext::maxi<Eigen::Index>(
|
||||||
Index block_total_size_max = numext::maxi(Index(1), l1_cache_scalars);
|
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||||
|
|
||||||
resources->push_back(internal::TensorOpResourceRequirements(
|
resources->push_back(internal::TensorOpResourceRequirements(
|
||||||
internal::TensorBlockShapeType::kSkewedInnerDims,
|
internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
@ -202,9 +202,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
|
m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_block_total_size_max =
|
|
||||||
numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,9 +287,11 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
|
auto block_total_size_max = numext::maxi<Eigen::Index>(
|
||||||
|
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||||
resources->push_back(internal::TensorOpResourceRequirements(
|
resources->push_back(internal::TensorOpResourceRequirements(
|
||||||
internal::TensorBlockShapeType::kSkewedInnerDims,
|
internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||||
m_block_total_size_max));
|
block_total_size_max));
|
||||||
m_impl.getResourceRequirements(resources);
|
m_impl.getResourceRequirements(resources);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -370,13 +369,14 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
{
|
{
|
||||||
Index inputIndex;
|
Index inputIndex;
|
||||||
if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
|
if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
|
||||||
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
|
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims - 1)) {
|
||||||
// m_stride is equal to 1, so let's avoid the integer division.
|
// m_stride is equal to 1, so let's avoid the integer division.
|
||||||
eigen_assert(m_stride == 1);
|
eigen_assert(m_stride == 1);
|
||||||
inputIndex = index * m_inputStride + m_inputOffset;
|
inputIndex = index * m_inputStride + m_inputOffset;
|
||||||
} else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
|
} else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
|
||||||
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
|
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
|
||||||
// m_stride is aways greater than index, so let's avoid the integer division.
|
// m_stride is aways greater than index, so let's avoid the integer
|
||||||
|
// division.
|
||||||
eigen_assert(m_stride > index);
|
eigen_assert(m_stride > index);
|
||||||
inputIndex = index + m_inputOffset;
|
inputIndex = index + m_inputOffset;
|
||||||
} else {
|
} else {
|
||||||
@ -392,7 +392,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
Index m_stride;
|
Index m_stride;
|
||||||
Index m_inputOffset;
|
Index m_inputOffset;
|
||||||
Index m_inputStride;
|
Index m_inputStride;
|
||||||
Index m_block_total_size_max;
|
|
||||||
DSizes<Index, NumInputDims> m_inputStrides;
|
DSizes<Index, NumInputDims> m_inputStrides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const internal::DimensionId<DimId> m_dim;
|
const internal::DimensionId<DimId> m_dim;
|
||||||
|
@ -259,7 +259,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
#else
|
#else
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
|
||||||
#endif
|
#endif
|
||||||
: m_impl(op.expression(), device)
|
: m_device(device), m_impl(op.expression(), device)
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
, m_op(op)
|
, m_op(op)
|
||||||
#endif
|
#endif
|
||||||
@ -404,9 +404,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
} else {
|
} else {
|
||||||
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
|
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_block_total_size_max =
|
|
||||||
numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
@ -551,9 +548,11 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
|
auto block_total_size_max = numext::maxi<Eigen::Index>(
|
||||||
|
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||||
resources->push_back(internal::TensorOpResourceRequirements(
|
resources->push_back(internal::TensorOpResourceRequirements(
|
||||||
internal::TensorBlockShapeType::kSkewedInnerDims,
|
internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||||
m_block_total_size_max));
|
block_total_size_max));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
||||||
@ -743,8 +742,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
internal::TensorIntDivisor<Index> m_fastOutputDepth;
|
internal::TensorIntDivisor<Index> m_fastOutputDepth;
|
||||||
|
|
||||||
Scalar m_paddingValue;
|
Scalar m_paddingValue;
|
||||||
Index m_block_total_size_max;
|
|
||||||
|
|
||||||
|
const Device& m_device;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
// Required for SYCL in order to construct the expression tree on the device
|
// Required for SYCL in order to construct the expression tree on the device
|
||||||
|
@ -560,9 +560,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_block_total_size_max =
|
|
||||||
numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
@ -672,9 +669,11 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
|
auto block_total_size_max = numext::maxi<Eigen::Index>(
|
||||||
|
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||||
resources->push_back(internal::TensorOpResourceRequirements(
|
resources->push_back(internal::TensorOpResourceRequirements(
|
||||||
internal::TensorBlockShapeType::kSkewedInnerDims,
|
internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||||
m_block_total_size_max));
|
block_total_size_max));
|
||||||
m_impl.getResourceRequirements(resources);
|
m_impl.getResourceRequirements(resources);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -761,7 +760,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
bool m_is_identity;
|
bool m_is_identity;
|
||||||
const StartIndices m_offsets;
|
const StartIndices m_offsets;
|
||||||
Index m_block_total_size_max;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -1047,9 +1045,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
|
m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
|
|
||||||
device.lastLevelCacheSize() /
|
|
||||||
sizeof(Scalar));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
@ -1128,7 +1123,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
DSizes<Index, NumDims> m_dimensions;
|
DSizes<Index, NumDims> m_dimensions;
|
||||||
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
|
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
|
||||||
const Strides m_strides;
|
const Strides m_strides;
|
||||||
std::size_t m_block_total_size_max;
|
|
||||||
//use by sycl
|
//use by sycl
|
||||||
const StartIndices m_exprStartIndices;
|
const StartIndices m_exprStartIndices;
|
||||||
//use by sycl
|
//use by sycl
|
||||||
|
@ -572,9 +572,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
: (static_cast<int>(Layout) == static_cast<int>(ColMajor))
|
: (static_cast<int>(Layout) == static_cast<int>(ColMajor))
|
||||||
? m_preservedStrides[0]
|
? m_preservedStrides[0]
|
||||||
: m_preservedStrides[NumOutputDims - 1];
|
: m_preservedStrides[NumOutputDims - 1];
|
||||||
|
|
||||||
m_block_total_size_max =
|
|
||||||
numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
@ -771,9 +768,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
|
auto block_total_size_max = numext::maxi<Eigen::Index>(
|
||||||
|
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||||
resources->push_back(internal::TensorOpResourceRequirements(
|
resources->push_back(internal::TensorOpResourceRequirements(
|
||||||
internal::TensorBlockShapeType::kSkewedInnerDims,
|
internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||||
m_block_total_size_max));
|
block_total_size_max));
|
||||||
m_impl.getResourceRequirements(resources);
|
m_impl.getResourceRequirements(resources);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1204,9 +1203,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
// Indexed by reduced dimensions.
|
// Indexed by reduced dimensions.
|
||||||
array<Index, NumReducedDims> m_reducedDims;
|
array<Index, NumReducedDims> m_reducedDims;
|
||||||
|
|
||||||
// Block size for tiled (aka TensorBlock) evaluation.
|
|
||||||
Index m_block_total_size_max;
|
|
||||||
|
|
||||||
// Evaluator for the input expression.
|
// Evaluator for the input expression.
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
|
||||||
|
@ -124,8 +124,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
using TensorBlock = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>;
|
using TensorBlock = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>;
|
||||||
using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>;
|
using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||||
: m_impl(op.expression(), device), m_shuffle(op.shufflePermutation())
|
const Device& device)
|
||||||
|
: m_device(device),
|
||||||
|
m_impl(op.expression(), device),
|
||||||
|
m_shuffle(op.shufflePermutation())
|
||||||
{
|
{
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
const Shuffle& shuffle = op.shufflePermutation();
|
const Shuffle& shuffle = op.shufflePermutation();
|
||||||
@ -162,9 +165,6 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
|
m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
m_block_total_size_max =
|
|
||||||
numext::maxi<Index>(1, device.firstLevelCacheSize() / sizeof(Scalar));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
@ -226,9 +226,10 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||||
|
auto block_total_size_max = numext::maxi<Eigen::Index>(
|
||||||
|
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||||
resources->push_back(internal::TensorOpResourceRequirements(
|
resources->push_back(internal::TensorOpResourceRequirements(
|
||||||
internal::TensorBlockShapeType::kUniformAllDims,
|
internal::TensorBlockShapeType::kUniformAllDims, block_total_size_max));
|
||||||
m_block_total_size_max));
|
|
||||||
m_impl.getResourceRequirements(resources);
|
m_impl.getResourceRequirements(resources);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,7 +385,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
|
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
array<Index, NumDims> m_unshuffledInputStrides;
|
array<Index, NumDims> m_unshuffledInputStrides;
|
||||||
Index m_block_total_size_max;
|
|
||||||
|
const Device& m_device;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
/// required by sycl
|
/// required by sycl
|
||||||
Shuffle m_shuffle;
|
Shuffle m_shuffle;
|
||||||
|
Loading…
Reference in New Issue
Block a user