mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
Do not use std::vector in getResourceRequirements
This commit is contained in:
parent
8056a05b54
commit
2918f85ba9
@ -208,10 +208,11 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
m_leftImpl.getResourceRequirements(resources);
|
||||
m_rightImpl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
m_leftImpl.getResourceRequirements(),
|
||||
m_rightImpl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2(
|
||||
|
@ -65,40 +65,6 @@ enum TensorBlockShapeType {
|
||||
kSkewedInnerDims
|
||||
};
|
||||
|
||||
struct TensorOpResourceRequirements {
|
||||
TensorBlockShapeType block_shape;
|
||||
Index block_total_size;
|
||||
// TODO(andydavis) Add 'target_num_threads' to support communication of
|
||||
// thread-resource requirements. This will allow ops deep in the
|
||||
// expression tree (like reductions) to communicate resources
|
||||
// requirements based on local state (like the total number of reductions
|
||||
// to be computed).
|
||||
TensorOpResourceRequirements(TensorBlockShapeType shape,
|
||||
const Index size)
|
||||
: block_shape(shape), block_total_size(size) {}
|
||||
};
|
||||
|
||||
// Tries to merge multiple resource requirements.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MergeResourceRequirements(
|
||||
const std::vector<TensorOpResourceRequirements>& resources,
|
||||
TensorBlockShapeType* block_shape, Index* block_total_size) {
|
||||
if (resources.empty()) {
|
||||
return;
|
||||
}
|
||||
// TODO(andydavis) Implement different policies (i.e. revert to a default
|
||||
// policy if block shapes/sizes conflict).
|
||||
*block_shape = resources[0].block_shape;
|
||||
*block_total_size = resources[0].block_total_size;
|
||||
for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
|
||||
if (resources[i].block_shape == kSkewedInnerDims &&
|
||||
*block_shape != kSkewedInnerDims) {
|
||||
*block_shape = kSkewedInnerDims;
|
||||
}
|
||||
*block_total_size =
|
||||
numext::maxi(*block_total_size, resources[i].block_total_size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \class TensorBlock
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
|
@ -57,6 +57,60 @@ EIGEN_STRONG_INLINE DSizes<std::ptrdiff_t, sizeof...(Indices)> strides(
|
||||
return strides<Layout>(DSizes<std::ptrdiff_t, sizeof...(Indices)>(sizes));
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
|
||||
// Tensor block shape type defines what are the shape preference for the blocks
|
||||
// extracted from the larger tensor.
|
||||
//
|
||||
// Example: blocks of 100 elements from the large 100x100 tensor:
|
||||
// - tensor: 100x100
|
||||
// - target_block_size: 100
|
||||
//
|
||||
// TensorBlockShapeType:
|
||||
// - kUniformAllDims: 100 blocks of size 10x10
|
||||
// - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column
|
||||
// or row major layout)
|
||||
enum class TensorBlockV2ShapeType { kUniformAllDims, kSkewedInnerDims };
|
||||
|
||||
struct TensorBlockV2ResourceRequirements {
|
||||
TensorBlockV2ShapeType shape_type;
|
||||
size_t size;
|
||||
|
||||
TensorBlockShapeType shapeV1() const {
|
||||
return shape_type == TensorBlockV2ShapeType::kUniformAllDims
|
||||
? internal::kUniformAllDims
|
||||
: internal::kSkewedInnerDims;
|
||||
}
|
||||
|
||||
static TensorBlockV2ResourceRequirements
|
||||
merge(const TensorBlockV2ResourceRequirements &lhs,
|
||||
const TensorBlockV2ResourceRequirements &rhs) {
|
||||
return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
|
||||
}
|
||||
|
||||
// This is a resource requirement that should be returned from expressions
|
||||
// that do not have any block evaluation preference (e.g. default tensor
|
||||
// expression with raw buffer access).
|
||||
static TensorBlockV2ResourceRequirements any() {
|
||||
return {TensorBlockV2ShapeType::kUniformAllDims, 1};
|
||||
}
|
||||
|
||||
private:
|
||||
using Requirements = TensorBlockV2ResourceRequirements;
|
||||
|
||||
static size_t merge(size_t lhs_size, size_t rhs_size) {
|
||||
return numext::maxi(lhs_size, rhs_size);
|
||||
}
|
||||
|
||||
static TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
|
||||
TensorBlockV2ShapeType rhs) {
|
||||
return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims ||
|
||||
rhs == TensorBlockV2ShapeType::kSkewedInnerDims)
|
||||
? TensorBlockV2ShapeType::kSkewedInnerDims
|
||||
: TensorBlockV2ShapeType::kUniformAllDims;
|
||||
}
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// TensorBlockDescriptor specifies a block offset within a tensor and the block
|
||||
// sizes along each of the tensor dimensions.
|
||||
|
@ -616,17 +616,16 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
|
||||
// tensors. But this might need further tuning.
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
|
||||
m_impl.getResourceRequirements(resources);
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -294,13 +294,14 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
TensorOpCost(0, 0, cost, vectorized, PacketSize);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
m_impl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -397,9 +397,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
m_impl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return m_impl.getResourceRequirements();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -164,9 +164,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
m_impl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return m_impl.getResourceRequirements();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2(
|
||||
|
@ -149,8 +149,10 @@ struct TensorEvaluator
|
||||
PacketType<CoeffReturnType, Device>::size);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>*) const {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
@ -320,8 +322,10 @@ struct TensorEvaluator<const Derived, Device>
|
||||
PacketType<CoeffReturnType, Device>::size);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>*) const {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
@ -517,9 +521,9 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
m_argImpl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return m_argImpl.getResourceRequirements();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
@ -655,10 +659,11 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
m_leftImpl.getResourceRequirements(resources);
|
||||
m_rightImpl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
m_leftImpl.getResourceRequirements(),
|
||||
m_rightImpl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
@ -934,11 +939,13 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
.cwiseMax(m_elseImpl.costPerCoeff(vectorized));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
m_condImpl.getResourceRequirements(resources);
|
||||
m_thenImpl.getResourceRequirements(resources);
|
||||
m_elseImpl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
m_condImpl.getResourceRequirements(),
|
||||
internal::TensorBlockV2ResourceRequirements::merge(
|
||||
m_thenImpl.getResourceRequirements(),
|
||||
m_elseImpl.getResourceRequirements()));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -182,25 +182,18 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
||||
TensorBlockScratch;
|
||||
|
||||
Evaluator evaluator(expr, device);
|
||||
Index total_size = array_prod(evaluator.dimensions());
|
||||
Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
|
||||
|
||||
// TODO(ezhulenev): Do not use tiling for small tensors?
|
||||
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||
|
||||
if (needs_assign) {
|
||||
// Size tensor blocks to fit in cache (or requested target block size).
|
||||
Index block_total_size = numext::mini(cache_size, total_size);
|
||||
TensorBlockShapeType block_shape = kSkewedInnerDims;
|
||||
// Query expression tree for desired block size/shape.
|
||||
std::vector<TensorOpResourceRequirements> resources;
|
||||
evaluator.getResourceRequirements(&resources);
|
||||
MergeResourceRequirements(resources, &block_shape, &block_total_size);
|
||||
const TensorBlockV2ResourceRequirements requirements =
|
||||
evaluator.getResourceRequirements();
|
||||
|
||||
TensorBlockMapper block_mapper(
|
||||
TensorBlockDimensions(evaluator.dimensions()), block_shape,
|
||||
block_total_size);
|
||||
block_total_size = block_mapper.block_dims_total_size();
|
||||
const TensorBlockMapper block_mapper(
|
||||
TensorBlockDimensions(evaluator.dimensions()), requirements.shapeV1(),
|
||||
requirements.size);
|
||||
|
||||
// Share scratch memory allocator between all blocks.
|
||||
TensorBlockScratch scratch(device);
|
||||
@ -268,14 +261,10 @@ template <typename Evaluator, typename TensorBlockMapper, bool Vectorizable>
|
||||
TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
|
||||
const ThreadPoolDevice& device, const Evaluator& evaluator,
|
||||
bool allocate_buffer = true) {
|
||||
// Prefer blocks skewed toward inner dimension.
|
||||
TensorBlockShapeType block_shape = kSkewedInnerDims;
|
||||
Index block_total_size = 0;
|
||||
|
||||
// Query expression tree for desired block size/shape.
|
||||
std::vector<TensorOpResourceRequirements> resources;
|
||||
evaluator.getResourceRequirements(&resources);
|
||||
MergeResourceRequirements(resources, &block_shape, &block_total_size);
|
||||
const TensorBlockV2ResourceRequirements requirements =
|
||||
evaluator.getResourceRequirements();
|
||||
|
||||
int num_threads = device.numThreads();
|
||||
|
||||
// Estimate minimum block size based on cost.
|
||||
@ -285,7 +274,7 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
|
||||
|
||||
TensorBlockMapper block_mapper(
|
||||
typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
|
||||
block_shape, block_size);
|
||||
requirements.shapeV1(), block_size);
|
||||
|
||||
block_size = block_mapper.block_dims_total_size();
|
||||
const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);
|
||||
|
@ -176,8 +176,10 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>*) const {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
|
@ -167,12 +167,12 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
return rslt;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
return {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
}
|
||||
|
||||
struct BlockIteratorState {
|
||||
|
@ -540,14 +540,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
}
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
|
@ -198,10 +198,9 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
return m_impl.costPerCoeff(vectorized);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>*) const {
|
||||
// TODO(ezhulenev): If we'll ever support block evaluation without raw
|
||||
// access we'll need to get requirements from `m_impl`.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
}
|
||||
|
||||
// required in block(OutputTensorBlock* output_block) const
|
||||
@ -636,13 +635,13 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
m_impl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -227,14 +227,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
return cost;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
|
||||
m_impl.getResourceRequirements(resources);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -905,15 +905,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
m_impl.getResourceRequirements(resources);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
|
||||
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||
EIGEN_DEVICE_FUNC const Device& device() const { return m_device; }
|
||||
|
@ -248,12 +248,13 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
return rslt;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
internal::kSkewedInnerDims, block_total_size_max));
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
@ -244,18 +244,19 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
|
||||
std::vector<internal::TensorOpResourceRequirements>* resources) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
static const int inner_dim =
|
||||
Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||
const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
|
||||
|
||||
Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
resources->push_back(internal::TensorOpResourceRequirements(
|
||||
inner_dim_shuffled ? internal::kUniformAllDims
|
||||
: internal::kSkewedInnerDims,
|
||||
block_total_size_max));
|
||||
|
||||
const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
|
||||
return {inner_dim_shuffled
|
||||
? internal::TensorBlockV2ShapeType::kUniformAllDims
|
||||
: internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
|
Loading…
Reference in New Issue
Block a user