mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Remove V2 suffix from TensorBlock
This commit is contained in:
parent
dbca11e880
commit
1c879eb010
@ -97,7 +97,7 @@ typedef unsigned __int64 uint64_t;
|
||||
#include "src/Tensor/TensorGlobalFunctions.h"
|
||||
|
||||
#include "src/Tensor/TensorBase.h"
|
||||
#include "src/Tensor/TensorBlockV2.h"
|
||||
#include "src/Tensor/TensorBlock.h"
|
||||
|
||||
#include "src/Tensor/TensorEvaluator.h"
|
||||
#include "src/Tensor/TensorExpr.h"
|
||||
|
@ -88,7 +88,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -96,7 +96,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -229,7 +229,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -237,7 +237,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -108,8 +108,8 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
TensorEvaluator<RightArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
|
||||
TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<LeftArgType, Device>::BlockAccessV2 &
|
||||
TensorEvaluator<RightArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
|
||||
TensorEvaluator<RightArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess |
|
||||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
|
||||
RightTensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
@ -201,13 +201,13 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
m_leftImpl.getResourceRequirements(),
|
||||
m_rightImpl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(
|
||||
TensorBlockDesc& desc, TensorBlockScratch& scratch) {
|
||||
if (TensorEvaluator<LeftArgType, Device>::RawAccess &&
|
||||
m_leftImpl.data() != NULL) {
|
||||
@ -218,10 +218,10 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
/*dst_strides=*/internal::strides<Layout>(m_leftImpl.dimensions()));
|
||||
}
|
||||
|
||||
RightTensorBlock block = m_rightImpl.blockV2(desc, scratch, /*root_of_expr_ast=*/true);
|
||||
RightTensorBlock block = m_rightImpl.block(desc, scratch, /*root_of_expr_ast=*/true);
|
||||
// If block was evaluated into a destination, there is no need to do assignment.
|
||||
if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) {
|
||||
m_leftImpl.writeBlockV2(desc, block);
|
||||
m_leftImpl.writeBlock(desc, block);
|
||||
}
|
||||
block.cleanup();
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_V2_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_V2_H
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
@ -14,7 +14,7 @@ namespace internal {
|
||||
// -------------------------------------------------------------------------- //
|
||||
// Forward declarations for templates defined below.
|
||||
template <typename Scalar, typename IndexType, int NumDims, int Layout>
|
||||
class TensorBlockIOV2;
|
||||
class TensorBlockIO;
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
// Helper function to compute strides for densely stored buffer of given
|
||||
@ -70,16 +70,16 @@ EIGEN_STRONG_INLINE DSizes<std::ptrdiff_t, sizeof...(Indices)> strides(
|
||||
// - kUniformAllDims: 100 blocks of size 10x10
|
||||
// - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column
|
||||
// or row major layout)
|
||||
enum class TensorBlockV2ShapeType { kUniformAllDims, kSkewedInnerDims };
|
||||
enum class TensorBlockShapeType { kUniformAllDims, kSkewedInnerDims };
|
||||
|
||||
struct TensorBlockV2ResourceRequirements {
|
||||
TensorBlockV2ShapeType shape_type;
|
||||
struct TensorBlockResourceRequirements {
|
||||
TensorBlockShapeType shape_type;
|
||||
size_t size;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
|
||||
merge(const TensorBlockV2ResourceRequirements &lhs,
|
||||
const TensorBlockV2ResourceRequirements &rhs) {
|
||||
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements
|
||||
merge(const TensorBlockResourceRequirements &lhs,
|
||||
const TensorBlockResourceRequirements &rhs) {
|
||||
return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
|
||||
}
|
||||
|
||||
@ -87,12 +87,12 @@ struct TensorBlockV2ResourceRequirements {
|
||||
// that do not have any block evaluation preference (e.g. default tensor
|
||||
// expression with raw buffer access).
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements any() {
|
||||
return {TensorBlockV2ShapeType::kUniformAllDims, 1};
|
||||
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements any() {
|
||||
return {TensorBlockShapeType::kUniformAllDims, 1};
|
||||
}
|
||||
|
||||
private:
|
||||
using Requirements = TensorBlockV2ResourceRequirements;
|
||||
using Requirements = TensorBlockResourceRequirements;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size) {
|
||||
@ -100,12 +100,12 @@ private:
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
|
||||
TensorBlockV2ShapeType rhs) {
|
||||
return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims ||
|
||||
rhs == TensorBlockV2ShapeType::kSkewedInnerDims)
|
||||
? TensorBlockV2ShapeType::kSkewedInnerDims
|
||||
: TensorBlockV2ShapeType::kUniformAllDims;
|
||||
static EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs,
|
||||
TensorBlockShapeType rhs) {
|
||||
return (lhs == TensorBlockShapeType::kSkewedInnerDims ||
|
||||
rhs == TensorBlockShapeType::kSkewedInnerDims)
|
||||
? TensorBlockShapeType::kSkewedInnerDims
|
||||
: TensorBlockShapeType::kUniformAllDims;
|
||||
}
|
||||
};
|
||||
|
||||
@ -272,15 +272,15 @@ class TensorBlockDescriptor {
|
||||
// TensorBlockMapper is responsible for iterating over the blocks of a tensor.
|
||||
|
||||
template <int NumDims, int Layout, typename IndexType = Eigen::Index>
|
||||
class TensorBlockV2Mapper {
|
||||
class TensorBlockMapper {
|
||||
typedef TensorBlockDescriptor<NumDims, IndexType> BlockDescriptor;
|
||||
|
||||
public:
|
||||
typedef DSizes<IndexType, NumDims> Dimensions;
|
||||
|
||||
TensorBlockV2Mapper() = default;
|
||||
TensorBlockV2Mapper(const DSizes<IndexType, NumDims>& dimensions,
|
||||
const TensorBlockV2ResourceRequirements& requirements)
|
||||
TensorBlockMapper() = default;
|
||||
TensorBlockMapper(const DSizes<IndexType, NumDims>& dimensions,
|
||||
const TensorBlockResourceRequirements& requirements)
|
||||
: m_tensor_dimensions(dimensions), m_requirements(requirements) {
|
||||
// Initialize `m_block_dimensions`.
|
||||
InitializeBlockDimensions();
|
||||
@ -338,7 +338,7 @@ class TensorBlockV2Mapper {
|
||||
private:
|
||||
void InitializeBlockDimensions() {
|
||||
// Requested block shape and size.
|
||||
const TensorBlockV2ShapeType shape_type = m_requirements.shape_type;
|
||||
const TensorBlockShapeType shape_type = m_requirements.shape_type;
|
||||
const IndexType target_block_size =
|
||||
numext::maxi<IndexType>(1, static_cast<IndexType>(m_requirements.size));
|
||||
|
||||
@ -362,7 +362,7 @@ class TensorBlockV2Mapper {
|
||||
static const bool isColMajor = Layout == static_cast<int>(ColMajor);
|
||||
|
||||
// Block shape skewed towards inner dimension.
|
||||
if (shape_type == TensorBlockV2ShapeType::kSkewedInnerDims) {
|
||||
if (shape_type == TensorBlockShapeType::kSkewedInnerDims) {
|
||||
IndexType coeff_to_allocate = target_block_size;
|
||||
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
@ -375,7 +375,7 @@ class TensorBlockV2Mapper {
|
||||
}
|
||||
eigen_assert(coeff_to_allocate == 1);
|
||||
|
||||
} else if (shape_type == TensorBlockV2ShapeType::kUniformAllDims) {
|
||||
} else if (shape_type == TensorBlockShapeType::kUniformAllDims) {
|
||||
// Tensor will not fit within 'target_block_size' budget: calculate tensor
|
||||
// block dimension sizes based on "square" dimension size target.
|
||||
const IndexType dim_size_target = convert_index<IndexType>(
|
||||
@ -421,7 +421,7 @@ class TensorBlockV2Mapper {
|
||||
}
|
||||
|
||||
DSizes<IndexType, NumDims> m_tensor_dimensions;
|
||||
TensorBlockV2ResourceRequirements m_requirements;
|
||||
TensorBlockResourceRequirements m_requirements;
|
||||
|
||||
DSizes<IndexType, NumDims> m_block_dimensions;
|
||||
IndexType m_total_block_count;
|
||||
@ -722,7 +722,7 @@ class TensorMaterializedBlock {
|
||||
// Reuse destination buffer or allocate new buffer with scratch allocator.
|
||||
const Storage storage = prepareStorage(desc, scratch);
|
||||
|
||||
typedef internal::TensorBlockIOV2<Scalar, IndexType, NumDims, Layout>
|
||||
typedef internal::TensorBlockIO<Scalar, IndexType, NumDims, Layout>
|
||||
TensorBlockIO;
|
||||
typedef typename TensorBlockIO::Dst TensorBlockIODst;
|
||||
typedef typename TensorBlockIO::Src TensorBlockIOSrc;
|
||||
@ -1062,7 +1062,7 @@ class StridedLinearBufferCopy {
|
||||
// `src` we need to know only stride to navigate through source memory buffer.
|
||||
|
||||
template <typename Scalar, typename IndexType, int NumDims, int Layout>
|
||||
class TensorBlockIOV2 {
|
||||
class TensorBlockIO {
|
||||
static const bool IsColMajor = (Layout == ColMajor);
|
||||
|
||||
typedef StridedLinearBufferCopy<Scalar, IndexType> LinCopy;
|
||||
@ -1478,4 +1478,4 @@ class TensorBlockAssignment {
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_V2_H
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
@ -114,7 +114,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
@ -130,12 +130,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
ArgTensorBlock;
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||
@ -617,19 +617,19 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
|
||||
// tensors. But this might need further tuning.
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
BlockBroadcastingParams params = blockBroadcastingParams(desc);
|
||||
|
||||
@ -638,8 +638,8 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
}
|
||||
|
||||
// Prepare storage for the materialized broadcasting result.
|
||||
const typename TensorBlockV2::Storage block_storage =
|
||||
TensorBlockV2::prepareStorage(desc, scratch);
|
||||
const typename TensorBlock::Storage block_storage =
|
||||
TensorBlock::prepareStorage(desc, scratch);
|
||||
ScalarNoConst* materialized_output = block_storage.data();
|
||||
|
||||
// We potentially will need to materialize input blocks.
|
||||
@ -843,10 +843,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
return params;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 emptyBlock() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock emptyBlock() const {
|
||||
DSizes<Index, NumDims> dimensions;
|
||||
for (int i = 0; i < NumDims; ++i) dimensions[i] = 0;
|
||||
return TensorBlockV2(internal::TensorBlockKind::kView, NULL, dimensions);
|
||||
return TensorBlock(internal::TensorBlockKind::kView, NULL, dimensions);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlockAlongBcastDim(
|
||||
@ -856,7 +856,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
size_t* materialized_input_size) const {
|
||||
if (params.bcast_dim_size == 1) {
|
||||
// We just need one block read using the ready-set values above.
|
||||
return BroadcastBlockV2(
|
||||
return BroadcastBlock(
|
||||
params.input_block_sizes, params.input_block_strides,
|
||||
params.bcast_block_sizes, params.bcast_block_strides,
|
||||
params.bcast_input_strides, bcast_offset, 0, scratch,
|
||||
@ -873,7 +873,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
params.bcast_block_strides[broadcast_bcast_dim] =
|
||||
params.output_strides[params.bcast_dim];
|
||||
|
||||
return BroadcastBlockV2(
|
||||
return BroadcastBlock(
|
||||
params.input_block_sizes, params.input_block_strides,
|
||||
params.bcast_block_sizes, params.bcast_block_strides,
|
||||
params.bcast_input_strides, bcast_offset, 0, scratch,
|
||||
@ -942,7 +942,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
params.output_strides[params.bcast_dim] *
|
||||
params.input_dims[params.bcast_dim];
|
||||
|
||||
num_output_coeffs += BroadcastBlockV2(
|
||||
num_output_coeffs += BroadcastBlock(
|
||||
params.input_block_sizes, params.input_block_strides,
|
||||
params.bcast_block_sizes, params.bcast_block_strides,
|
||||
params.bcast_input_strides, bcast_offset, 0, scratch,
|
||||
@ -964,7 +964,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
const Index offset = (first_multiple - bcast_dim_left_index) *
|
||||
m_outputStrides[params.bcast_dim];
|
||||
|
||||
num_output_coeffs += BroadcastBlockV2(
|
||||
num_output_coeffs += BroadcastBlock(
|
||||
params.input_block_sizes, params.input_block_strides,
|
||||
params.bcast_block_sizes, params.bcast_block_strides,
|
||||
params.bcast_input_strides, bcast_offset, offset, scratch,
|
||||
@ -987,7 +987,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
const Index offset = (last_multiple - bcast_dim_left_index) *
|
||||
m_outputStrides[params.bcast_dim];
|
||||
|
||||
num_output_coeffs += BroadcastBlockV2(
|
||||
num_output_coeffs += BroadcastBlock(
|
||||
params.input_block_sizes, params.input_block_strides,
|
||||
params.bcast_block_sizes, params.bcast_block_strides,
|
||||
params.bcast_input_strides, bcast_offset, offset, scratch,
|
||||
@ -1005,7 +1005,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
params.bcast_block_strides[copy_bcast_dim] =
|
||||
params.output_strides[params.bcast_dim];
|
||||
|
||||
num_output_coeffs += BroadcastBlockV2(
|
||||
num_output_coeffs += BroadcastBlock(
|
||||
params.input_block_sizes, params.input_block_strides,
|
||||
params.bcast_block_sizes, params.bcast_block_strides,
|
||||
params.bcast_input_strides, bcast_offset, 0, scratch,
|
||||
@ -1016,7 +1016,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlockV2(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlock(
|
||||
const Dimensions& input_block_sizes,
|
||||
const Dimensions& input_block_strides,
|
||||
const BroadcastDimensions& bcast_block_sizes,
|
||||
@ -1032,7 +1032,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
IsColMajor ? indexColMajor(input_offset) : indexRowMajor(input_offset),
|
||||
input_block_sizes);
|
||||
|
||||
ArgTensorBlock input_block = m_impl.blockV2(input_desc, scratch);
|
||||
ArgTensorBlock input_block = m_impl.block(input_desc, scratch);
|
||||
|
||||
// ---------------------------------------------------------------------- //
|
||||
// Materialize input block into a temporary memory buffer only if it's not
|
||||
@ -1071,14 +1071,14 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
// ---------------------------------------------------------------------- //
|
||||
// Copy data from materialized input block to the materialized output, using
|
||||
// given broadcast strides (strides with zeroes).
|
||||
typedef internal::TensorBlockIOV2<ScalarNoConst, Index, 2 * NumDims, Layout>
|
||||
TensorBlockIOV2;
|
||||
typedef internal::TensorBlockIO<ScalarNoConst, Index, 2 * NumDims, Layout>
|
||||
TensorBlockIO;
|
||||
|
||||
typename TensorBlockIOV2::Src src(bcast_input_strides, input_buffer);
|
||||
typename TensorBlockIOV2::Dst dst(bcast_block_sizes, bcast_block_strides,
|
||||
typename TensorBlockIO::Src src(bcast_input_strides, input_buffer);
|
||||
typename TensorBlockIO::Dst dst(bcast_block_sizes, bcast_block_strides,
|
||||
materialized_output + offset);
|
||||
|
||||
return TensorBlockIOV2::Copy(dst, src);
|
||||
return TensorBlockIO::Copy(dst, src);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -148,7 +148,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
IsAligned = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
// Chipping of outer-most dimension is a trivial operation, because we can
|
||||
// read and write directly from the underlying tensor using single offset.
|
||||
IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
|
||||
@ -172,12 +172,12 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
|
||||
typedef internal::TensorBlockDescriptor<NumInputDims, Index>
|
||||
ArgTensorBlockDesc;
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
ArgTensorBlock;
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -295,17 +295,17 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool root_of_expr_ast = false) const {
|
||||
const Index chip_dim = m_dim.actualDim();
|
||||
|
||||
@ -334,20 +334,20 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
arg_destination_strides);
|
||||
}
|
||||
|
||||
ArgTensorBlock arg_block = m_impl.blockV2(arg_desc, scratch, root_of_expr_ast);
|
||||
ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
|
||||
if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
|
||||
|
||||
if (arg_block.data() != NULL) {
|
||||
// Forward argument block buffer if possible.
|
||||
return TensorBlockV2(arg_block.kind(), arg_block.data(),
|
||||
return TensorBlock(arg_block.kind(), arg_block.data(),
|
||||
desc.dimensions());
|
||||
|
||||
} else {
|
||||
// Assign argument block expression to a buffer.
|
||||
|
||||
// Prepare storage for the materialized chipping result.
|
||||
const typename TensorBlockV2::Storage block_storage =
|
||||
TensorBlockV2::prepareStorage(desc, scratch);
|
||||
const typename TensorBlock::Storage block_storage =
|
||||
TensorBlock::prepareStorage(desc, scratch);
|
||||
|
||||
typedef internal::TensorBlockAssignment<
|
||||
ScalarNoConst, NumInputDims, typename ArgTensorBlock::XprType, Index>
|
||||
@ -442,7 +442,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
@ -499,9 +499,9 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TensorBlockV2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
||||
template <typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
assert(this->m_impl.data() != NULL);
|
||||
|
||||
const Index chip_dim = this->m_dim.actualDim();
|
||||
@ -514,7 +514,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
||||
}
|
||||
|
||||
typedef TensorReshapingOp<const DSizes<Index, NumInputDims>,
|
||||
const typename TensorBlockV2::XprType>
|
||||
const typename TensorBlock::XprType>
|
||||
TensorBlockExpr;
|
||||
|
||||
typedef internal::TensorBlockAssignment<Scalar, NumInputDims,
|
||||
|
@ -125,7 +125,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
|
||||
TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
@ -133,7 +133,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -324,7 +324,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &&
|
||||
TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
@ -332,7 +332,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)
|
||||
|
@ -381,7 +381,7 @@ struct TensorContractionEvaluatorBase
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -389,7 +389,7 @@ struct TensorContractionEvaluatorBase
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
// Most of the code is assuming that both input tensors are ColMajor. If the
|
||||
|
@ -302,7 +302,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
TensorEvaluator<ArgType, Device>::PacketAccess &
|
||||
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
|
||||
#endif
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
@ -314,7 +314,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
ArgTensorBlock;
|
||||
|
||||
struct TensorConversionOpBlockFactory {
|
||||
@ -331,7 +331,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
|
||||
typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory,
|
||||
ArgTensorBlock>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -398,14 +398,14 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return m_impl.getResourceRequirements();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
return TensorBlockV2(m_impl.blockV2(desc, scratch),
|
||||
return TensorBlock(m_impl.block(desc, scratch),
|
||||
TensorConversionOpBlockFactory());
|
||||
}
|
||||
|
||||
|
@ -309,7 +309,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -317,7 +317,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -786,7 +786,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -794,7 +794,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
|
||||
|
@ -294,7 +294,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
IsAligned = TensorEvaluator<InputArgType, Eigen::SyclDevice>::IsAligned &
|
||||
TensorEvaluator<KernelArgType, Eigen::SyclDevice>::IsAligned,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<InputArgType, Eigen::SyclDevice>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -302,7 +302,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType &op, const Eigen::SyclDevice &device)
|
||||
|
@ -95,7 +95,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<XprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -103,7 +103,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
|
||||
@ -268,7 +268,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -276,7 +276,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -110,7 +110,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = true,
|
||||
BlockAccess = true,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -123,7 +123,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
ArgTensorBlock;
|
||||
|
||||
typedef internal::TensorBlockAssignment<
|
||||
@ -165,11 +165,11 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return m_impl.getResourceRequirements();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(
|
||||
TensorBlockDesc& desc, TensorBlockScratch& scratch) {
|
||||
// Add `m_buffer` as destination buffer to the block descriptor.
|
||||
desc.template AddDestinationBuffer<Layout>(
|
||||
@ -177,7 +177,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
||||
/*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()));
|
||||
|
||||
ArgTensorBlock block =
|
||||
m_impl.blockV2(desc, scratch, /*root_of_expr_ast=*/true);
|
||||
m_impl.block(desc, scratch, /*root_of_expr_ast=*/true);
|
||||
|
||||
// If block was evaluated into a destination buffer, there is no need to do
|
||||
// an assignment.
|
||||
|
@ -45,7 +45,7 @@ struct TensorEvaluator
|
||||
enum {
|
||||
IsAligned = Derived::IsAligned,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
||||
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
|
||||
PreferBlockAccess = false,
|
||||
Layout = Derived::Layout,
|
||||
CoordAccess = NumCoords > 0,
|
||||
@ -60,7 +60,7 @@ struct TensorEvaluator
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
||||
@ -150,23 +150,23 @@ struct TensorEvaluator
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::any();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
assert(m_data != NULL);
|
||||
return TensorBlockV2::materialize(m_data, m_dims, desc, scratch);
|
||||
return TensorBlock::materialize(m_data, m_dims, desc, scratch);
|
||||
}
|
||||
|
||||
template<typename TensorBlockV2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
||||
template<typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
assert(m_data != NULL);
|
||||
|
||||
typedef typename TensorBlockV2::XprType TensorBlockExpr;
|
||||
typedef typename TensorBlock::XprType TensorBlockExpr;
|
||||
typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
|
||||
Index>
|
||||
TensorBlockAssign;
|
||||
@ -246,7 +246,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
enum {
|
||||
IsAligned = Derived::IsAligned,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = internal::is_arithmetic<ScalarNoConst>::value,
|
||||
BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
|
||||
PreferBlockAccess = false,
|
||||
Layout = Derived::Layout,
|
||||
CoordAccess = NumCoords > 0,
|
||||
@ -259,7 +259,7 @@ struct TensorEvaluator<const Derived, Device>
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
||||
@ -323,15 +323,15 @@ struct TensorEvaluator<const Derived, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::any();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
assert(m_data != NULL);
|
||||
return TensorBlockV2::materialize(m_data, m_dims, desc, scratch);
|
||||
return TensorBlock::materialize(m_data, m_dims, desc, scratch);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
|
||||
@ -378,7 +378,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
||||
&& (PacketType<CoeffReturnType, Device>::size >1)
|
||||
#endif
|
||||
,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -386,7 +386,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
||||
@ -448,7 +448,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess &
|
||||
internal::functor_traits<UnaryOp>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -476,11 +476,11 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
ArgTensorBlock;
|
||||
|
||||
typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
||||
@ -520,14 +520,14 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return m_argImpl.getResourceRequirements();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
return TensorBlockV2(m_argImpl.blockV2(desc, scratch), m_functor);
|
||||
return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||
@ -560,8 +560,8 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess &
|
||||
TensorEvaluator<RightArgType, Device>::PacketAccess &
|
||||
internal::functor_traits<BinaryOp>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<LeftArgType, Device>::BlockAccessV2 &
|
||||
TensorEvaluator<RightArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess &
|
||||
TensorEvaluator<RightArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess |
|
||||
TensorEvaluator<RightArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<LeftArgType, Device>::Layout,
|
||||
@ -595,14 +595,14 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
|
||||
LeftTensorBlock;
|
||||
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
|
||||
RightTensorBlock;
|
||||
|
||||
typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
|
||||
RightTensorBlock>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
||||
@ -653,18 +653,18 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
m_leftImpl.getResourceRequirements(),
|
||||
m_rightImpl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
desc.DropDestinationBuffer();
|
||||
return TensorBlockV2(m_leftImpl.blockV2(desc, scratch),
|
||||
m_rightImpl.blockV2(desc, scratch), m_functor);
|
||||
return TensorBlock(m_leftImpl.block(desc, scratch),
|
||||
m_rightImpl.block(desc, scratch), m_functor);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||
@ -696,7 +696,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
||||
TensorEvaluator<Arg2Type, Device>::PacketAccess &&
|
||||
TensorEvaluator<Arg3Type, Device>::PacketAccess &&
|
||||
internal::functor_traits<TernaryOp>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
|
||||
@ -739,7 +739,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
||||
typedef typename Storage::Type EvaluatorPointerType;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
||||
@ -814,9 +814,9 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
|
||||
TensorEvaluator<ElseArgType, Device>::PacketAccess &
|
||||
PacketType<Scalar, Device>::HasBlend,
|
||||
BlockAccessV2 = TensorEvaluator<IfArgType, Device>::BlockAccessV2 &&
|
||||
TensorEvaluator<ThenArgType, Device>::BlockAccessV2 &&
|
||||
TensorEvaluator<ElseArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
|
||||
TensorEvaluator<ThenArgType, Device>::BlockAccess &&
|
||||
TensorEvaluator<ElseArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
|
||||
TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
|
||||
@ -850,11 +850,11 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
|
||||
IfArgTensorBlock;
|
||||
typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
|
||||
ThenArgTensorBlock;
|
||||
typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
|
||||
ElseArgTensorBlock;
|
||||
|
||||
struct TensorSelectOpBlockFactory {
|
||||
@ -873,7 +873,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
|
||||
IfArgTensorBlock, ThenArgTensorBlock,
|
||||
ElseArgTensorBlock>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
||||
@ -933,24 +933,24 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
m_condImpl.getResourceRequirements(),
|
||||
internal::TensorBlockV2ResourceRequirements::merge(
|
||||
internal::TensorBlockResourceRequirements::merge(
|
||||
m_thenImpl.getResourceRequirements(),
|
||||
m_elseImpl.getResourceRequirements()));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
// It's unsafe to pass destination buffer to underlying expressions, because
|
||||
// output might be aliased with one of the inputs.
|
||||
desc.DropDestinationBuffer();
|
||||
|
||||
return TensorBlockV2(
|
||||
m_condImpl.blockV2(desc, scratch), m_thenImpl.blockV2(desc, scratch),
|
||||
m_elseImpl.blockV2(desc, scratch), TensorSelectOpBlockFactory());
|
||||
return TensorBlock(
|
||||
m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
|
||||
m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
|
||||
|
@ -172,7 +172,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(const Expression& expr,
|
||||
const DefaultDevice& device = DefaultDevice()) {
|
||||
typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, StorageIndex>
|
||||
typedef TensorBlockMapper<NumDims, Evaluator::Layout, StorageIndex>
|
||||
TensorBlockMapper;
|
||||
|
||||
typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
|
||||
@ -187,7 +187,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
||||
|
||||
if (needs_assign) {
|
||||
// Query expression tree for desired block size/shape.
|
||||
const TensorBlockV2ResourceRequirements requirements =
|
||||
const TensorBlockResourceRequirements requirements =
|
||||
evaluator.getResourceRequirements();
|
||||
|
||||
const TensorBlockMapper block_mapper(
|
||||
@ -200,7 +200,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
||||
const StorageIndex total_block_count = block_mapper.blockCount();
|
||||
for (StorageIndex i = 0; i < total_block_count; ++i) {
|
||||
TensorBlockDesc desc = block_mapper.blockDescriptor(i);
|
||||
evaluator.evalBlockV2(desc, scratch);
|
||||
evaluator.evalBlock(desc, scratch);
|
||||
scratch.reset();
|
||||
}
|
||||
}
|
||||
@ -257,7 +257,7 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
|
||||
const ThreadPoolDevice& device, const Evaluator& evaluator,
|
||||
bool allocate_buffer = true) {
|
||||
// Query expression tree for desired block size/shape.
|
||||
const TensorBlockV2ResourceRequirements requirements =
|
||||
const TensorBlockResourceRequirements requirements =
|
||||
evaluator.getResourceRequirements();
|
||||
|
||||
int num_threads = device.numThreads();
|
||||
@ -377,7 +377,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
|
||||
static const int NumDims = traits<Expression>::NumDimensions;
|
||||
|
||||
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
||||
typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||
typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
|
||||
|
||||
typedef internal::TensorBlockDescriptor<NumDims, IndexType>
|
||||
@ -402,7 +402,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
|
||||
|
||||
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; ++block_idx) {
|
||||
TensorBlockDesc desc = tiling.block_mapper.blockDescriptor(block_idx);
|
||||
evaluator.evalBlockV2(desc, scratch);
|
||||
evaluator.evalBlock(desc, scratch);
|
||||
scratch.reset();
|
||||
}
|
||||
};
|
||||
@ -478,7 +478,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
|
||||
static const int NumDims = traits<Expression>::NumDimensions;
|
||||
|
||||
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
||||
typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||
typedef TensorBlockMapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
|
||||
|
||||
typedef internal::TensorBlockDescriptor<NumDims, IndexType> TensorBlockDesc;
|
||||
@ -510,7 +510,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
|
||||
++block_idx) {
|
||||
TensorBlockDesc desc =
|
||||
ctx->tiling.block_mapper.blockDescriptor(block_idx);
|
||||
ctx->evaluator.evalBlockV2(desc, scratch);
|
||||
ctx->evaluator.evalBlock(desc, scratch);
|
||||
scratch.reset();
|
||||
}
|
||||
};
|
||||
|
@ -133,7 +133,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = true,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -141,7 +141,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
|
||||
|
@ -41,7 +41,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = Options_ & RowMajor ? RowMajor : ColMajor,
|
||||
CoordAccess = true,
|
||||
@ -49,7 +49,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
typedef Dimensions_ Dimensions;
|
||||
|
@ -96,7 +96,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = internal::is_arithmetic<CoeffReturnType>::value,
|
||||
BlockAccess = internal::is_arithmetic<CoeffReturnType>::value,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = true
|
||||
@ -110,7 +110,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -177,15 +177,15 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::any();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
assert(m_buffer != NULL);
|
||||
return TensorBlockV2::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
|
||||
return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||
|
@ -157,7 +157,7 @@ struct IsVectorizable<GpuDevice, Expression> {
|
||||
// Tiled evaluation strategy.
|
||||
enum TiledEvaluation {
|
||||
Off = 0, // tiled evaluation is not supported
|
||||
On = 1, // still work in progress (see TensorBlockV2.h)
|
||||
On = 1, // still work in progress (see TensorBlock.h)
|
||||
};
|
||||
|
||||
template <typename Device, typename Expression>
|
||||
@ -165,12 +165,12 @@ struct IsTileable {
|
||||
// Check that block evaluation is supported and it's a preferred option (at
|
||||
// least one sub-expression has much faster block evaluation, e.g.
|
||||
// broadcasting).
|
||||
static const bool BlockAccessV2 =
|
||||
TensorEvaluator<Expression, Device>::BlockAccessV2 &&
|
||||
static const bool BlockAccess =
|
||||
TensorEvaluator<Expression, Device>::BlockAccess &&
|
||||
TensorEvaluator<Expression, Device>::PreferBlockAccess;
|
||||
|
||||
static const TiledEvaluation value =
|
||||
BlockAccessV2 ? TiledEvaluation::On : TiledEvaluation::Off;
|
||||
BlockAccess ? TiledEvaluation::On : TiledEvaluation::Off;
|
||||
};
|
||||
|
||||
template <typename Expression, typename Device,
|
||||
|
@ -93,7 +93,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = true,
|
||||
BlockAccess = true,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -108,7 +108,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -165,10 +165,10 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
return {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
return {internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
}
|
||||
|
||||
@ -179,8 +179,8 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
Index count;
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
static const bool is_col_major =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor);
|
||||
@ -206,8 +206,8 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
eigen_assert(it[0].stride == 1);
|
||||
|
||||
// Prepare storage for the materialized generator result.
|
||||
const typename TensorBlockV2::Storage block_storage =
|
||||
TensorBlockV2::prepareStorage(desc, scratch);
|
||||
const typename TensorBlock::Storage block_storage =
|
||||
TensorBlock::prepareStorage(desc, scratch);
|
||||
|
||||
CoeffReturnType* block_buffer = block_storage.data();
|
||||
|
||||
|
@ -231,7 +231,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -239,7 +239,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
|
||||
|
@ -92,7 +92,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -100,7 +100,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -119,7 +119,7 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -127,7 +127,7 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -198,14 +198,14 @@ template<typename ArgType, typename Device>
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
|
||||
CoordAccess = false // to be implemented
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -138,7 +138,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
// For trivial reshapes with raw access to underlying data we will provide
|
||||
// zero overhead block access.
|
||||
// TODO(ezhulenev): Consider adding block access without raw access?
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess &&
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess &&
|
||||
NumInputDims > 0 && NumOutputDims > 0,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
@ -155,7 +155,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
typedef
|
||||
typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -199,8 +199,8 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockV2ResourceRequirements::any();
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return internal::TensorBlockResourceRequirements::any();
|
||||
}
|
||||
|
||||
// required in block(OutputTensorBlock* output_block) const
|
||||
@ -212,8 +212,8 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
Index count;
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
eigen_assert(m_impl.data() != NULL);
|
||||
eigen_assert((kind == Runtime) ||
|
||||
@ -223,12 +223,12 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
||||
if (kind == OneByN || kind == NByOne) {
|
||||
// We can guarantee at compile time that block is just a contiguous slice
|
||||
// of the underlying expression memory buffer.
|
||||
return TensorBlockV2(internal::TensorBlockKind::kView,
|
||||
return TensorBlock(internal::TensorBlockKind::kView,
|
||||
m_impl.data() + desc.offset(), desc.dimensions());
|
||||
} else {
|
||||
// This will do additional runtime checks, and in the end it might be also
|
||||
// a view, or it might be a block materialized in the temporary buffer.
|
||||
return TensorBlockV2::materialize(m_impl.data(), m_dimensions, desc,
|
||||
return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
|
||||
scratch);
|
||||
}
|
||||
}
|
||||
@ -264,7 +264,7 @@ template<typename NewDimensions, typename ArgType, typename Device>
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -297,7 +297,7 @@ template<typename NewDimensions, typename ArgType, typename Device>
|
||||
}
|
||||
|
||||
template <typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
assert(this->m_impl.data() != NULL);
|
||||
|
||||
@ -456,7 +456,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
// slice offsets and sizes.
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -470,8 +470,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
// Tensor slicing does not change the block type.
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
TensorBlockV2;
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -547,7 +547,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
}
|
||||
}
|
||||
// Use memcpy if it's going to be faster than using the regular evaluation.
|
||||
const MemcpyTriggerForSlicing<Index, Device, BlockAccessV2> trigger(m_device);
|
||||
const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
|
||||
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
|
||||
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
|
||||
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
|
||||
@ -633,19 +633,19 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
|
||||
TensorBlockV2 block = m_impl.blockV2(arg_desc, scratch);
|
||||
TensorBlock block = m_impl.block(arg_desc, scratch);
|
||||
if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
|
||||
return block;
|
||||
}
|
||||
@ -745,7 +745,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -823,11 +823,11 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TensorBlockV2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
||||
template<typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
|
||||
this->m_impl.writeBlockV2(arg_desc, block);
|
||||
this->m_impl.writeBlock(arg_desc, block);
|
||||
}
|
||||
};
|
||||
|
||||
@ -935,14 +935,14 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
||||
// slice offsets and sizes.
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -1116,7 +1116,7 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
|
||||
@ -1124,7 +1124,7 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -98,7 +98,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
enum {
|
||||
IsAligned = true,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = true,
|
||||
@ -113,7 +113,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -228,20 +228,20 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
// If one of the dimensions is zero, return empty block view.
|
||||
if (desc.size() == 0) {
|
||||
return TensorBlockV2(internal::TensorBlockKind::kView, NULL,
|
||||
return TensorBlock(internal::TensorBlockKind::kView, NULL,
|
||||
desc.dimensions());
|
||||
}
|
||||
|
||||
@ -355,8 +355,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
|
||||
|
||||
// Prepare storage for the materialized padding result.
|
||||
const typename TensorBlockV2::Storage block_storage =
|
||||
TensorBlockV2::prepareStorage(desc, scratch);
|
||||
const typename TensorBlock::Storage block_storage =
|
||||
TensorBlock::prepareStorage(desc, scratch);
|
||||
|
||||
// Iterate copying data from `m_impl.data()` to the output buffer.
|
||||
for (Index size = 0; size < output_size; size += output_inner_dim_size) {
|
||||
|
@ -96,7 +96,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -104,7 +104,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -584,7 +584,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -594,7 +594,7 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
|
@ -141,7 +141,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = PlainObjectType::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -149,7 +149,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -----------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) {
|
||||
@ -377,7 +377,7 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorRef<Derived>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -385,7 +385,7 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&)
|
||||
@ -430,13 +430,13 @@ struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<cons
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = false,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d)
|
||||
|
@ -115,7 +115,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = NumDims > 0,
|
||||
BlockAccess = NumDims > 0,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -128,12 +128,12 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
|
||||
ArgTensorBlock;
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||
@ -245,15 +245,15 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
return {internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool /*root_of_expr_ast*/ = false) const {
|
||||
// TODO(ezhulenev): If underlying tensor expression supports and prefers
|
||||
// block evaluation we must use it. Currently we use coeff and packet
|
||||
@ -322,8 +322,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
const Index inner_dim_size = it[effective_inner_dim].size;
|
||||
|
||||
// Prepare storage for the materialized reverse result.
|
||||
const typename TensorBlockV2::Storage block_storage =
|
||||
TensorBlockV2::prepareStorage(desc, scratch);
|
||||
const typename TensorBlock::Storage block_storage =
|
||||
TensorBlock::prepareStorage(desc, scratch);
|
||||
CoeffReturnType* block_buffer = block_storage.data();
|
||||
|
||||
while (it[NumDims - 1].count < it[NumDims - 1].size) {
|
||||
@ -433,7 +433,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -449,7 +449,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
|
@ -99,7 +99,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -107,7 +107,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||
|
@ -115,7 +115,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -130,7 +130,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
|
||||
typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
|
||||
Layout, Index>
|
||||
TensorBlockV2;
|
||||
TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||
@ -245,7 +245,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
static const int inner_dim =
|
||||
Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||
|
||||
@ -254,23 +254,23 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
|
||||
const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
|
||||
return {inner_dim_shuffled
|
||||
? internal::TensorBlockV2ShapeType::kUniformAllDims
|
||||
: internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
? internal::TensorBlockShapeType::kUniformAllDims
|
||||
: internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
|
||||
bool root_of_expr_ast = false) const {
|
||||
assert(m_impl.data() != NULL);
|
||||
|
||||
typedef internal::TensorBlockIOV2<ScalarNoConst, Index, NumDims, Layout>
|
||||
typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
|
||||
TensorBlockIO;
|
||||
typedef typename TensorBlockIO::Dst TensorBlockIODst;
|
||||
typedef typename TensorBlockIO::Src TensorBlockIOSrc;
|
||||
|
||||
const typename TensorBlockV2::Storage block_storage =
|
||||
TensorBlockV2::prepareStorage(
|
||||
const typename TensorBlock::Storage block_storage =
|
||||
TensorBlock::prepareStorage(
|
||||
desc, scratch, /*allow_strided_storage=*/root_of_expr_ast);
|
||||
|
||||
typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
|
||||
@ -380,7 +380,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
|
||||
PreferBlockAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
@ -414,12 +414,12 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TensorBlockV2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
||||
template <typename TensorBlock>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
||||
const TensorBlockDesc& desc, const TensorBlock& block) {
|
||||
eigen_assert(this->m_impl.data() != NULL);
|
||||
|
||||
typedef internal::TensorBlockIOV2<ScalarNoConst, Index, NumDims, Layout>
|
||||
typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
|
||||
TensorBlockIO;
|
||||
typedef typename TensorBlockIO::Dst TensorBlockIODst;
|
||||
typedef typename TensorBlockIO::Src TensorBlockIOSrc;
|
||||
@ -434,7 +434,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
ScalarNoConst* buf = static_cast<ScalarNoConst*>(mem);
|
||||
|
||||
typedef internal::TensorBlockAssignment<
|
||||
ScalarNoConst, NumDims, typename TensorBlockV2::XprType, Index>
|
||||
ScalarNoConst, NumDims, typename TensorBlock::XprType, Index>
|
||||
TensorBlockAssignment;
|
||||
|
||||
TensorBlockAssignment::Run(
|
||||
|
@ -114,7 +114,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
@ -122,7 +122,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -97,7 +97,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
|
@ -183,7 +183,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccessV2 = false,
|
||||
BlockAccess = false,
|
||||
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = false,
|
||||
@ -191,7 +191,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
||||
};
|
||||
|
||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||
typedef internal::TensorBlockNotImplemented TensorBlock;
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) :
|
||||
|
@ -19,7 +19,7 @@ using Eigen::Tensor;
|
||||
using Eigen::Index;
|
||||
using Eigen::RowMajor;
|
||||
using Eigen::ColMajor;
|
||||
using Eigen::internal::TensorBlockV2ShapeType;
|
||||
using Eigen::internal::TensorBlockShapeType;
|
||||
|
||||
|
||||
template<typename T>
|
||||
@ -27,10 +27,10 @@ static const T& choose(int layout, const T& col, const T& row) {
|
||||
return layout == ColMajor ? col : row;
|
||||
}
|
||||
|
||||
static TensorBlockV2ShapeType RandomShape() {
|
||||
static TensorBlockShapeType RandomShape() {
|
||||
return internal::random<bool>()
|
||||
? TensorBlockV2ShapeType::kUniformAllDims
|
||||
: TensorBlockV2ShapeType::kSkewedInnerDims;
|
||||
? TensorBlockShapeType::kUniformAllDims
|
||||
: TensorBlockShapeType::kSkewedInnerDims;
|
||||
}
|
||||
|
||||
template <int NumDims>
|
||||
@ -67,13 +67,13 @@ static void Debug(DSizes<Index, NumDims> dims) {
|
||||
template <int Layout>
|
||||
static void test_block_mapper_sanity()
|
||||
{
|
||||
typedef internal::TensorBlockV2Mapper<2, Layout> TensorBlockMapper;
|
||||
typedef internal::TensorBlockMapper<2, Layout> TensorBlockMapper;
|
||||
|
||||
DSizes<Index, 2> tensor_dims(100, 100);
|
||||
|
||||
// Test uniform blocks.
|
||||
TensorBlockMapper uniform_block_mapper(
|
||||
tensor_dims, {TensorBlockV2ShapeType::kUniformAllDims, 100});
|
||||
tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100});
|
||||
|
||||
VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100);
|
||||
VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100);
|
||||
@ -85,7 +85,7 @@ static void test_block_mapper_sanity()
|
||||
|
||||
// Test skewed to inner dims blocks.
|
||||
TensorBlockMapper skewed_block_mapper(
|
||||
tensor_dims, {TensorBlockV2ShapeType::kSkewedInnerDims, 100});
|
||||
tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100});
|
||||
|
||||
VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100);
|
||||
VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100);
|
||||
@ -121,7 +121,7 @@ static void UpdateCoeffSet(
|
||||
|
||||
template <typename T, int NumDims, int Layout>
|
||||
static void test_block_mapper_maps_every_element() {
|
||||
typedef internal::TensorBlockV2Mapper<NumDims, Layout> TensorBlockMapper;
|
||||
typedef internal::TensorBlockMapper<NumDims, Layout> TensorBlockMapper;
|
||||
|
||||
DSizes<Index, NumDims> dims = RandomDims<NumDims>();
|
||||
DSizes<Index, NumDims> strides = internal::strides<Layout>(dims);
|
||||
@ -227,14 +227,14 @@ template <int Layout>
|
||||
static void test_uniform_block_shape()
|
||||
{
|
||||
typedef internal::TensorBlockDescriptor<5> TensorBlock;
|
||||
typedef internal::TensorBlockV2Mapper<5, Layout> TensorBlockMapper;
|
||||
typedef internal::TensorBlockMapper<5, Layout> TensorBlockMapper;
|
||||
|
||||
{
|
||||
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
@ -249,7 +249,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||
@ -261,7 +261,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
|
||||
@ -277,7 +277,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
@ -289,7 +289,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -305,7 +305,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||
@ -318,7 +318,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -334,7 +334,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||
@ -347,7 +347,7 @@ static void test_uniform_block_shape()
|
||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -363,14 +363,14 @@ template <int Layout>
|
||||
static void test_skewed_inner_dim_block_shape()
|
||||
{
|
||||
typedef internal::TensorBlockDescriptor<5> TensorBlock;
|
||||
typedef internal::TensorBlockV2Mapper<5, Layout> TensorBlockMapper;
|
||||
typedef internal::TensorBlockMapper<5, Layout> TensorBlockMapper;
|
||||
|
||||
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(10, block.dimensions()[0]);
|
||||
@ -382,7 +382,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
|
||||
@ -397,7 +397,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
@ -409,7 +409,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -425,7 +425,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
@ -438,7 +438,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -455,7 +455,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
@ -469,7 +469,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -486,7 +486,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
@ -499,7 +499,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
@ -512,7 +512,7 @@ static void test_skewed_inner_dim_block_shape()
|
||||
}
|
||||
|
||||
template <int Layout>
|
||||
static void test_empty_dims(const internal::TensorBlockV2ShapeType block_shape)
|
||||
static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
||||
{
|
||||
// Test blocking of tensors with zero dimensions:
|
||||
// - we must not crash on asserts and divisions by zero
|
||||
@ -520,7 +520,7 @@ static void test_empty_dims(const internal::TensorBlockV2ShapeType block_shape)
|
||||
// (recipe for overflows/underflows, divisions by zero and NaNs later)
|
||||
// - total block count must be zero
|
||||
{
|
||||
typedef internal::TensorBlockV2Mapper<1, Layout> TensorBlockMapper;
|
||||
typedef internal::TensorBlockMapper<1, Layout> TensorBlockMapper;
|
||||
|
||||
DSizes<Index, 1> dims(0);
|
||||
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||
@ -531,7 +531,7 @@ static void test_empty_dims(const internal::TensorBlockV2ShapeType block_shape)
|
||||
}
|
||||
|
||||
{
|
||||
typedef internal::TensorBlockV2Mapper<2, Layout> TensorBlockMapper;
|
||||
typedef internal::TensorBlockMapper<2, Layout> TensorBlockMapper;
|
||||
|
||||
for (int dim1 = 0; dim1 < 3; ++dim1) {
|
||||
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
||||
@ -573,8 +573,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
|
||||
TEST_LAYOUTS_AND_DIMS(float, test_block_mapper_maps_every_element);
|
||||
TEST_LAYOUTS(test_uniform_block_shape);
|
||||
TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
|
||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockV2ShapeType::kUniformAllDims);
|
||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockV2ShapeType::kSkewedInnerDims);
|
||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims);
|
||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims);
|
||||
}
|
||||
|
||||
#undef TEST_LAYOUTS
|
||||
|
@ -61,9 +61,9 @@ static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims,
|
||||
template <int Layout, int NumDims>
|
||||
static TensorBlockParams<NumDims> SkewedInnerBlock(
|
||||
DSizes<Index, NumDims> dims) {
|
||||
using BlockMapper = internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
|
||||
using BlockMapper = internal::TensorBlockMapper<NumDims, Layout, Index>;
|
||||
BlockMapper block_mapper(dims,
|
||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
internal::random<size_t>(1, dims.TotalSize())});
|
||||
|
||||
Index total_blocks = block_mapper.blockCount();
|
||||
@ -158,7 +158,7 @@ static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
|
||||
}
|
||||
|
||||
const bool root_of_expr = internal::random<bool>();
|
||||
auto tensor_block = eval.blockV2(block_params.desc, scratch, root_of_expr);
|
||||
auto tensor_block = eval.block(block_params.desc, scratch, root_of_expr);
|
||||
|
||||
if (tensor_block.kind() == internal::TensorBlockKind::kMaterializedInOutput) {
|
||||
// Copy data from destination buffer.
|
||||
@ -596,7 +596,7 @@ static void VerifyBlockAssignment(Tensor<T, NumDims, Layout>& tensor,
|
||||
tensor.setZero();
|
||||
|
||||
// Use evaluator to write block into a tensor.
|
||||
eval.writeBlockV2(block_params.desc, blk);
|
||||
eval.writeBlock(block_params.desc, blk);
|
||||
|
||||
// Make a copy of the result after assignment.
|
||||
Tensor<T, NumDims, Layout> block_assigned = tensor;
|
||||
|
@ -22,10 +22,10 @@ static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
|
||||
return DSizes<Index, NumDims>(dims);
|
||||
}
|
||||
|
||||
static internal::TensorBlockV2ShapeType RandomBlockShape() {
|
||||
static internal::TensorBlockShapeType RandomBlockShape() {
|
||||
return internal::random<bool>()
|
||||
? internal::TensorBlockV2ShapeType::kUniformAllDims
|
||||
: internal::TensorBlockV2ShapeType::kSkewedInnerDims;
|
||||
? internal::TensorBlockShapeType::kUniformAllDims
|
||||
: internal::TensorBlockShapeType::kSkewedInnerDims;
|
||||
}
|
||||
|
||||
template <int NumDims>
|
||||
@ -60,7 +60,7 @@ static Index GetInputIndex(Index output_index,
|
||||
|
||||
template <typename T, int NumDims, int Layout>
|
||||
static void test_block_io_copy_data_from_source_to_target() {
|
||||
using TensorBlockIO = internal::TensorBlockIOV2<T, Index, NumDims, Layout>;
|
||||
using TensorBlockIO = internal::TensorBlockIO<T, Index, NumDims, Layout>;
|
||||
using IODst = typename TensorBlockIO::Dst;
|
||||
using IOSrc = typename TensorBlockIO::Src;
|
||||
|
||||
@ -74,7 +74,7 @@ static void test_block_io_copy_data_from_source_to_target() {
|
||||
|
||||
// Construct a tensor block mapper.
|
||||
using TensorBlockMapper =
|
||||
internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
|
||||
internal::TensorBlockMapper<NumDims, Layout, Index>;
|
||||
TensorBlockMapper block_mapper(dims, {RandomBlockShape(),
|
||||
RandomTargetBlockSize(dims)});
|
||||
|
||||
@ -145,7 +145,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
||||
// Construct a tensor block mapper.
|
||||
// NOTE: Tensor block mapper works with shuffled dimensions.
|
||||
using TensorBlockMapper =
|
||||
internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
|
||||
internal::TensorBlockMapper<NumDims, Layout, Index>;
|
||||
TensorBlockMapper block_mapper(output_tensor_dims, {RandomBlockShape(),
|
||||
RandomTargetBlockSize(output_tensor_dims)});
|
||||
|
||||
@ -169,7 +169,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
||||
|
||||
// NOTE: Block dimensions are in the same order as output dimensions.
|
||||
|
||||
using TensorBlockIO = internal::TensorBlockIOV2<T, Index, NumDims, Layout>;
|
||||
using TensorBlockIO = internal::TensorBlockIO<T, Index, NumDims, Layout>;
|
||||
using IODst = typename TensorBlockIO::Dst;
|
||||
using IOSrc = typename TensorBlockIO::Src;
|
||||
|
||||
@ -181,7 +181,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
||||
IODst dst(blk_dims, blk_strides, block_data, 0);
|
||||
IOSrc src(input_strides, input_data, first_coeff_index);
|
||||
|
||||
// TODO(ezhulenev): Remove when fully switched to TensorBlockV2.
|
||||
// TODO(ezhulenev): Remove when fully switched to TensorBlock.
|
||||
DSizes<int, NumDims> dim_map;
|
||||
for (int j = 0; j < NumDims; ++j)
|
||||
dim_map[j] = static_cast<int>(output_to_input_dim_map[j]);
|
||||
@ -199,7 +199,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
||||
IODst dst(dst_dims, input_strides, output_data, first_coeff_index);
|
||||
IOSrc src(blk_strides, block_data, 0);
|
||||
|
||||
// TODO(ezhulenev): Remove when fully switched to TensorBlockV2.
|
||||
// TODO(ezhulenev): Remove when fully switched to TensorBlock.
|
||||
DSizes<int, NumDims> dim_map;
|
||||
for (int j = 0; j < NumDims; ++j)
|
||||
dim_map[j] = static_cast<int>(input_to_output_dim_map[j]);
|
||||
@ -235,7 +235,7 @@ static void test_block_io_copy_using_reordered_dimensions_do_not_squeeze() {
|
||||
float* tensor_data = tensor.data();
|
||||
float* block_data = block.data();
|
||||
|
||||
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 3, Layout>;
|
||||
using TensorBlockIO = internal::TensorBlockIO<float, Index, 3, Layout>;
|
||||
using IODst = typename TensorBlockIO::Dst;
|
||||
using IOSrc = typename TensorBlockIO::Src;
|
||||
|
||||
@ -283,7 +283,7 @@ static void test_block_io_copy_using_reordered_dimensions_squeeze() {
|
||||
float* tensor_data = tensor.data();
|
||||
float* block_data = block.data();
|
||||
|
||||
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 4, Layout>;
|
||||
using TensorBlockIO = internal::TensorBlockIO<float, Index, 4, Layout>;
|
||||
using IODst = typename TensorBlockIO::Dst;
|
||||
using IOSrc = typename TensorBlockIO::Src;
|
||||
|
||||
@ -334,7 +334,7 @@ static void test_block_io_zero_stride() {
|
||||
Tensor<float, 5, Layout> output(output_tensor_dims);
|
||||
output.setRandom();
|
||||
|
||||
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 5, Layout>;
|
||||
using TensorBlockIO = internal::TensorBlockIO<float, Index, 5, Layout>;
|
||||
using IODst = typename TensorBlockIO::Dst;
|
||||
using IOSrc = typename TensorBlockIO::Src;
|
||||
|
||||
@ -360,7 +360,7 @@ static void test_block_io_zero_stride() {
|
||||
|
||||
template <int Layout>
|
||||
static void test_block_io_squeeze_ones() {
|
||||
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 5, Layout>;
|
||||
using TensorBlockIO = internal::TensorBlockIO<float, Index, 5, Layout>;
|
||||
using IODst = typename TensorBlockIO::Dst;
|
||||
using IOSrc = typename TensorBlockIO::Src;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user