// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // clang-format off #include "main.h" #include // clang-format on using Eigen::internal::TensorBlockDescriptor; using Eigen::internal::TensorExecutor; // -------------------------------------------------------------------------- // // Utility functions to generate random tensors, blocks, and evaluate them. template static DSizes RandomDims(Index min, Index max) { DSizes dims; for (int i = 0; i < NumDims; ++i) { dims[i] = internal::random(min, max); } return DSizes(dims); } // Block offsets and extents allows to construct a TensorSlicingOp corresponding // to a TensorBlockDescriptor. template struct TensorBlockParams { DSizes offsets; DSizes sizes; TensorBlockDescriptor desc; }; template static TensorBlockParams RandomBlock(DSizes dims, Index min, Index max) { // Choose random offsets and sizes along all tensor dimensions. DSizes offsets(RandomDims(min, max)); DSizes sizes(RandomDims(min, max)); // Make sure that offset + size do not overflow dims. for (int i = 0; i < NumDims; ++i) { offsets[i] = numext::mini(dims[i] - 1, offsets[i]); sizes[i] = numext::mini(sizes[i], dims[i] - offsets[i]); } Index offset = 0; DSizes strides = Eigen::internal::strides(dims); for (int i = 0; i < NumDims; ++i) { offset += strides[i] * offsets[i]; } return {offsets, sizes, TensorBlockDescriptor(offset, sizes)}; } // Generate block with block sizes skewed towards inner dimensions. This type of // block is required for evaluating broadcast expressions. template static TensorBlockParams SkewedInnerBlock( DSizes dims) { using BlockMapper = internal::TensorBlockMapper; BlockMapper block_mapper(dims, internal::TensorBlockShapeType::kSkewedInnerDims, internal::random(1, dims.TotalSize())); Index total_blocks = block_mapper.total_block_count(); Index block_index = internal::random(0, total_blocks - 1); auto block = block_mapper.GetBlockForIndex(block_index, nullptr); DSizes sizes = block.block_sizes(); auto strides = internal::strides(dims); DSizes offsets; // Compute offsets for the first block coefficient. Index index = block.first_coeff_index(); if (static_cast(Layout) == static_cast(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / strides[i]; index -= idx * strides[i]; offsets[i] = idx; } offsets[0] = index; } else { for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / strides[i]; index -= idx * strides[i]; offsets[i] = idx; } offsets[NumDims - 1] = index; } auto desc = TensorBlockDescriptor(block.first_coeff_index(), sizes); return {offsets, sizes, desc}; } template static TensorBlockParams FixedSizeBlock(DSizes dims) { DSizes offsets; for (int i = 0; i < NumDims; ++i) offsets[i] = 0; return {offsets, dims, TensorBlockDescriptor(0, dims)}; } inline Eigen::IndexList> NByOne(Index n) { Eigen::IndexList> ret; ret.set(0, n); return ret; } inline Eigen::IndexList, Index> OneByM(Index m) { Eigen::IndexList, Index> ret; ret.set(1, m); return ret; } // -------------------------------------------------------------------------- // // Verify that block expression evaluation produces the same result as a // TensorSliceOp (reading a tensor block is same to taking a tensor slice). template static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) { using Device = DefaultDevice; auto d = Device(); // Scratch memory allocator for block evaluation. typedef internal::TensorBlockScratchAllocator TensorBlockScratch; TensorBlockScratch scratch(d); // TensorEvaluator is needed to produce tensor blocks of the expression. auto eval = TensorEvaluator(expr, d); eval.evalSubExprsIfNeeded(nullptr); // Choose a random offsets, sizes and TensorBlockDescriptor. TensorBlockParams block_params = gen_block(); // Evaluate TensorBlock expression into a tensor. Tensor block(block_params.desc.dimensions()); // Maybe use this tensor as a block desc destination. Tensor dst(block_params.desc.dimensions()); if (internal::random()) { block_params.desc.template AddDestinationBuffer( dst.data(), internal::strides(dst.dimensions()), dst.dimensions().TotalSize() * sizeof(T)); } auto tensor_block = eval.blockV2(block_params.desc, scratch); auto b_expr = tensor_block.expr(); // We explicitly disable vectorization and tiling, to run a simple coefficient // wise assignment loop, because it's very simple and should be correct. using BlockAssign = TensorAssignOp; using BlockExecutor = TensorExecutor; BlockExecutor::run(BlockAssign(block, b_expr), d); // Cleanup temporary buffers owned by a tensor block. tensor_block.cleanup(); // Compute a Tensor slice corresponding to a Tensor block. Tensor slice(block_params.desc.dimensions()); auto s_expr = expr.slice(block_params.offsets, block_params.sizes); // Explicitly use coefficient assignment to evaluate slice expression. using SliceAssign = TensorAssignOp; using SliceExecutor = TensorExecutor; SliceExecutor::run(SliceAssign(slice, s_expr), d); // Tensor block and tensor slice must be the same. for (Index i = 0; i < block.dimensions().TotalSize(); ++i) { VERIFY_IS_EQUAL(block.coeff(i), slice.coeff(i)); } } // -------------------------------------------------------------------------- // template static void test_eval_tensor_block() { DSizes dims = RandomDims(10, 20); Tensor input(dims); input.setRandom(); // Identity tensor expression transformation. VerifyBlockEvaluator( input, [&dims]() { return RandomBlock(dims, 1, 10); }); } template static void test_eval_tensor_unary_expr_block() { DSizes dims = RandomDims(10, 20); Tensor input(dims); input.setRandom(); VerifyBlockEvaluator( input.square(), [&dims]() { return RandomBlock(dims, 1, 10); }); } template static void test_eval_tensor_binary_expr_block() { DSizes dims = RandomDims(10, 20); Tensor lhs(dims), rhs(dims); lhs.setRandom(); rhs.setRandom(); VerifyBlockEvaluator( lhs + rhs, [&dims]() { return RandomBlock(dims, 1, 10); }); } template static void test_eval_tensor_binary_with_unary_expr_block() { DSizes dims = RandomDims(10, 20); Tensor lhs(dims), rhs(dims); lhs.setRandom(); rhs.setRandom(); VerifyBlockEvaluator( (lhs.square() + rhs.square()).sqrt(), [&dims]() { return RandomBlock(dims, 1, 10); }); } template static void test_eval_tensor_broadcast() { DSizes dims = RandomDims(1, 10); Tensor input(dims); input.setRandom(); DSizes bcast = RandomDims(1, 5); DSizes bcasted_dims; for (int i = 0; i < NumDims; ++i) bcasted_dims[i] = dims[i] * bcast[i]; VerifyBlockEvaluator( input.broadcast(bcast), [&bcasted_dims]() { return SkewedInnerBlock(bcasted_dims); }); VerifyBlockEvaluator( input.broadcast(bcast), [&bcasted_dims]() { return FixedSizeBlock(bcasted_dims); }); // Check that desc.destination() memory is not shared between two broadcast // materializations. VerifyBlockEvaluator( input.broadcast(bcast) + input.square().broadcast(bcast), [&bcasted_dims]() { return SkewedInnerBlock(bcasted_dims); }); } template static void test_eval_tensor_reshape() { DSizes dims = RandomDims(1, 10); DSizes shuffled = dims; std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed)); Tensor input(dims); input.setRandom(); VerifyBlockEvaluator( input.reshape(shuffled), [&shuffled]() { return RandomBlock(shuffled, 1, 10); }); VerifyBlockEvaluator( input.reshape(shuffled), [&shuffled]() { return SkewedInnerBlock(shuffled); }); } template static void test_eval_tensor_cast() { DSizes dims = RandomDims(10, 20); Tensor input(dims); input.setRandom(); VerifyBlockEvaluator( input.template cast().template cast(), [&dims]() { return RandomBlock(dims, 1, 10); }); } template static void test_eval_tensor_select() { DSizes dims = RandomDims(10, 20); Tensor lhs(dims); Tensor rhs(dims); Tensor cond(dims); lhs.setRandom(); rhs.setRandom(); cond.setRandom(); VerifyBlockEvaluator(cond.select(lhs, rhs), [&dims]() { return RandomBlock(dims, 1, 20); }); } template static void test_eval_tensor_padding() { const int inner_dim = Layout == static_cast(ColMajor) ? 0 : NumDims - 1; DSizes dims = RandomDims(10, 20); Tensor input(dims); input.setRandom(); DSizes pad_before = RandomDims(0, 4); DSizes pad_after = RandomDims(0, 4); array, NumDims> paddings; for (int i = 0; i < NumDims; ++i) { paddings[i] = std::make_pair(pad_before[i], pad_after[i]); } // Test squeezing reads from inner dim. if (internal::random()) { pad_before[inner_dim] = 0; pad_after[inner_dim] = 0; paddings[inner_dim] = std::make_pair(0, 0); } DSizes padded_dims; for (int i = 0; i < NumDims; ++i) { padded_dims[i] = dims[i] + pad_before[i] + pad_after[i]; } VerifyBlockEvaluator( input.pad(paddings), [&padded_dims]() { return FixedSizeBlock(padded_dims); }); VerifyBlockEvaluator( input.pad(paddings), [&padded_dims]() { return RandomBlock(padded_dims, 1, 10); }); VerifyBlockEvaluator( input.pad(paddings), [&padded_dims]() { return SkewedInnerBlock(padded_dims); }); } template static void test_eval_tensor_reshape_with_bcast() { Index dim = internal::random(1, 100); Tensor lhs(1, dim); Tensor rhs(dim, 1); lhs.setRandom(); rhs.setRandom(); auto reshapeLhs = NByOne(dim); auto reshapeRhs = OneByM(dim); auto bcastLhs = OneByM(dim); auto bcastRhs = NByOne(dim); DSizes dims(dim, dim); VerifyBlockEvaluator( lhs.reshape(reshapeLhs).broadcast(bcastLhs) + rhs.reshape(reshapeRhs).broadcast(bcastRhs), [dims]() { return SkewedInnerBlock(dims); }); } template static void test_eval_tensor_forced_eval() { Index dim = internal::random(1, 100); Tensor lhs(dim, 1); Tensor rhs(1, dim); lhs.setRandom(); rhs.setRandom(); auto bcastLhs = OneByM(dim); auto bcastRhs = NByOne(dim); DSizes dims(dim, dim); VerifyBlockEvaluator( (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), [dims]() { return SkewedInnerBlock(dims); }); VerifyBlockEvaluator( (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), [dims]() { return RandomBlock(dims, 1, 50); }); } // -------------------------------------------------------------------------- // // Verify that assigning block to a Tensor expression produces the same result // as an assignment to TensorSliceOp (writing a block is is identical to // assigning one tensor to a slice of another tensor). template static void VerifyBlockAssignment(Tensor& tensor, Expression expr, GenBlockParams gen_block) { using Device = DefaultDevice; auto d = Device(); // We use tensor evaluator as a target for block and slice assignments. auto eval = TensorEvaluator(expr, d); // Generate a random block, or choose a block that fits in full expression. TensorBlockParams block_params = gen_block(); // Generate random data of the selected block size. Tensor block(block_params.desc.dimensions()); block.setRandom(); // ************************************************************************ // // (1) Assignment from a block. // Construct a materialize block from a random generated block tensor. internal::TensorMaterializedBlock blk( internal::TensorBlockKind::kView, block.data(), block.dimensions()); // Reset all underlying tensor values to zero. tensor.setZero(); // Use evaluator to write block into a tensor. eval.writeBlockV2(block_params.desc, blk); // Make a copy of the result after assignment. Tensor block_assigned = tensor; // ************************************************************************ // // (2) Assignment to a slice // Reset all underlying tensor values to zero. tensor.setZero(); // Assign block to a slice of original expression auto s_expr = expr.slice(block_params.offsets, block_params.sizes); // Explicitly use coefficient assignment to evaluate slice expression. using SliceAssign = TensorAssignOp; using SliceExecutor = TensorExecutor; SliceExecutor::run(SliceAssign(s_expr, block), d); // Make a copy of the result after assignment. Tensor slice_assigned = tensor; for (Index i = 0; i < tensor.dimensions().TotalSize(); ++i) { VERIFY_IS_EQUAL(block_assigned.coeff(i), slice_assigned.coeff(i)); } } // -------------------------------------------------------------------------- // template static void test_assign_to_tensor() { DSizes dims = RandomDims(10, 20); Tensor tensor(dims); TensorMap> map(tensor.data(), dims); VerifyBlockAssignment( tensor, map, [&dims]() { return RandomBlock(dims, 10, 20); }); VerifyBlockAssignment( tensor, map, [&dims]() { return FixedSizeBlock(dims); }); } template static void test_assign_to_tensor_reshape() { DSizes dims = RandomDims(10, 20); Tensor tensor(dims); TensorMap> map(tensor.data(), dims); DSizes shuffled = dims; std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed)); VerifyBlockAssignment( tensor, map.reshape(shuffled), [&shuffled]() { return RandomBlock(shuffled, 1, 10); }); VerifyBlockAssignment( tensor, map.reshape(shuffled), [&shuffled]() { return SkewedInnerBlock(shuffled); }); VerifyBlockAssignment( tensor, map.reshape(shuffled), [&shuffled]() { return FixedSizeBlock(shuffled); }); } // -------------------------------------------------------------------------- // #define CALL_SUBTESTS_DIMS_LAYOUTS(NAME) \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())) #define CALL_SUBTESTS_LAYOUTS(NAME) \ CALL_SUBTEST((NAME())); \ CALL_SUBTEST((NAME())) EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) { // clang-format off CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_block); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_unary_expr_block); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_binary_expr_block); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_binary_with_unary_expr_block); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_broadcast); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_reshape); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_cast); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding); CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast); CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape); // clang-format on }