diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h index 3880e7ed30..b8c5925430 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h @@ -418,12 +418,22 @@ class TensorMaterializedBlock { if (can_use_direct_access) { const Scalar* block_start = data + desc.offset(); - return TensorMaterializedBlock(internal::TensorBlockKind::kView, block_start, - desc.dimensions()); + return TensorMaterializedBlock(internal::TensorBlockKind::kView, + block_start, desc.dimensions()); } else { - void* mem = scratch.allocate(desc.size() * sizeof(Scalar)); - Scalar* block_buffer = static_cast(mem); + // Try to reuse destination as an output block buffer. + Scalar* block_buffer = desc.template destination(); + bool materialized_in_output; + + if (block_buffer != NULL) { + materialized_in_output = true; + + } else { + materialized_in_output = false; + void* mem = scratch.allocate(desc.size() * sizeof(Scalar)); + block_buffer = static_cast(mem); + } typedef internal::TensorBlockIOV2 TensorBlockIO; @@ -438,8 +448,11 @@ class TensorMaterializedBlock { TensorBlockIO::Copy(dst, src); - return TensorMaterializedBlock(internal::TensorBlockKind::kMaterializedInScratch, - block_buffer, desc.dimensions()); + return TensorMaterializedBlock( + materialized_in_output + ? internal::TensorBlockKind::kMaterializedInOutput + : internal::TensorBlockKind::kMaterializedInScratch, + block_buffer, desc.dimensions()); } } @@ -1141,7 +1154,7 @@ class TensorBlockAssignment { it[idx].count = 0; it[idx].size = target.dims[dim]; it[idx].output_stride = target.strides[dim]; - it[idx].output_span = it[i].output_stride * (it[i].size - 1); + it[idx].output_span = it[idx].output_stride * (it[idx].size - 1); idx++; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 8860840a7d..20591da332 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -149,7 +149,7 @@ struct TensorEvaluator, Device> Layout = TensorEvaluator::Layout, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = TensorEvaluator::BlockAccess, - BlockAccessV2 = false, + BlockAccessV2 = TensorEvaluator::BlockAccessV2, // Chipping of outer-most dimension is a trivial operation, because we can // read and write directly from the underlying tensor using single offset. IsOuterChipping = (static_cast(Layout) == ColMajor && DimId == NumInputDims - 1) || @@ -171,7 +171,17 @@ struct TensorEvaluator, Device> OutputTensorBlock; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef internal::TensorBlockDescriptor + ArgTensorBlockDesc; + typedef typename TensorEvaluator::TensorBlockV2 + ArgTensorBlock; + + typedef typename internal::TensorMaterializedBlock + TensorBlockV2; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -357,6 +367,72 @@ struct TensorEvaluator, Device> m_impl.block(&input_block); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + const Index chip_dim = m_dim.actualDim(); + + DSizes input_block_dims; + for (int i = 0; i < NumInputDims; ++i) { + input_block_dims[i] = i < chip_dim ? desc.dimension(i) + : i > chip_dim ? desc.dimension(i - 1) + : 1; + } + + ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims); + + // Try to reuse destination buffer for materializing argument block. + ScalarNoConst* destination_buffer = + desc.template destination(); + if (destination_buffer != NULL) { + arg_desc.AddDestinationBuffer( + destination_buffer, internal::strides(arg_desc.dimensions()), + (arg_desc.size() * sizeof(Scalar))); + } + + ArgTensorBlock arg_block = m_impl.blockV2(arg_desc, scratch); + + if (arg_block.data() != NULL) { + // Forward argument block buffer if possible. + return TensorBlockV2(arg_block.kind(), arg_block.data(), + desc.dimensions()); + + } else { + // Assign argument block expression to a buffer. + + // Try to reuse destination as an output buffer. + ScalarNoConst* output_buffer = + desc.template destination(); + bool materialized_in_output; + + if (output_buffer != NULL) { + materialized_in_output = true; + + } else { + materialized_in_output = false; + const size_t materialized_output_size = desc.size() * sizeof(Scalar); + void* output_scratch_mem = scratch.allocate(materialized_output_size); + output_buffer = static_cast(output_scratch_mem); + } + + typedef internal::TensorBlockAssignment< + ScalarNoConst, NumInputDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; + + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + arg_desc.dimensions(), + internal::strides(arg_desc.dimensions()), + output_buffer), + arg_block.expr()); + + return TensorBlockV2( + materialized_in_output + ? internal::TensorBlockKind::kMaterializedInOutput + : internal::TensorBlockKind::kMaterializedInScratch, + output_buffer, desc.dimensions()); + } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { typename Storage::Type result = constCast(m_impl.data()); if (isOuterChipping() && result) { @@ -434,11 +510,12 @@ struct TensorEvaluator, Device> static const int PacketSize = PacketType::size; enum { - IsAligned = false, - PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + BlockAccessV2 = TensorEvaluator::RawAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; @@ -448,6 +525,10 @@ struct TensorEvaluator, Device> typedef internal::TensorBlock OutputTensorBlock; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) { } @@ -539,6 +620,36 @@ struct TensorEvaluator, Device> input_block_strides, this->m_inputStrides, const_cast(output_block.data()))); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2( + const TensorBlockDesc& desc, const TensorBlockV2& block) { + assert(this->m_impl.data() != NULL); + + const Index chip_dim = this->m_dim.actualDim(); + + DSizes input_block_dims; + for (int i = 0; i < NumInputDims; ++i) { + input_block_dims[i] = i < chip_dim ? desc.dimension(i) + : i > chip_dim ? desc.dimension(i - 1) + : 1; + } + + typedef TensorReshapingOp, + const typename TensorBlockV2::XprType> + TensorBlockExpr; + + typedef internal::TensorBlockAssignment + TensorBlockAssign; + + TensorBlockAssign::Run( + TensorBlockAssign::target( + input_block_dims, + internal::strides(this->m_impl.dimensions()), + this->m_impl.data(), this->srcCoeff(desc.offset())), + block.expr().reshape(input_block_dims)); + } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index b1d6687449..b77d8fe84d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -53,18 +53,22 @@ struct TensorEvaluator RawAccess = true }; - typedef typename internal::TensorBlock< - typename internal::remove_const::type, Index, NumCoords, Layout> + typedef typename internal::remove_const::type ScalarNoConst; + + typedef typename internal::TensorBlock TensorBlock; - typedef typename internal::TensorBlockReader< - typename internal::remove_const::type, Index, NumCoords, Layout> + typedef typename internal::TensorBlockReader TensorBlockReader; - typedef typename internal::TensorBlockWriter< - typename internal::remove_const::type, Index, NumCoords, Layout> + typedef typename internal::TensorBlockWriter TensorBlockWriter; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlockV2; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) @@ -161,6 +165,12 @@ struct TensorEvaluator TensorBlockReader::Run(block, m_data); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + assert(m_data != NULL); + return TensorBlockV2::materialize(m_data, m_dims, desc, scratch); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( const TensorBlock& block) { assert(m_data != NULL); @@ -269,11 +279,6 @@ struct TensorEvaluator typedef internal::TensorBlockDescriptor TensorBlockDesc; typedef internal::TensorBlockScratchAllocator TensorBlockScratch; - typedef internal::TensorBlockIOV2 - TensorBlockIO; - typedef typename TensorBlockIO::Dst TensorBlockIODst; - typedef typename TensorBlockIO::Src TensorBlockIOSrc; - typedef typename internal::TensorMaterializedBlock TensorBlockV2; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 97ac96db13..6ad6327a65 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -521,6 +521,19 @@ class TensorExecutor::value) { + internal::TensorExecutor::run(expr, device); + evaluator.cleanup(); + return; + } const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); if (needs_assign) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 8d45bd62a2..d98af13552 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -97,21 +97,26 @@ struct TensorEvaluator, Device> IsAligned = true, PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::value, - BlockAccessV2 = false, + BlockAccessV2 = internal::is_arithmetic::value, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, RawAccess = true }; - typedef typename internal::TensorBlock< - CoeffReturnType, Index, internal::traits::NumDimensions, Layout> + static const int NumDims = internal::traits::NumDimensions; + + typedef typename internal::TensorBlock TensorBlock; - typedef typename internal::TensorBlockReader< - CoeffReturnType, Index, internal::traits::NumDimensions, Layout> + typedef typename internal::TensorBlockReader TensorBlockReader; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename internal::TensorMaterializedBlock + TensorBlockV2; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) @@ -170,6 +175,12 @@ struct TensorEvaluator, Device> TensorBlockReader::Run(block, m_buffer); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + assert(m_buffer != NULL); + return TensorBlockV2::materialize(m_buffer, m_impl.dimensions(), desc, scratch); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 5d4b0f061d..c9d78ba9b7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -644,6 +644,9 @@ struct TensorEvaluator, Devi } } + // No strides for scalars. + if (NumDims == 0) return; + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); const Sizes& output_dims = op.sizes(); if (static_cast(Layout) == static_cast(ColMajor)) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index f3907be6ed..a0b4e04b1a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -334,8 +334,12 @@ struct TensorEvaluator, Device // Want to copy from input. (output_inner_dim_size - output_inner_pad_before_size), // Can copy from input. - (static_cast(m_impl.dimensions()[inner_dim_idx]) - - numext::maxi(input_offsets[inner_dim_idx], Index(0)))); + numext::maxi( + static_cast(m_impl.dimensions()[inner_dim_idx]) - + (input_offsets[inner_dim_idx] + output_inner_pad_before_size), + Index(0))); + + eigen_assert(output_inner_copy_size >= 0); // How many values to fill with padding AFTER reading from the input inner // dimension. diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp index 1dc0a9e2c7..e11092af37 100644 --- a/unsupported/test/cxx11_tensor_block_eval.cpp +++ b/unsupported/test/cxx11_tensor_block_eval.cpp @@ -82,14 +82,14 @@ static TensorBlockParams SkewedInnerBlock( index -= idx * strides[i]; offsets[i] = idx; } - offsets[0] = index; + if (NumDims > 0) offsets[0] = index; } else { for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / strides[i]; index -= idx * strides[i]; offsets[i] = idx; } - offsets[NumDims - 1] = index; + if (NumDims > 0) offsets[NumDims - 1] = index; } auto desc = TensorBlockDescriptor(block.first_coeff_index(), sizes); @@ -333,6 +333,42 @@ static void test_eval_tensor_padding() { [&padded_dims]() { return SkewedInnerBlock(padded_dims); }); } +template +static void test_eval_tensor_chipping() { + DSizes dims = RandomDims(10, 20); + Tensor input(dims); + input.setRandom(); + + Index chip_dim = internal::random(0, NumDims - 1); + Index chip_offset = internal::random(0, dims[chip_dim] - 2); + + DSizes chipped_dims; + for (Index i = 0; i < chip_dim; ++i) { + chipped_dims[i] = dims[i]; + } + for (Index i = chip_dim + 1; i < NumDims; ++i) { + chipped_dims[i - 1] = dims[i]; + } + + // Block buffer forwarding. + VerifyBlockEvaluator( + input.chip(chip_offset, chip_dim), + [&chipped_dims]() { return FixedSizeBlock(chipped_dims); }); + + VerifyBlockEvaluator( + input.chip(chip_offset, chip_dim), + [&chipped_dims]() { return RandomBlock(chipped_dims, 1, 10); }); + + // Block expression assignment. + VerifyBlockEvaluator( + input.square().chip(chip_offset, chip_dim), + [&chipped_dims]() { return FixedSizeBlock(chipped_dims); }); + + VerifyBlockEvaluator( + input.square().chip(chip_offset, chip_dim), + [&chipped_dims]() { return RandomBlock(chipped_dims, 1, 10); }); +} + template static void test_eval_tensor_reshape_with_bcast() { Index dim = internal::random(1, 100); @@ -384,8 +420,8 @@ static void test_eval_tensor_forced_eval() { // as an assignment to TensorSliceOp (writing a block is is identical to // assigning one tensor to a slice of another tensor). -template +template static void VerifyBlockAssignment(Tensor& tensor, Expression expr, GenBlockParams gen_block) { using Device = DefaultDevice; @@ -395,17 +431,17 @@ static void VerifyBlockAssignment(Tensor& tensor, auto eval = TensorEvaluator(expr, d); // Generate a random block, or choose a block that fits in full expression. - TensorBlockParams block_params = gen_block(); + TensorBlockParams block_params = gen_block(); // Generate random data of the selected block size. - Tensor block(block_params.desc.dimensions()); + Tensor block(block_params.desc.dimensions()); block.setRandom(); // ************************************************************************ // // (1) Assignment from a block. // Construct a materialize block from a random generated block tensor. - internal::TensorMaterializedBlock blk( + internal::TensorMaterializedBlock blk( internal::TensorBlockKind::kView, block.data(), block.dimensions()); // Reset all underlying tensor values to zero. @@ -478,6 +514,37 @@ static void test_assign_to_tensor_reshape() { [&shuffled]() { return FixedSizeBlock(shuffled); }); } +template +static void test_assign_to_tensor_chipping() { + DSizes dims = RandomDims(10, 20); + Tensor tensor(dims); + + Index chip_dim = internal::random(0, NumDims - 1); + Index chip_offset = internal::random(0, dims[chip_dim] - 2); + + DSizes < Index, NumDims - 1 > chipped_dims; + for (Index i = 0; i < chip_dim; ++i) { + chipped_dims[i] = dims[i]; + } + for (Index i = chip_dim + 1; i < NumDims; ++i) { + chipped_dims[i - 1] = dims[i]; + } + + TensorMap> map(tensor.data(), dims); + + VerifyBlockAssignment( + tensor, map.chip(chip_offset, chip_dim), + [&chipped_dims]() { return RandomBlock(chipped_dims, 1, 10); }); + + VerifyBlockAssignment( + tensor, map.chip(chip_offset, chip_dim), + [&chipped_dims]() { return SkewedInnerBlock(chipped_dims); }); + + VerifyBlockAssignment( + tensor, map.chip(chip_offset, chip_dim), + [&chipped_dims]() { return FixedSizeBlock(chipped_dims); }); +} + // -------------------------------------------------------------------------- // #define CALL_SUBTESTS_DIMS_LAYOUTS(NAME) \ @@ -503,12 +570,15 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) { CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_broadcast); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_reshape); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_cast); + CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_select); CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding); + CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_chipping); CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast); CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape); + CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_chipping); // clang-format on } diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index efae819619..8fb4ba7525 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -180,9 +180,8 @@ static void test_execute_chipping_lvalue(Device d) \ const auto offset = internal::random(0, dims[(CHIP_DIM)] - 1); \ \ - /* Generate random data to fill non-chipped dimensions*/ \ Tensor random(dims); \ - random.setRandom(); \ + random.setZero(); \ \ Tensor golden(dims); \ golden = random; \ @@ -716,13 +715,13 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) { CALL_SUBTEST_COMBINATIONS_V2(3, test_execute_broadcasting, float, 4); CALL_SUBTEST_COMBINATIONS_V2(3, test_execute_broadcasting, float, 5); - CALL_SUBTEST_COMBINATIONS_V1(4, test_execute_chipping_rvalue, float, 3); - CALL_SUBTEST_COMBINATIONS_V1(4, test_execute_chipping_rvalue, float, 4); - CALL_SUBTEST_COMBINATIONS_V1(4, test_execute_chipping_rvalue, float, 5); + CALL_SUBTEST_COMBINATIONS_V2(4, test_execute_chipping_rvalue, float, 3); + CALL_SUBTEST_COMBINATIONS_V2(4, test_execute_chipping_rvalue, float, 4); + CALL_SUBTEST_COMBINATIONS_V2(4, test_execute_chipping_rvalue, float, 5); - CALL_SUBTEST_COMBINATIONS_V1(5, test_execute_chipping_lvalue, float, 3); - CALL_SUBTEST_COMBINATIONS_V1(5, test_execute_chipping_lvalue, float, 4); - CALL_SUBTEST_COMBINATIONS_V1(5, test_execute_chipping_lvalue, float, 5); + CALL_SUBTEST_COMBINATIONS_V2(5, test_execute_chipping_lvalue, float, 3); + CALL_SUBTEST_COMBINATIONS_V2(5, test_execute_chipping_lvalue, float, 4); + CALL_SUBTEST_COMBINATIONS_V2(5, test_execute_chipping_lvalue, float, 5); CALL_SUBTEST_COMBINATIONS_V1(6, test_execute_shuffle_rvalue, float, 3); CALL_SUBTEST_COMBINATIONS_V1(6, test_execute_shuffle_rvalue, float, 4); @@ -752,10 +751,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) { CALL_SUBTEST_COMBINATIONS_V1(11, test_execute_slice_lvalue, float, 4); CALL_SUBTEST_COMBINATIONS_V1(11, test_execute_slice_lvalue, float, 5); - CALL_SUBTEST_COMBINATIONS_V1(12, test_execute_broadcasting_of_forced_eval, float, 2); - CALL_SUBTEST_COMBINATIONS_V1(12, test_execute_broadcasting_of_forced_eval, float, 3); - CALL_SUBTEST_COMBINATIONS_V1(12, test_execute_broadcasting_of_forced_eval, float, 4); - CALL_SUBTEST_COMBINATIONS_V1(12, test_execute_broadcasting_of_forced_eval, float, 5); + CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 2); + CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 3); + CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 4); + CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 5); CALL_SUBTEST_COMBINATIONS_V1(13, test_execute_generator_op, float, 2); CALL_SUBTEST_COMBINATIONS_V1(13, test_execute_generator_op, float, 3);