From 900c7c61bb6abca5b3324c11ba1b45fa3e31c5fa Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 15 Oct 2018 16:52:33 -0700 Subject: [PATCH] Check if it's allowed to squueze inner dimensions in TensorBlockIO --- .../Eigen/CXX11/src/Tensor/TensorBlock.h | 36 +++++- .../test/cxx11_tensor_block_access.cpp | 113 +++++++++++++++++- 2 files changed, 144 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index a59a5d5b2..91c77b05a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -246,6 +246,8 @@ class TensorBlockIO { typedef TensorBlockCopyOp BlockCopyOp; protected: + typedef array Dimensions; + struct BlockIteratorState { StorageIndex input_stride; StorageIndex output_stride; @@ -262,22 +264,46 @@ class TensorBlockIO { count(0) {} }; + // Compute how many inner dimensions it's allowed to squeeze when doing IO + // between a tensor and a block. It's safe to squeeze inner dimensions, only + // if they are not reordered. + static int NumSqueezableInnerDims(const Dimensions& tensor_to_block_dim_map) { + int num_squeezable_dims = 0; + if (Layout == ColMajor) { + for (int i = 0; i < NumDims; ++i) { + if (tensor_to_block_dim_map[i] == i) num_squeezable_dims++; + else break; + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (tensor_to_block_dim_map[i] == i) num_squeezable_dims++; + else break; + } + } + return num_squeezable_dims; + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( const Block& block, StorageIndex first_coeff_index, - const array& tensor_to_block_dim_map, - const array& tensor_strides, const Scalar* src_data, + const Dimensions& tensor_to_block_dim_map, + const Dimensions& tensor_strides, + const Scalar* src_data, Scalar* dst_data) { + // Do not squeeze reordered inner dimensions. + int num_squeezable_dims = NumSqueezableInnerDims(tensor_to_block_dim_map); + // Find the innermost tensor dimension whose size is not 1. This is the // effective inner dim. If all dimensions are of size 1, then fallback to // using the actual innermost dim to avoid out-of-bound access. StorageIndex num_size_one_inner_dims = 0; - for (int i = 0; i < NumDims; ++i) { + for (int i = 0; i < num_squeezable_dims; ++i) { const int dim = cond()(i, NumDims - i - 1); if (block.block_sizes()[tensor_to_block_dim_map[dim]] != 1) { num_size_one_inner_dims = i; break; } } + // Calculate strides and dimensions. const StorageIndex tensor_stride1_dim = cond()( num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1); @@ -286,7 +312,9 @@ class TensorBlockIO { StorageIndex block_inner_dim_size = NumDims == 0 ? 1 : block.block_sizes()[block_dim_for_tensor_stride1_dim]; - for (Index i = num_size_one_inner_dims + 1; i < NumDims; ++i) { + + // Squeeze multiple inner dims into one for larger inner dim size. + for (Index i = num_size_one_inner_dims + 1; i < num_squeezable_dims; ++i) { const Index dim = cond()(i, NumDims - i - 1); const StorageIndex block_stride = block.block_strides()[tensor_to_block_dim_map[dim]]; diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp index ad12ae557..64b3a1202 100644 --- a/unsupported/test/cxx11_tensor_block_access.cpp +++ b/unsupported/test/cxx11_tensor_block_access.cpp @@ -367,6 +367,116 @@ static void test_block_io_copy_using_reordered_dimensions() { delete[] output_data; } +// This is the special case for reading data with reordering, when dimensions +// before/after reordering are the same. Squeezing reads along inner dimensions +// in this case is illegal, because we reorder innermost dimension. +template +static void test_block_io_copy_using_reordered_dimensions_do_not_squeeze() +{ + typedef internal::TensorBlock TensorBlock; + typedef internal::TensorBlockReader + TensorBlockReader; + + DSizes tensor_dims; + tensor_dims[0] = 7; + tensor_dims[1] = 9; + tensor_dims[2] = 7; + + DSizes block_dims = tensor_dims; + + DSizes tensor_to_block_dim_map; + tensor_to_block_dim_map[0] = 2; + tensor_to_block_dim_map[1] = 1; + tensor_to_block_dim_map[2] = 0; + + DSizes tensor_strides(ComputeStrides(tensor_dims)); + DSizes block_strides(ComputeStrides(block_dims)); + + const Index tensor_size = tensor_dims.TotalSize(); + float* tensor_data = GenerateRandomData(tensor_size); + float* block_data = new float[tensor_size]; + + TensorBlock block(0, block_dims, block_strides, tensor_strides, block_data); + TensorBlockReader::Run(&block, + 0, + tensor_to_block_dim_map, + tensor_strides, + tensor_data); + + TensorMap > block_tensor(block_data, block_dims); + TensorMap > tensor_tensor(tensor_data, tensor_dims); + + for (Index d0 = 0; d0 < tensor_dims[0]; ++d0) { + for (Index d1 = 0; d1 < tensor_dims[1]; ++d1) { + for (Index d2 = 0; d2 < tensor_dims[2]; ++d2) { + float block_value = block_tensor(d2, d1, d0); + float tensor_value = tensor_tensor(d0, d1, d2); + VERIFY_IS_EQUAL(block_value, tensor_value); + } + } + } + + delete[] block_data; + delete[] tensor_data; +} + +// This is the special case for reading data with reordering, when dimensions +// before/after reordering are the same. Squeezing reads in this case is allowed +// because we reorder outer dimensions. +template +static void test_block_io_copy_using_reordered_dimensions_squeeze() +{ + typedef internal::TensorBlock TensorBlock; + typedef internal::TensorBlockReader + TensorBlockReader; + + DSizes tensor_dims; + tensor_dims[0] = 7; + tensor_dims[1] = 5; + tensor_dims[2] = 9; + tensor_dims[3] = 9; + + DSizes block_dims = tensor_dims; + + DSizes tensor_to_block_dim_map; + tensor_to_block_dim_map[0] = 0; + tensor_to_block_dim_map[1] = 1; + tensor_to_block_dim_map[2] = 3; + tensor_to_block_dim_map[3] = 2; + + DSizes tensor_strides(ComputeStrides(tensor_dims)); + DSizes block_strides(ComputeStrides(block_dims)); + + const Index tensor_size = tensor_dims.TotalSize(); + float* tensor_data = GenerateRandomData(tensor_size); + float* block_data = new float[tensor_size]; + + TensorBlock block(0, block_dims, block_strides, tensor_strides, block_data); + TensorBlockReader::Run(&block, + 0, + tensor_to_block_dim_map, + tensor_strides, + tensor_data); + + TensorMap > block_tensor(block_data, block_dims); + TensorMap > tensor_tensor(tensor_data, tensor_dims); + + for (Index d0 = 0; d0 < tensor_dims[0]; ++d0) { + for (Index d1 = 0; d1 < tensor_dims[1]; ++d1) { + for (Index d2 = 0; d2 < tensor_dims[2]; ++d2) { + for (Index d3 = 0; d3 < tensor_dims[3]; ++d3) { + float block_value = block_tensor(d0, d1, d3, d2); + float tensor_value = tensor_tensor(d0, d1, d2, d3); + VERIFY_IS_EQUAL(block_value, tensor_value); + } + } + } + } + + delete[] block_data; + delete[] tensor_data; +} + template class EqualityChecker { @@ -400,7 +510,6 @@ public: } }; - template static void test_block_io_zero_stride() { @@ -1092,6 +1201,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) { TEST_LAYOUTS_AND_DIMS(Data, test_block_io_copy_data_from_source_to_target); TEST_LAYOUTS_AND_DIMS(float, test_block_io_copy_using_reordered_dimensions); TEST_LAYOUTS_AND_DIMS(Data, test_block_io_copy_using_reordered_dimensions); + TEST_LAYOUTS(test_block_io_copy_using_reordered_dimensions_do_not_squeeze); + TEST_LAYOUTS(test_block_io_copy_using_reordered_dimensions_squeeze); TEST_LAYOUTS(test_block_io_zero_stride); TEST_LAYOUTS(test_block_io_squeeze_ones); TEST_LAYOUTS_AND_DIMS(float, test_block_cwise_unary_io_basic);