From 2fa2b617a97ba254343c7c1635a9b6d617a100e8 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 24 Feb 2017 19:16:24 +0000 Subject: [PATCH] Adding TensorVolumePatchOP.h for sycl --- .../TensorSyclConvertToDeviceExpression.h | 10 + .../src/Tensor/TensorSyclExprConstructor.h | 22 ++ .../src/Tensor/TensorSyclExtractAccessor.h | 15 ++ .../src/Tensor/TensorSyclExtractFunctors.h | 44 ++++ .../CXX11/src/Tensor/TensorSyclLeafCount.h | 13 +- .../src/Tensor/TensorSyclPlaceHolderExpr.h | 14 ++ .../CXX11/src/Tensor/TensorVolumePatch.h | 30 ++- unsupported/test/CMakeLists.txt | 5 +- .../test/cxx11_tensor_volume_patchOP_sycl.cpp | 222 ++++++++++++++++++ 9 files changed, 359 insertions(+), 16 deletions(-) create mode 100644 unsupported/test/cxx11_tensor_volume_patchOP_sycl.cpp diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h index 5b4a9af9f..dd63a2e2f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -177,6 +177,16 @@ KERNELBROKERCONVERTIMAGEPATCHOP() #undef KERNELBROKERCONVERTIMAGEPATCHOP +/// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorVolumePatchOp +#define KERNELBROKERCONVERTVOLUMEPATCHOP(CVQual)\ +template\ +struct ConvertToDeviceExpression >{\ + typedef CVQual TensorVolumePatchOp::Type> Type;\ +}; +KERNELBROKERCONVERTVOLUMEPATCHOP(const) +KERNELBROKERCONVERTVOLUMEPATCHOP() +#undef KERNELBROKERCONVERTVOLUMEPATCHOP + } // namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h index 57a10d06b..117b368ec 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -404,6 +404,28 @@ SYCLTENSORIMAGEPATCHOPEXPR(const) SYCLTENSORIMAGEPATCHOPEXPR() #undef SYCLTENSORIMAGEPATCHOPEXPR +// TensorVolumePatchOp +#define SYCLTENSORVOLUMEPATCHOPEXPR(CVQual)\ +template\ +struct ExprConstructor, CVQual TensorVolumePatchOp, Params... > {\ + typedef ExprConstructor my_xpr_type;\ + typedef CVQual TensorVolumePatchOp Type;\ + my_xpr_type xprExpr;\ + Type expr;\ + template \ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ + : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.m_patch_planes, funcD.m_patch_rows, funcD.m_patch_cols, funcD.m_plane_strides, funcD.m_row_strides, funcD.m_col_strides,\ + funcD.m_in_plane_strides, funcD.m_in_row_strides, funcD.m_in_col_strides,funcD.m_plane_inflate_strides, funcD.m_row_inflate_strides, funcD.m_col_inflate_strides, \ + funcD.m_padding_top_z, funcD.m_padding_bottom_z, funcD.m_padding_top, funcD.m_padding_bottom, funcD.m_padding_left, funcD.m_padding_right, funcD.m_padding_value,\ + funcD.m_padding_type, funcD.m_padding_explicit){\ + }\ +}; + +SYCLTENSORVOLUMEPATCHOPEXPR(const) +SYCLTENSORVOLUMEPATCHOPEXPR() +#undef SYCLTENSORVOLUMEPATCHOPEXPR + + // TensorLayoutSwapOp #define SYCLTENSORLAYOUTSWAPOPEXPR(CVQual)\ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h index 2be6f3710..4a6322d44 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -240,6 +240,21 @@ SYCLTENSORIMAGEPATCHOPEXTACC() #undef SYCLTENSORIMAGEPATCHOPEXTACC + +// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorVolumePatchOp. +#define SYCLTENSORVOLUMEPATCHOPEXTACC(CVQual)\ +template\ +struct ExtractAccessor, Dev> >{\ + static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator, Dev>& eval)\ + RETURN_CPP11(AccessorConstructor::getTuple(cgh, eval.impl()))\ +}; + +SYCLTENSORVOLUMEPATCHOPEXTACC(const) +SYCLTENSORVOLUMEPATCHOPEXTACC() +#undef SYCLTENSORVOLUMEPATCHOPEXTACC + + // specialisation of the \ref ExtractAccessor struct when the node type is /// TensorLayoutSwapOp. #define SYCLTENSORLAYOUTSWAPOPEXTACC(CVQual)\ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h index dbac01138..8828a0495 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -344,6 +344,50 @@ FunctorExtractor(const TensorEvaluator& expr)\ SYCLEXTRFUNCIMAGEPATCHOP(const) SYCLEXTRFUNCIMAGEPATCHOP() #undef SYCLEXTRFUNCIMAGEPATCHOP + +/// TensorVolumePatchOp +#define SYCLEXTRFUNCVOLUMEPATCHOP(CVQual)\ +template\ +struct FunctorExtractor, Device> >{\ +typedef CVQual TensorVolumePatchOp Self;\ +FunctorExtractor > xprExpr;\ +const DenseIndex m_patch_planes;\ +const DenseIndex m_patch_rows;\ +const DenseIndex m_patch_cols;\ +const DenseIndex m_plane_strides;\ +const DenseIndex m_row_strides;\ +const DenseIndex m_col_strides;\ +const DenseIndex m_in_plane_strides;\ +const DenseIndex m_in_row_strides;\ +const DenseIndex m_in_col_strides;\ +const DenseIndex m_plane_inflate_strides;\ +const DenseIndex m_row_inflate_strides;\ +const DenseIndex m_col_inflate_strides;\ +const bool m_padding_explicit;\ +const DenseIndex m_padding_top_z;\ +const DenseIndex m_padding_bottom_z;\ +const DenseIndex m_padding_top;\ +const DenseIndex m_padding_bottom;\ +const DenseIndex m_padding_left;\ +const DenseIndex m_padding_right;\ +const PaddingType m_padding_type;\ +const typename Self::Scalar m_padding_value;\ +FunctorExtractor(const TensorEvaluator& expr)\ +: xprExpr(expr.impl()), m_patch_planes(expr.xpr().patch_planes()), m_patch_rows(expr.xpr().patch_rows()), m_patch_cols(expr.xpr().patch_cols()),\ + m_plane_strides(expr.xpr().plane_strides()), m_row_strides(expr.xpr().row_strides()), m_col_strides(expr.xpr().col_strides()),\ + m_in_plane_strides(expr.xpr().in_plane_strides()), m_in_row_strides(expr.xpr().in_row_strides()), m_in_col_strides(expr.xpr().in_col_strides()),\ + m_plane_inflate_strides(expr.xpr().plane_inflate_strides()),m_row_inflate_strides(expr.xpr().row_inflate_strides()),\ + m_col_inflate_strides(expr.xpr().col_inflate_strides()), m_padding_explicit(expr.xpr().padding_explicit()),\ + m_padding_top_z(expr.xpr().padding_top_z()), m_padding_bottom_z(expr.xpr().padding_bottom_z()), \ + m_padding_top(expr.xpr().padding_top()), m_padding_bottom(expr.xpr().padding_bottom()), m_padding_left(expr.xpr().padding_left()),\ + m_padding_right(expr.xpr().padding_right()), m_padding_type(expr.xpr().padding_type()),m_padding_value(expr.xpr().padding_value()){}\ +}; +SYCLEXTRFUNCVOLUMEPATCHOP(const) +SYCLEXTRFUNCVOLUMEPATCHOP() +#undef SYCLEXTRFUNCVOLUMEPATCHOP + + + /// template deduction function for FunctorExtractor template auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h index b8e658824..50f4595fc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h @@ -151,7 +151,7 @@ CHIPPINGOPLEAFCOUNT(const) CHIPPINGOPLEAFCOUNT() #undef CHIPPINGOPLEAFCOUNT - +///TensorStridingSlicingOp #define SLICESTRIDEOPLEAFCOUNT(CVQual)\ template\ struct LeafCount >:CategoryCount{}; @@ -160,7 +160,7 @@ SLICESTRIDEOPLEAFCOUNT(const) SLICESTRIDEOPLEAFCOUNT() #undef SLICESTRIDEOPLEAFCOUNT - +//TensorImagePatchOp #define TENSORIMAGEPATCHOPLEAFCOUNT(CVQual)\ template\ struct LeafCount >:CategoryCount{}; @@ -170,6 +170,15 @@ TENSORIMAGEPATCHOPLEAFCOUNT(const) TENSORIMAGEPATCHOPLEAFCOUNT() #undef TENSORIMAGEPATCHOPLEAFCOUNT +// TensorVolumePatchOp +#define TENSORVOLUMEPATCHOPLEAFCOUNT(CVQual)\ +template\ +struct LeafCount >:CategoryCount{}; + + +TENSORVOLUMEPATCHOPLEAFCOUNT(const) +TENSORVOLUMEPATCHOPLEAFCOUNT() +#undef TENSORVOLUMEPATCHOPLEAFCOUNT } /// namespace TensorSycl } /// namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h index ab97235ae..fcef0be04 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h @@ -235,6 +235,20 @@ SYCLTENSORIMAGEPATCHOP() #undef SYCLTENSORIMAGEPATCHOP + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorVolumePatchOp +#define SYCLTENSORVOLUMEPATCHOP(CVQual)\ +template\ +struct PlaceHolderExpression, N> {\ + typedef CVQual TensorVolumePatchOp::ArgType> Type;\ +}; + +SYCLTENSORVOLUMEPATCHOP(const) +SYCLTENSORVOLUMEPATCHOP() +#undef SYCLTENSORVOLUMEPATCHOP + + /// template deduction for \ref PlaceHolderExpression struct template struct createPlaceHolderExpression { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index 0ca2cac84..64474ee80 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -65,12 +65,8 @@ class TensorVolumePatchOp : public TensorBase, D CoordAccess = false, RawAccess = false }; +#ifdef __SYCL_DEVICE_ONLY__ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType op, const Device& device) +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device) +#endif - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) + : m_impl(op.expression(), device), m_op(op) { EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE); @@ -321,7 +321,9 @@ struct TensorEvaluator, D m_outputPlanesRows = m_outputPlanes * m_outputRows; // Fast representations of different variables. + // printf("THis is m_otherStride: %lu\n", m_otherStride ); m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); + m_fastPatchStride = internal::TensorIntDivisor(m_patchStride); m_fastColStride = internal::TensorIntDivisor(m_colStride); m_fastRowStride = internal::TensorIntDivisor(m_rowStride); @@ -338,7 +340,6 @@ struct TensorEvaluator, D m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { @@ -352,6 +353,7 @@ struct TensorEvaluator, D EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + // Patch index corresponding to the passed in index. const Index patchIndex = index / m_fastPatchStride; @@ -505,6 +507,8 @@ struct TensorEvaluator, D EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } const TensorEvaluator& impl() const { return m_impl; } + // required by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; } Index planePaddingTop() const { return m_planePaddingTop; } Index rowPaddingTop() const { return m_rowPaddingTop; } @@ -600,6 +604,8 @@ struct TensorEvaluator, D Scalar m_paddingValue; TensorEvaluator m_impl; +// required by sycl + XprType m_op; }; diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 69c892362..508f29446 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -167,11 +167,12 @@ if(EIGEN_TEST_CXX11) ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_layout_swap_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_image_patchOP_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_inflation_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_generator_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_image_patchOP_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_volume_patchOP_sycl "-std=c++11") endif(EIGEN_TEST_SYCL) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. diff --git a/unsupported/test/cxx11_tensor_volume_patchOP_sycl.cpp b/unsupported/test/cxx11_tensor_volume_patchOP_sycl.cpp new file mode 100644 index 000000000..ddc9e0d46 --- /dev/null +++ b/unsupported/test/cxx11_tensor_volume_patchOP_sycl.cpp @@ -0,0 +1,222 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_volume_patchOP_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::Tensor; +static const int DataLayout = ColMajor; + +template +static void test_single_voxel_patch_sycl(const Eigen::SyclDevice& sycl_device) +{ + +IndexType sizeDim0 = 4; +IndexType sizeDim1 = 2; +IndexType sizeDim2 = 3; +IndexType sizeDim3 = 5; +IndexType sizeDim4 = 7; +array tensorColMajorRange = {{sizeDim0, sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; +array tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1, sizeDim0}}; +Tensor tensor_col_major(tensorColMajorRange); +Tensor tensor_row_major(tensorRowMajorRange); +tensor_col_major.setRandom(); + + + DataType* gpu_data_col_major = static_cast(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); + DataType* gpu_data_row_major = static_cast(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); + TensorMap> gpu_col_major(gpu_data_col_major, tensorColMajorRange); + TensorMap> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); + + sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); + gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); + + + // single volume patch: ColMajor + array patchColMajorTensorRange={{sizeDim0,1, 1, 1, sizeDim1*sizeDim2*sizeDim3, sizeDim4}}; + Tensor single_voxel_patch_col_major(patchColMajorTensorRange); + size_t patchTensorBuffSize =single_voxel_patch_col_major.size()*sizeof(DataType); + DataType* gpu_data_single_voxel_patch_col_major = static_cast(sycl_device.allocate(patchTensorBuffSize)); + TensorMap> gpu_single_voxel_patch_col_major(gpu_data_single_voxel_patch_col_major, patchColMajorTensorRange); + gpu_single_voxel_patch_col_major.device(sycl_device)=gpu_col_major.extract_volume_patches(1, 1, 1); + sycl_device.memcpyDeviceToHost(single_voxel_patch_col_major.data(), gpu_data_single_voxel_patch_col_major, patchTensorBuffSize); + + + VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(0), 4); + VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(1), 1); + VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(2), 1); + VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(3), 1); + VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(4), 2 * 3 * 5); + VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(5), 7); + + array patchRowMajorTensorRange={{sizeDim4, sizeDim1*sizeDim2*sizeDim3, 1, 1, 1, sizeDim0}}; + Tensor single_voxel_patch_row_major(patchRowMajorTensorRange); + patchTensorBuffSize =single_voxel_patch_row_major.size()*sizeof(DataType); + DataType* gpu_data_single_voxel_patch_row_major = static_cast(sycl_device.allocate(patchTensorBuffSize)); + TensorMap> gpu_single_voxel_patch_row_major(gpu_data_single_voxel_patch_row_major, patchRowMajorTensorRange); + gpu_single_voxel_patch_row_major.device(sycl_device)=gpu_row_major.extract_volume_patches(1, 1, 1); + sycl_device.memcpyDeviceToHost(single_voxel_patch_row_major.data(), gpu_data_single_voxel_patch_row_major, patchTensorBuffSize); + + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4); + + sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); + for (IndexType i = 0; i < tensor_col_major.size(); ++i) { + VERIFY_IS_EQUAL(tensor_col_major.data()[i], single_voxel_patch_col_major.data()[i]); + VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]); + VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]); + } + + + sycl_device.deallocate(gpu_data_col_major); + sycl_device.deallocate(gpu_data_row_major); + sycl_device.deallocate(gpu_data_single_voxel_patch_col_major); + sycl_device.deallocate(gpu_data_single_voxel_patch_row_major); +} + +template +static void test_entire_volume_patch_sycl(const Eigen::SyclDevice& sycl_device) +{ + const int depth = 4; + const int patch_z = 2; + const int patch_y = 3; + const int patch_x = 5; + const int batch = 7; + + array tensorColMajorRange = {{depth, patch_z, patch_y, patch_x, batch}}; + array tensorRowMajorRange = {{batch, patch_x, patch_y, patch_z, depth}}; + Tensor tensor_col_major(tensorColMajorRange); + Tensor tensor_row_major(tensorRowMajorRange); + tensor_col_major.setRandom(); + + + DataType* gpu_data_col_major = static_cast(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); + DataType* gpu_data_row_major = static_cast(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); + TensorMap> gpu_col_major(gpu_data_col_major, tensorColMajorRange); + TensorMap> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); + + sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); + gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); + sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); + + + // single volume patch: ColMajor + array patchColMajorTensorRange={{depth,patch_z, patch_y, patch_x, patch_z*patch_y*patch_x, batch}}; + Tensor entire_volume_patch_col_major(patchColMajorTensorRange); + size_t patchTensorBuffSize =entire_volume_patch_col_major.size()*sizeof(DataType); + DataType* gpu_data_entire_volume_patch_col_major = static_cast(sycl_device.allocate(patchTensorBuffSize)); + TensorMap> gpu_entire_volume_patch_col_major(gpu_data_entire_volume_patch_col_major, patchColMajorTensorRange); + gpu_entire_volume_patch_col_major.device(sycl_device)=gpu_col_major.extract_volume_patches(patch_z, patch_y, patch_x); + sycl_device.memcpyDeviceToHost(entire_volume_patch_col_major.data(), gpu_data_entire_volume_patch_col_major, patchTensorBuffSize); + + +// Tensor tensor(depth, patch_z, patch_y, patch_x, batch); +// tensor.setRandom(); +// Tensor tensor_row_major = tensor.swap_layout(); + + //Tensor entire_volume_patch; + //entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(0), depth); + VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(1), patch_z); + VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(2), patch_y); + VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(3), patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(4), patch_z * patch_y * patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(5), batch); + +// Tensor entire_volume_patch_row_major; + //entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x); + + array patchRowMajorTensorRange={{batch,patch_z*patch_y*patch_x, patch_x, patch_y, patch_z, depth}}; + Tensor entire_volume_patch_row_major(patchRowMajorTensorRange); + patchTensorBuffSize =entire_volume_patch_row_major.size()*sizeof(DataType); + DataType* gpu_data_entire_volume_patch_row_major = static_cast(sycl_device.allocate(patchTensorBuffSize)); + TensorMap> gpu_entire_volume_patch_row_major(gpu_data_entire_volume_patch_row_major, patchRowMajorTensorRange); + gpu_entire_volume_patch_row_major.device(sycl_device)=gpu_row_major.extract_volume_patches(patch_z, patch_y, patch_x); + sycl_device.memcpyDeviceToHost(entire_volume_patch_row_major.data(), gpu_data_entire_volume_patch_row_major, patchTensorBuffSize); + + + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth); + + const int dz = patch_z - 1; + const int dy = patch_y - 1; + const int dx = patch_x - 1; + + const int forward_pad_z = dz - dz / 2; + const int forward_pad_y = dy - dy / 2; + const int forward_pad_x = dx - dx / 2; + + for (int pz = 0; pz < patch_z; pz++) { + for (int py = 0; py < patch_y; py++) { + for (int px = 0; px < patch_x; px++) { + const int patchId = pz + patch_z * (py + px * patch_y); + for (int z = 0; z < patch_z; z++) { + for (int y = 0; y < patch_y; y++) { + for (int x = 0; x < patch_x; x++) { + for (int b = 0; b < batch; b++) { + for (int d = 0; d < depth; d++) { + float expected = 0.0f; + float expected_row_major = 0.0f; + const int eff_z = z - forward_pad_z + pz; + const int eff_y = y - forward_pad_y + py; + const int eff_x = x - forward_pad_x + px; + if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 && + eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) { + expected = tensor_col_major(d, eff_z, eff_y, eff_x, b); + expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d); + } + VERIFY_IS_EQUAL(entire_volume_patch_col_major(d, z, y, x, patchId, b), expected); + VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major); + } + } + } + } + } + } + } + } + sycl_device.deallocate(gpu_data_col_major); + sycl_device.deallocate(gpu_data_row_major); + sycl_device.deallocate(gpu_data_entire_volume_patch_col_major); + sycl_device.deallocate(gpu_data_entire_volume_patch_row_major); +} + + + +template void sycl_tensor_volume_patch_test_per_device(dev_Selector s){ +QueueInterface queueInterface(s); +auto sycl_device = Eigen::SyclDevice(&queueInterface); +std::cout << "Running on " << s.template get_info() << std::endl; +test_single_voxel_patch_sycl(sycl_device); +test_entire_volume_patch_sycl(sycl_device); +} +void test_cxx11_tensor_volume_patchOP_sycl() +{ +for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_tensor_volume_patch_test_per_device(device)); +} +}