Adding TensorLayoutSwapOp for sycl.

2024-12-21 07:19:46 +08:00 · 2017-02-15 16:28:12 +00:00 · 2017-02-15 16:28:12 +00:00 · 91982b91c0
commit 91982b91c0
parent b1e312edd6
9 changed files with 217 additions and 44 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
@ -91,28 +91,35 @@ ASSIGNCONVERT(, false)
 #undef ASSIGNCONVERT
 /// specialisation of the \ref ConvertToDeviceExpression struct when the node
-/// type is either TensorForcedEvalOp or TensorEvalToOp
+/// type is  TensorEvalToOp
 #define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\
 template <typename Expr>\
 struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \
 : DeviceConvertor<ExprNode, Res, Expr>{};
 /// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorForcedEvalOp
 #define KERNELBROKERCONVERTFORCEDEVAL(CVQual)\
 template <typename Expr>\
 struct ConvertToDeviceExpression<CVQual TensorForcedEvalOp<Expr> > {\
  typedef CVQual TensorForcedEvalOp< typename ConvertToDeviceExpression<Expr>::Type> Type;\
 };
 KERNELBROKERCONVERTFORCEDEVAL(const)
 KERNELBROKERCONVERTFORCEDEVAL()
 #undef KERNELBROKERCONVERTFORCEDEVAL
 KERNELBROKERCONVERT(const, true, TensorEvalToOp)
 KERNELBROKERCONVERT(, false, TensorEvalToOp)
 #undef KERNELBROKERCONVERT
 /// specialisation of the \ref ConvertToDeviceExpression struct when the node types are TensorForcedEvalOp and TensorLayoutSwapOp
 #define KERNELBROKERCONVERTFORCEDEVALLAYOUTSWAP(CVQual, ExprNode)\
 template <typename Expr>\
 struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > {\
  typedef CVQual ExprNode< typename ConvertToDeviceExpression<Expr>::Type> Type;\
 };
 // TensorForcedEvalOp
 KERNELBROKERCONVERTFORCEDEVALLAYOUTSWAP(const,TensorForcedEvalOp)
 KERNELBROKERCONVERTFORCEDEVALLAYOUTSWAP(,TensorForcedEvalOp)
 // TensorLayoutSwapOp
 KERNELBROKERCONVERTFORCEDEVALLAYOUTSWAP(const,TensorLayoutSwapOp)
 KERNELBROKERCONVERTFORCEDEVALLAYOUTSWAP(,TensorLayoutSwapOp)
 #undef KERNELBROKERCONVERTFORCEDEVALLAYOUTSWAP
 /// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorReductionOp
 #define KERNELBROKERCONVERTREDUCTION(CVQual)\
 template <typename OP, typename Dim, typename subExpr, template <class> class MakePointer_>\
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
@ -223,7 +223,7 @@ struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQua
  Type expr;\
  template <typename FuncDetector>\
  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
-  : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\
+  : nestedExpression(funcD.xprExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\
 };
 EVALTO(const)
@ -386,6 +386,25 @@ SYCLTENSORCHIPPINGOPEXPR()
 #undef SYCLTENSORCHIPPINGOPEXPR
 // TensorLayoutSwapOp
 #define SYCLTENSORLAYOUTSWAPOPEXPR(CVQual)\
 template<typename OrigXprType, typename XprType, typename... Params>\
 struct ExprConstructor<CVQual TensorLayoutSwapOp <OrigXprType> , CVQual TensorLayoutSwapOp<XprType>, Params... >{\
  typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
  typedef CVQual TensorLayoutSwapOp<typename my_xpr_type::Type> Type;\
  my_xpr_type xprExpr;\
  Type expr;\
  template <typename FuncDetector>\
  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
  : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr) {}\
 };
 SYCLTENSORLAYOUTSWAPOPEXPR(const)
 SYCLTENSORLAYOUTSWAPOPEXPR()
 #undef SYCLTENSORLAYOUTSWAPOPEXPR
 /// template deduction for \ref ExprConstructor struct
 template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
 auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t)
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
@ -226,6 +226,21 @@ SYCLTENSORCHIPPINGOPEXTACC()
 #undef SYCLTENSORCHIPPINGOPEXTACC
 // specialisation of the \ref ExtractAccessor struct when the node type is
 /// TensorLayoutSwapOp.
 #define SYCLTENSORLAYOUTSWAPOPEXTACC(CVQual)\
 template<typename XprType, typename Dev>\
 struct ExtractAccessor<TensorEvaluator<CVQual TensorLayoutSwapOp<XprType>, Dev> >{\
  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorLayoutSwapOp<XprType>, Dev>& eval)\
  RETURN_CPP11(AccessorConstructor::getTuple(cgh, eval.impl()))\
 };
 SYCLTENSORLAYOUTSWAPOPEXTACC(const)
 SYCLTENSORLAYOUTSWAPOPEXTACC()
 #undef SYCLTENSORLAYOUTSWAPOPEXTACC
 /// template deduction for \ref ExtractAccessor
 template <typename Evaluator>
 auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& eval)
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
@ -39,7 +39,6 @@ template <typename Evaluator> struct FunctorExtractor{
  EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
  FunctorExtractor(const Evaluator& expr)
  : m_dimensions(expr.dimensions()) {}
 };
 /// specialisation of the \ref FunctorExtractor struct when the node type does not require anything
@ -143,19 +142,23 @@ SYCLEXTRFUNCASSIGNOP(const)
 SYCLEXTRFUNCASSIGNOP()
 #undef SYCLEXTRFUNCASSIGNOP
-/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// specialisation of the \ref FunctorExtractor struct when the node types are
-/// TensorEvalToOp, This is an specialisation without OP so it has to be separated.
+/// TensorEvalToOp, TensorLayoutSwapOp. This is an specialisation without OP so it has to be separated.
-#define SYCLEXTRFUNCEVALTOOP(CVQual)\
+#define SYCLEXTRFUNCEVALTOOPSWAPLAYOUT(CVQual, ExprNode)\
-template <typename RHSExpr, typename Dev>\
+template <typename Expr, typename Dev>\
-struct FunctorExtractor<TensorEvaluator<CVQual TensorEvalToOp<RHSExpr>, Dev> > {\
+struct FunctorExtractor<TensorEvaluator<CVQual ExprNode<Expr>, Dev> > {\
-  FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\
+  FunctorExtractor<TensorEvaluator<Expr, Dev> > xprExpr;\
-  FunctorExtractor(const TensorEvaluator<CVQual TensorEvalToOp<RHSExpr>, Dev>& expr)\
+  FunctorExtractor(const TensorEvaluator<CVQual ExprNode<Expr>, Dev>& expr)\
-  : rhsExpr(expr.impl()) {}\
+  : xprExpr(expr.impl()) {}\
 };
 //TensorEvalToOp
 SYCLEXTRFUNCEVALTOOPSWAPLAYOUT(const, TensorEvalToOp)
 SYCLEXTRFUNCEVALTOOPSWAPLAYOUT(, TensorEvalToOp)
 // TensorLayoutSwapOp
 SYCLEXTRFUNCEVALTOOPSWAPLAYOUT(const, TensorLayoutSwapOp)
 SYCLEXTRFUNCEVALTOOPSWAPLAYOUT(, TensorLayoutSwapOp)
-SYCLEXTRFUNCEVALTOOP(const)
+#undef SYCLEXTRFUNCEVALTOOPSWAPLAYOUT
 SYCLEXTRFUNCEVALTOOP()
 #undef SYCLEXTRFUNCEVALTOOP
 template<typename Dim, size_t NumOutputDim> struct DimConstr {
 template<typename InDim>
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
@ -94,15 +94,17 @@ SYCLFORCEDEVALLEAFCOUNT()
 #undef SYCLFORCEDEVALLEAFCOUNT
 /// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp
-#define EVALTOLEAFCOUNT(CVQual)\
+#define EVALTOLAYOUTSWAPLEAFCOUNT(CVQual , ExprNode, Num)\
 template <typename Expr>\
-struct LeafCount<CVQual TensorEvalToOp<Expr> > {\
+struct LeafCount<CVQual ExprNode<Expr> > {\
-  static const size_t Count = 1 + CategoryCount<Expr>::Count;\
+  static const size_t Count = Num + CategoryCount<Expr>::Count;\
 };
-EVALTOLEAFCOUNT(const)
+EVALTOLAYOUTSWAPLEAFCOUNT(const, TensorEvalToOp, 1)
-EVALTOLEAFCOUNT()
+EVALTOLAYOUTSWAPLEAFCOUNT(, TensorEvalToOp, 1)
-#undef EVALTOLEAFCOUNT
+EVALTOLAYOUTSWAPLEAFCOUNT(const, TensorLayoutSwapOp, 0)
 EVALTOLAYOUTSWAPLEAFCOUNT(, TensorLayoutSwapOp, 0)
 #undef EVALTOLAYOUTSWAPLEAFCOUNT
 /// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp
 #define REDUCTIONLEAFCOUNT(CVQual)\
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
@ -144,16 +144,19 @@ FORCEDEVAL()
 #undef FORCEDEVAL
 /// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorEvalToOp
+/// TensorEvalToOp, TensorLayoutSwapOp
-#define EVALTO(CVQual)\
+#define EVALTOLAYOUTSWAP(CVQual, ExprNode)\
 template <typename Expr, size_t N>\
-struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\
+struct PlaceHolderExpression<CVQual ExprNode<Expr>, N> {\
-  typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\
+  typedef CVQual ExprNode<typename CalculateIndex <N, Expr>::ArgType> Type;\
 };
-EVALTO(const)
+EVALTOLAYOUTSWAP(const, TensorEvalToOp)
-EVALTO()
+EVALTOLAYOUTSWAP(, TensorEvalToOp)
-#undef EVALTO
+EVALTOLAYOUTSWAP(const, TensorLayoutSwapOp)
 EVALTOLAYOUTSWAP(, TensorLayoutSwapOp)
 #undef EVALTOLAYOUTSWAP
 /// specialisation of the \ref PlaceHolderExpression when the node is
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -168,6 +168,7 @@ if(EIGEN_TEST_CXX11)
    ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_layout_swap_sycl "-std=c++11")
  endif(EIGEN_TEST_SYCL)
  # It should be safe to always run these tests as there is some fallback code for
  # older compiler that don't support cxx11.
--- a/unsupported/test/cxx11_tensor_layout_swap_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_layout_swap_sycl.cpp
@ -0,0 +1,126 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2016
 // Mehdi Goli    Codeplay Software Ltd.
 // Ralph Potter  Codeplay Software Ltd.
 // Luke Iwanski  Codeplay Software Ltd.
 // Contact: <eigen@codeplay.com>
 // Benoit Steiner <benoit.steiner.goog@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_layout_swap_sycl
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
 #define EIGEN_USE_SYCL
 #include "main.h"
 #include <Eigen/CXX11/Tensor>
 using Eigen::Tensor;
 template <typename DataType, typename IndexType>
 static void test_simple_swap_sycl(const Eigen::SyclDevice& sycl_device)
 {
  IndexType sizeDim1 = 2;
  IndexType sizeDim2 = 3;
  IndexType sizeDim3 = 7;
  array<IndexType, 3> tensorColRange = {{sizeDim1, sizeDim2, sizeDim3}};
  array<IndexType, 3> tensorRowRange = {{sizeDim3, sizeDim2, sizeDim1}};
  Tensor<DataType, 3, ColMajor, IndexType> tensor1(tensorColRange);
  Tensor<DataType, 3, RowMajor, IndexType> tensor2(tensorRowRange);
  tensor1.setRandom();
  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
  TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu1(gpu_data1, tensorColRange);
  TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu2(gpu_data2, tensorRowRange);
  sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
  gpu2.device(sycl_device)=gpu1.swap_layout();
  sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
 //  Tensor<float, 3, ColMajor> tensor(2,3,7);
  //tensor.setRandom();
 //  Tensor<float, 3, RowMajor> tensor2 = tensor.swap_layout();
  VERIFY_IS_EQUAL(tensor1.dimension(0), tensor2.dimension(2));
  VERIFY_IS_EQUAL(tensor1.dimension(1), tensor2.dimension(1));
  VERIFY_IS_EQUAL(tensor1.dimension(2), tensor2.dimension(0));
  for (IndexType i = 0; i < 2; ++i) {
    for (IndexType j = 0; j < 3; ++j) {
      for (IndexType k = 0; k < 7; ++k) {
        VERIFY_IS_EQUAL(tensor1(i,j,k), tensor2(k,j,i));
      }
    }
  }
  sycl_device.deallocate(gpu_data1);
  sycl_device.deallocate(gpu_data2);
 }
 template <typename DataType, typename IndexType>
 static void test_swap_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
 {
  IndexType sizeDim1 = 2;
  IndexType sizeDim2 = 3;
  IndexType sizeDim3 = 7;
  array<IndexType, 3> tensorColRange = {{sizeDim1, sizeDim2, sizeDim3}};
  array<IndexType, 3> tensorRowRange = {{sizeDim3, sizeDim2, sizeDim1}};
  Tensor<DataType, 3, ColMajor, IndexType> tensor1(tensorColRange);
  Tensor<DataType, 3, RowMajor, IndexType> tensor2(tensorRowRange);
  tensor1.setRandom();
  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
  TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu1(gpu_data1, tensorColRange);
  TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu2(gpu_data2, tensorRowRange);
  sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
  gpu2.swap_layout().device(sycl_device)=gpu1;
  sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
 //  Tensor<float, 3, ColMajor> tensor(2,3,7);
 //  tensor.setRandom();
  //Tensor<float, 3, RowMajor> tensor2(7,3,2);
 //  tensor2.swap_layout() = tensor;
  VERIFY_IS_EQUAL(tensor1.dimension(0), tensor2.dimension(2));
  VERIFY_IS_EQUAL(tensor1.dimension(1), tensor2.dimension(1));
  VERIFY_IS_EQUAL(tensor1.dimension(2), tensor2.dimension(0));
  for (IndexType i = 0; i < 2; ++i) {
    for (IndexType j = 0; j < 3; ++j) {
      for (IndexType k = 0; k < 7; ++k) {
        VERIFY_IS_EQUAL(tensor1(i,j,k), tensor2(k,j,i));
      }
    }
  }
  sycl_device.deallocate(gpu_data1);
  sycl_device.deallocate(gpu_data2);
 }
 template<typename DataType, typename dev_Selector> void sycl_tensor_layout_swap_test_per_device(dev_Selector s){
  QueueInterface queueInterface(s);
  auto sycl_device = Eigen::SyclDevice(&queueInterface);
  test_simple_swap_sycl<DataType, int64_t>(sycl_device);
  test_swap_as_lvalue_sycl<DataType, int64_t>(sycl_device);
 }
 void test_cxx11_tensor_layout_swap_sycl()
 {
  for (const auto& device :Eigen::get_sycl_supported_devices()) {
    CALL_SUBTEST(sycl_tensor_layout_swap_test_per_device<float>(device));
  }
 }
--- a/unsupported/test/cxx11_tensor_patch_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_patch_sycl.cpp
@ -12,7 +12,6 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_patch_sycl
@ -80,10 +79,12 @@ static void test_simple_patch_sycl(const Eigen::SyclDevice& sycl_device){
  for (int i = 0; i < tensor.size(); ++i) {
    VERIFY_IS_EQUAL(tensor.data()[i], no_patch.data()[i]);
  }
  patch_dims[0] = 2;
  patch_dims[1] = 3;
  patch_dims[2] = 5;
  patch_dims[3] = 7;
  if (DataLayout == ColMajor) {
   patchTensorRange = {{sizeDim1,sizeDim2,sizeDim3,sizeDim4,1}};
  }else{
@ -114,15 +115,11 @@ static void test_simple_patch_sycl(const Eigen::SyclDevice& sycl_device){
  for (int i = 0; i < tensor.size(); ++i) {
    VERIFY_IS_EQUAL(tensor.data()[i], single_patch.data()[i]);
  }
  patch_dims[0] = 1;
  patch_dims[1] = 2;
  patch_dims[2] = 2;
  patch_dims[3] = 1;
  if (DataLayout == ColMajor) {
   patchTensorRange = {{1,2,2,1,2*2*4*7}};
  }else{