Adding Sycl backend for TensorImagePatchOP.h; adding Sycl backend for TensorInflation.h.

This commit is contained in:
Mehdi Goli 2017-02-20 12:11:05 +00:00
parent 91982b91c0
commit 79ebc8f761
11 changed files with 1356 additions and 12 deletions

View File

@ -70,12 +70,8 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
DenseIndex in_row_strides, DenseIndex in_col_strides,
DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
PaddingType padding_type, Scalar padding_value)
: m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
m_row_strides(row_strides), m_col_strides(col_strides),
m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
m_padding_type(padding_type), m_padding_value(padding_value) {}
: TensorImagePatchOp(expr, patch_rows, patch_cols, row_strides,col_strides, in_row_strides, in_col_strides, row_inflate_strides,
col_inflate_strides, 0,0,0,0,padding_value, padding_type, false ){}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
DenseIndex row_strides, DenseIndex col_strides,
@ -83,14 +79,15 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
DenseIndex padding_top, DenseIndex padding_bottom,
DenseIndex padding_left, DenseIndex padding_right,
Scalar padding_value)
Scalar padding_value, PaddingType padding_type=PADDING_VALID,
bool padding_explicit=true)
: m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
m_row_strides(row_strides), m_col_strides(col_strides),
m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
m_padding_explicit(padding_explicit), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
m_padding_left(padding_left), m_padding_right(padding_right),
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
m_padding_type(padding_type), m_padding_value(padding_value) {}
EIGEN_DEVICE_FUNC
DenseIndex patch_rows() const { return m_patch_rows; }
@ -172,7 +169,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device)
: m_impl(op.expression(), device), m_op(op)
{
EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
@ -241,6 +238,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
break;
default:
eigen_assert(false && "unexpected padding");
m_outputCols=0; // silence the uninitialised warnig;
m_outputRows=0; //// silence the uninitialised warnig;
}
}
eigen_assert(m_outputRows > 0);
@ -420,7 +419,10 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
// required by sycl
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; }
Index rowPaddingTop() const { return m_rowPaddingTop; }
Index colPaddingLeft() const { return m_colPaddingLeft; }
@ -501,6 +503,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
Scalar m_paddingValue;
TensorEvaluator<ArgType, Device> m_impl;
// required for sycl
const XprType& m_op;
};

View File

@ -215,6 +215,12 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
/// required by sycl in order to extract the accessor
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
/// required by sycl in order to extract the accessor
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Strides& functor() const { return m_strides; }
protected:
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;

View File

@ -165,6 +165,20 @@ KERNELBROKERCONVERTCHIPPINGOP()
/// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorImagePatchOp
#define KERNELBROKERCONVERTIMAGEPATCHOP(CVQual)\
template<DenseIndex Rows, DenseIndex Cols, typename XprType>\
struct ConvertToDeviceExpression<CVQual TensorImagePatchOp<Rows, Cols, XprType> >{\
typedef CVQual TensorImagePatchOp<Rows, Cols, typename ConvertToDeviceExpression<XprType>::Type> Type;\
};
KERNELBROKERCONVERTIMAGEPATCHOP(const)
KERNELBROKERCONVERTIMAGEPATCHOP()
#undef KERNELBROKERCONVERTIMAGEPATCHOP
} // namespace internal
} // namespace TensorSycl
} // namespace Eigen

View File

@ -385,6 +385,24 @@ SYCLTENSORCHIPPINGOPEXPR(const)
SYCLTENSORCHIPPINGOPEXPR()
#undef SYCLTENSORCHIPPINGOPEXPR
// TensorImagePatchOp
#define SYCLTENSORIMAGEPATCHOPEXPR(CVQual)\
template<DenseIndex Rows, DenseIndex Cols, typename OrigXprType, typename XprType, typename... Params>\
struct ExprConstructor<CVQual TensorImagePatchOp<Rows, Cols, OrigXprType>, CVQual TensorImagePatchOp<Rows, Cols, XprType>, Params... > {\
typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
typedef CVQual TensorImagePatchOp<Rows, Cols, typename my_xpr_type::Type> Type;\
my_xpr_type xprExpr;\
Type expr;\
template <typename FuncDetector>\
ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
: xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.m_patch_rows, funcD.m_patch_cols, funcD.m_row_strides, funcD.m_col_strides,\
funcD.m_in_row_strides, funcD.m_in_col_strides, funcD.m_row_inflate_strides, funcD.m_col_inflate_strides, \
funcD.m_padding_top, funcD.m_padding_bottom, funcD.m_padding_left, funcD.m_padding_right, funcD.m_padding_value, funcD.m_padding_type, funcD.m_padding_explicit){}\
};
SYCLTENSORIMAGEPATCHOPEXPR(const)
SYCLTENSORIMAGEPATCHOPEXPR()
#undef SYCLTENSORIMAGEPATCHOPEXPR
// TensorLayoutSwapOp

View File

@ -226,6 +226,20 @@ SYCLTENSORCHIPPINGOPEXTACC()
#undef SYCLTENSORCHIPPINGOPEXTACC
// specialisation of the \ref ExtractAccessor struct when the node type is
/// TensorImagePatchOp.
#define SYCLTENSORIMAGEPATCHOPEXTACC(CVQual)\
template<DenseIndex Rows, DenseIndex Cols, typename XprType, typename Dev>\
struct ExtractAccessor<TensorEvaluator<CVQual TensorImagePatchOp<Rows, Cols, XprType>, Dev> >{\
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorImagePatchOp<Rows, Cols, XprType>, Dev>& eval)\
RETURN_CPP11(AccessorConstructor::getTuple(cgh, eval.impl()))\
};
SYCLTENSORIMAGEPATCHOPEXTACC(const)
SYCLTENSORIMAGEPATCHOPEXTACC()
#undef SYCLTENSORIMAGEPATCHOPEXTACC
// specialisation of the \ref ExtractAccessor struct when the node type is
/// TensorLayoutSwapOp.
#define SYCLTENSORLAYOUTSWAPOPEXTACC(CVQual)\

View File

@ -296,7 +296,7 @@ SYCLEXTRFUNCCONTRACTCONCAT(TensorConcatenationOp, axis(),)
//TensorChippingOp
#define SYCLEXTRFUNCCHIPPINGOP(CVQual)\
template<DenseIndex DimId, typename XprType, typename Device>\
struct FunctorExtractor<TensorEvaluator<CVQual TensorChippingOp<DimId, XprType>, Device>>{\
struct FunctorExtractor<TensorEvaluator<CVQual TensorChippingOp<DimId, XprType>, Device> >{\
FunctorExtractor<Eigen::TensorEvaluator<XprType, Device> > xprExpr;\
const DenseIndex m_dim;\
const DenseIndex m_offset;\
@ -310,6 +310,40 @@ SYCLEXTRFUNCCHIPPINGOP(const)
SYCLEXTRFUNCCHIPPINGOP()
#undef SYCLEXTRFUNCCHIPPINGOP
#define SYCLEXTRFUNCIMAGEPATCHOP(CVQual)\
template<DenseIndex Rows, DenseIndex Cols, typename XprType, typename Device>\
struct FunctorExtractor<TensorEvaluator<CVQual TensorImagePatchOp<Rows, Cols, XprType>, Device> >{\
typedef CVQual TensorImagePatchOp<Rows, Cols, XprType> Self;\
FunctorExtractor<Eigen::TensorEvaluator<XprType, Device> > xprExpr;\
const DenseIndex m_patch_rows;\
const DenseIndex m_patch_cols;\
const DenseIndex m_row_strides;\
const DenseIndex m_col_strides;\
const DenseIndex m_in_row_strides;\
const DenseIndex m_in_col_strides;\
const DenseIndex m_row_inflate_strides;\
const DenseIndex m_col_inflate_strides;\
const bool m_padding_explicit;\
const DenseIndex m_padding_top;\
const DenseIndex m_padding_bottom;\
const DenseIndex m_padding_left;\
const DenseIndex m_padding_right;\
const PaddingType m_padding_type;\
const typename Self::Scalar m_padding_value;\
FunctorExtractor(const TensorEvaluator<Self, Device>& expr)\
: xprExpr(expr.impl()), m_patch_rows(expr.xpr().patch_rows()), m_patch_cols(expr.xpr().patch_cols()),\
m_row_strides(expr.xpr().row_strides()), m_col_strides(expr.xpr().col_strides()),\
m_in_row_strides(expr.xpr().in_row_strides()), m_in_col_strides(expr.xpr().in_col_strides()),\
m_row_inflate_strides(expr.xpr().row_inflate_strides()), m_col_inflate_strides(expr.xpr().col_inflate_strides()),\
m_padding_explicit(expr.xpr().padding_explicit()),m_padding_top(expr.xpr().padding_top()),\
m_padding_bottom(expr.xpr().padding_bottom()), m_padding_left(expr.xpr().padding_left()),\
m_padding_right(expr.xpr().padding_right()), m_padding_type(expr.xpr().padding_type()),\
m_padding_value(expr.xpr().padding_value()){}\
};
SYCLEXTRFUNCIMAGEPATCHOP(const)
SYCLEXTRFUNCIMAGEPATCHOP()
#undef SYCLEXTRFUNCIMAGEPATCHOP
/// template deduction function for FunctorExtractor
template <typename Evaluator>
auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {

View File

@ -161,6 +161,16 @@ SLICESTRIDEOPLEAFCOUNT()
#undef SLICESTRIDEOPLEAFCOUNT
#define TENSORIMAGEPATCHOPLEAFCOUNT(CVQual)\
template<DenseIndex Rows, DenseIndex Cols, typename XprType>\
struct LeafCount<CVQual TensorImagePatchOp<Rows, Cols, XprType> >:CategoryCount<XprType>{};
TENSORIMAGEPATCHOPLEAFCOUNT(const)
TENSORIMAGEPATCHOPLEAFCOUNT()
#undef TENSORIMAGEPATCHOPLEAFCOUNT
} /// namespace TensorSycl
} /// namespace internal
} /// namespace Eigen

View File

@ -221,6 +221,20 @@ SYCLSLICESTRIDEOPPLH()
#undef SYCLSLICESTRIDEOPPLH
/// specialisation of the \ref PlaceHolderExpression when the node is
/// TensorImagePatchOp
#define SYCLTENSORIMAGEPATCHOP(CVQual)\
template<DenseIndex Rows, DenseIndex Cols, typename XprType, size_t N>\
struct PlaceHolderExpression<CVQual TensorImagePatchOp<Rows, Cols, XprType>, N> {\
typedef CVQual TensorImagePatchOp<Rows, Cols, typename CalculateIndex <N, XprType>::ArgType> Type;\
};
SYCLTENSORIMAGEPATCHOP(const)
SYCLTENSORIMAGEPATCHOP()
#undef SYCLTENSORIMAGEPATCHOP
/// template deduction for \ref PlaceHolderExpression struct
template <typename Expr>
struct createPlaceHolderExpression {

View File

@ -169,6 +169,8 @@ if(EIGEN_TEST_CXX11)
ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11")
ei_add_test_sycl(cxx11_tensor_layout_swap_sycl "-std=c++11")
ei_add_test_sycl(cxx11_tensor_image_patchOP_sycl "-std=c++11")
ei_add_test_sycl(cxx11_tensor_inflation_sycl "-std=c++11")
endif(EIGEN_TEST_SYCL)
# It should be safe to always run these tests as there is some fallback code for
# older compiler that don't support cxx11.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_inflation_sycl
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
// Inflation Defenition for each dimention the inflated val would be
//((dim-1)*strid[dim] +1)
// for 1 dimnention vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to
// tensor of size (2*3) +1 = 7 with the value of
// (4, 0, 0, 4, 0, 0, 4).
template <typename DataType, int DataLayout, typename IndexType>
void test_simple_inflation_sycl(const Eigen::SyclDevice &sycl_device) {
IndexType sizeDim1 = 2;
IndexType sizeDim2 = 3;
IndexType sizeDim3 = 5;
IndexType sizeDim4 = 7;
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange);
Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensorRange);
tensor.setRandom();
array<IndexType, 4> strides;
strides[0] = 1;
strides[1] = 1;
strides[2] = 1;
strides[3] = 1;
const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
DataType* gpu_data_no_stride = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_no_stride(gpu_data_no_stride, tensorRange);
sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
gpu_no_stride.device(sycl_device)=gpu_tensor.inflate(strides);
sycl_device.memcpyDeviceToHost(no_stride.data(), gpu_data_no_stride, tensorBuffSize);
VERIFY_IS_EQUAL(no_stride.dimension(0), sizeDim1);
VERIFY_IS_EQUAL(no_stride.dimension(1), sizeDim2);
VERIFY_IS_EQUAL(no_stride.dimension(2), sizeDim3);
VERIFY_IS_EQUAL(no_stride.dimension(3), sizeDim4);
for (IndexType i = 0; i < 2; ++i) {
for (IndexType j = 0; j < 3; ++j) {
for (IndexType k = 0; k < 5; ++k) {
for (IndexType l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
}
}
}
}
strides[0] = 2;
strides[1] = 4;
strides[2] = 2;
strides[3] = 3;
IndexType inflatedSizeDim1 = 3;
IndexType inflatedSizeDim2 = 9;
IndexType inflatedSizeDim3 = 9;
IndexType inflatedSizeDim4 = 19;
array<IndexType, 4> inflatedTensorRange = {{inflatedSizeDim1, inflatedSizeDim2, inflatedSizeDim3, inflatedSizeDim4}};
Tensor<DataType, 4, DataLayout, IndexType> inflated(inflatedTensorRange);
const size_t inflatedTensorBuffSize =inflated.size()*sizeof(DataType);
DataType* gpu_data_inflated = static_cast<DataType*>(sycl_device.allocate(inflatedTensorBuffSize));
TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_inflated(gpu_data_inflated, inflatedTensorRange);
gpu_inflated.device(sycl_device)=gpu_tensor.inflate(strides);
sycl_device.memcpyDeviceToHost(inflated.data(), gpu_data_inflated, inflatedTensorBuffSize);
VERIFY_IS_EQUAL(inflated.dimension(0), inflatedSizeDim1);
VERIFY_IS_EQUAL(inflated.dimension(1), inflatedSizeDim2);
VERIFY_IS_EQUAL(inflated.dimension(2), inflatedSizeDim3);
VERIFY_IS_EQUAL(inflated.dimension(3), inflatedSizeDim4);
for (IndexType i = 0; i < inflatedSizeDim1; ++i) {
for (IndexType j = 0; j < inflatedSizeDim2; ++j) {
for (IndexType k = 0; k < inflatedSizeDim3; ++k) {
for (IndexType l = 0; l < inflatedSizeDim4; ++l) {
if (i % strides[0] == 0 &&
j % strides[1] == 0 &&
k % strides[2] == 0 &&
l % strides[3] == 0) {
VERIFY_IS_EQUAL(inflated(i,j,k,l),
tensor(i/strides[0], j/strides[1], k/strides[2], l/strides[3]));
} else {
VERIFY_IS_EQUAL(0, inflated(i,j,k,l));
}
}
}
}
}
sycl_device.deallocate(gpu_data_tensor);
sycl_device.deallocate(gpu_data_no_stride);
sycl_device.deallocate(gpu_data_inflated);
}
template<typename DataType, typename dev_Selector> void sycl_inflation_test_per_device(dev_Selector s){
QueueInterface queueInterface(s);
auto sycl_device = Eigen::SyclDevice(&queueInterface);
test_simple_inflation_sycl<DataType, RowMajor, int64_t>(sycl_device);
test_simple_inflation_sycl<DataType, ColMajor, int64_t>(sycl_device);
}
void test_cxx11_tensor_inflation_sycl()
{
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_inflation_test_per_device<float>(device));
}
}