2017-01-19 19:30:59 +08:00
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
# define EIGEN_TEST_NO_LONGDOUBLE
# define EIGEN_TEST_NO_COMPLEX
# define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl
2017-02-01 23:29:53 +08:00
# define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
2017-01-19 19:30:59 +08:00
# define EIGEN_USE_SYCL
# include <iostream>
# include <chrono>
# include <ctime>
# include "main.h"
# include <unsupported/Eigen/CXX11/Tensor>
# include <iomanip>
using Eigen : : array ;
using Eigen : : SyclDevice ;
using Eigen : : Tensor ;
using Eigen : : TensorMap ;
static const float error_threshold = 1e-4 f ;
template < typename DataType , int DataLayout , typename IndexType >
static void test_larg_expr1D ( const Eigen : : SyclDevice & sycl_device )
{
2017-02-01 23:29:53 +08:00
IndexType indim0 = 53 ;
IndexType indim1 = 55 ;
IndexType indim2 = 51 ;
IndexType outdim0 = 50 ;
IndexType outdim1 = 55 ;
IndexType outdim2 = 51 ;
2017-01-19 19:30:59 +08:00
Eigen : : array < IndexType , 3 > input_dims = { { indim0 , indim1 , indim2 } } ;
Eigen : : array < IndexType , 1 > kernel_dims = { { 4 } } ;
Eigen : : array < IndexType , 3 > result_dims = { { outdim0 , outdim1 , outdim2 } } ;
Tensor < DataType , 3 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 1 , DataLayout , IndexType > kernel ( kernel_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > result ( result_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > result_host ( result_dims ) ;
Eigen : : array < IndexType , 1 > dims3 { { 0 } } ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
result . setZero ( ) ;
result_host . setZero ( ) ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t result_bytes = result . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_result = static_cast < DataType * > ( sycl_device . allocate ( result_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_result ( d_result , result_dims ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_result . device ( sycl_device ) = gpu_input . convolve ( gpu_kernel , dims3 ) ;
sycl_device . memcpyDeviceToHost ( result . data ( ) , d_result , result_bytes ) ;
result_host = input . convolve ( kernel , dims3 ) ;
2017-02-01 23:29:53 +08:00
for ( IndexType i = 0 ; i < outdim0 ; i + + ) {
for ( IndexType j = 0 ; j < outdim1 ; j + + ) {
for ( IndexType k = 0 ; k < outdim2 ; k + + ) {
2017-01-19 19:30:59 +08:00
if ( ! ( Eigen : : internal : : isApprox ( result ( i , j , k ) , result_host ( i , j , k ) , error_threshold ) ) ) {
std : : cout < < std : : setprecision ( 16 ) < < " mismatch detected at index ( " < < i < < " , " < < j < < " , " < < k < < " ) " < < " \t " < < result ( i , j , k ) < < " vs " < < result_host ( i , j , k ) < < std : : endl ;
assert ( false ) ;
}
}
}
}
sycl_device . deallocate ( d_input ) ;
sycl_device . deallocate ( d_kernel ) ;
sycl_device . deallocate ( d_result ) ;
}
template < typename DataType , int DataLayout , typename IndexType >
static void test_larg_expr2D ( const Eigen : : SyclDevice & sycl_device )
{
2017-02-01 23:29:53 +08:00
IndexType indim0 = 53 ;
IndexType indim1 = 55 ;
IndexType indim2 = 51 ;
IndexType outdim0 = 50 ;
IndexType outdim1 = 51 ;
IndexType outdim2 = 51 ;
2017-01-19 19:30:59 +08:00
Eigen : : array < IndexType , 3 > input_dims = { { indim0 , indim1 , indim2 } } ;
Eigen : : array < IndexType , 2 > kernel_dims = { { 4 , 5 } } ;
Eigen : : array < IndexType , 3 > result_dims = { { outdim0 , outdim1 , outdim2 } } ;
Tensor < DataType , 3 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 2 , DataLayout , IndexType > kernel ( kernel_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > result ( result_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > result_host ( result_dims ) ;
Eigen : : array < IndexType , 2 > dims3 { { 0 , 1 } } ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
result . setZero ( ) ;
result_host . setZero ( ) ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t result_bytes = result . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_result = static_cast < DataType * > ( sycl_device . allocate ( result_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 2 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_result ( d_result , result_dims ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_result . device ( sycl_device ) = gpu_input . convolve ( gpu_kernel , dims3 ) ;
sycl_device . memcpyDeviceToHost ( result . data ( ) , d_result , result_bytes ) ;
result_host = input . convolve ( kernel , dims3 ) ;
2017-02-01 23:29:53 +08:00
for ( IndexType i = 0 ; i < outdim0 ; i + + ) {
for ( IndexType j = 0 ; j < outdim1 ; j + + ) {
for ( IndexType k = 0 ; k < outdim2 ; k + + ) {
2017-01-19 19:30:59 +08:00
if ( ! ( Eigen : : internal : : isApprox ( result ( i , j , k ) , result_host ( i , j , k ) , error_threshold ) ) ) {
std : : cout < < std : : setprecision ( 16 ) < < " mismatch detected at index ( " < < i < < " , " < < j < < " , " < < k < < " ) " < < " \t " < < result ( i , j , k ) < < " vs " < < result_host ( i , j , k ) < < std : : endl ;
assert ( false ) ;
}
}
}
}
sycl_device . deallocate ( d_input ) ;
sycl_device . deallocate ( d_kernel ) ;
sycl_device . deallocate ( d_result ) ;
}
template < typename DataType , int DataLayout , typename IndexType >
static void test_larg_expr3D ( const Eigen : : SyclDevice & sycl_device )
{
2017-02-01 23:29:53 +08:00
IndexType indim0 = 53 ;
IndexType indim1 = 55 ;
IndexType indim2 = 51 ;
IndexType outdim0 = 50 ;
IndexType outdim1 = 51 ;
IndexType outdim2 = 49 ;
2017-01-19 19:30:59 +08:00
Eigen : : array < IndexType , 3 > input_dims = { { indim0 , indim1 , indim2 } } ;
Eigen : : array < IndexType , 3 > kernel_dims = { { 4 , 5 , 3 } } ;
Eigen : : array < IndexType , 3 > result_dims = { { outdim0 , outdim1 , outdim2 } } ;
Tensor < DataType , 3 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > kernel ( kernel_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > result ( result_dims ) ;
Tensor < DataType , 3 , DataLayout , IndexType > result_host ( result_dims ) ;
Eigen : : array < IndexType , 3 > dims3 { { 0 , 1 , 2 } } ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
result . setZero ( ) ;
result_host . setZero ( ) ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t result_bytes = result . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_result = static_cast < DataType * > ( sycl_device . allocate ( result_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 3 , DataLayout , IndexType > > gpu_result ( d_result , result_dims ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_result . device ( sycl_device ) = gpu_input . convolve ( gpu_kernel , dims3 ) ;
sycl_device . memcpyDeviceToHost ( result . data ( ) , d_result , result_bytes ) ;
result_host = input . convolve ( kernel , dims3 ) ;
2017-02-01 23:29:53 +08:00
for ( IndexType i = 0 ; i < outdim0 ; i + + ) {
for ( IndexType j = 0 ; j < outdim1 ; j + + ) {
for ( IndexType k = 0 ; k < outdim2 ; k + + ) {
2017-01-19 19:30:59 +08:00
if ( ! ( Eigen : : internal : : isApprox ( result ( i , j , k ) , result_host ( i , j , k ) , error_threshold ) ) ) {
std : : cout < < std : : setprecision ( 16 ) < < " mismatch detected at index ( " < < i < < " , " < < j < < " , " < < k < < " ) " < < " \t " < < result ( i , j , k ) < < " vs " < < result_host ( i , j , k ) < < std : : endl ;
assert ( false ) ;
}
}
}
}
sycl_device . deallocate ( d_input ) ;
sycl_device . deallocate ( d_kernel ) ;
sycl_device . deallocate ( d_result ) ;
}
template < typename DataType , int DataLayout , typename IndexType >
static void test_evals ( const Eigen : : SyclDevice & sycl_device )
{
Eigen : : array < IndexType , 2 > input_dims = { { 3 , 3 } } ;
Eigen : : array < IndexType , 1 > kernel_dims = { { 2 } } ;
Eigen : : array < IndexType , 2 > result_dims = { { 2 , 3 } } ;
Tensor < DataType , 2 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 1 , DataLayout , IndexType > kernel ( kernel_dims ) ;
Tensor < DataType , 2 , DataLayout , IndexType > result ( result_dims ) ;
Eigen : : array < IndexType , 1 > dims3 { { 0 } } ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
result . setZero ( ) ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t result_bytes = result . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_result = static_cast < DataType * > ( sycl_device . allocate ( result_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 2 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 2 , DataLayout , IndexType > > gpu_result ( d_result , result_dims ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_result . device ( sycl_device ) = gpu_input . convolve ( gpu_kernel , dims3 ) ;
sycl_device . memcpyDeviceToHost ( result . data ( ) , d_result , result_bytes ) ;
VERIFY_IS_APPROX ( result ( 0 , 0 ) , input ( 0 , 0 ) * kernel ( 0 ) + input ( 1 , 0 ) * kernel ( 1 ) ) ; // index 0
VERIFY_IS_APPROX ( result ( 0 , 1 ) , input ( 0 , 1 ) * kernel ( 0 ) + input ( 1 , 1 ) * kernel ( 1 ) ) ; // index 2
VERIFY_IS_APPROX ( result ( 0 , 2 ) , input ( 0 , 2 ) * kernel ( 0 ) + input ( 1 , 2 ) * kernel ( 1 ) ) ; // index 4
VERIFY_IS_APPROX ( result ( 1 , 0 ) , input ( 1 , 0 ) * kernel ( 0 ) + input ( 2 , 0 ) * kernel ( 1 ) ) ; // index 1
VERIFY_IS_APPROX ( result ( 1 , 1 ) , input ( 1 , 1 ) * kernel ( 0 ) + input ( 2 , 1 ) * kernel ( 1 ) ) ; // index 3
VERIFY_IS_APPROX ( result ( 1 , 2 ) , input ( 1 , 2 ) * kernel ( 0 ) + input ( 2 , 2 ) * kernel ( 1 ) ) ; // index 5
sycl_device . deallocate ( d_input ) ;
sycl_device . deallocate ( d_kernel ) ;
sycl_device . deallocate ( d_result ) ;
}
template < typename DataType , int DataLayout , typename IndexType >
static void test_expr ( const Eigen : : SyclDevice & sycl_device )
{
Eigen : : array < IndexType , 2 > input_dims = { { 3 , 3 } } ;
Eigen : : array < IndexType , 2 > kernel_dims = { { 2 , 2 } } ;
Eigen : : array < IndexType , 2 > result_dims = { { 2 , 2 } } ;
Tensor < DataType , 2 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 2 , DataLayout , IndexType > kernel ( kernel_dims ) ;
Tensor < DataType , 2 , DataLayout , IndexType > result ( result_dims ) ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
Eigen : : array < IndexType , 2 > dims ;
dims [ 0 ] = 0 ;
dims [ 1 ] = 1 ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t result_bytes = result . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_result = static_cast < DataType * > ( sycl_device . allocate ( result_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 2 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 2 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 2 , DataLayout , IndexType > > gpu_result ( d_result , result_dims ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_result . device ( sycl_device ) = gpu_input . convolve ( gpu_kernel , dims ) ;
sycl_device . memcpyDeviceToHost ( result . data ( ) , d_result , result_bytes ) ;
VERIFY_IS_APPROX ( result ( 0 , 0 ) , input ( 0 , 0 ) * kernel ( 0 , 0 ) + input ( 0 , 1 ) * kernel ( 0 , 1 ) +
input ( 1 , 0 ) * kernel ( 1 , 0 ) + input ( 1 , 1 ) * kernel ( 1 , 1 ) ) ;
VERIFY_IS_APPROX ( result ( 0 , 1 ) , input ( 0 , 1 ) * kernel ( 0 , 0 ) + input ( 0 , 2 ) * kernel ( 0 , 1 ) +
input ( 1 , 1 ) * kernel ( 1 , 0 ) + input ( 1 , 2 ) * kernel ( 1 , 1 ) ) ;
VERIFY_IS_APPROX ( result ( 1 , 0 ) , input ( 1 , 0 ) * kernel ( 0 , 0 ) + input ( 1 , 1 ) * kernel ( 0 , 1 ) +
input ( 2 , 0 ) * kernel ( 1 , 0 ) + input ( 2 , 1 ) * kernel ( 1 , 1 ) ) ;
VERIFY_IS_APPROX ( result ( 1 , 1 ) , input ( 1 , 1 ) * kernel ( 0 , 0 ) + input ( 1 , 2 ) * kernel ( 0 , 1 ) +
input ( 2 , 1 ) * kernel ( 1 , 0 ) + input ( 2 , 2 ) * kernel ( 1 , 1 ) ) ;
sycl_device . deallocate ( d_input ) ;
sycl_device . deallocate ( d_kernel ) ;
sycl_device . deallocate ( d_result ) ;
}
template < typename DataType , int DataLayout , typename IndexType >
static void test_modes ( const Eigen : : SyclDevice & sycl_device ) {
Eigen : : array < IndexType , 1 > input_dims = { { 3 } } ;
Eigen : : array < IndexType , 1 > kernel_dims = { { 3 } } ;
Tensor < DataType , 1 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 1 , DataLayout , IndexType > kernel ( kernel_dims ) ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
Eigen : : array < IndexType , 1 > dims ;
dims [ 0 ] = 0 ;
input ( 0 ) = 1.0f ;
input ( 1 ) = 2.0f ;
input ( 2 ) = 3.0f ;
kernel ( 0 ) = 0.5f ;
kernel ( 1 ) = 1.0f ;
kernel ( 2 ) = 0.0f ;
Eigen : : array < std : : pair < IndexType , IndexType > , 1 > padding ;
// Emulate VALID mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding [ 0 ] = std : : make_pair ( 0 , 0 ) ;
Tensor < DataType , 1 , DataLayout , IndexType > valid ( 1 ) ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t valid_bytes = valid . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_valid = static_cast < DataType * > ( sycl_device . allocate ( valid_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_valid ( d_valid , valid . dimensions ( ) ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_valid . device ( sycl_device ) = gpu_input . pad ( padding ) . convolve ( gpu_kernel , dims ) ;
sycl_device . memcpyDeviceToHost ( valid . data ( ) , d_valid , valid_bytes ) ;
VERIFY_IS_EQUAL ( valid . dimension ( 0 ) , 1 ) ;
VERIFY_IS_APPROX ( valid ( 0 ) , 2.5f ) ;
// Emulate SAME mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding [ 0 ] = std : : make_pair ( 1 , 1 ) ;
Tensor < DataType , 1 , DataLayout , IndexType > same ( 3 ) ;
std : : size_t same_bytes = same . size ( ) * sizeof ( DataType ) ;
DataType * d_same = static_cast < DataType * > ( sycl_device . allocate ( same_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_same ( d_same , same . dimensions ( ) ) ;
gpu_same . device ( sycl_device ) = gpu_input . pad ( padding ) . convolve ( gpu_kernel , dims ) ;
sycl_device . memcpyDeviceToHost ( same . data ( ) , d_same , same_bytes ) ;
VERIFY_IS_EQUAL ( same . dimension ( 0 ) , 3 ) ;
VERIFY_IS_APPROX ( same ( 0 ) , 1.0f ) ;
VERIFY_IS_APPROX ( same ( 1 ) , 2.5f ) ;
VERIFY_IS_APPROX ( same ( 2 ) , 4.0f ) ;
// Emulate FULL mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding [ 0 ] = std : : make_pair ( 2 , 2 ) ;
Tensor < DataType , 1 , DataLayout , IndexType > full ( 5 ) ;
std : : size_t full_bytes = full . size ( ) * sizeof ( DataType ) ;
DataType * d_full = static_cast < DataType * > ( sycl_device . allocate ( full_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_full ( d_full , full . dimensions ( ) ) ;
gpu_full . device ( sycl_device ) = gpu_input . pad ( padding ) . convolve ( gpu_kernel , dims ) ;
sycl_device . memcpyDeviceToHost ( full . data ( ) , d_full , full_bytes ) ;
VERIFY_IS_EQUAL ( full . dimension ( 0 ) , 5 ) ;
VERIFY_IS_APPROX ( full ( 0 ) , 0.0f ) ;
VERIFY_IS_APPROX ( full ( 1 ) , 1.0f ) ;
VERIFY_IS_APPROX ( full ( 2 ) , 2.5f ) ;
VERIFY_IS_APPROX ( full ( 3 ) , 4.0f ) ;
VERIFY_IS_APPROX ( full ( 4 ) , 1.5f ) ;
sycl_device . deallocate ( d_input ) ;
sycl_device . deallocate ( d_kernel ) ;
sycl_device . deallocate ( d_valid ) ;
sycl_device . deallocate ( d_same ) ;
sycl_device . deallocate ( d_full ) ;
}
template < typename DataType , int DataLayout , typename IndexType >
static void test_strides ( const Eigen : : SyclDevice & sycl_device ) {
Eigen : : array < IndexType , 1 > input_dims = { { 13 } } ;
Eigen : : array < IndexType , 1 > kernel_dims = { { 3 } } ;
Tensor < DataType , 1 , DataLayout , IndexType > input ( input_dims ) ;
Tensor < DataType , 1 , DataLayout , IndexType > kernel ( kernel_dims ) ;
Tensor < DataType , 1 , DataLayout , IndexType > result ( 2 ) ;
input . setRandom ( ) ;
kernel . setRandom ( ) ;
Eigen : : array < IndexType , 1 > dims ;
dims [ 0 ] = 0 ;
Eigen : : array < IndexType , 1 > stride_of_3 ;
stride_of_3 [ 0 ] = 3 ;
Eigen : : array < IndexType , 1 > stride_of_2 ;
stride_of_2 [ 0 ] = 2 ;
std : : size_t input_bytes = input . size ( ) * sizeof ( DataType ) ;
std : : size_t kernel_bytes = kernel . size ( ) * sizeof ( DataType ) ;
std : : size_t result_bytes = result . size ( ) * sizeof ( DataType ) ;
DataType * d_input = static_cast < DataType * > ( sycl_device . allocate ( input_bytes ) ) ;
DataType * d_kernel = static_cast < DataType * > ( sycl_device . allocate ( kernel_bytes ) ) ;
DataType * d_result = static_cast < DataType * > ( sycl_device . allocate ( result_bytes ) ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_input ( d_input , input_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_kernel ( d_kernel , kernel_dims ) ;
Eigen : : TensorMap < Eigen : : Tensor < DataType , 1 , DataLayout , IndexType > > gpu_result ( d_result , result . dimensions ( ) ) ;
sycl_device . memcpyHostToDevice ( d_input , input . data ( ) , input_bytes ) ;
sycl_device . memcpyHostToDevice ( d_kernel , kernel . data ( ) , kernel_bytes ) ;
gpu_result . device ( sycl_device ) = gpu_input . stride ( stride_of_3 ) . convolve ( gpu_kernel , dims ) . stride ( stride_of_2 ) ;
sycl_device . memcpyDeviceToHost ( result . data ( ) , d_result , result_bytes ) ;
VERIFY_IS_EQUAL ( result . dimension ( 0 ) , 2 ) ;
VERIFY_IS_APPROX ( result ( 0 ) , ( input ( 0 ) * kernel ( 0 ) + input ( 3 ) * kernel ( 1 ) +
input ( 6 ) * kernel ( 2 ) ) ) ;
VERIFY_IS_APPROX ( result ( 1 ) , ( input ( 6 ) * kernel ( 0 ) + input ( 9 ) * kernel ( 1 ) +
input ( 12 ) * kernel ( 2 ) ) ) ;
}
template < typename Dev_selector > void tensorConvolutionPerDevice ( Dev_selector & s ) {
QueueInterface queueInterface ( s ) ;
auto sycl_device = Eigen : : SyclDevice ( & queueInterface ) ;
2017-02-01 23:29:53 +08:00
test_larg_expr1D < float , RowMajor , int64_t > ( sycl_device ) ;
test_larg_expr1D < float , ColMajor , int64_t > ( sycl_device ) ;
test_larg_expr2D < float , RowMajor , int64_t > ( sycl_device ) ;
test_larg_expr2D < float , ColMajor , int64_t > ( sycl_device ) ;
test_larg_expr3D < float , RowMajor , int64_t > ( sycl_device ) ;
test_larg_expr3D < float , ColMajor , int64_t > ( sycl_device ) ;
test_evals < float , ColMajor , int64_t > ( sycl_device ) ;
test_evals < float , RowMajor , int64_t > ( sycl_device ) ;
test_expr < float , ColMajor , int64_t > ( sycl_device ) ;
test_expr < float , RowMajor , int64_t > ( sycl_device ) ;
test_modes < float , ColMajor , int64_t > ( sycl_device ) ;
test_modes < float , RowMajor , int64_t > ( sycl_device ) ;
test_strides < float , ColMajor , int64_t > ( sycl_device ) ;
test_strides < float , RowMajor , int64_t > ( sycl_device ) ;
2017-01-19 19:30:59 +08:00
}
void test_cxx11_tensor_convolution_sycl ( ) {
for ( const auto & device : Eigen : : get_sycl_supported_devices ( ) ) {
CALL_SUBTEST ( tensorConvolutionPerDevice ( device ) ) ;
}
}