2016-11-05 02:18:19 +08:00
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
# define EIGEN_TEST_NO_LONGDOUBLE
# define EIGEN_TEST_NO_COMPLEX
# define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl
# define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
# define EIGEN_USE_SYCL
# include "main.h"
# include <unsupported/Eigen/CXX11/Tensor>
2016-11-19 00:20:42 +08:00
template < typename DataType , int DataLayout >
2016-11-09 01:08:02 +08:00
static void test_full_reductions_sycl ( const Eigen : : SyclDevice & sycl_device ) {
2016-11-05 02:18:19 +08:00
const int num_rows = 452 ;
const int num_cols = 765 ;
array < int , 2 > tensorRange = { { num_rows , num_cols } } ;
2016-11-19 00:20:42 +08:00
Tensor < DataType , 2 , DataLayout > in ( tensorRange ) ;
Tensor < DataType , 0 , DataLayout > full_redux ;
Tensor < DataType , 0 , DataLayout > full_redux_gpu ;
2016-11-09 01:08:02 +08:00
2016-11-05 02:18:19 +08:00
in . setRandom ( ) ;
full_redux = in . sum ( ) ;
2016-11-19 00:20:42 +08:00
DataType * gpu_in_data = static_cast < DataType * > ( sycl_device . allocate ( in . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ) ;
DataType * gpu_out_data = ( DataType * ) sycl_device . allocate ( sizeof ( DataType ) ) ;
2016-11-05 02:18:19 +08:00
2016-11-19 00:20:42 +08:00
TensorMap < Tensor < DataType , 2 , DataLayout > > in_gpu ( gpu_in_data , tensorRange ) ;
TensorMap < Tensor < DataType , 0 , DataLayout > > out_gpu ( gpu_out_data ) ;
2016-11-05 02:18:19 +08:00
2016-11-19 00:20:42 +08:00
sycl_device . memcpyHostToDevice ( gpu_in_data , in . data ( ) , ( in . dimensions ( ) . TotalSize ( ) ) * sizeof ( DataType ) ) ;
2016-11-09 01:08:02 +08:00
out_gpu . device ( sycl_device ) = in_gpu . sum ( ) ;
2016-11-19 00:20:42 +08:00
sycl_device . memcpyDeviceToHost ( full_redux_gpu . data ( ) , gpu_out_data , sizeof ( DataType ) ) ;
2016-11-09 01:08:02 +08:00
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX ( full_redux_gpu ( ) , full_redux ( ) ) ;
2016-11-05 02:18:19 +08:00
2016-11-09 01:08:02 +08:00
sycl_device . deallocate ( gpu_in_data ) ;
sycl_device . deallocate ( gpu_out_data ) ;
}
2016-11-19 00:20:42 +08:00
template < typename DataType , int DataLayout >
2016-11-09 01:08:02 +08:00
static void test_first_dim_reductions_sycl ( const Eigen : : SyclDevice & sycl_device ) {
2016-11-05 02:18:19 +08:00
int dim_x = 145 ;
int dim_y = 1 ;
int dim_z = 67 ;
array < int , 3 > tensorRange = { { dim_x , dim_y , dim_z } } ;
Eigen : : array < int , 1 > red_axis ;
red_axis [ 0 ] = 0 ;
array < int , 2 > reduced_tensorRange = { { dim_y , dim_z } } ;
2016-11-19 00:20:42 +08:00
Tensor < DataType , 3 , DataLayout > in ( tensorRange ) ;
Tensor < DataType , 2 , DataLayout > redux ( reduced_tensorRange ) ;
Tensor < DataType , 2 , DataLayout > redux_gpu ( reduced_tensorRange ) ;
2016-11-09 01:08:02 +08:00
in . setRandom ( ) ;
2016-11-05 02:18:19 +08:00
2016-11-09 01:08:02 +08:00
redux = in . sum ( red_axis ) ;
2016-11-05 02:18:19 +08:00
2016-11-19 00:20:42 +08:00
DataType * gpu_in_data = static_cast < DataType * > ( sycl_device . allocate ( in . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ) ;
DataType * gpu_out_data = static_cast < DataType * > ( sycl_device . allocate ( redux_gpu . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ) ;
2016-11-05 02:18:19 +08:00
2016-11-19 00:20:42 +08:00
TensorMap < Tensor < DataType , 3 , DataLayout > > in_gpu ( gpu_in_data , tensorRange ) ;
TensorMap < Tensor < DataType , 2 , DataLayout > > out_gpu ( gpu_out_data , reduced_tensorRange ) ;
2016-11-05 02:18:19 +08:00
2016-11-19 00:20:42 +08:00
sycl_device . memcpyHostToDevice ( gpu_in_data , in . data ( ) , ( in . dimensions ( ) . TotalSize ( ) ) * sizeof ( DataType ) ) ;
2016-11-09 01:08:02 +08:00
out_gpu . device ( sycl_device ) = in_gpu . sum ( red_axis ) ;
2016-11-19 00:20:42 +08:00
sycl_device . memcpyDeviceToHost ( redux_gpu . data ( ) , gpu_out_data , redux_gpu . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ;
2016-11-09 01:08:02 +08:00
// Check that the CPU and GPU reductions return the same result.
for ( int j = 0 ; j < reduced_tensorRange [ 0 ] ; j + + )
for ( int k = 0 ; k < reduced_tensorRange [ 1 ] ; k + + )
VERIFY_IS_APPROX ( redux_gpu ( j , k ) , redux ( j , k ) ) ;
sycl_device . deallocate ( gpu_in_data ) ;
sycl_device . deallocate ( gpu_out_data ) ;
}
2016-11-19 00:20:42 +08:00
template < typename DataType , int DataLayout >
2016-11-09 01:08:02 +08:00
static void test_last_dim_reductions_sycl ( const Eigen : : SyclDevice & sycl_device ) {
2016-11-05 02:18:19 +08:00
int dim_x = 567 ;
int dim_y = 1 ;
int dim_z = 47 ;
array < int , 3 > tensorRange = { { dim_x , dim_y , dim_z } } ;
Eigen : : array < int , 1 > red_axis ;
red_axis [ 0 ] = 2 ;
array < int , 2 > reduced_tensorRange = { { dim_x , dim_y } } ;
2016-11-19 00:20:42 +08:00
Tensor < DataType , 3 , DataLayout > in ( tensorRange ) ;
Tensor < DataType , 2 , DataLayout > redux ( reduced_tensorRange ) ;
Tensor < DataType , 2 , DataLayout > redux_gpu ( reduced_tensorRange ) ;
2016-11-09 01:08:02 +08:00
in . setRandom ( ) ;
redux = in . sum ( red_axis ) ;
2016-11-19 00:20:42 +08:00
DataType * gpu_in_data = static_cast < DataType * > ( sycl_device . allocate ( in . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ) ;
DataType * gpu_out_data = static_cast < DataType * > ( sycl_device . allocate ( redux_gpu . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ) ;
2016-11-09 01:08:02 +08:00
2016-11-19 00:20:42 +08:00
TensorMap < Tensor < DataType , 3 , DataLayout > > in_gpu ( gpu_in_data , tensorRange ) ;
TensorMap < Tensor < DataType , 2 , DataLayout > > out_gpu ( gpu_out_data , reduced_tensorRange ) ;
2016-11-09 01:08:02 +08:00
2016-11-19 00:20:42 +08:00
sycl_device . memcpyHostToDevice ( gpu_in_data , in . data ( ) , ( in . dimensions ( ) . TotalSize ( ) ) * sizeof ( DataType ) ) ;
2016-11-09 01:08:02 +08:00
out_gpu . device ( sycl_device ) = in_gpu . sum ( red_axis ) ;
2016-11-19 00:20:42 +08:00
sycl_device . memcpyDeviceToHost ( redux_gpu . data ( ) , gpu_out_data , redux_gpu . dimensions ( ) . TotalSize ( ) * sizeof ( DataType ) ) ;
2016-11-05 02:18:19 +08:00
// Check that the CPU and GPU reductions return the same result.
2016-11-09 01:08:02 +08:00
for ( int j = 0 ; j < reduced_tensorRange [ 0 ] ; j + + )
for ( int k = 0 ; k < reduced_tensorRange [ 1 ] ; k + + )
2016-11-05 02:18:19 +08:00
VERIFY_IS_APPROX ( redux_gpu ( j , k ) , redux ( j , k ) ) ;
2016-11-09 01:08:02 +08:00
sycl_device . deallocate ( gpu_in_data ) ;
sycl_device . deallocate ( gpu_out_data ) ;
2016-11-05 02:18:19 +08:00
}
2016-11-19 08:31:14 +08:00
template < typename DataType > void sycl_reduction_test_per_device ( const cl : : sycl : : device & d ) {
std : : cout < < " Running on " < < d . template get_info < cl : : sycl : : info : : device : : name > ( ) < < std : : endl ;
QueueInterface queueInterface ( d ) ;
2016-11-19 00:20:42 +08:00
auto sycl_device = Eigen : : SyclDevice ( & queueInterface ) ;
2016-11-19 08:31:14 +08:00
2016-11-19 00:20:42 +08:00
test_full_reductions_sycl < DataType , RowMajor > ( sycl_device ) ;
test_first_dim_reductions_sycl < DataType , RowMajor > ( sycl_device ) ;
test_last_dim_reductions_sycl < DataType , RowMajor > ( sycl_device ) ;
test_full_reductions_sycl < DataType , ColMajor > ( sycl_device ) ;
test_first_dim_reductions_sycl < DataType , ColMajor > ( sycl_device ) ;
test_last_dim_reductions_sycl < DataType , ColMajor > ( sycl_device ) ;
}
2016-11-05 02:18:19 +08:00
void test_cxx11_tensor_reduction_sycl ( ) {
2016-11-19 08:31:14 +08:00
for ( const auto & device : cl : : sycl : : device : : get_devices ( ) ) {
2016-11-24 00:30:41 +08:00
/// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
auto s = device . template get_info < cl : : sycl : : info : : device : : vendor > ( ) ;
std : : transform ( s . begin ( ) , s . end ( ) , s . begin ( ) , : : tolower ) ;
if ( ! device . is_cpu ( ) | | s . find ( " amd " ) = = std : : string : : npos )
CALL_SUBTEST ( sycl_reduction_test_per_device < float > ( device ) ) ;
2016-11-19 08:31:14 +08:00
}
2016-11-05 02:18:19 +08:00
}