diff --git a/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/unsupported/test/cxx11_tensor_builtins_sycl.cpp index 26cea18a6..989b335b2 100644 --- a/unsupported/test/cxx11_tensor_builtins_sycl.cpp +++ b/unsupported/test/cxx11_tensor_builtins_sycl.cpp @@ -123,8 +123,8 @@ template T inverse(T x) { return 1 / x; } } #define TEST_UNARY_BUILTINS(SCALAR) \ - TEST_UNARY_BUILTINS_OPERATOR(SCALAR, += ) \ - TEST_UNARY_BUILTINS_OPERATOR(SCALAR, = ) \ + TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=) \ + TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =) \ TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan) \ TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite) \ TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf) @@ -140,9 +140,133 @@ static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) { TEST_UNARY_BUILTINS(double) } +namespace std { +template T cwiseMax(T x, T y) { return std::max(x, y); } +template T cwiseMin(T x, T y) { return std::min(x, y); } +} + +#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC) \ + { \ + /* out = in_1.FUNC(in_2) */ \ + Tensor in_1(tensorRange); \ + Tensor in_2(tensorRange); \ + Tensor out(tensorRange); \ + in_1 = in_1.random() + static_cast(0.01); \ + in_2 = in_2.random() + static_cast(0.01); \ + out = out.random() + static_cast(0.01); \ + Tensor reference(out); \ + SCALAR *gpu_data_1 = static_cast( \ + sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_2 = static_cast( \ + sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap> gpu_1(gpu_data_1, tensorRange); \ + TensorMap> gpu_2(gpu_data_2, tensorRange); \ + TensorMap> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ + (in_1.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ + (in_2.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ + (out.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int i = 0; i < out.size(); ++i) { \ + SCALAR ver = reference(i); \ + ver = std::FUNC(in_1(i), in_2(i)); \ + VERIFY_IS_APPROX(out(i), ver); \ + } \ + sycl_device.deallocate(gpu_data_1); \ + sycl_device.deallocate(gpu_data_2); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR) \ + { \ + /* out = in_1 OPERATOR in_2 */ \ + Tensor in_1(tensorRange); \ + Tensor in_2(tensorRange); \ + Tensor out(tensorRange); \ + in_1 = in_1.random() + static_cast(0.01); \ + in_2 = in_2.random() + static_cast(0.01); \ + out = out.random() + static_cast(0.01); \ + Tensor reference(out); \ + SCALAR *gpu_data_1 = static_cast( \ + sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_2 = static_cast( \ + sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap> gpu_1(gpu_data_1, tensorRange); \ + TensorMap> gpu_2(gpu_data_2, tensorRange); \ + TensorMap> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ + (in_1.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ + (in_2.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ + (out.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int i = 0; i < out.size(); ++i) { \ + VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \ + } \ + sycl_device.deallocate(gpu_data_1); \ + sycl_device.deallocate(gpu_data_2); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR) \ + { \ + /* out = in_1 OPERATOR 2 */ \ + Tensor in_1(tensorRange); \ + Tensor out(tensorRange); \ + in_1 = in_1.random() + static_cast(0.01); \ + Tensor reference(out); \ + SCALAR *gpu_data_1 = static_cast( \ + sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap> gpu_1(gpu_data_1, tensorRange); \ + TensorMap> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ + (in_1.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ + (out.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int i = 0; i < out.size(); ++i) { \ + VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \ + } \ + sycl_device.deallocate(gpu_data_1); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_BINARY_BUILTINS(SCALAR) \ + TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax) \ + TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, +) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, -) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, *) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, /) + +static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) { + int sizeDim1 = 10; + int sizeDim2 = 10; + int sizeDim3 = 10; + array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + TEST_BINARY_BUILTINS(float) + TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %) +} + void test_cxx11_tensor_builtins_sycl() { cl::sycl::gpu_selector s; QueueInterface queueInterface(s); Eigen::SyclDevice sycl_device(&queueInterface); CALL_SUBTEST(test_builtin_unary_sycl(sycl_device)); + CALL_SUBTEST(test_builtin_binary_sycl(sycl_device)); }