eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli    Codeplay Software Ltd.
// Ralph Potter  Codeplay Software Ltd.
// Luke Iwanski  Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL

#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>

using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;

namespace std {
template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); }
template <typename T> T square(T x) { return x * x; }
template <typename T> T cube(T x) { return x * x * x; }
template <typename T> T inverse(T x) { return 1 / x; }
}

#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout)         \
  {                                                                            \
    /* out OPERATOR in.FUNC() */                                               \
    Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange);                        \
    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
    in = in.random() + static_cast<SCALAR>(0.01);                              \
    out = out.random() + static_cast<SCALAR>(0.01);                            \
    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
    SCALAR *gpu_data = static_cast<SCALAR *>(                                  \
        sycl_device.allocate(in.size() * sizeof(SCALAR)));                     \
    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange);          \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
    sycl_device.memcpyHostToDevice(gpu_data, in.data(),                        \
                                   (in.size()) * sizeof(SCALAR));              \
    sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),                   \
                                   (out.size()) * sizeof(SCALAR));             \
    gpu_out.device(sycl_device) OPERATOR gpu.FUNC();                           \
    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
                                   (out.size()) * sizeof(SCALAR));             \
    for (int64_t i = 0; i < out.size(); ++i) {                                 \
      SCALAR ver = reference(i);                                               \
      ver OPERATOR std::FUNC(in(i));                                           \
      VERIFY_IS_APPROX(out(i), ver);                                           \
    }                                                                          \
    sycl_device.deallocate(gpu_data);                                          \
    sycl_device.deallocate(gpu_data_out);                                      \
  }                                                                            \
  {                                                                            \
    /* out OPERATOR out.FUNC() */                                              \
    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
    out = out.random() + static_cast<SCALAR>(0.01);                            \
    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
    sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),                   \
                                   (out.size()) * sizeof(SCALAR));             \
    gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC();                       \
    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
                                   (out.size()) * sizeof(SCALAR));             \
    for (int64_t i = 0; i < out.size(); ++i) {                                 \
      SCALAR ver = reference(i);                                               \
      ver OPERATOR std::FUNC(reference(i));                                    \
      VERIFY_IS_APPROX(out(i), ver);                                           \
    }                                                                          \
    sycl_device.deallocate(gpu_data_out);                                      \
  }

#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout)                \
  TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout)               \
  TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout)              \
  TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout)             \
  TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout)            \
  TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout)              \
  TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout)           \
  TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout)              \
  TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout)               \
  TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout)             \
  TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout)               \
  TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout)               \
  TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout)              \
  TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout)             \
  TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout)             \
  TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout)

#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout)                        \
  {                                                                            \
    /* out = in.FUNC() */                                                      \
    Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange);                        \
    Tensor<bool, 3, Layout, int64_t> out(tensorRange);                         \
    in = in.random() + static_cast<SCALAR>(0.01);                              \
    SCALAR *gpu_data = static_cast<SCALAR *>(                                  \
        sycl_device.allocate(in.size() * sizeof(SCALAR)));                     \
    bool *gpu_data_out =                                                       \
        static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool)));  \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange);          \
    TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);    \
    sycl_device.memcpyHostToDevice(gpu_data, in.data(),                        \
                                   (in.size()) * sizeof(SCALAR));              \
    gpu_out.device(sycl_device) = gpu.FUNC();                                  \
    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
                                   (out.size()) * sizeof(bool));               \
    for (int64_t i = 0; i < out.size(); ++i) {                                 \
      VERIFY_IS_EQUAL(out(i), std::FUNC(in(i)));                               \
    }                                                                          \
    sycl_device.deallocate(gpu_data);                                          \
    sycl_device.deallocate(gpu_data_out);                                      \
  }

#define TEST_UNARY_BUILTINS(SCALAR, Layout)                                    \
  TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout)                             \
  TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout)                              \
  TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout)                             \
  TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout)                          \
  TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout)

static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
  int64_t sizeDim1 = 10;
  int64_t sizeDim2 = 10;
  int64_t sizeDim3 = 10;
  array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};

  TEST_UNARY_BUILTINS(float, RowMajor)
  TEST_UNARY_BUILTINS(float, ColMajor)
}

namespace std {
template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); }
template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); }
}

#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout)                        \
  {                                                                            \
    /* out = in_1.FUNC(in_2) */                                                \
    Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange);                      \
    Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange);                      \
    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
    in_1 = in_1.random() + static_cast<SCALAR>(0.01);                          \
    in_2 = in_2.random() + static_cast<SCALAR>(0.01);                          \
    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
    SCALAR *gpu_data_1 = static_cast<SCALAR *>(                                \
        sycl_device.allocate(in_1.size() * sizeof(SCALAR)));                   \
    SCALAR *gpu_data_2 = static_cast<SCALAR *>(                                \
        sycl_device.allocate(in_2.size() * sizeof(SCALAR)));                   \
    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange);      \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange);      \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
    sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),                    \
                                   (in_1.size()) * sizeof(SCALAR));            \
    sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),                    \
                                   (in_2.size()) * sizeof(SCALAR));            \
    gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2);                           \
    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
                                   (out.size()) * sizeof(SCALAR));             \
    for (int64_t i = 0; i < out.size(); ++i) {                                 \
      SCALAR ver = reference(i);                                               \
      ver = std::FUNC(in_1(i), in_2(i));                                       \
      VERIFY_IS_APPROX(out(i), ver);                                           \
    }                                                                          \
    sycl_device.deallocate(gpu_data_1);                                        \
    sycl_device.deallocate(gpu_data_2);                                        \
    sycl_device.deallocate(gpu_data_out);                                      \
  }

#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout)               \
  {                                                                            \
    /* out = in_1 OPERATOR in_2 */                                             \
    Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange);                      \
    Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange);                      \
    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
    in_1 = in_1.random() + static_cast<SCALAR>(0.01);                          \
    in_2 = in_2.random() + static_cast<SCALAR>(0.01);                          \
    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
    SCALAR *gpu_data_1 = static_cast<SCALAR *>(                                \
        sycl_device.allocate(in_1.size() * sizeof(SCALAR)));                   \
    SCALAR *gpu_data_2 = static_cast<SCALAR *>(                                \
        sycl_device.allocate(in_2.size() * sizeof(SCALAR)));                   \
    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange);      \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange);      \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
    sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),                    \
                                   (in_1.size()) * sizeof(SCALAR));            \
    sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),                    \
                                   (in_2.size()) * sizeof(SCALAR));            \
    gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2;                        \
    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
                                   (out.size()) * sizeof(SCALAR));             \
    for (int64_t i = 0; i < out.size(); ++i) {                                 \
      VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i));                      \
    }                                                                          \
    sycl_device.deallocate(gpu_data_1);                                        \
    sycl_device.deallocate(gpu_data_2);                                        \
    sycl_device.deallocate(gpu_data_out);                                      \
  }

#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout)     \
  {                                                                            \
    /* out = in_1 OPERATOR 2 */                                                \
    Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange);                      \
    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
    in_1 = in_1.random() + static_cast<SCALAR>(0.01);                          \
    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
    SCALAR *gpu_data_1 = static_cast<SCALAR *>(                                \
        sycl_device.allocate(in_1.size() * sizeof(SCALAR)));                   \
    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange);      \
    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
    sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),                    \
                                   (in_1.size()) * sizeof(SCALAR));            \
    gpu_out.device(sycl_device) = gpu_1 OPERATOR 2;                            \
    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
                                   (out.size()) * sizeof(SCALAR));             \
    for (int64_t i = 0; i < out.size(); ++i) {                                 \
      VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2);                            \
    }                                                                          \
    sycl_device.deallocate(gpu_data_1);                                        \
    sycl_device.deallocate(gpu_data_out);                                      \
  }

#define TEST_BINARY_BUILTINS(SCALAR, Layout)                                   \
  TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout)                         \
  TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout)                         \
  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout)                           \
  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout)                           \
  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout)                           \
  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout)

static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
  int64_t sizeDim1 = 10;
  int64_t sizeDim2 = 10;
  int64_t sizeDim3 = 10;
  array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
  TEST_BINARY_BUILTINS(float, RowMajor)
  TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor)
  TEST_BINARY_BUILTINS(float, ColMajor)
  TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor)
}

EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {
  for (const auto& device :Eigen::get_sycl_supported_devices()) {
    QueueInterface queueInterface(device);
    Eigen::SyclDevice sycl_device(&queueInterface);
    CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));
    CALL_SUBTEST(test_builtin_binary_sycl(sycl_device));
  }
}
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00			`// This file is part of Eigen, a lightweight C++ template library`
			`// for linear algebra.`
			`//`
			`// Copyright (C) 2016`
			`// Mehdi Goli Codeplay Software Ltd.`
			`// Ralph Potter Codeplay Software Ltd.`
			`// Luke Iwanski Codeplay Software Ltd.`
			`// Contact: <eigen@codeplay.com>`
			`//`
			`// This Source Code Form is subject to the terms of the Mozilla`
			`// Public License v. 2.0. If a copy of the MPL was not distributed`
			`// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.`

			`#define EIGEN_TEST_NO_LONGDOUBLE`
			`#define EIGEN_TEST_NO_COMPLEX`
Get rid of EIGEN_TEST_FUNC, unit tests must now be declared with EIGEN_DECLARE_TEST(mytest) { /* code */ }. This provide several advantages: - more flexibility in designing unit tests - unit tests can be glued to speed up compilation - unit tests are compiled with same predefined macros, which is a requirement for zapcc 2018-07-17 20:46:15 +08:00
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00			`#define EIGEN_USE_SYCL`

			`#include "main.h"`
			`#include <unsupported/Eigen/CXX11/Tensor>`

			`using Eigen::array;`
			`using Eigen::SyclDevice;`
			`using Eigen::Tensor;`
			`using Eigen::TensorMap;`

Fixed existing test. 2016-11-18 01:46:55 +08:00			`namespace std {`
			`template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); }`
			`template <typename T> T square(T x) { return x * x; }`
			`template <typename T> T cube(T x) { return x * x * x; }`
			`template <typename T> T inverse(T x) { return 1 / x; }`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00			`}`

Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout) \`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`{ \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`/* out OPERATOR in.FUNC() */ \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \`
			`Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \`
Avoid calling log(0) or 1/0 2016-11-18 03:56:44 +08:00			`in = in.random() + static_cast<SCALAR>(0.01); \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`out = out.random() + static_cast<SCALAR>(0.01); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> reference(out); \`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`SCALAR gpu_data = static_cast<SCALAR >( \`
			`sycl_device.allocate(in.size() * sizeof(SCALAR))); \`
			`SCALAR gpu_data_out = static_cast<SCALAR >( \`
			`sycl_device.allocate(out.size() * sizeof(SCALAR))); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \`
			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`sycl_device.memcpyHostToDevice(gpu_data, in.data(), \`
			`(in.size()) * sizeof(SCALAR)); \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \`
			`(out.size()) * sizeof(SCALAR)); \`
			`gpu_out.device(sycl_device) OPERATOR gpu.FUNC(); \`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \`
			`(out.size()) * sizeof(SCALAR)); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`for (int64_t i = 0; i < out.size(); ++i) { \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`SCALAR ver = reference(i); \`
			`ver OPERATOR std::FUNC(in(i)); \`
			`VERIFY_IS_APPROX(out(i), ver); \`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`} \`
			`sycl_device.deallocate(gpu_data); \`
			`sycl_device.deallocate(gpu_data_out); \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`} \`
			`{ \`
			`/* out OPERATOR out.FUNC() */ \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`out = out.random() + static_cast<SCALAR>(0.01); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> reference(out); \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`SCALAR gpu_data_out = static_cast<SCALAR >( \`
			`sycl_device.allocate(out.size() * sizeof(SCALAR))); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \`
			`(out.size()) * sizeof(SCALAR)); \`
			`gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC(); \`
			`sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \`
			`(out.size()) * sizeof(SCALAR)); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`for (int64_t i = 0; i < out.size(); ++i) { \`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00			`SCALAR ver = reference(i); \`
			`ver OPERATOR std::FUNC(reference(i)); \`
			`VERIFY_IS_APPROX(out(i), ver); \`
			`} \`
			`sycl_device.deallocate(gpu_data_out); \`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`}`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout) \`
			`TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout)`
Now testing out (+=, =) in.FUNC() and out (+=, =) out.FUNC() 2016-11-18 19:16:42 +08:00
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout) \`
Added isnan, isfinite and isinf for SYCL device. Plus test for that. 2016-11-19 00:01:48 +08:00			`{ \`
Modifying TensorDeviceSycl.h to always create buffer of type uint8_t and convert them to the actual type at the execution on the device; adding the queue interface class to separate the lifespan of sycl queue and buffers,created for that queue, from Eigen::SyclDevice; modifying sycl tests to support the evaluation of the results for both row major and column major data layout on all different devices that are supported by Sycl{CPU; GPU; and Host}. 2016-11-19 00:20:42 +08:00			`/* out = in.FUNC() */ \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \`
			`Tensor<bool, 3, Layout, int64_t> out(tensorRange); \`
Added isnan, isfinite and isinf for SYCL device. Plus test for that. 2016-11-19 00:01:48 +08:00			`in = in.random() + static_cast<SCALAR>(0.01); \`
			`SCALAR gpu_data = static_cast<SCALAR >( \`
			`sycl_device.allocate(in.size() * sizeof(SCALAR))); \`
			`bool *gpu_data_out = \`
			`static_cast<bool >(sycl_device.allocate(out.size() sizeof(bool))); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \`
			`TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \`
Added isnan, isfinite and isinf for SYCL device. Plus test for that. 2016-11-19 00:01:48 +08:00			`sycl_device.memcpyHostToDevice(gpu_data, in.data(), \`
			`(in.size()) * sizeof(SCALAR)); \`
			`gpu_out.device(sycl_device) = gpu.FUNC(); \`
			`sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \`
			`(out.size()) * sizeof(bool)); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`for (int64_t i = 0; i < out.size(); ++i) { \`
Added isnan, isfinite and isinf for SYCL device. Plus test for that. 2016-11-19 00:01:48 +08:00			`VERIFY_IS_EQUAL(out(i), std::FUNC(in(i))); \`
			`} \`
			`sycl_device.deallocate(gpu_data); \`
			`sycl_device.deallocate(gpu_data_out); \`
			`}`

Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_UNARY_BUILTINS(SCALAR, Layout) \`
			`TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout) \`
			`TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout) \`
			`TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout) \`
			`TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout) \`
			`TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout)`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00
Fixed existing test. 2016-11-18 01:46:55 +08:00			`static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`int64_t sizeDim1 = 10;`
			`int64_t sizeDim2 = 10;`
			`int64_t sizeDim3 = 10;`
			`array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TEST_UNARY_BUILTINS(float, RowMajor)`
			`TEST_UNARY_BUILTINS(float, ColMajor)`
Fixed existing test. 2016-11-18 01:46:55 +08:00			`}`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`namespace std {`
			`template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); }`
			`template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); }`
			`}`

Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout) \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`{ \`
			`/* out = in_1.FUNC(in_2) */ \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \`
			`Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \`
			`Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`in_1 = in_1.random() + static_cast<SCALAR>(0.01); \`
			`in_2 = in_2.random() + static_cast<SCALAR>(0.01); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> reference(out); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`SCALAR gpu_data_1 = static_cast<SCALAR >( \`
			`sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \`
			`SCALAR gpu_data_2 = static_cast<SCALAR >( \`
			`sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \`
			`SCALAR gpu_data_out = static_cast<SCALAR >( \`
			`sycl_device.allocate(out.size() * sizeof(SCALAR))); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \`
			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \`
			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \`
			`(in_1.size()) * sizeof(SCALAR)); \`
			`sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \`
			`(in_2.size()) * sizeof(SCALAR)); \`
			`gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \`
			`sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \`
			`(out.size()) * sizeof(SCALAR)); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`for (int64_t i = 0; i < out.size(); ++i) { \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`SCALAR ver = reference(i); \`
			`ver = std::FUNC(in_1(i), in_2(i)); \`
			`VERIFY_IS_APPROX(out(i), ver); \`
			`} \`
			`sycl_device.deallocate(gpu_data_1); \`
			`sycl_device.deallocate(gpu_data_2); \`
			`sycl_device.deallocate(gpu_data_out); \`
			`}`

Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout) \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`{ \`
			`/* out = in_1 OPERATOR in_2 */ \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \`
			`Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \`
			`Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`in_1 = in_1.random() + static_cast<SCALAR>(0.01); \`
			`in_2 = in_2.random() + static_cast<SCALAR>(0.01); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> reference(out); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`SCALAR gpu_data_1 = static_cast<SCALAR >( \`
			`sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \`
			`SCALAR gpu_data_2 = static_cast<SCALAR >( \`
			`sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \`
			`SCALAR gpu_data_out = static_cast<SCALAR >( \`
			`sycl_device.allocate(out.size() * sizeof(SCALAR))); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \`
			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \`
			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \`
			`(in_1.size()) * sizeof(SCALAR)); \`
			`sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \`
			`(in_2.size()) * sizeof(SCALAR)); \`
			`gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \`
			`sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \`
			`(out.size()) * sizeof(SCALAR)); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`for (int64_t i = 0; i < out.size(); ++i) { \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \`
			`} \`
			`sycl_device.deallocate(gpu_data_1); \`
			`sycl_device.deallocate(gpu_data_2); \`
			`sycl_device.deallocate(gpu_data_out); \`
			`}`

Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout) \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`{ \`
			`/* out = in_1 OPERATOR 2 */ \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \`
			`Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`in_1 = in_1.random() + static_cast<SCALAR>(0.01); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`Tensor<SCALAR, 3, Layout, int64_t> reference(out); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`SCALAR gpu_data_1 = static_cast<SCALAR >( \`
			`sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \`
			`SCALAR gpu_data_out = static_cast<SCALAR >( \`
			`sycl_device.allocate(out.size() * sizeof(SCALAR))); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \`
			`TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \`
			`(in_1.size()) * sizeof(SCALAR)); \`
			`gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \`
			`sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \`
			`(out.size()) * sizeof(SCALAR)); \`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`for (int64_t i = 0; i < out.size(); ++i) { \`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \`
			`} \`
			`sycl_device.deallocate(gpu_data_1); \`
			`sycl_device.deallocate(gpu_data_out); \`
			`}`

Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`#define TEST_BINARY_BUILTINS(SCALAR, Layout) \`
			`TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout) \`
			`TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout) \`
			`TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout) \`
			`TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout) \`
			`TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout) \`
			`TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout)`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00
			`static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {`
Reducing warnings in Sycl backend. 2017-02-01 23:29:53 +08:00			`int64_t sizeDim1 = 10;`
			`int64_t sizeDim2 = 10;`
			`int64_t sizeDim3 = 10;`
			`array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};`
			`TEST_BINARY_BUILTINS(float, RowMajor)`
			`TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor)`
			`TEST_BINARY_BUILTINS(float, ColMajor)`
			`TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor)`
Added test for cwiseMin, cwiseMax and operator%. 2016-11-19 21:37:27 +08:00			`}`

Get rid of EIGEN_TEST_FUNC, unit tests must now be declared with EIGEN_DECLARE_TEST(mytest) { /* code */ }. This provide several advantages: - more flexibility in designing unit tests - unit tests can be glued to speed up compilation - unit tests are compiled with same predefined macros, which is a requirement for zapcc 2018-07-17 20:46:15 +08:00			`EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {`
Fixing LLVM error on TensorMorphingSycl.h on GPU; fixing int64_t crash for tensor_broadcast_sycl on GPU; adding get_sycl_supported_devices() on syclDevice.h. 2016-11-26 00:19:07 +08:00			`for (const auto& device :Eigen::get_sycl_supported_devices()) {`
			`QueueInterface queueInterface(device);`
			`Eigen::SyclDevice sycl_device(&queueInterface);`
			`CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));`
			`CALL_SUBTEST(test_builtin_binary_sycl(sycl_device));`
Removing unsupported device from test case; cleaning the tensor device sycl. 2016-11-24 00:30:41 +08:00			`}`
Specialised basic math functions for SYCL device. 2016-11-17 19:47:13 +08:00			`}`