diff --git a/Eigen/Core b/Eigen/Core index 82558155e..55fc886b6 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -43,10 +43,12 @@ #else #define EIGEN_DEVICE_FUNC #endif - #else #define EIGEN_DEVICE_FUNC +#endif +#if defined(EIGEN_USE_SYCL) + #define EIGEN_DONT_VECTORIZE #endif // When compiling CUDA device code with NVCC, pull in math functions from the @@ -283,6 +285,15 @@ #include #endif +#if defined(__SYCL_DEVICE_ONLY__) + #undef min + #undef max + #undef isnan + #undef isinf + #undef isfinite + #include +#endif + /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 8d47fb8a4..142fec998 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -413,7 +413,7 @@ inline NewType cast(const OldType& x) static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - using std::round; + EIGEN_USING_STD_MATH(round); return round(x); } }; @@ -640,7 +640,7 @@ template struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) - { + { typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float &x) { return ::log1pf(x); } @@ -969,6 +974,11 @@ inline typename internal::pow_impl::result_type pow(const Scala return internal::pow_impl::run(x, y); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float pow(float x, float y) { return cl::sycl::pow(x, y); } +EIGEN_ALWAYS_INLINE double pow(double x, double y) { return cl::sycl::pow(x, y); } +#endif // defined(__SYCL_DEVICE_ONLY__) + template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } @@ -980,6 +990,11 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float round(float x) { return cl::sycl::round(x); } +EIGEN_ALWAYS_INLINE double round(double x) { return cl::sycl::round(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + template EIGEN_DEVICE_FUNC T (floor)(const T& x) @@ -988,6 +1003,11 @@ T (floor)(const T& x) return floor(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); } +EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float &x) { return ::floorf(x); } @@ -1004,6 +1024,11 @@ T (ceil)(const T& x) return ceil(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); } +EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float &x) { return ::ceilf(x); } @@ -1044,6 +1069,11 @@ T sqrt(const T &x) return sqrt(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float sqrt(float x) { return cl::sycl::sqrt(x); } +EIGEN_ALWAYS_INLINE double sqrt(double x) { return cl::sycl::sqrt(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x) { @@ -1051,6 +1081,12 @@ T log(const T &x) { return log(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float log(float x) { return cl::sycl::log(x); } +EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float &x) { return ::logf(x); } @@ -1066,6 +1102,11 @@ typename NumTraits::Real abs(const T &x) { return abs(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } +EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const float &x) { return ::fabsf(x); } @@ -1091,6 +1132,11 @@ T exp(const T &x) { return exp(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); } +EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float &x) { return ::expf(x); } @@ -1106,6 +1152,11 @@ T cos(const T &x) { return cos(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); } +EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float &x) { return ::cosf(x); } @@ -1121,6 +1172,11 @@ T sin(const T &x) { return sin(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); } +EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float &x) { return ::sinf(x); } @@ -1136,6 +1192,11 @@ T tan(const T &x) { return tan(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); } +EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float &x) { return ::tanf(x); } @@ -1151,6 +1212,11 @@ T acos(const T &x) { return acos(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float acos(float x) { return cl::sycl::acos(x); } +EIGEN_ALWAYS_INLINE double acos(double x) { return cl::sycl::acos(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float &x) { return ::acosf(x); } @@ -1166,6 +1232,11 @@ T asin(const T &x) { return asin(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float asin(float x) { return cl::sycl::asin(x); } +EIGEN_ALWAYS_INLINE double asin(double x) { return cl::sycl::asin(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float &x) { return ::asinf(x); } @@ -1181,6 +1252,11 @@ T atan(const T &x) { return atan(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float atan(float x) { return cl::sycl::atan(x); } +EIGEN_ALWAYS_INLINE double atan(double x) { return cl::sycl::atan(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float &x) { return ::atanf(x); } @@ -1197,6 +1273,11 @@ T cosh(const T &x) { return cosh(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); } +EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float &x) { return ::coshf(x); } @@ -1212,6 +1293,11 @@ T sinh(const T &x) { return sinh(x); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); } +EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float &x) { return ::sinhf(x); } @@ -1227,7 +1313,10 @@ T tanh(const T &x) { return tanh(x); } -#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); } +EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); } +#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif @@ -1247,6 +1336,11 @@ T fmod(const T& a, const T& b) { return fmod(a, b); } +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y); } +EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1389,13 +1483,13 @@ template<> struct random_impl template<> struct scalar_fuzzy_impl { typedef bool RealScalar; - + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } - + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { @@ -1407,10 +1501,10 @@ template<> struct scalar_fuzzy_impl { return (!x) || y; } - + }; - + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 7ecb4c74d..e41b67c56 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -13,7 +13,7 @@ #include "../../../Eigen/Core" -#ifdef EIGEN_USE_SYCL +#if defined(EIGEN_USE_SYCL) #undef min #undef max #undef isnan diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index f988cb465..471826746 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -147,6 +147,7 @@ if(EIGEN_TEST_CXX11) ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11") endif(EIGEN_TEST_SYCL) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. diff --git a/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/unsupported/test/cxx11_tensor_builtins_sycl.cpp new file mode 100644 index 000000000..aed4e47e4 --- /dev/null +++ b/unsupported/test/cxx11_tensor_builtins_sycl.cpp @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +namespace std +{ + template T rsqrt(T x) { return 1/std::sqrt(x); } + template T square(T x) { return x*x; } + template T cube(T x) { return x*x*x; } + template T inverse(T x) { return 1/x; } +} + +#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR) \ +{ \ + Tensor in1(tensorRange); \ + Tensor out1(tensorRange); \ + in1 = in1.random(); \ + SCALAR* gpu_data1 = static_cast(sycl_device.allocate(in1.size()*sizeof(SCALAR))); \ + TensorMap> gpu1(gpu_data1, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(SCALAR)); \ + gpu1.device(sycl_device) = gpu1.FUNC(); \ + sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(SCALAR)); \ + for (int i = 0; i < in1.size(); ++i) { \ + VERIFY_IS_APPROX(out1(i), std::FUNC(in1(i))); \ + } \ + sycl_device.deallocate(gpu_data1); \ +} + +#define TEST_UNARY_BUILTINS(SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR) \ +TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR) + +static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device){ + int sizeDim1 = 100; + int sizeDim2 = 100; + int sizeDim3 = 100; + array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + + TEST_UNARY_BUILTINS(float) + TEST_UNARY_BUILTINS(double) +} + + +void test_cxx11_tensor_builtins_sycl() { + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_builtin_unary_sycl(sycl_device)); +}