Specialised basic math functions for SYCL device.

2024-12-15 07:10:37 +08:00 · 2016-11-17 11:47:13 +00:00 · 2016-11-17 11:47:13 +00:00 · c5130dedbe
commit c5130dedbe
parent b5c75351e3
5 changed files with 198 additions and 9 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -43,10 +43,12 @@
  #else
    #define EIGEN_DEVICE_FUNC
  #endif
-
 #else
  #define EIGEN_DEVICE_FUNC
+#endif

+#if defined(EIGEN_USE_SYCL)
+  #define EIGEN_DONT_VECTORIZE
 #endif

 // When compiling CUDA device code with NVCC, pull in math functions from the
@ -283,6 +285,15 @@
  #include <intrin.h>
 #endif

+#if defined(__SYCL_DEVICE_ONLY__)
+  #undef min
+  #undef max
+  #undef isnan
+  #undef isinf
+  #undef isfinite
+  #include <SYCL/sycl.hpp>
+#endif
+
 /** \brief Namespace containing all symbols from the %Eigen library. */
 namespace Eigen {

--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -413,7 +413,7 @@ inline NewType cast(const OldType& x)
    static inline Scalar run(const Scalar& x)
    {
      EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
-      using std::round;
+      EIGEN_USING_STD_MATH(round);
      return round(x);
    }
  };
@ -954,6 +954,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
  return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   log1p(float x) { return cl::sycl::log1p(x); }
+EIGEN_ALWAYS_INLINE double  log1p(double x) { return cl::sycl::log1p(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float log1p(const float &x) { return ::log1pf(x); }
@ -969,6 +974,11 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala
  return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   pow(float x, float y) { return cl::sycl::pow(x, y); }
+EIGEN_ALWAYS_INLINE double  pow(double x, double y) { return cl::sycl::pow(x, y); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 template<typename T> EIGEN_DEVICE_FUNC bool (isnan)   (const T &x) { return internal::isnan_impl(x); }
 template<typename T> EIGEN_DEVICE_FUNC bool (isinf)   (const T &x) { return internal::isinf_impl(x); }
 template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
@ -980,6 +990,11 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
  return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   round(float x) { return cl::sycl::round(x); }
+EIGEN_ALWAYS_INLINE double  round(double x) { return cl::sycl::round(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 template<typename T>
 EIGEN_DEVICE_FUNC
 T (floor)(const T& x)
@ -988,6 +1003,11 @@ T (floor)(const T& x)
  return floor(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   floor(float x) { return cl::sycl::floor(x); }
+EIGEN_ALWAYS_INLINE double  floor(double x) { return cl::sycl::floor(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float floor(const float &x) { return ::floorf(x); }
@ -1004,6 +1024,11 @@ T (ceil)(const T& x)
  return ceil(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   ceil(float x) { return cl::sycl::ceil(x); }
+EIGEN_ALWAYS_INLINE double  ceil(double x) { return cl::sycl::ceil(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float ceil(const float &x) { return ::ceilf(x); }
@ -1044,6 +1069,11 @@ T sqrt(const T &x)
  return sqrt(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   sqrt(float x) { return cl::sycl::sqrt(x); }
+EIGEN_ALWAYS_INLINE double  sqrt(double x) { return cl::sycl::sqrt(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T log(const T &x) {
@ -1051,6 +1081,12 @@ T log(const T &x) {
  return log(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   log(float x) { return cl::sycl::log(x); }
+EIGEN_ALWAYS_INLINE double  log(double x) { return cl::sycl::log(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float log(const float &x) { return ::logf(x); }
@ -1066,6 +1102,11 @@ typename NumTraits<T>::Real abs(const T &x) {
  return abs(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   abs(float x) { return cl::sycl::fabs(x); }
+EIGEN_ALWAYS_INLINE double  abs(double x) { return cl::sycl::fabs(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float abs(const float &x) { return ::fabsf(x); }
@ -1091,6 +1132,11 @@ T exp(const T &x) {
  return exp(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   exp(float x) { return cl::sycl::exp(x); }
+EIGEN_ALWAYS_INLINE double  exp(double x) { return cl::sycl::exp(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float exp(const float &x) { return ::expf(x); }
@ -1106,6 +1152,11 @@ T cos(const T &x) {
  return cos(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   cos(float x) { return cl::sycl::cos(x); }
+EIGEN_ALWAYS_INLINE double  cos(double x) { return cl::sycl::cos(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float cos(const float &x) { return ::cosf(x); }
@ -1121,6 +1172,11 @@ T sin(const T &x) {
  return sin(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   sin(float x) { return cl::sycl::sin(x); }
+EIGEN_ALWAYS_INLINE double  sin(double x) { return cl::sycl::sin(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float sin(const float &x) { return ::sinf(x); }
@ -1136,6 +1192,11 @@ T tan(const T &x) {
  return tan(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   tan(float x) { return cl::sycl::tan(x); }
+EIGEN_ALWAYS_INLINE double  tan(double x) { return cl::sycl::tan(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tan(const float &x) { return ::tanf(x); }
@ -1151,6 +1212,11 @@ T acos(const T &x) {
  return acos(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   acos(float x) { return cl::sycl::acos(x); }
+EIGEN_ALWAYS_INLINE double  acos(double x) { return cl::sycl::acos(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float acos(const float &x) { return ::acosf(x); }
@ -1166,6 +1232,11 @@ T asin(const T &x) {
  return asin(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   asin(float x) { return cl::sycl::asin(x); }
+EIGEN_ALWAYS_INLINE double  asin(double x) { return cl::sycl::asin(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float asin(const float &x) { return ::asinf(x); }
@ -1181,6 +1252,11 @@ T atan(const T &x) {
  return atan(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   atan(float x) { return cl::sycl::atan(x); }
+EIGEN_ALWAYS_INLINE double  atan(double x) { return cl::sycl::atan(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float atan(const float &x) { return ::atanf(x); }
@ -1197,6 +1273,11 @@ T cosh(const T &x) {
  return cosh(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   cosh(float x) { return cl::sycl::cosh(x); }
+EIGEN_ALWAYS_INLINE double  cosh(double x) { return cl::sycl::cosh(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float cosh(const float &x) { return ::coshf(x); }
@ -1212,6 +1293,11 @@ T sinh(const T &x) {
  return sinh(x);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   sinh(float x) { return cl::sycl::sinh(x); }
+EIGEN_ALWAYS_INLINE double  sinh(double x) { return cl::sycl::sinh(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float sinh(const float &x) { return ::sinhf(x); }
@ -1227,7 +1313,10 @@ T tanh(const T &x) {
  return tanh(x);
 }

-#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   tanh(float x) { return cl::sycl::tanh(x); }
+EIGEN_ALWAYS_INLINE double  tanh(double x) { return cl::sycl::tanh(x); }
+#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tanh(float x) { return internal::generic_fast_tanh_float(x); }
 #endif
@ -1247,6 +1336,11 @@ T fmod(const T& a, const T& b) {
  return fmod(a, b);
 }

+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   fmod(float x, float y) { return cl::sycl::fmod(x, y); }
+EIGEN_ALWAYS_INLINE double  fmod(double x, double y) { return cl::sycl::fmod(x, y); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
 #ifdef __CUDACC__
 template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@ -13,7 +13,7 @@

 #include "../../../Eigen/Core"

-#ifdef EIGEN_USE_SYCL
+#if defined(EIGEN_USE_SYCL)
 #undef min
 #undef max
 #undef isnan
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -147,6 +147,7 @@ if(EIGEN_TEST_CXX11)
    ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
  endif(EIGEN_TEST_SYCL)
  # It should be safe to always run these tests as there is some fallback code for
  # older compiler that don't support cxx11.
--- a/unsupported/test/cxx11_tensor_builtins_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_builtins_sycl.cpp
@ -0,0 +1,83 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+namespace std
+{
+  template<typename T> T rsqrt(T x) { return 1/std::sqrt(x); }
+  template<typename T> T square(T x) { return x*x; }
+  template<typename T> T cube(T x) { return x*x*x; }
+  template<typename T> T inverse(T x) { return 1/x; }
+}
+
+#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR)                                            \
+{                                                                                               \
+  Tensor<SCALAR, 3> in1(tensorRange);                                                           \
+  Tensor<SCALAR, 3> out1(tensorRange);                                                          \
+  in1 = in1.random();                                                                           \
+  SCALAR* gpu_data1  = static_cast<SCALAR*>(sycl_device.allocate(in1.size()*sizeof(SCALAR)));   \
+  TensorMap<Tensor<SCALAR, 3>> gpu1(gpu_data1, tensorRange);                                    \
+  sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(SCALAR));            \
+  gpu1.device(sycl_device) = gpu1.FUNC();                                                       \
+  sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(SCALAR));          \
+  for (int i = 0; i < in1.size(); ++i) {                                                        \
+    VERIFY_IS_APPROX(out1(i), std::FUNC(in1(i)));                                               \
+  }                                                                                             \
+  sycl_device.deallocate(gpu_data1);                                                            \
+} 
+
+#define TEST_UNARY_BUILTINS(SCALAR)               \
+TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR)       \
+TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR)      \
+TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR)     \
+TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR)    \
+TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR)      \
+TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR)   \
+TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR)      \
+TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR)       \
+TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR)       \
+TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR)       \
+TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR)      \
+TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR)     \
+TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR)     \
+TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR)
+
+static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device){
+  int sizeDim1 = 100;
+  int sizeDim2 = 100;
+  int sizeDim3 = 100;
+  array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+
+  TEST_UNARY_BUILTINS(float)
+  TEST_UNARY_BUILTINS(double)
+}
+
+
+void test_cxx11_tensor_builtins_sycl() {
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));
+}