Added a test to validate the conversion of half floats into floats on Kepler GPUs.

Restricted the testing of the random number generation code to GPU architecture greater than or equal to 3.5.
2025-01-24 14:45:14 +08:00 · 2016-03-03 10:37:25 -08:00 · 2016-03-03 10:37:25 -08:00 · dac58d7c35
commit dac58d7c35
parent 1032441c6f
2 changed files with 80 additions and 2 deletions
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -170,10 +170,15 @@ if(CUDA_FOUND)
  ei_add_test(cxx11_tensor_cuda)
  ei_add_test(cxx11_tensor_contract_cuda)
  ei_add_test(cxx11_tensor_reduction_cuda)
-  ei_add_test(cxx11_tensor_random_cuda)
  ei_add_test(cxx11_tensor_argmax_cuda)
+  ei_add_test(cxx11_tensor_cast_float16_cuda)

-  # Half floats are only supported starting with arch 5.3
+  # The random number generation code requires arch 3.5 or greater.
+  if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
+    ei_add_test(cxx11_tensor_random_cuda)
+  endif()
+
+  # Operations other that casting of half floats are only supported starting with arch 5.3
  if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52)
    ei_add_test(cxx11_tensor_of_float16_cuda)
  endif()
--- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@ -0,0 +1,73 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_cast_float16_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+#ifdef EIGEN_HAS_CUDA_FP16
+
+void test_cuda_conversion() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+  
+  Tensor<float, 1> floats(num_elem);
+  floats.setRandom();
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  half* d_half = (half*)gpu_device.allocate(num_elem * sizeof(half));
+  float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<half, 1>, Eigen::Aligned> gpu_half(
+      d_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+      d_conv, num_elem);
+
+  gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float));
+
+  gpu_half.device(gpu_device) = gpu_float.cast<half>();
+  gpu_conv.device(gpu_device) = gpu_half.cast<float>();
+
+  Tensor<float, 1> initial(num_elem);
+  Tensor<float, 1> final(num_elem);
+  gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(initial(i), final(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_half);
+  gpu_device.deallocate(d_conv);
+}
+
+#endif
+
+
+void test_cxx11_tensor_cast_float16_cuda()
+{
+#ifdef EIGEN_HAS_CUDA_FP16
+  CALL_SUBTEST(test_cuda_conversion());
+#else
+  std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
+#endif
+}