Added a test to validate the conversion of half floats into floats on Kepler GPUs.

Restricted the testing of the random number generation code to GPU architecture greater than or equal to 3.5.
This commit is contained in:
Benoit Steiner 2016-03-03 10:37:25 -08:00
parent 1032441c6f
commit dac58d7c35
2 changed files with 80 additions and 2 deletions

View File

@ -170,10 +170,15 @@ if(CUDA_FOUND)
ei_add_test(cxx11_tensor_cuda)
ei_add_test(cxx11_tensor_contract_cuda)
ei_add_test(cxx11_tensor_reduction_cuda)
ei_add_test(cxx11_tensor_random_cuda)
ei_add_test(cxx11_tensor_argmax_cuda)
ei_add_test(cxx11_tensor_cast_float16_cuda)
# Half floats are only supported starting with arch 5.3
# The random number generation code requires arch 3.5 or greater.
if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
ei_add_test(cxx11_tensor_random_cuda)
endif()
# Operations other that casting of half floats are only supported starting with arch 5.3
if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52)
ei_add_test(cxx11_tensor_of_float16_cuda)
endif()

View File

@ -0,0 +1,73 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_cast_float16_cuda
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
#ifdef EIGEN_HAS_CUDA_FP16
void test_cuda_conversion() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
Tensor<float, 1> floats(num_elem);
floats.setRandom();
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
half* d_half = (half*)gpu_device.allocate(num_elem * sizeof(half));
float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<half, 1>, Eigen::Aligned> gpu_half(
d_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
d_conv, num_elem);
gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float));
gpu_half.device(gpu_device) = gpu_float.cast<half>();
gpu_conv.device(gpu_device) = gpu_half.cast<float>();
Tensor<float, 1> initial(num_elem);
Tensor<float, 1> final(num_elem);
gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(initial(i), final(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_half);
gpu_device.deallocate(d_conv);
}
#endif
void test_cxx11_tensor_cast_float16_cuda()
{
#ifdef EIGEN_HAS_CUDA_FP16
CALL_SUBTEST(test_cuda_conversion());
#else
std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
#endif
}