mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-06 14:14:46 +08:00
FP16 on CUDA are only available starting with cuda 7.5. Disable them when using an older version of CUDA
This commit is contained in:
parent
f36c0c2c65
commit
0606a0a39b
@ -200,7 +200,10 @@
|
|||||||
#if defined __CUDACC__
|
#if defined __CUDACC__
|
||||||
#define EIGEN_VECTORIZE_CUDA
|
#define EIGEN_VECTORIZE_CUDA
|
||||||
#include <vector_types.h>
|
#include <vector_types.h>
|
||||||
#include <cuda_fp16.h>
|
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
|
||||||
|
#define EIGEN_HAS_CUDA_FP16
|
||||||
|
#include <cuda_fp16.h>
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
|
#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
|
||||||
|
@ -14,6 +14,8 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
#if defined(EIGEN_HAS_CUDA_FP16)
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct scalar_cast_op<float, half> {
|
struct scalar_cast_op<float, half> {
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||||
@ -92,6 +94,7 @@ template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
using Eigen::Tensor;
|
using Eigen::Tensor;
|
||||||
|
|
||||||
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
void test_cuda_conversion() {
|
void test_cuda_conversion() {
|
||||||
Eigen::CudaStreamDevice stream;
|
Eigen::CudaStreamDevice stream;
|
||||||
Eigen::GpuDevice gpu_device(&stream);
|
Eigen::GpuDevice gpu_device(&stream);
|
||||||
@ -52,9 +53,11 @@ void test_cuda_conversion() {
|
|||||||
gpu_device.deallocate(d_half);
|
gpu_device.deallocate(d_half);
|
||||||
gpu_device.deallocate(d_conv);
|
gpu_device.deallocate(d_conv);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void test_cxx11_tensor_of_float16_cuda()
|
void test_cxx11_tensor_of_float16_cuda()
|
||||||
{
|
{
|
||||||
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
CALL_SUBTEST_1(test_cuda_conversion());
|
CALL_SUBTEST_1(test_cuda_conversion());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user