Don't take the address of a kernel on CUDA devices that don't support this feature.

2025-01-24 14:45:14 +08:00 · 2016-04-19 14:35:11 -07:00 · 2016-04-19 14:35:11 -07:00 · b9ea40c30d
commit b9ea40c30d
parent 884c075058
1 changed files with 6 additions and 3 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@ -291,14 +291,17 @@ struct GpuDevice {
  int max_blocks_;
 };

-#ifndef __CUDA_ARCH__
+#if !defined(__CUDA_ARCH__)
 #define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...)             \
  (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__);   \
  assert(cudaGetLastError() == cudaSuccess);
-#else
+#elif __CUDA_ARCH__ >= 350
 #define LAUNCH_CUDA_KERNEL(kernel, ...)                                                     \
  { const auto __attribute__((__unused__)) __makeTheKernelInstantiate = &(kernel); }        \
-  eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__);
+  eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__ kernel);
+#else
+#define LAUNCH_CUDA_KERNEL(kernel, ...)                                                     \
+  eigen_assert(false && "Cannot launch a kernel from another kernel" __CUDA_ARCH__ kernel);
 #endif