Use EIGEN_DEVICE_FUNC macro instead of __device__.

This commit is contained in:
Rasmus Munk Larsen 2019-12-03 12:08:22 -08:00
parent 6358599ecb
commit ead81559c8
2 changed files with 5 additions and 5 deletions

View File

@ -18,7 +18,7 @@ namespace Eigen {
template<typename Scalar, typename Index, typename LhsMapper,
typename RhsMapper, typename OutputMapper, bool needs_edge_check>
__device__ EIGEN_STRONG_INLINE void
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
const OutputMapper output, Scalar* lhs_shmem, Scalar* rhs_shmem,
const Index m_size, const Index n_size, const Index k_size) {
@ -531,7 +531,7 @@ EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
template<typename Index, typename LhsMapper,
typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY,
bool CHECK_RHS_BOUNDARY>
__device__ EIGEN_STRONG_INLINE void
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs,
const OutputMapper output, float2 lhs_shmem2[][16],
float2 rhs_shmem2[][8], const Index m_size,
@ -771,7 +771,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
template<typename Index, typename LhsMapper,
typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY,
bool CHECK_RHS_BOUNDARY>
__device__ EIGEN_STRONG_INLINE void
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
const OutputMapper output, float2 lhs_shmem2[][32],
float2 rhs_shmem2[][8], const Index m_size,

View File

@ -582,7 +582,7 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable, Tiling> {
#if defined(EIGEN_GPUCC)
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
struct EigenMetaKernelEval {
static __device__ EIGEN_ALWAYS_INLINE
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) {
for (StorageIndex i = firstIdx; i < lastIdx; i += step_size) {
eval.evalScalar(i);
@ -592,7 +592,7 @@ struct EigenMetaKernelEval {
template <typename Evaluator, typename StorageIndex>
struct EigenMetaKernelEval<Evaluator, StorageIndex, true> {
static __device__ EIGEN_ALWAYS_INLINE
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void run(Evaluator& eval, StorageIndex firstIdx, StorageIndex lastIdx, StorageIndex step_size) {
const StorageIndex PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
const StorageIndex vectorized_size = (lastIdx / PacketSize) * PacketSize;