mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Clean up #if
s in GPU PacketPath.
Removed redundant checks and redundant code for CUDA/HIP. Note: there are several issues here of calling `__device__` functions from `__host__ __device__` functions, in particular `__low2half`. We do not address that here -- only modifying this file enough to get our current tests to compile. Fixed: #1847
This commit is contained in:
parent
f9fac1d5b0
commit
5ec4907434
@ -480,9 +480,7 @@ ptranspose(PacketBlock<double2,2>& kernel) {
|
|||||||
|
|
||||||
// Packet4h2 must be defined in the macro without EIGEN_CUDA_ARCH, meaning
|
// Packet4h2 must be defined in the macro without EIGEN_CUDA_ARCH, meaning
|
||||||
// its corresponding packet_traits<Eigen::half> must be visible on host.
|
// its corresponding packet_traits<Eigen::half> must be visible on host.
|
||||||
#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC)) || \
|
#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
|
||||||
(defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIPCC)) || \
|
|
||||||
(defined(EIGEN_HAS_CUDA_FP16) && defined(__clang__) && defined(__CUDA__))
|
|
||||||
|
|
||||||
typedef ulonglong2 Packet4h2;
|
typedef ulonglong2 Packet4h2;
|
||||||
template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
|
template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
|
||||||
@ -515,13 +513,8 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
|
||||||
#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIPCC)
|
#if defined(EIGEN_HIPCC)
|
||||||
half2 r;
|
return half2half2(from);
|
||||||
r.x = from;
|
|
||||||
r.y = from;
|
|
||||||
return r;
|
|
||||||
#elif defined(EIGEN_HIPCC)
|
|
||||||
return __half2{from,from};
|
|
||||||
#else
|
#else
|
||||||
return __half2half2(from);
|
return __half2half2(from);
|
||||||
#endif
|
#endif
|
||||||
@ -561,50 +554,33 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
|
||||||
const half2& from) {
|
const half2& from) {
|
||||||
#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIPCC)
|
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
to[0] = from.x;
|
|
||||||
to[1] = from.y;
|
|
||||||
#else
|
|
||||||
to[0] = __low2half(from);
|
to[0] = __low2half(from);
|
||||||
to[1] = __high2half(from);
|
to[1] = __high2half(from);
|
||||||
|
#else
|
||||||
|
// Unfortunately __low2half and __high2half are only __device__ functions.
|
||||||
|
to[0] = __float2half(__low2float(from));
|
||||||
|
to[1] = __float2half(__high2float(from));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
|
||||||
const Eigen::half* from) {
|
const Eigen::half* from) {
|
||||||
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
||||||
|
|
||||||
return __ldg((const half2*)from);
|
return __ldg((const half2*)from);
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 350
|
|
||||||
return __ldg((const half2*)from);
|
|
||||||
#else
|
#else
|
||||||
return __halves2half2(*(from+0), *(from+1));
|
return __halves2half2(*(from+0), *(from+1));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
|
||||||
const Eigen::half* from) {
|
const Eigen::half* from) {
|
||||||
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
|
||||||
|
|
||||||
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 350
|
|
||||||
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
|
||||||
#else
|
#else
|
||||||
return __halves2half2(*(from+0), *(from+1));
|
return __halves2half2(*(from+0), *(from+1));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
|
||||||
@ -651,20 +627,12 @@ ptranspose(PacketBlock<half2,2>& kernel) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
||||||
#else
|
#else
|
||||||
float f = __half2float(a) + 1.0f;
|
float f = __half2float(a) + 1.0f;
|
||||||
return __halves2half2(a, __float2half(f));
|
return __halves2half2(a, __float2half(f));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
|
||||||
@ -749,13 +717,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
|
||||||
const half2& b) {
|
const half2& b) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hadd2(a, b);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hadd2(a, b);
|
return __hadd2(a, b);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
@ -766,19 +728,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
|
|||||||
float r2 = a2 + b2;
|
float r2 = a2 + b2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
|
||||||
const half2& b) {
|
const half2& b) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hsub2(a, b);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hsub2(a, b);
|
return __hsub2(a, b);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
@ -789,39 +743,23 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
|
|||||||
float r2 = a2 - b2;
|
float r2 = a2 - b2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hneg2(a);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hneg2(a);
|
return __hneg2(a);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return __floats2half2_rn(-a1, -a2);
|
return __floats2half2_rn(-a1, -a2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
|
||||||
const half2& b) {
|
const half2& b) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hmul2(a, b);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hmul2(a, b);
|
return __hmul2(a, b);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
@ -832,20 +770,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
|
|||||||
float r2 = a2 * b2;
|
float r2 = a2 * b2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
|
||||||
const half2& b,
|
const half2& b,
|
||||||
const half2& c) {
|
const half2& c) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hfma2(a, b, c);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hfma2(a, b, c);
|
return __hfma2(a, b, c);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
@ -858,18 +788,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
|
|||||||
float r2 = a2 * b2 + c2;
|
float r2 = a2 * b2 + c2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
|
||||||
const half2& b) {
|
const half2& b) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||||
|
|
||||||
return __h2div(a, b);
|
return __h2div(a, b);
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
#else // EIGEN_CUDA_ARCH
|
||||||
|
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
float b1 = __low2float(b);
|
float b1 = __low2float(b);
|
||||||
@ -877,7 +802,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
|
|||||||
float r1 = a1 / b1;
|
float r1 = a1 / b1;
|
||||||
float r2 = a2 / b2;
|
float r2 = a2 / b2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -904,33 +828,17 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hadd(__low2half(a), __high2half(a));
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hadd(__low2half(a), __high2half(a));
|
return __hadd(__low2half(a), __high2half(a));
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return Eigen::half(__float2half(a1 + a2));
|
return Eigen::half(__float2half(a1 + a2));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
__half first = __low2half(a);
|
|
||||||
__half second = __high2half(a);
|
|
||||||
return __hgt(first, second) ? first : second;
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
__half first = __low2half(a);
|
__half first = __low2half(a);
|
||||||
__half second = __high2half(a);
|
__half second = __high2half(a);
|
||||||
return __hgt(first, second) ? first : second;
|
return __hgt(first, second) ? first : second;
|
||||||
@ -939,20 +847,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
|
|||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return a1 > a2 ? __low2half(a) : __high2half(a);
|
return a1 > a2 ? __low2half(a) : __high2half(a);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
__half first = __low2half(a);
|
|
||||||
__half second = __high2half(a);
|
|
||||||
return __hlt(first, second) ? first : second;
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
__half first = __low2half(a);
|
__half first = __low2half(a);
|
||||||
__half second = __high2half(a);
|
__half second = __high2half(a);
|
||||||
return __hlt(first, second) ? first : second;
|
return __hlt(first, second) ? first : second;
|
||||||
@ -961,26 +859,16 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
|
|||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return a1 < a2 ? __low2half(a) : __high2half(a);
|
return a1 < a2 ? __low2half(a) : __high2half(a);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(const half2& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(const half2& a) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hmul(__low2half(a), __high2half(a));
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hmul(__low2half(a), __high2half(a));
|
return __hmul(__low2half(a), __high2half(a));
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return Eigen::half(__float2half(a1 * a2));
|
return Eigen::half(__float2half(a1 * a2));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) {
|
||||||
@ -1108,14 +996,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(
|
|||||||
template <>
|
template <>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
|
||||||
ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
|
ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
|
||||||
|
|
||||||
Packet4h2 r;
|
|
||||||
r = __ldg((const Packet4h2*)from);
|
|
||||||
return r;
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 350
|
|
||||||
Packet4h2 r;
|
Packet4h2 r;
|
||||||
r = __ldg((const Packet4h2*)from);
|
r = __ldg((const Packet4h2*)from);
|
||||||
return r;
|
return r;
|
||||||
@ -1128,8 +1009,6 @@ ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
|
|||||||
r_alias[3] = ploadt_ro_aligned(from + 6);
|
r_alias[3] = ploadt_ro_aligned(from + 6);
|
||||||
return r;
|
return r;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -1685,13 +1564,7 @@ prsqrt<Packet4h2>(const Packet4h2& a) {
|
|||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
|
||||||
const half2& b) {
|
const half2& b) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hadd2(a, b);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hadd2(a, b);
|
return __hadd2(a, b);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
@ -1702,20 +1575,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
|
|||||||
float r2 = a2 + b2;
|
float r2 = a2 + b2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
|
||||||
const half2& b) {
|
const half2& b) {
|
||||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||||
|
|
||||||
return __hmul2(a, b);
|
|
||||||
|
|
||||||
#else // EIGEN_CUDA_ARCH
|
|
||||||
|
|
||||||
#if EIGEN_CUDA_ARCH >= 530
|
|
||||||
return __hmul2(a, b);
|
return __hmul2(a, b);
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
@ -1726,8 +1591,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
|
|||||||
float r2 = a2 * b2;
|
float r2 = a2 * b2;
|
||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
Loading…
Reference in New Issue
Block a user