mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Moved some of the fp16 operators outside the Eigen namespace to workaround some nvcc limitations.
This commit is contained in:
parent
1e6fe6f046
commit
9ff269a1d3
@ -10,10 +10,6 @@
|
|||||||
#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
|
#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
|
||||||
#define EIGEN_PACKET_MATH_HALF_CUDA_H
|
#define EIGEN_PACKET_MATH_HALF_CUDA_H
|
||||||
|
|
||||||
namespace Eigen {
|
|
||||||
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
#if defined(EIGEN_HAS_CUDA_FP16)
|
#if defined(EIGEN_HAS_CUDA_FP16)
|
||||||
|
|
||||||
// Make sure this is only available when targeting a GPU: we don't want to
|
// Make sure this is only available when targeting a GPU: we don't want to
|
||||||
@ -41,22 +37,28 @@ __device__ half operator - (const half& a) {
|
|||||||
return __hneg(a);
|
return __hneg(a);
|
||||||
}
|
}
|
||||||
__device__ half operator += (half& a, const half& b) {
|
__device__ half operator += (half& a, const half& b) {
|
||||||
a = __hadd(a, b);
|
a = a + b;
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
__device__ half operator *= (half& a, const half& b) {
|
__device__ half operator *= (half& a, const half& b) {
|
||||||
a = __hmul(a, b);
|
a = a * b;
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
__device__ half operator -= (half& a, const half& b) {
|
__device__ half operator -= (half& a, const half& b) {
|
||||||
a = __hsub(a, b);
|
a = a - b;
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
__device__ half operator /= (half& a, const half& b) {
|
__device__ half operator /= (half& a, const half& b) {
|
||||||
a = a / b;
|
a = a / b;
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
__device__ half __shfl_xor(half a, int) {
|
||||||
|
assert(false && "tbd");
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
template<> struct is_arithmetic<half2> { enum { value = true }; };
|
template<> struct is_arithmetic<half2> { enum { value = true }; };
|
||||||
|
|
||||||
@ -225,13 +227,11 @@ ptranspose(PacketBlock<half2,2>& kernel) {
|
|||||||
// kernel.packet[1].x = tmp;
|
// kernel.packet[1].x = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#endif // EIGEN_PACKET_MATH_HALF_CUDA_H
|
#endif // EIGEN_PACKET_MATH_HALF_CUDA_H
|
||||||
|
Loading…
Reference in New Issue
Block a user