From d485d12c51bc46286f7439377e3ab591f67ddbbf Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 6 Oct 2016 10:41:03 -0700 Subject: [PATCH] Added missing AVX intrinsics for fp16: in particular, implemented predux which is required by the matrix-vector code. --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 82dfc12c9..9dd89e07f 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -492,6 +492,30 @@ template<> EIGEN_STRONG_INLINE void pscatter(Eigen::half* to[stride*7].x = aux[7].x; } +template<> EIGEN_STRONG_INLINE Eigen::half predux(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux(af); + return Eigen::half(reduced); +} + +template<> EIGEN_STRONG_INLINE Eigen::half predux_max(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux_max(af); + return Eigen::half(reduced); +} + +template<> EIGEN_STRONG_INLINE Eigen::half predux_min(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux_min(af); + return Eigen::half(reduced); +} + +template<> EIGEN_STRONG_INLINE Eigen::half predux_mul(const Packet8h& a) { + Packet8f af = half2float(a); + float reduced = predux_mul(af); + return Eigen::half(reduced); +} + EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { __m128i a = kernel.packet[0].x;