From 1d79c68ba0507574d893780e60b982f07d210261 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Wed, 14 Apr 2021 13:54:11 -0700 Subject: [PATCH] Fix ldexp for AVX512 (#2215) Wrong shuffle was used. Need to interleave low/high halves with a `permute` instruction. Fixes #2215. --- Eigen/src/Core/arch/AVX512/PacketMath.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index f8741372d..9307c6763 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -929,7 +929,8 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp(const Packet8d& a, cons Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4) // 2^b - Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0)); + const Packet8i permute_idx = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); + Packet8i hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx); Packet8i lo = _mm256_slli_epi64(hi, 52); hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52); Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1)); @@ -937,7 +938,7 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp(const Packet8d& a, cons // 2^(e - 3b) b = psub(psub(psub(e, b), b), b); // e - 3b - hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0)); + hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx); lo = _mm256_slli_epi64(hi, 52); hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52); c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));