2
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2025-04-24 19:40:45 +08:00

Unbork avx512 preduce_mul on MSVC.

This commit is contained in:
Antonio Sánchez 2024-04-26 15:28:03 +00:00
parent 42aa3d17cd
commit dcceb9afec
2 changed files with 27 additions and 0 deletions
Eigen/src/Core/arch/AVX512
test

@ -1562,10 +1562,28 @@ template <>
EIGEN_STRONG_INLINE int predux_mul<Packet16i>(const Packet16i& a) {
return _mm512_reduce_mul_epi32(a);
}
#if EIGEN_COMP_MSVC
// MSVC's _mm512_reduce_mul_epi64 is borked, at least up to and including 1939.
// alignas(64) int64_t data[] = { 1,1,-1,-1,1,-1,-1,-1 };
// int64_t out = _mm512_reduce_mul_epi64(_mm512_load_epi64(data));
// produces garbage: 4294967295. It seems to happen whenever the output is supposed to be negative.
// Fall back to a manual approach:
template <>
EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) {
Packet4l lane0 = _mm512_extracti64x4_epi64(a, 0);
Packet4l lane1 = _mm512_extracti64x4_epi64(a, 1);
Packet4l res = pmul(lane0, lane1);
res = pmul(res, Packet4l(_mm256_permute2x128_si256(res, res, 1)));
res = pmul(res, Packet4l(_mm256_shuffle_epi32(res, 0xE)));
return pfirst(res);
}
#else
template <>
EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) {
return _mm512_reduce_mul_epi64(a);
}
#endif
template <>
EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {

@ -623,6 +623,15 @@ void packetmath() {
VERIFY(test::areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4");
}
// Avoid overflows.
if (NumTraits<Scalar>::IsInteger && NumTraits<Scalar>::IsSigned) {
Scalar limit =
static_cast<Scalar>(std::pow(static_cast<double>(numext::real(NumTraits<Scalar>::highest())),
1.0 / static_cast<double>(Eigen::internal::unpacket_traits<Packet>::size)));
for (int i = 0; i < PacketSize; ++i) {
data1[i] = internal::random<Scalar>(-limit, limit);
}
}
ref[0] = Scalar(1);
for (int i = 0; i < PacketSize; ++i) ref[0] = REF_MUL(ref[0], data1[i]);
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");