mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
Fix failing MSVC tests due to compiler bugs.
This commit is contained in:
parent
d6235d76db
commit
394aabb0a3
@ -984,13 +984,20 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i&
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from, uint8_t umask) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512
|
||||
__mmask16 mask = static_cast<__mmask16>(umask & 0x00FF);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_ps(to, mask, _mm512_castps256_ps512(from));
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm512_mask_storeu_ps(to, mask, _mm512_castps256_ps512(from));
|
||||
#else
|
||||
Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));
|
||||
const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
|
||||
const Packet8i bit_mask = _mm256_set_epi32(0x7f7f7f7f, 0xbfbfbfbf, 0xdfdfdfdf, 0xefefefef, 0xf7f7f7f7, 0xfbfbfbfb, 0xfdfdfdfd, 0xfefefefe);
|
||||
mask = por<Packet8i>(mask, bit_mask);
|
||||
mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
|
||||
EIGEN_DEBUG_UNALIGNED_STORE return _mm256_maskstore_ps(to, mask, from);
|
||||
#if EIGEN_COMP_MSVC
|
||||
// MSVC sometimes seems to use a bogus mask with maskstore.
|
||||
const __m256i ifrom = _mm256_castps_si256(from);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm_maskmoveu_si128(_mm256_extractf128_si256(ifrom, 0), _mm256_extractf128_si256(mask, 0), reinterpret_cast<char*>(to));
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm_maskmoveu_si128(_mm256_extractf128_si256(ifrom, 1), _mm256_extractf128_si256(mask, 1), reinterpret_cast<char*>(to + 4));
|
||||
#else
|
||||
EIGEN_DEBUG_UNALIGNED_STORE _mm256_maskstore_ps(to, mask, from);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -336,12 +336,19 @@ EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a,
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
|
||||
const __m512i mask = _mm512_set1_epi32(0x80000000);
|
||||
// NOTE: MSVC seems to struggle with _mm512_set1_epi32, leading to random results.
|
||||
// The intel docs give it a relatively high latency as well, so we're probably
|
||||
// better off with using _mm512_set_epi32 directly anyways.
|
||||
const __m512i mask = _mm512_set_epi32(0x80000000,0x80000000,0x80000000,0x80000000,
|
||||
0x80000000,0x80000000,0x80000000,0x80000000,
|
||||
0x80000000,0x80000000,0x80000000,0x80000000,
|
||||
0x80000000,0x80000000,0x80000000,0x80000000);
|
||||
return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
|
||||
const __m512i mask = _mm512_set1_epi64(0x8000000000000000ULL);
|
||||
const __m512i mask = _mm512_set_epi64(0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL,
|
||||
0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL);
|
||||
return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask));
|
||||
}
|
||||
template <>
|
||||
|
22
test/packet_ostream.h
Normal file
22
test/packet_ostream.h
Normal file
@ -0,0 +1,22 @@
|
||||
#ifndef TEST_PACKET_OSTREAM
|
||||
#define TEST_PACKET_OSTREAM
|
||||
|
||||
#include <type_traits>
|
||||
#include <ostream>
|
||||
|
||||
// Include this header to be able to print Packets while debugging.
|
||||
|
||||
template<typename Packet, typename EnableIf = std::enable_if_t<Eigen::internal::unpacket_traits<Packet>::vectorizable> >
|
||||
std::ostream& operator<<(std::ostream& os, const Packet& packet) {
|
||||
using Scalar = typename Eigen::internal::unpacket_traits<Packet>::type;
|
||||
Scalar v[Eigen::internal::unpacket_traits<Packet>::size];
|
||||
Eigen::internal::pstoreu(v, packet);
|
||||
os << "{" << v[0];
|
||||
for (int i=1; i<Eigen::internal::unpacket_traits<Packet>::size; ++i) {
|
||||
os << "," << v[i];
|
||||
}
|
||||
os << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif // TEST_PACKET_OSTREAM
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "packetmath_test_shared.h"
|
||||
#include "random_without_cast_overflow.h"
|
||||
#include "packet_ostream.h"
|
||||
|
||||
template <typename T>
|
||||
inline T REF_ADD(const T& a, const T& b) {
|
||||
|
Loading…
Reference in New Issue
Block a user