Fix strict aliasing bug causing product_small failure.

Packet loading is skipped due to aliasing violation, leading to nullopt matrix
multiplication.

Fixes #2327.
This commit is contained in:
Antonio Sanchez 2021-09-17 12:49:01 -07:00 committed by Rasmus Munk Larsen
parent 9882aec279
commit 3c724c44cf
3 changed files with 9 additions and 10 deletions

View File

@ -101,7 +101,9 @@ template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<fl
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
{
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
const float re = std::real(from);
const float im = std::imag(from);
return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
}
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)

View File

@ -99,7 +99,9 @@ template<> EIGEN_STRONG_INLINE Packet8cf ploadu<Packet8cf>(const std::complex<fl
template<> EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from)
{
return Packet8cf(_mm512_castpd_ps(pload1<Packet8d>((const double*)(const void*)&from)));
const float re = std::real(from);
const float im = std::imag(from);
return Packet8cf(_mm512_set_ps(im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re));
}
template<> EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from)

View File

@ -108,14 +108,9 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<fl
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
Packet2cf res;
#ifdef EIGEN_VECTORIZE_SSE3
res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));
#else
res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));
res.v = _mm_movelh_ps(res.v, res.v);
#endif
return res;
const float re = std::real(from);
const float im = std::imag(from);
return Packet2cf(_mm_set_ps(im, re, im, re));
}
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }