From 9fe040ad29400f152b392fff9dc1493a6b9c14aa Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Sun, 7 Mar 2010 14:05:26 +0100 Subject: [PATCH] Reintroduced the if-clause for MSVC ei_ploadu via _loadu_. --- Eigen/src/Core/arch/SSE/PacketMath.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 282a1971c..77f15d982 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -184,17 +184,17 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pload(const float* from) { template<> EIGEN_STRONG_INLINE Packet2d ei_pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i ei_pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); } -// #if (!defined __GNUC__) && (!defined __ICC) -// template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); } -// template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } -// template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); } -// #else - +#if defined(_MSC_VER) + template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); } + template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } + template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); } +#else // Fast unaligned loads. Note that here we cannot directly use intrinsics: this would // require pointer casting to incompatible pointer types and leads to invalid code // because of the strict aliasing rule. The "dummy" stuff are required to enforce // a correct instruction dependency. // TODO: do the same for MSVC (ICC is compatible) +// NOTE: with the code below, MSVC's compiler crashes! template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD @@ -219,6 +219,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) res = _mm_loadh_pd(res, (const double*)(from+2)) ; return _mm_castpd_si128(res); } +#endif template<> EIGEN_STRONG_INLINE void ei_pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); } template<> EIGEN_STRONG_INLINE void ei_pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }