fix #53: performance regression, hopefully I did not resurected another

perf. issue...
This commit is contained in:
Gael Guennebaud 2009-09-17 23:18:21 +02:00
parent e4f94b8c58
commit 9395326e44

View File

@ -77,15 +77,16 @@ template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size
#ifdef __GNUC__
// Sometimes GCC implements _mm_set1_p* using multiple moves,
// that is inefficient :(
// TODO make sure the new solution using the shuffle/unpacklo is ok
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
Packet4f res = _mm_set_ss(from);
asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
return res;
return _mm_shuffle_ps(res,res,0);
//asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
Packet2d res = _mm_set_sd(from);
asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
return res;
return _mm_unpacklo_pd(res,res);
// asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
}
#else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }