mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-01 18:26:24 +08:00
fix #53: performance regression, hopefully I did not resurected another
perf. issue...
This commit is contained in:
parent
e4f94b8c58
commit
9395326e44
@ -77,15 +77,16 @@ template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size
|
||||
#ifdef __GNUC__
|
||||
// Sometimes GCC implements _mm_set1_p* using multiple moves,
|
||||
// that is inefficient :(
|
||||
// TODO make sure the new solution using the shuffle/unpacklo is ok
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
|
||||
Packet4f res = _mm_set_ss(from);
|
||||
asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
|
||||
return res;
|
||||
return _mm_shuffle_ps(res,res,0);
|
||||
//asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
|
||||
Packet2d res = _mm_set_sd(from);
|
||||
asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
|
||||
return res;
|
||||
return _mm_unpacklo_pd(res,res);
|
||||
// asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
|
||||
}
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }
|
||||
|
Loading…
Reference in New Issue
Block a user