mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
Add round, ceil and floor for SSE4.1/AVX (Bug #70)
This commit is contained in:
parent
b32948c642
commit
d46e2c10a6
@ -66,7 +66,10 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
@ -83,7 +86,10 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
};
|
||||
};
|
||||
|
||||
@ -176,6 +182,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, 0); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, 0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
|
||||
|
||||
|
@ -109,7 +109,13 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
HasBlend = 1,
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
#endif
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
@ -126,7 +132,13 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
HasBlend = 1,
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
#endif
|
||||
};
|
||||
};
|
||||
#endif
|
||||
@ -256,6 +268,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
|
||||
|
@ -296,6 +296,10 @@ template<typename Scalar> void packetmath_real()
|
||||
EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4];
|
||||
EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4];
|
||||
|
||||
CHECK_CWISE1_IF(PacketTraits::HasRound, std::round, internal::pround);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasCeil, std::ceil, internal::pceil);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasFloor, std::floor, internal::pfloor);
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
{
|
||||
data1[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
|
||||
|
Loading…
Reference in New Issue
Block a user