mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-07 18:27:40 +08:00
Add dedicated implementations of predux_any for AVX512, NEON, and Altivec/VSE
This commit is contained in:
parent
3f14e0d19e
commit
47810cf5b7
@ -969,6 +969,13 @@ EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
|
||||
return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)
|
||||
{
|
||||
Packet16i xi = _mm512_castps_si512(x);
|
||||
__mmask16 tmp = _mm512_test_epi32_mask(xi,xi);
|
||||
return !_mm512_kortestz(tmp,tmp);
|
||||
}
|
||||
|
||||
template <int Offset>
|
||||
struct palign_impl<Offset, Packet16f> {
|
||||
static EIGEN_STRONG_INLINE void run(Packet16f& first,
|
||||
|
@ -720,6 +720,11 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
return pfirst(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
|
||||
{
|
||||
return vec_any_ne(x, pzero(x));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
|
@ -551,6 +551,13 @@ template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
|
||||
return vget_lane_s32(max, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
|
||||
{
|
||||
uint32x2_t tmp = vorr_u32(vget_low_u32( vreinterpretq_u32_f32(x)),
|
||||
vget_high_u32(vreinterpretq_u32_f32(x)));
|
||||
return vget_lane_u32(vpmax_u32(tmp,tmp),0);
|
||||
}
|
||||
|
||||
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
|
||||
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
|
||||
#define PALIGN_NEON(Offset,Type,Command) \
|
||||
|
Loading…
Reference in New Issue
Block a user