mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-13 18:37:27 +08:00
Additional NEON packet-math operations
This commit is contained in:
parent
deb93ed1bf
commit
52d54278be
@ -58,6 +58,7 @@ struct default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 1,
|
||||
HasBlend = 0,
|
||||
HasInsert = 0,
|
||||
HasReduxp = 1,
|
||||
|
||||
HasDiv = 0,
|
||||
|
@ -42,7 +42,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
HasSetLinear = 0,
|
||||
HasInsert = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
@ -64,6 +64,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
AlignedOnScalar = 1,
|
||||
size = 8,
|
||||
HasHalfPacket = 1,
|
||||
HasInsert = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
@ -94,6 +95,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 1,
|
||||
HasInsert = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1,
|
||||
@ -131,7 +133,8 @@ struct packet_traits<Eigen::half> : default_packet_traits {
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasBlend = 0
|
||||
HasBlend = 0,
|
||||
HasInsert = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
@ -42,7 +42,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0,
|
||||
HasReduxp = 0
|
||||
HasReduxp = 0,
|
||||
HasInsert = 1
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -76,7 +76,8 @@ struct packet_traits<half> : default_packet_traits {
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasBlend = 0
|
||||
HasBlend = 0,
|
||||
HasInsert = 1
|
||||
};
|
||||
};
|
||||
|
||||
@ -90,6 +91,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
size = 16,
|
||||
HasHalfPacket = 1,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
|
||||
@ -118,6 +120,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
AlignedOnScalar = 1,
|
||||
size = 8,
|
||||
HasHalfPacket = 1,
|
||||
HasInsert = 1,
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
|
||||
HasSqrt = EIGEN_FAST_MATH,
|
||||
HasRsqrt = EIGEN_FAST_MATH,
|
||||
|
@ -99,6 +99,11 @@ template<> struct unpacket_traits<Packet2cf>
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)
|
||||
{ return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)
|
||||
{ return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)
|
||||
{ return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
|
@ -137,6 +137,7 @@ struct packet_traits<float> : default_packet_traits
|
||||
size = 4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -151,6 +152,7 @@ struct packet_traits<float> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
@ -178,6 +180,7 @@ struct packet_traits<int8_t> : default_packet_traits
|
||||
size = 16,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -192,6 +195,7 @@ struct packet_traits<int8_t> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1
|
||||
};
|
||||
};
|
||||
@ -208,6 +212,7 @@ struct packet_traits<uint8_t> : default_packet_traits
|
||||
size = 16,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -222,6 +227,7 @@ struct packet_traits<uint8_t> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1,
|
||||
|
||||
HasSqrt = 1
|
||||
@ -240,6 +246,7 @@ struct packet_traits<int16_t> : default_packet_traits
|
||||
size = 8,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -254,6 +261,7 @@ struct packet_traits<int16_t> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1
|
||||
};
|
||||
};
|
||||
@ -270,6 +278,7 @@ struct packet_traits<uint16_t> : default_packet_traits
|
||||
size = 8,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -284,6 +293,7 @@ struct packet_traits<uint16_t> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1,
|
||||
|
||||
HasSqrt = 1
|
||||
@ -302,6 +312,7 @@ struct packet_traits<int32_t> : default_packet_traits
|
||||
size = 4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -316,6 +327,7 @@ struct packet_traits<int32_t> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1
|
||||
};
|
||||
};
|
||||
@ -332,6 +344,7 @@ struct packet_traits<uint32_t> : default_packet_traits
|
||||
size = 4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCast = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
@ -346,6 +359,7 @@ struct packet_traits<uint32_t> : default_packet_traits
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasInsert = 1,
|
||||
HasReduxp = 1,
|
||||
|
||||
HasSqrt = 1
|
||||
@ -1509,6 +1523,43 @@ template<> EIGEN_STRONG_INLINE Packet2l pandnot<Packet2l>(const Packet2l& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul pandnot<Packet2ul>(const Packet2ul& a, const Packet2ul& b)
|
||||
{ return vbicq_u64(a,b); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2f pnot<Packet2f>(const Packet2f& a)
|
||||
{ return vreinterpret_f32_u32(vmvn_u32(vreinterpret_u32_f32(a))); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pnot<Packet4f>(const Packet4f& a)
|
||||
{ return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a))); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c pnot<Packet4c>(const Packet4c& a)
|
||||
{ return ~a; }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8c pnot<Packet8c>(const Packet8c& a)
|
||||
{ return vmvn_s8(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16c pnot<Packet16c>(const Packet16c& a)
|
||||
{ return vmvnq_s8(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4uc pnot<Packet4uc>(const Packet4uc& a)
|
||||
{ return ~a; }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8uc pnot<Packet8uc>(const Packet8uc& a)
|
||||
{ return vmvn_u8(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet16uc pnot<Packet16uc>(const Packet16uc& a)
|
||||
{ return vmvnq_u8(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4s pnot<Packet4s>(const Packet4s& a)
|
||||
{ return vmvn_s16(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8s pnot<Packet8s>(const Packet8s& a)
|
||||
{ return vmvnq_s16(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4us pnot<Packet4us>(const Packet4us& a)
|
||||
{ return vmvn_u16(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8us pnot<Packet8us>(const Packet8us& a)
|
||||
{ return vmvnq_u16(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2i pnot<Packet2i>(const Packet2i& a)
|
||||
{ return vmvn_s32(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4i pnot<Packet4i>(const Packet4i& a)
|
||||
{ return vmvnq_s32(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ui pnot<Packet2ui>(const Packet2ui& a)
|
||||
{ return vmvn_u32(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4ui pnot<Packet4ui>(const Packet4ui& a)
|
||||
{ return vmvnq_u32(a); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2l pnot<Packet2l>(const Packet2l& a)
|
||||
{ return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a))); }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet2ul pnot<Packet2ul>(const Packet2ul& a)
|
||||
{ return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(a))); }
|
||||
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4c parithmetic_shift_right(Packet4c& a)
|
||||
{ return vget_lane_s32(vreinterpret_s32_s8(vshr_n_s8(vreinterpret_s8_s32(vdup_n_s32(a)), N)), 0); }
|
||||
template<int N> EIGEN_STRONG_INLINE Packet8c parithmetic_shift_right(Packet8c a) { return vshr_n_s8(a,N); }
|
||||
@ -3431,6 +3482,82 @@ ptranspose(PacketBlock<Packet2ul, 2>& kernel)
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2f pselect( const Packet2f& mask, const Packet2f& a, const Packet2f& b)
|
||||
{ return vbsl_f32(vreinterpret_u32_f32(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b)
|
||||
{ return vbslq_f32(vreinterpretq_u32_f32(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8c pselect(const Packet8c& mask, const Packet8c& a, const Packet8c& b)
|
||||
{ return vbsl_s8(vreinterpret_u8_s8(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet16c pselect(const Packet16c& mask, const Packet16c& a, const Packet16c& b)
|
||||
{ return vbslq_s8(vreinterpretq_u8_s8(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8uc pselect(const Packet8uc& mask, const Packet8uc& a, const Packet8uc& b)
|
||||
{ return vbsl_u8(mask, a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet16uc pselect(const Packet16uc& mask, const Packet16uc& a, const Packet16uc& b)
|
||||
{ return vbslq_u8(mask, a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4s pselect(const Packet4s& mask, const Packet4s& a, const Packet4s& b)
|
||||
{ return vbsl_s16(vreinterpret_u16_s16(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8s pselect(const Packet8s& mask, const Packet8s& a, const Packet8s& b)
|
||||
{ return vbslq_s16(vreinterpretq_u16_s16(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4us pselect(const Packet4us& mask, const Packet4us& a, const Packet4us& b)
|
||||
{ return vbsl_u16(mask, a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8us pselect(const Packet8us& mask, const Packet8us& a, const Packet8us& b)
|
||||
{ return vbslq_u16(mask, a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2i pselect(const Packet2i& mask, const Packet2i& a, const Packet2i& b)
|
||||
{ return vbsl_s32(vreinterpret_u32_s32(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pselect(const Packet4i& mask, const Packet4i& a, const Packet4i& b)
|
||||
{ return vbslq_s32(vreinterpretq_u32_s32(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2ui pselect(const Packet2ui& mask, const Packet2ui& a, const Packet2ui& b)
|
||||
{ return vbsl_u32(mask, a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4ui pselect(const Packet4ui& mask, const Packet4ui& a, const Packet4ui& b)
|
||||
{ return vbslq_u32(mask, a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2l pselect(const Packet2l& mask, const Packet2l& a, const Packet2l& b)
|
||||
{ return vbslq_s64(vreinterpretq_u64_s64(mask), a, b); }
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2ul pselect(const Packet2ul& mask, const Packet2ul& a, const Packet2ul& b)
|
||||
{ return vbslq_u64(mask, a, b); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Packet2f pinsertfirst(const Packet2f& a, float b) { return vset_lane_f32(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4f pinsertfirst(const Packet4f& a, float b) { return vsetq_lane_f32(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4c pinsertfirst(const Packet4c& a, int8_t b)
|
||||
{
|
||||
return static_cast<int32_t>((static_cast<uint32_t>(a) & 0xffffff00u) |
|
||||
(static_cast<uint32_t>(b) & 0xffu));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline Packet8c pinsertfirst(const Packet8c& a, int8_t b) { return vset_lane_s8(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet16c pinsertfirst(const Packet16c& a, int8_t b) { return vsetq_lane_s8(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4uc pinsertfirst(const Packet4uc& a, uint8_t b) { return (a & ~0xffu) | b; }
|
||||
EIGEN_DEVICE_FUNC inline Packet8uc pinsertfirst(const Packet8uc& a, uint8_t b) { return vset_lane_u8(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet16uc pinsertfirst(const Packet16uc& a, uint8_t b) { return vsetq_lane_u8(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4s pinsertfirst(const Packet4s& a, int16_t b) { return vset_lane_s16(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet8s pinsertfirst(const Packet8s& a, int16_t b) { return vsetq_lane_s16(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4us pinsertfirst(const Packet4us& a, uint16_t b) { return vset_lane_u16(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet8us pinsertfirst(const Packet8us& a, uint16_t b) { return vsetq_lane_u16(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2i pinsertfirst(const Packet2i& a, int32_t b) { return vset_lane_s32(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4i pinsertfirst(const Packet4i& a, int32_t b) { return vsetq_lane_s32(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2ui pinsertfirst(const Packet2ui& a, uint32_t b) { return vset_lane_u32(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4ui pinsertfirst(const Packet4ui& a, uint32_t b) { return vsetq_lane_u32(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2l pinsertfirst(const Packet2l& a, int64_t b) { return vsetq_lane_s64(b, a, 0); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2ul pinsertfirst(const Packet2ul& a, uint64_t b) { return vsetq_lane_u64(b, a, 0); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Packet2f pinsertlast(const Packet2f& a, float b) { return vset_lane_f32(b, a, 1); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4f pinsertlast(const Packet4f& a, float b) { return vsetq_lane_f32(b, a, 3); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4c pinsertlast(const Packet4c& a, int8_t b)
|
||||
{ return (static_cast<uint32_t>(a) & 0x00ffffffu) | (static_cast<uint32_t>(b) << 24); }
|
||||
EIGEN_DEVICE_FUNC inline Packet8c pinsertlast(const Packet8c& a, int8_t b) { return vset_lane_s8(b, a, 7); }
|
||||
EIGEN_DEVICE_FUNC inline Packet16c pinsertlast(const Packet16c& a, int8_t b) { return vsetq_lane_s8(b, a, 15); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4uc pinsertlast(const Packet4uc& a, uint8_t b) { return (a & ~0xff000000u) | (b << 24); }
|
||||
EIGEN_DEVICE_FUNC inline Packet8uc pinsertlast(const Packet8uc& a, uint8_t b) { return vset_lane_u8(b, a, 7); }
|
||||
EIGEN_DEVICE_FUNC inline Packet16uc pinsertlast(const Packet16uc& a, uint8_t b) { return vsetq_lane_u8(b, a, 15); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4s pinsertlast(const Packet4s& a, int16_t b) { return vset_lane_s16(b, a, 3); }
|
||||
EIGEN_DEVICE_FUNC inline Packet8s pinsertlast(const Packet8s& a, int16_t b) { return vsetq_lane_s16(b, a, 7); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4us pinsertlast(const Packet4us& a, uint16_t b) { return vset_lane_u16(b, a, 3); }
|
||||
EIGEN_DEVICE_FUNC inline Packet8us pinsertlast(const Packet8us& a, uint16_t b) { return vsetq_lane_u16(b, a, 7); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2i pinsertlast(const Packet2i& a, int32_t b) { return vset_lane_s32(b, a, 1); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4i pinsertlast(const Packet4i& a, int32_t b) { return vsetq_lane_s32(b, a, 3); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2ui pinsertlast(const Packet2ui& a, uint32_t b) { return vset_lane_u32(b, a, 1); }
|
||||
EIGEN_DEVICE_FUNC inline Packet4ui pinsertlast(const Packet4ui& a, uint32_t b) { return vsetq_lane_u32(b, a, 3); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2l pinsertlast(const Packet2l& a, int64_t b) { return vsetq_lane_s64(b, a, 1); }
|
||||
EIGEN_DEVICE_FUNC inline Packet2ul pinsertlast(const Packet2ul& a, uint64_t b) { return vsetq_lane_u64(b, a, 1); }
|
||||
|
||||
/**
|
||||
* Computes the integer square root
|
||||
* @remarks The calculation is performed using an algorithm which iterates through each binary digit of the result
|
||||
@ -3579,7 +3706,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasReduxp = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasFloor = 0,
|
||||
HasFloor = 1,
|
||||
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
@ -3639,6 +3766,18 @@ template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
const Packet2d cst_1 = pset1<Packet2d>(1.0);
|
||||
/* perform a floorf */
|
||||
const Packet2d tmp = vcvtq_f64_s64(vcvtq_s64_f64(a));
|
||||
|
||||
/* if greater, substract 1 */
|
||||
uint64x2_t mask = vcgtq_f64(tmp, a);
|
||||
mask = vandq_u64(mask, vreinterpretq_u64_f64(cst_1));
|
||||
return vsubq_f64(tmp, vreinterpretq_f64_u64(mask));
|
||||
}
|
||||
|
||||
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); }
|
||||
@ -3755,6 +3894,14 @@ ptranspose(PacketBlock<Packet2d, 2>& kernel)
|
||||
kernel.packet[0] = tmp1;
|
||||
kernel.packet[1] = tmp2;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pselect( const Packet2d& mask, const Packet2d& a, const Packet2d& b)
|
||||
{ return vbslq_f64(vreinterpretq_u64_f64(mask), a, b); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Packet2d pinsertfirst(const Packet2d& a, double b) { return vsetq_lane_f64(b, a, 0); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Packet2d pinsertlast(const Packet2d& a, double b) { return vsetq_lane_f64(b, a, 1); }
|
||||
|
||||
#endif // EIGEN_ARCH_ARM64
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -14,23 +14,289 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> struct type_casting_traits<float,numext::int8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::uint8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::int64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<float,numext::uint64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int8_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int8_t,numext::uint8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int8_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int8_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int8_t,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int8_t,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint8_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint8_t,numext::int8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint8_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint8_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint8_t,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint8_t,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::int8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::uint8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::int64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int16_t,numext::uint64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::int8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::uint8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::int64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint16_t,numext::uint64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::int8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::uint8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::int64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int32_t,numext::uint64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::int8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::uint8_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::int64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint32_t,numext::uint64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int64_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int64_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int64_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int64_t,numext::uint32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::int64_t,numext::uint64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint64_t,float>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint64_t,numext::int16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint64_t,numext::uint16_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint64_t,numext::int32_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
template<> struct type_casting_traits<numext::uint64_t,numext::int64_t>
|
||||
{ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2f pcast<Packet2i,Packet2f>(const Packet2i& a) { return vcvt_f32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2f pcast<Packet2ui,Packet2f>(const Packet2ui& a) { return vcvt_f32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2f pcast<Packet2l,Packet2f>(const Packet2l& a)
|
||||
{ return vcvt_f32_s32(vmovn_s64(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2f pcast<Packet2ul,Packet2f>(const Packet2ul& a)
|
||||
{ return vcvt_f32_u32(vmovn_u64(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4c,Packet4f>(const Packet4c& a)
|
||||
{ return vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a)))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4uc,Packet4f>(const Packet4uc& a)
|
||||
{ return vcvtq_f32_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4s,Packet4f>(const Packet4s& a)
|
||||
{ return vcvtq_f32_s32(vmovl_s16(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4us,Packet4f>(const Packet4us& a)
|
||||
{ return vcvtq_f32_s32(vreinterpretq_s32_u32(vmovl_u16(a))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i,Packet4f>(const Packet4i& a) { return vcvtq_f32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4ui,Packet4f>(const Packet4ui& a) { return vcvtq_f32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4c pcast<Packet4f,Packet4c>(const Packet4f& a)
|
||||
{
|
||||
const int16x4_t b = vmovn_s32(vcvtq_s32_f32(a));
|
||||
return vget_lane_s32(vreinterpret_s32_s8(vmovn_s16(vcombine_s16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4c pcast<Packet4uc,Packet4c>(const Packet4uc& a)
|
||||
{ return static_cast<Packet4c>(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4c pcast<Packet4s,Packet4c>(const Packet4s& a)
|
||||
{ return vget_lane_s32(vreinterpret_s32_s8(vmovn_s16(vcombine_s16(a, a))), 0); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4c pcast<Packet4us,Packet4c>(const Packet4us& a)
|
||||
{
|
||||
const int16x4_t b = vreinterpret_s16_u16(a);
|
||||
return vget_lane_s32(vreinterpret_s32_s8(vmovn_s16(vcombine_s16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4c pcast<Packet4i,Packet4c>(const Packet4i& a)
|
||||
{
|
||||
const int16x4_t b = vmovn_s32(a);
|
||||
return vget_lane_s32(vreinterpret_s32_s8(vmovn_s16(vcombine_s16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4c pcast<Packet4ui,Packet4c>(const Packet4ui& a)
|
||||
{
|
||||
const int16x4_t b = vmovn_s32(vreinterpretq_s32_u32(a));
|
||||
return vget_lane_s32(vreinterpret_s32_s8(vmovn_s16(vcombine_s16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8c pcast<Packet8uc,Packet8c>(const Packet8uc& a) { return vreinterpret_s8_u8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8c pcast<Packet8s,Packet8c>(const Packet8s& a) { return vmovn_s16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8c pcast<Packet8us,Packet8c>(const Packet8us& a)
|
||||
{ return vreinterpret_s8_u8(vmovn_u16(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet16c pcast<Packet16uc,Packet16c>(const Packet16uc& a)
|
||||
{ return vreinterpretq_s8_u8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc pcast<Packet4f,Packet4uc>(const Packet4f& a)
|
||||
{
|
||||
const uint16x4_t b = vmovn_u32(vreinterpretq_u32_s32(vcvtq_s32_f32(a)));
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc pcast<Packet4i,Packet4uc>(const Packet4i& a)
|
||||
{
|
||||
const uint16x4_t b = vmovn_u32(vreinterpretq_u32_s32(a));
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc pcast<Packet4ui,Packet4uc>(const Packet4ui& a)
|
||||
{
|
||||
const uint16x4_t b = vmovn_u32(a);
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc pcast<Packet4c,Packet4uc>(const Packet4c& a)
|
||||
{ return static_cast<Packet4uc>(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc pcast<Packet4s,Packet4uc>(const Packet4s& a)
|
||||
{
|
||||
const uint16x4_t b = vreinterpret_u16_s16(a);
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(b, b))), 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc pcast<Packet4us,Packet4uc>(const Packet4us& a)
|
||||
{ return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(a, a))), 0); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8uc pcast<Packet8c,Packet8uc>(const Packet8c& a) { return vreinterpret_u8_s8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8uc pcast<Packet8s,Packet8uc>(const Packet8s& a)
|
||||
{ return vreinterpret_u8_s8(vmovn_s16(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8uc pcast<Packet8us,Packet8uc>(const Packet8us& a) { return vmovn_u16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc pcast<Packet16c,Packet16uc>(const Packet16c& a)
|
||||
{ return vreinterpretq_u8_s8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s pcast<Packet4f,Packet4s>(const Packet4f& a)
|
||||
{ return vmovn_s32(vcvtq_s32_f32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s pcast<Packet4c,Packet4s>(const Packet4c& a)
|
||||
{ return vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a)))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s pcast<Packet4uc,Packet4s>(const Packet4uc& a)
|
||||
{ return vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s pcast<Packet4us,Packet4s>(const Packet4us& a)
|
||||
{ return vreinterpret_s16_u16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s pcast<Packet4i,Packet4s>(const Packet4i& a) { return vmovn_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s pcast<Packet4ui,Packet4s>(const Packet4ui& a)
|
||||
{ return vmovn_s32(vreinterpretq_s32_u32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8s pcast<Packet8uc,Packet8s>(const Packet8uc& a)
|
||||
{ return vreinterpretq_s16_u16(vmovl_u8(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8s pcast<Packet8c,Packet8s>(const Packet8c& a) { return vmovl_s8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8s pcast<Packet8us,Packet8s>(const Packet8us& a)
|
||||
{ return vreinterpretq_s16_u16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us pcast<Packet4f,Packet4us>(const Packet4f& a)
|
||||
{ return vmovn_u32(vreinterpretq_u32_s32(vcvtq_s32_f32(a))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us pcast<Packet4c,Packet4us>(const Packet4c& a)
|
||||
{ return vget_low_u16(vreinterpretq_u16_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us pcast<Packet4uc,Packet4us>(const Packet4uc& a)
|
||||
{ return vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a)))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us pcast<Packet4s,Packet4us>(const Packet4s& a)
|
||||
{ return vreinterpret_u16_s16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us pcast<Packet4i,Packet4us>(const Packet4i& a)
|
||||
{ return vmovn_u32(vreinterpretq_u32_s32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us pcast<Packet4ui,Packet4us>(const Packet4ui& a) { return vmovn_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8c,Packet8us>(const Packet8c& a)
|
||||
{ return vreinterpretq_u16_s16(vmovl_s8(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8uc,Packet8us>(const Packet8uc& a) { return vmovl_u8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8s,Packet8us>(const Packet8s& a)
|
||||
{ return vreinterpretq_u16_s16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i pcast<Packet2f,Packet2i>(const Packet2f& a) { return vcvt_s32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i pcast<Packet2ui,Packet2i>(const Packet2ui& a)
|
||||
{ return vreinterpret_s32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i pcast<Packet2l,Packet2i>(const Packet2l& a)
|
||||
{ return vmovn_s64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i pcast<Packet2ul,Packet2i>(const Packet2ul& a)
|
||||
{ return vmovn_s64(vreinterpretq_s64_u64(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f,Packet4i>(const Packet4f& a) { return vcvtq_s32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4c,Packet4i>(const Packet4c& a)
|
||||
{ return vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4uc,Packet4i>(const Packet4uc& a)
|
||||
{ return vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a)))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4s,Packet4i>(const Packet4s& a) { return vmovl_s16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4us,Packet4i>(const Packet4us& a)
|
||||
{ return vreinterpretq_s32_u32(vmovl_u16(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4ui,Packet4i>(const Packet4ui& a)
|
||||
{ return vreinterpretq_s32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui pcast<Packet2f,Packet2ui>(const Packet2f& a) { return vcvt_u32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui pcast<Packet2i,Packet2ui>(const Packet2i& a)
|
||||
{ return vreinterpret_u32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui pcast<Packet2l,Packet2ui>(const Packet2l& a)
|
||||
{ return vmovn_u64(vreinterpretq_u64_s64(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui pcast<Packet2ul,Packet2ui>(const Packet2ul& a)
|
||||
{ return vmovn_u64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4f,Packet4ui>(const Packet4f& a) { return vcvtq_u32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4c,Packet4ui>(const Packet4c& a)
|
||||
{ return vreinterpretq_u32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(vdup_n_s32(a)))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4uc,Packet4ui>(const Packet4uc& a)
|
||||
{ return vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(a))))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4s,Packet4ui>(const Packet4s& a)
|
||||
{ return vreinterpretq_u32_s32(vmovl_s16(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4us,Packet4ui>(const Packet4us& a) { return vmovl_u16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4i,Packet4ui>(const Packet4i& a)
|
||||
{ return vreinterpretq_u32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l pcast<Packet2f,Packet2l>(const Packet2f& a)
|
||||
{ return vmovl_s32(vcvt_s32_f32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l pcast<Packet2i,Packet2l>(const Packet2i& a)
|
||||
{ return vmovl_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l pcast<Packet2ui,Packet2l>(const Packet2ui& a)
|
||||
{ return vreinterpretq_s64_u64(vmovl_u32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l pcast<Packet2ul,Packet2l>(const Packet2ul& a)
|
||||
{ return vreinterpretq_s64_u64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul pcast<Packet2f,Packet2ul>(const Packet2f& a)
|
||||
{ return vmovl_u32(vcvt_u32_f32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul pcast<Packet2i,Packet2ul>(const Packet2i& a)
|
||||
{ return vreinterpretq_u64_s64(vmovl_s32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul pcast<Packet2ui,Packet2ul>(const Packet2ui& a)
|
||||
{ return vmovl_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul pcast<Packet2l,Packet2ul>(const Packet2l& a)
|
||||
{ return vreinterpretq_u64_s64(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f,Packet2i>(const Packet2i& a)
|
||||
{ return vreinterpret_f32_s32(a); }
|
||||
@ -40,14 +306,70 @@ template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Pa
|
||||
{ return vreinterpretq_f32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4ui>(const Packet4ui& a)
|
||||
{ return vreinterpretq_f32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c,Packet4uc>(const Packet4uc& a)
|
||||
{ return static_cast<Packet4c>(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c,Packet8uc>(const Packet8uc& a)
|
||||
{ return vreinterpret_s8_u8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c,Packet16uc>(const Packet16uc& a)
|
||||
{ return vreinterpretq_s8_u8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc,Packet4c>(const Packet4c& a)
|
||||
{ return static_cast<Packet4uc>(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc,Packet8c>(const Packet8c& a)
|
||||
{ return vreinterpret_u8_s8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc,Packet16c>(const Packet16c& a)
|
||||
{ return vreinterpretq_u8_s8(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s,Packet4us>(const Packet4us& a)
|
||||
{ return vreinterpret_s16_u16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s,Packet8us>(const Packet8us& a)
|
||||
{ return vreinterpretq_s16_u16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us,Packet4s>(const Packet4s& a)
|
||||
{ return vreinterpret_u16_s16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us,Packet8s>(const Packet8s& a)
|
||||
{ return vreinterpretq_u16_s16(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i,Packet2f>(const Packet2f& a)
|
||||
{ return vreinterpret_s32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i,Packet2ui>(const Packet2ui& a)
|
||||
{ return vreinterpret_s32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a)
|
||||
{ return vreinterpretq_s32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4ui>(const Packet4ui& a)
|
||||
{ return vreinterpretq_s32_u32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui,Packet2f>(const Packet2f& a)
|
||||
{ return vreinterpret_u32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui,Packet2i>(const Packet2i& a)
|
||||
{ return vreinterpret_u32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui,Packet4f>(const Packet4f& a)
|
||||
{ return vreinterpretq_u32_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui,Packet4i>(const Packet4i& a)
|
||||
{ return vreinterpretq_u32_s32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l,Packet2ul>(const Packet2ul& a)
|
||||
{ return vreinterpretq_s64_u64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul,Packet2l>(const Packet2l& a)
|
||||
{ return vreinterpretq_u64_s64(a); }
|
||||
|
||||
#if EIGEN_ARCH_ARM64
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2f pcast<Packet2d,Packet2f>(const Packet2d& a) { return vcvt_f32_f64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet2f,Packet2d>(const Packet2f& a) { return vcvt_f64_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet2i,Packet2d>(const Packet2i& a) { return vcvtq_f64_s64(vmovl_s32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet2ui,Packet2d>(const Packet2ui& a) { return vcvtq_f64_u64(vmovl_u32(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet2l,Packet2d>(const Packet2l& a) { return vcvtq_f64_s64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet2ul,Packet2d>(const Packet2ul& a) { return vcvtq_f64_u64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2i pcast<Packet2d,Packet2i>(const Packet2d& a) { return vcvt_s32_f32(vcvt_f32_f64(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ui pcast<Packet2d,Packet2ui>(const Packet2d& a) { return vcvt_u32_f32(vcvt_f32_f64(a)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l pcast<Packet2d,Packet2l>(const Packet2d& a) { return vcvtq_s64_f64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul pcast<Packet2d,Packet2ul>(const Packet2d& a) { return vcvtq_u64_f64(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d,Packet2l>(const Packet2l& a)
|
||||
{ return vreinterpretq_f64_s64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d,Packet2ul>(const Packet2ul& a)
|
||||
{ return vreinterpretq_f64_u64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l,Packet2d>(const Packet2d& a)
|
||||
{ return vreinterpretq_s64_f64(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul,Packet2d>(const Packet2d& a)
|
||||
{ return vreinterpretq_u64_f64(a); }
|
||||
|
||||
#endif // EIGEN_ARCH_ARM64
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
@ -45,7 +45,8 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 1
|
||||
HasBlend = 1,
|
||||
HasInsert = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
@ -120,6 +120,7 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = EIGEN_FAST_MATH,
|
||||
HasBlend = 1,
|
||||
HasInsert = 1,
|
||||
HasFloor = 1
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
@ -144,7 +145,8 @@ struct packet_traits<double> : default_packet_traits {
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
HasBlend = 1,
|
||||
HasInsert = 1
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
,
|
||||
|
@ -16,6 +16,58 @@
|
||||
#define REF_DIV(a,b) ((a)/(b))
|
||||
#define REF_ABS_DIFF(a,b) ((a)>(b)?(a)-(b):(b)-(a))
|
||||
|
||||
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket, bool CanCast = false>
|
||||
struct test_cast_helper;
|
||||
|
||||
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket>
|
||||
struct test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, false> {
|
||||
static void run() {}
|
||||
};
|
||||
|
||||
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket>
|
||||
struct test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, true> {
|
||||
static void run() {
|
||||
static const int PacketSize = internal::unpacket_traits<FromPacket>::size;
|
||||
EIGEN_ALIGN_MAX FromScalar data1[PacketSize];
|
||||
EIGEN_ALIGN_MAX ToScalar data2[PacketSize];
|
||||
EIGEN_ALIGN_MAX ToScalar ref[PacketSize];
|
||||
|
||||
// Construct a packet of scalars that will not overflow when casting
|
||||
for (int i=0; i<PacketSize; ++i) {
|
||||
const FromScalar from_scalar = Array<FromScalar,1,1>::Random().value();
|
||||
const ToScalar to_scalar = Array<ToScalar,1,1>::Random().value();
|
||||
const FromScalar c = sizeof(ToScalar) > sizeof(FromScalar) ? static_cast<FromScalar>(to_scalar) : from_scalar;
|
||||
data1[i] = (NumTraits<FromScalar>::IsSigned && !NumTraits<ToScalar>::IsSigned) ? numext::abs(c) : c;
|
||||
}
|
||||
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
ref[i] = static_cast<const ToScalar>(data1[i]);
|
||||
internal::pstore(data2, internal::pcast<FromPacket, ToPacket>(internal::pload<FromPacket>(data1)));
|
||||
|
||||
VERIFY(areApprox(ref, data2, PacketSize) && "internal::pcast<>");
|
||||
}
|
||||
};
|
||||
|
||||
template<typename FromPacket, typename ToScalar>
|
||||
void test_cast() {
|
||||
typedef typename internal::packet_traits<ToScalar>::type Full;
|
||||
typedef typename internal::unpacket_traits<Full>::half Half;
|
||||
typedef typename internal::unpacket_traits<typename internal::unpacket_traits<Full>::half>::half Quarter;
|
||||
|
||||
static const int PacketSize = internal::unpacket_traits<FromPacket>::size;
|
||||
static const bool CanCast =
|
||||
PacketSize == internal::unpacket_traits<Full>::size ||
|
||||
PacketSize == internal::unpacket_traits<Half>::size ||
|
||||
PacketSize == internal::unpacket_traits<Quarter>::size;
|
||||
|
||||
typedef typename internal::unpacket_traits<FromPacket>::type FromScalar;
|
||||
typedef typename internal::conditional<internal::unpacket_traits<Quarter>::size == PacketSize, Quarter,
|
||||
typename internal::conditional<internal::unpacket_traits<Half>::size == PacketSize, Half, Full>::type>::type
|
||||
ToPacket;
|
||||
|
||||
test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run();
|
||||
}
|
||||
|
||||
template<typename Scalar,typename Packet> void packetmath()
|
||||
{
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
@ -263,7 +315,7 @@ template<typename Scalar,typename Packet> void packetmath()
|
||||
}
|
||||
}
|
||||
|
||||
if (PacketTraits::HasBlend || g_vectorize_sse) {
|
||||
if (PacketTraits::HasInsert || g_vectorize_sse) {
|
||||
// pinsertfirst
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
ref[i] = data1[i];
|
||||
@ -273,7 +325,7 @@ template<typename Scalar,typename Packet> void packetmath()
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertfirst");
|
||||
}
|
||||
|
||||
if (PacketTraits::HasBlend || g_vectorize_sse) {
|
||||
if (PacketTraits::HasInsert || g_vectorize_sse) {
|
||||
// pinsertlast
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
ref[i] = data1[i];
|
||||
@ -547,6 +599,19 @@ template<typename Scalar,typename Packet> void packetmath_notcomplex()
|
||||
|
||||
Array<Scalar,Dynamic,1>::Map(data1, PacketSize*4).setRandom();
|
||||
|
||||
if (PacketTraits::HasCast) {
|
||||
test_cast<Packet, float>();
|
||||
test_cast<Packet, double>();
|
||||
test_cast<Packet, int8_t>();
|
||||
test_cast<Packet, uint8_t>();
|
||||
test_cast<Packet, int16_t>();
|
||||
test_cast<Packet, uint16_t>();
|
||||
test_cast<Packet, int32_t>();
|
||||
test_cast<Packet, uint32_t>();
|
||||
test_cast<Packet, int64_t>();
|
||||
test_cast<Packet, uint64_t>();
|
||||
}
|
||||
|
||||
ref[0] = data1[0];
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
ref[0] = (std::min)(ref[0],data1[i]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user