mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
bug #1631: fix compilation with ARM NEON and clang, and cleanup the weird pshiftright_and_cast and pcast_and_shiftleft functions.
This commit is contained in:
parent
a1a5fbbd21
commit
b131a4db24
@ -163,11 +163,11 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
|
@ -219,7 +219,13 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; }
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
||||
|
||||
/** \internal \returns \a a shifted by N bits */
|
||||
/** \internal \returns \a a shifted by N bits to the right */
|
||||
template<int N> EIGEN_DEVICE_FUNC inline int
|
||||
pshiftright(const int& a) { return a >> N; }
|
||||
template<int N> EIGEN_DEVICE_FUNC inline long int
|
||||
pshiftright(const long int& a) { return a >> N; }
|
||||
|
||||
/** \internal \returns \a a shifted by N bits to the left */
|
||||
template<int N> EIGEN_DEVICE_FUNC inline int
|
||||
pshiftleft(const int& a) { return a << N; }
|
||||
template<int N> EIGEN_DEVICE_FUNC inline long int
|
||||
@ -654,41 +660,17 @@ pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
* Some generic implementations to be used by implementors
|
||||
***************************************************************************/
|
||||
|
||||
/** \internal shift the bits by n and cast the result to the initial type, i.e.:
|
||||
* return float(reinterpret_cast<uint>(a) >> n)
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pshiftright_and_cast(Packet a, int n);
|
||||
|
||||
/** Default implementation of pfrexp for float.
|
||||
* It is expected to be called by implementers of template<> pfrexp,
|
||||
* and the above pshiftright_and_cast function must be implemented.
|
||||
* It is expected to be called by implementers of template<> pfrexp.
|
||||
*/
|
||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||
pfrexp_float(const Packet& a, Packet& exponent) {
|
||||
const Packet cst_126f = pset1<Packet>(126.0f);
|
||||
const Packet cst_half = pset1<Packet>(0.5f);
|
||||
const Packet cst_inv_mant_mask = pset1frombits<Packet>(~0x7f800000u);
|
||||
exponent = psub(pshiftright_and_cast(a,23), cst_126f);
|
||||
return por(pand(a, cst_inv_mant_mask), cst_half);
|
||||
}
|
||||
|
||||
/** \internal shift the bits by n and cast the result to the initial type, i.e.:
|
||||
* return reinterpret_cast<float>(int(a) >> n)
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pcast_and_shiftleft(Packet a, int n);
|
||||
pfrexp_float(const Packet& a, Packet& exponent);
|
||||
|
||||
/** Default implementation of pldexp for float.
|
||||
* It is expected to be called by implementers of template<> pldexp,
|
||||
* and the above pcast_and_shiftleft function must be implemented.
|
||||
* It is expected to be called by implementers of template<> pldexp.
|
||||
*/
|
||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||
pldexp_float(Packet a, Packet exponent) {
|
||||
const Packet cst_127 = pset1<Packet>(127.f);
|
||||
// return a * 2^exponent
|
||||
return pmul(a, pcast_and_shiftleft(padd(exponent, cst_127), 23));
|
||||
}
|
||||
pldexp_float(Packet a, Packet exponent);
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
@ -256,7 +256,17 @@ template<> EIGEN_STRONG_INLINE Packet8f pselect<Packet8f>(const Packet8f& mask,
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pselect<Packet4d>(const Packet4d& mask, const Packet4d& a, const Packet4d& b)
|
||||
{ return _mm256_blendv_pd(b,a,mask); }
|
||||
|
||||
template<int N> EIGEN_STRONG_INLINE Packet8i pshiftleft(const Packet8i& a) {
|
||||
template<int N> EIGEN_STRONG_INLINE Packet8i pshiftright(Packet8i a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_srli_epi32(a, N);
|
||||
#else
|
||||
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(a, 0), N);
|
||||
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(a, 1), N);
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int N> EIGEN_STRONG_INLINE Packet8i pshiftleft(Packet8i a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_slli_epi32(a, N);
|
||||
#else
|
||||
@ -409,33 +419,10 @@ template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
|
||||
return _mm256_and_pd(a,mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pshiftright_and_cast<Packet8f>(Packet8f v, int n)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
|
||||
#else
|
||||
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
|
||||
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
|
||||
return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Packet8f& exponent) {
|
||||
return pfrexp_float(a,exponent);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast_and_shiftleft<Packet8f>(Packet8f v, int n)
|
||||
{
|
||||
Packet8i vi = _mm256_cvttps_epi32(v);
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_castsi256_ps(_mm256_slli_epi32(vi, n));
|
||||
#else
|
||||
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(vi, 0), n);
|
||||
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(vi, 1), n);
|
||||
return _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pldexp<Packet8f>(const Packet8f& a, const Packet8f& exponent) {
|
||||
return pldexp_float(a,exponent);
|
||||
}
|
||||
|
@ -187,8 +187,19 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
};
|
||||
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f>
|
||||
{
|
||||
typedef float type;
|
||||
typedef Packet4f half;
|
||||
typedef Packet4i integer_packet;
|
||||
enum {size=4, alignment=Aligned16};
|
||||
};
|
||||
template<> struct unpacket_traits<Packet4i>
|
||||
{
|
||||
typedef int type;
|
||||
typedef Packet4i half;
|
||||
enum {size=4, alignment=Aligned16};
|
||||
};
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
|
||||
{
|
||||
@ -567,21 +578,15 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) {
|
||||
return vec_ctf(vec_sr(reinterpret_cast<Packet4i>(a),
|
||||
reinterpret_cast<Packet4ui>(pset1<Packet4i>(n))),0);
|
||||
}
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a)
|
||||
{ return vec_sr(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a)
|
||||
{ return vec_sl(a,reinterpret_cast<Packet4ui>(pset1<Packet4i>(N))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||
return pfrexp_float(a,exponent);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft<Packet4f>(Packet4f v, int n)
|
||||
{
|
||||
Packet4i vi = vec_cts(v,0);
|
||||
return reinterpret_cast<Packet4f>(vec_sl(vi, reinterpret_cast<Packet4ui>(pset1<Packet4i>(n))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
|
||||
return pldexp_float(a,exponent);
|
||||
}
|
||||
@ -807,6 +812,43 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return vec_cts(a,0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return vec_ctf(a,0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {
|
||||
return reinterpret_cast<Packet4i>(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Packet4i& a) {
|
||||
return reinterpret_cast<Packet4f>(a);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---------- double ----------
|
||||
#ifdef __VSX__
|
||||
typedef __vector double Packet2d;
|
||||
|
@ -16,6 +16,26 @@
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||
pfrexp_float(const Packet& a, Packet& exponent) {
|
||||
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
||||
const Packet cst_126f = pset1<Packet>(126.0f);
|
||||
const Packet cst_half = pset1<Packet>(0.5f);
|
||||
const Packet cst_inv_mant_mask = pset1frombits<Packet>(~0x7f800000u);
|
||||
exponent = psub(pcast<PacketI,Packet>(pshiftright<23>(preinterpret<PacketI>(a))), cst_126f);
|
||||
return por(pand(a, cst_inv_mant_mask), cst_half);
|
||||
}
|
||||
|
||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||
pldexp_float(Packet a, Packet exponent)
|
||||
{
|
||||
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
||||
const Packet cst_127 = pset1<Packet>(127.f);
|
||||
// return a * 2^exponent
|
||||
PacketI ei = pcast<Packet,PacketI>(padd(exponent, cst_127));
|
||||
return pmul(a, preinterpret<Packet>(pshiftleft<23>(ei)));
|
||||
}
|
||||
|
||||
// Natural logarithm
|
||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
||||
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
||||
|
@ -140,8 +140,19 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f>
|
||||
{
|
||||
typedef float type;
|
||||
typedef Packet4f half;
|
||||
typedef Packet4i integer_packet;
|
||||
enum {size=4, alignment=Aligned16};
|
||||
};
|
||||
template<> struct unpacket_traits<Packet4i>
|
||||
{
|
||||
typedef int32_t type;
|
||||
typedef Packet4i half;
|
||||
enum {size=4, alignment=Aligned16};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
|
||||
@ -294,6 +305,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
|
||||
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) { return vshrq_n_s32(a,N); }
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) { return vshlq_n_s32(a,N); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
@ -384,20 +398,10 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) {
|
||||
return vcvtq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a),n));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||
return pfrexp_float(a,exponent);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft<Packet4f>(Packet4f v, int n)
|
||||
{
|
||||
Packet4i vi = vcvtq_s32_f32(v);
|
||||
return vreinterpretq_f32_s32(vshlq_n_s32(vi, n));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
|
||||
return pldexp_float(a,exponent);
|
||||
}
|
||||
|
@ -41,6 +41,14 @@ template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i
|
||||
return vcvtq_f32_s32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {
|
||||
return vreinterpretq_s32_f32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Packet4i& a) {
|
||||
return vreinterpretq_f32_s32(a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -370,7 +370,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); }
|
||||
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftleft(const Packet4i& a) { return _mm_slli_epi32(a,N); }
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) { return _mm_srli_epi32(a,N); }
|
||||
template<int N> EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) { return _mm_slli_epi32(a,N); }
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
|
||||
@ -569,20 +570,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) {
|
||||
return _mm_cvtepi32_ps(_mm_srli_epi32(_mm_castps_si128(a),n));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||
return pfrexp_float(a,exponent);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft<Packet4f>(Packet4f v, int n)
|
||||
{
|
||||
Packet4i vi = _mm_cvttps_epi32(v);
|
||||
return _mm_castsi128_ps(_mm_slli_epi32(vi, n));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
|
||||
return pldexp_float(a,exponent);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user