diff --git a/Eigen/Core b/Eigen/Core index 61cc646aa..4bbde063a 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -163,11 +163,11 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/TypeCasting.h" #include "src/Core/arch/SSE/Complex.h" - #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX/TypeCasting.h" - #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/MathFunctions.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/TypeCasting.h" diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index c24268443..cc044de22 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -219,7 +219,13 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; } template EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (!b); } -/** \internal \returns \a a shifted by N bits */ +/** \internal \returns \a a shifted by N bits to the right */ +template EIGEN_DEVICE_FUNC inline int +pshiftright(const int& a) { return a >> N; } +template EIGEN_DEVICE_FUNC inline long int +pshiftright(const long int& a) { return a >> N; } + +/** \internal \returns \a a shifted by N bits to the left */ template EIGEN_DEVICE_FUNC inline int pshiftleft(const int& a) { return a << N; } template EIGEN_DEVICE_FUNC inline long int @@ -654,41 +660,17 @@ pinsertlast(const Packet& a, typename unpacket_traits::type b) * Some generic implementations to be used by implementors ***************************************************************************/ -/** \internal shift the bits by n and cast the result to the initial type, i.e.: - * return float(reinterpret_cast(a) >> n) - */ -template EIGEN_DEVICE_FUNC inline Packet -pshiftright_and_cast(Packet a, int n); - /** Default implementation of pfrexp for float. - * It is expected to be called by implementers of template<> pfrexp, - * and the above pshiftright_and_cast function must be implemented. + * It is expected to be called by implementers of template<> pfrexp. */ template EIGEN_STRONG_INLINE Packet -pfrexp_float(const Packet& a, Packet& exponent) { - const Packet cst_126f = pset1(126.0f); - const Packet cst_half = pset1(0.5f); - const Packet cst_inv_mant_mask = pset1frombits(~0x7f800000u); - exponent = psub(pshiftright_and_cast(a,23), cst_126f); - return por(pand(a, cst_inv_mant_mask), cst_half); -} - -/** \internal shift the bits by n and cast the result to the initial type, i.e.: - * return reinterpret_cast(int(a) >> n) - */ -template EIGEN_DEVICE_FUNC inline Packet -pcast_and_shiftleft(Packet a, int n); +pfrexp_float(const Packet& a, Packet& exponent); /** Default implementation of pldexp for float. - * It is expected to be called by implementers of template<> pldexp, - * and the above pcast_and_shiftleft function must be implemented. + * It is expected to be called by implementers of template<> pldexp. */ template EIGEN_STRONG_INLINE Packet -pldexp_float(Packet a, Packet exponent) { - const Packet cst_127 = pset1(127.f); - // return a * 2^exponent - return pmul(a, pcast_and_shiftleft(padd(exponent, cst_127), 23)); -} +pldexp_float(Packet a, Packet exponent); } // end namespace internal diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 969f68d79..4c1abe43f 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -256,7 +256,17 @@ template<> EIGEN_STRONG_INLINE Packet8f pselect(const Packet8f& mask, template<> EIGEN_STRONG_INLINE Packet4d pselect(const Packet4d& mask, const Packet4d& a, const Packet4d& b) { return _mm256_blendv_pd(b,a,mask); } -template EIGEN_STRONG_INLINE Packet8i pshiftleft(const Packet8i& a) { +template EIGEN_STRONG_INLINE Packet8i pshiftright(Packet8i a) { +#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_srli_epi32(a, N); +#else + __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(a, 0), N); + __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(a, 1), N); + return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); +#endif +} + +template EIGEN_STRONG_INLINE Packet8i pshiftleft(Packet8i a) { #ifdef EIGEN_VECTORIZE_AVX2 return _mm256_slli_epi32(a, N); #else @@ -409,33 +419,10 @@ template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a) return _mm256_and_pd(a,mask); } -template<> EIGEN_STRONG_INLINE Packet8f pshiftright_and_cast(Packet8f v, int n) -{ -#ifdef EIGEN_VECTORIZE_AVX2 - return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n)); -#else - __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n); - __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n); - return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)); -#endif -} - template<> EIGEN_STRONG_INLINE Packet8f pfrexp(const Packet8f& a, Packet8f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet8f pcast_and_shiftleft(Packet8f v, int n) -{ - Packet8i vi = _mm256_cvttps_epi32(v); -#ifdef EIGEN_VECTORIZE_AVX2 - return _mm256_castsi256_ps(_mm256_slli_epi32(vi, n)); -#else - __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(vi, 0), n); - __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(vi, 1), n); - return _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)); -#endif -} - template<> EIGEN_STRONG_INLINE Packet8f pldexp(const Packet8f& a, const Packet8f& exponent) { return pldexp_float(a,exponent); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index d64550d8a..446065fb7 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -187,8 +187,19 @@ template<> struct packet_traits : default_packet_traits }; -template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits +{ + typedef float type; + typedef Packet4f half; + typedef Packet4i integer_packet; + enum {size=4, alignment=Aligned16}; +}; +template<> struct unpacket_traits +{ + typedef int type; + typedef Packet4i half; + enum {size=4, alignment=Aligned16}; +}; inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) { @@ -567,21 +578,15 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } -template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { - return vec_ctf(vec_sr(reinterpret_cast(a), - reinterpret_cast(pset1(n))),0); -} +template EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) +{ return vec_sr(a,reinterpret_cast(pset1(N))); } +template EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) +{ return vec_sl(a,reinterpret_cast(pset1(N))); } template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft(Packet4f v, int n) -{ - Packet4i vi = vec_cts(v,0); - return reinterpret_cast(vec_sl(vi, reinterpret_cast(pset1(n)))); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_float(a,exponent); } @@ -807,6 +812,43 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons } +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { + return vec_cts(a,0); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { + return vec_ctf(a,0); +} + +template<> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4f& a) { + return reinterpret_cast(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f preinterpret(const Packet4i& a) { + return reinterpret_cast(a); +} + + + //---------- double ---------- #ifdef __VSX__ typedef __vector double Packet2d; diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 067d1dbe0..465f9bc3e 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -16,6 +16,26 @@ namespace Eigen { namespace internal { +template EIGEN_STRONG_INLINE Packet +pfrexp_float(const Packet& a, Packet& exponent) { + typedef typename unpacket_traits::integer_packet PacketI; + const Packet cst_126f = pset1(126.0f); + const Packet cst_half = pset1(0.5f); + const Packet cst_inv_mant_mask = pset1frombits(~0x7f800000u); + exponent = psub(pcast(pshiftright<23>(preinterpret(a))), cst_126f); + return por(pand(a, cst_inv_mant_mask), cst_half); +} + +template EIGEN_STRONG_INLINE Packet +pldexp_float(Packet a, Packet exponent) +{ + typedef typename unpacket_traits::integer_packet PacketI; + const Packet cst_127 = pset1(127.f); + // return a * 2^exponent + PacketI ei = pcast(padd(exponent, cst_127)); + return pmul(a, preinterpret(pshiftleft<23>(ei))); +} + // Natural logarithm // Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2) // and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index dc432f0d2..0a50153c7 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -140,8 +140,19 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } #endif -template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits +{ + typedef float type; + typedef Packet4f half; + typedef Packet4i integer_packet; + enum {size=4, alignment=Aligned16}; +}; +template<> struct unpacket_traits +{ + typedef int32_t type; + typedef Packet4i half; + enum {size=4, alignment=Aligned16}; +}; template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int32_t& from) { return vdupq_n_s32(from); } @@ -294,6 +305,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } +template EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) { return vshrq_n_s32(a,N); } +template EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) { return vshlq_n_s32(a,N); } + template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i pload(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } @@ -384,20 +398,10 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } -template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { - return vcvtq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a),n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft(Packet4f v, int n) -{ - Packet4i vi = vcvtq_s32_f32(v); - return vreinterpretq_f32_s32(vshlq_n_s32(vi, n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_float(a,exponent); } diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h index 95d1fd0e4..20dbe1332 100644 --- a/Eigen/src/Core/arch/NEON/TypeCasting.h +++ b/Eigen/src/Core/arch/NEON/TypeCasting.h @@ -41,6 +41,14 @@ template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i return vcvtq_f32_s32(a); } +template<> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4f& a) { + return vreinterpretq_s32_f32(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f preinterpret(const Packet4i& a) { + return vreinterpretq_f32_s32(a); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index fbc69ef1f..800eb4d86 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -370,7 +370,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); } -template EIGEN_STRONG_INLINE Packet4i pshiftleft(const Packet4i& a) { return _mm_slli_epi32(a,N); } +template EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) { return _mm_srli_epi32(a,N); } +template EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) { return _mm_slli_epi32(a,N); } #ifdef EIGEN_VECTORIZE_SSE4_1 template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return _mm_round_ps(a, 0); } @@ -569,20 +570,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) #endif } -template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { - return _mm_cvtepi32_ps(_mm_srli_epi32(_mm_castps_si128(a),n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft(Packet4f v, int n) -{ - Packet4i vi = _mm_cvttps_epi32(v); - return _mm_castsi128_ps(_mm_slli_epi32(vi, n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_float(a,exponent); }