mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Uses _mm512_abs_pd for Packet8d pabs
This commit is contained in:
parent
0cc9b5eb40
commit
f019b97aca
@ -139,6 +139,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
size = 8,
|
size = 8,
|
||||||
HasHalfPacket = 1,
|
HasHalfPacket = 1,
|
||||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
|
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
|
||||||
|
HasAbs = 1,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
HasSqrt = EIGEN_FAST_MATH,
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
HasRsqrt = EIGEN_FAST_MATH,
|
||||||
@ -888,9 +889,7 @@ template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
|
|||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
|
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
|
||||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
return _mm512_abs_pd(a);
|
||||||
return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
|
|
||||||
_mm512_set1_epi64(0x7fffffffffffffff)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
@ -900,7 +899,7 @@ EIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& e
|
|||||||
|
|
||||||
// Extract exponent without existence of Packet8l.
|
// Extract exponent without existence of Packet8l.
|
||||||
template<>
|
template<>
|
||||||
EIGEN_STRONG_INLINE
|
EIGEN_STRONG_INLINE
|
||||||
Packet8d pfrexp_generic_get_biased_exponent(const Packet8d& a) {
|
Packet8d pfrexp_generic_get_biased_exponent(const Packet8d& a) {
|
||||||
const Packet8d cst_exp_mask = pset1frombits<Packet8d>(static_cast<uint64_t>(0x7ff0000000000000ull));
|
const Packet8d cst_exp_mask = pset1frombits<Packet8d>(static_cast<uint64_t>(0x7ff0000000000000ull));
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||||
@ -923,18 +922,18 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
|
|||||||
// Clamp exponent to [-2099, 2099]
|
// Clamp exponent to [-2099, 2099]
|
||||||
const Packet8d max_exponent = pset1<Packet8d>(2099.0);
|
const Packet8d max_exponent = pset1<Packet8d>(2099.0);
|
||||||
const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
||||||
|
|
||||||
// Split 2^e into four factors and multiply.
|
// Split 2^e into four factors and multiply.
|
||||||
const Packet8i bias = pset1<Packet8i>(1023);
|
const Packet8i bias = pset1<Packet8i>(1023);
|
||||||
Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4)
|
Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4)
|
||||||
|
|
||||||
// 2^b
|
// 2^b
|
||||||
Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
|
Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
Packet8i lo = _mm256_slli_epi64(hi, 52);
|
Packet8i lo = _mm256_slli_epi64(hi, 52);
|
||||||
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
|
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
|
||||||
Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
|
Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
|
||||||
Packet8d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
Packet8d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||||
|
|
||||||
// 2^(e - 3b)
|
// 2^(e - 3b)
|
||||||
b = psub(psub(psub(e, b), b), b); // e - 3b
|
b = psub(psub(psub(e, b), b), b); // e - 3b
|
||||||
hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
|
hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
Loading…
Reference in New Issue
Block a user