mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-06 14:14:46 +08:00
Unify Altivec/VSX's plog with generic implementation, and enable it!
This commit is contained in:
parent
c24e98e6a8
commit
c2f35b1b47
@ -9,13 +9,15 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file come from
|
||||
/* The sin, cos, and exp functions of this file come from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
|
||||
#include "../Default/GenericPacketMathFunctions.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
@ -94,62 +96,7 @@ static Packet2ul p2ul_52 = { 52, 52 };
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
|
||||
Packet4i emm0;
|
||||
|
||||
/* isvalid_mask is 0 if x < 0 or x is NaN. */
|
||||
Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
|
||||
Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
|
||||
|
||||
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
||||
emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
|
||||
reinterpret_cast<Packet4ui>(p4i_23));
|
||||
|
||||
/* keep only the fractional part */
|
||||
x = pand(x, p4f_inv_mant_mask);
|
||||
x = por(x, p4f_half);
|
||||
|
||||
emm0 = psub(emm0, p4i_0x7f);
|
||||
Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
e -= 1;
|
||||
x = x + x - 1.0;
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
|
||||
Packet4f tmp = pand(x, mask);
|
||||
x = psub(x, p4f_1);
|
||||
e = psub(e, pand(p4f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet4f x2 = pmul(x,x);
|
||||
Packet4f x3 = pmul(x2,x);
|
||||
|
||||
Packet4f y, y1, y2;
|
||||
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
||||
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
||||
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
||||
y = pmadd(y , x, p4f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
y1 = pmul(e, p4f_cephes_log_q1);
|
||||
tmp = pmul(x2, p4f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p4f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
// negative arg will be NAN, 0 will be -INF
|
||||
x = vec_sel(x, p4f_minus_inf, iszero_mask);
|
||||
x = vec_sel(p4f_minus_nan, x, isvalid_mask);
|
||||
return x;
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
|
@ -148,7 +148,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
#ifdef __VSX__
|
||||
HasSqrt = 1,
|
||||
@ -285,6 +285,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
||||
Packet4i v = {from, from, from, from};
|
||||
return v;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) {
|
||||
return reinterpret_cast<Packet4f>(pset1<Packet4i>(from));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
@ -414,6 +419,14 @@ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
|
||||
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
|
||||
return vec_nor(c,c);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
||||
|
||||
@ -426,6 +439,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
|
||||
return vec_sel(b, a, mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
|
||||
@ -550,6 +567,15 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) {
|
||||
return vec_ctf(vec_sr(reinterpret_cast<Packet4i>(a),
|
||||
reinterpret_cast<Packet4ui>(pset1<Packet4i>(n))),0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||
return pfrexp_float(a,exponent);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f b, sum;
|
||||
|
Loading…
Reference in New Issue
Block a user