mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-06 14:14:46 +08:00
Add an additional step of Newton-Raphson for psqrt<double>
on Arm, which otherwise has an error of ~1000 ulps.
This commit is contained in:
parent
cf0b5b0344
commit
6cee8d347e
@ -3896,7 +3896,8 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){
|
|||||||
// Do a single step of Newton's iteration.
|
// Do a single step of Newton's iteration.
|
||||||
//the number 1.5f was set reference to Quake3's fast inverse square root
|
//the number 1.5f was set reference to Quake3's fast inverse square root
|
||||||
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
|
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
|
||||||
// Do one more Newton's iteration to get more accurate result.
|
// Do two more Newton's iteration to get a result accurate to 1 ulp.
|
||||||
|
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
|
||||||
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
|
x = vmulq_f64(x, psub(pset1<Packet2d>(1.5), pmul(half, pmul(x, x))));
|
||||||
// Flush results for denormals to zero.
|
// Flush results for denormals to zero.
|
||||||
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(pmul(_x, x)), denormal_mask));
|
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(pmul(_x, x)), denormal_mask));
|
||||||
|
Loading…
Reference in New Issue
Block a user