From 6cee8d347e8a7e8e1a689a3b7de5fe413f3e1103 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 15 Dec 2020 04:06:41 +0000 Subject: [PATCH] Add an additional step of Newton-Raphson for `psqrt` on Arm, which otherwise has an error of ~1000 ulps. --- Eigen/src/Core/arch/NEON/PacketMath.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 90ffee767..5883eca38 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3896,7 +3896,8 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ // Do a single step of Newton's iteration. //the number 1.5f was set reference to Quake3's fast inverse square root x = vmulq_f64(x, psub(pset1(1.5), pmul(half, pmul(x, x)))); - // Do one more Newton's iteration to get more accurate result. + // Do two more Newton's iteration to get a result accurate to 1 ulp. + x = vmulq_f64(x, psub(pset1(1.5), pmul(half, pmul(x, x)))); x = vmulq_f64(x, psub(pset1(1.5), pmul(half, pmul(x, x)))); // Flush results for denormals to zero. return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(pmul(_x, x)), denormal_mask));