mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-19 13:40:59 +08:00
PowerPC: remove branch prediction from rint implementation
The branch prediction hints is actually hurts performance in this case. The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a general floating point function, expected input is not bounded and then it is better to let the hardware handle the branches.
This commit is contained in:
parent
6142896d53
commit
60c414c346
@ -43,7 +43,7 @@
|
||||
# See pow-inputs for an example.
|
||||
|
||||
subdir := benchtests
|
||||
bench := exp pow
|
||||
bench := exp pow rint
|
||||
|
||||
exp-ITER = 100000
|
||||
exp-ARGLIST = double
|
||||
@ -55,5 +55,10 @@ pow-ARGLIST = double:double
|
||||
pow-RET = double
|
||||
LDFLAGS-bench-pow = -lm
|
||||
|
||||
rint-ITER = 250000000
|
||||
rint-ARGLIST = double
|
||||
rint-RET = double
|
||||
LDFLAGS-bench-rint = -lm
|
||||
|
||||
include ../Makeconfig
|
||||
include ../Rules
|
||||
|
4
benchtests/rint-inputs
Normal file
4
benchtests/rint-inputs
Normal file
@ -0,0 +1,4 @@
|
||||
78.5
|
||||
-78.5
|
||||
4503599627370497.0
|
||||
-4503599627370497.0
|
@ -45,14 +45,14 @@ ENTRY (__rint)
|
||||
fsub fp12,fp13,fp13 /* generate 0.0 */
|
||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||
bnllr- cr7
|
||||
bng- cr6,.L4
|
||||
bnllr cr7
|
||||
bng cr6,.L4
|
||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||
fabs fp1,fp1 /* if (x == 0.0) */
|
||||
blr /* x = 0.0; */
|
||||
.L4:
|
||||
bnllr- cr6 /* if (x < 0.0) */
|
||||
bnllr cr6 /* if (x < 0.0) */
|
||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||
|
@ -41,14 +41,14 @@ ENTRY (__rintf)
|
||||
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||
bnllr- cr7
|
||||
bng- cr6,.L4
|
||||
bnllr cr7
|
||||
bng cr6,.L4
|
||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||
fabs fp1,fp1 /* if (x == 0.0) */
|
||||
blr /* x = 0.0; */
|
||||
.L4:
|
||||
bnllr- cr6 /* if (x < 0.0) */
|
||||
bnllr cr6 /* if (x < 0.0) */
|
||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||
|
@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
|
||||
fsub fp12,fp13,fp13 /* generate 0.0 */
|
||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
|
||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||
bnllr- cr7
|
||||
bng- cr6,.L4
|
||||
bnllr cr7
|
||||
bng cr6,.L4
|
||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||
fabs fp1,fp1 /* if (x == 0.0) */
|
||||
blr /* x = 0.0; */
|
||||
.L4:
|
||||
bnllr- cr6 /* if (x < 0.0) */
|
||||
bnllr cr6 /* if (x < 0.0) */
|
||||
fsub fp1,fp1,fp13 /* x-= TWO52; */
|
||||
fadd fp1,fp1,fp13 /* x+= TWO52; */
|
||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||
|
@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
|
||||
fsubs fp12,fp13,fp13 /* generate 0.0 */
|
||||
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
|
||||
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
|
||||
bnllr- cr7
|
||||
bng- cr6,.L4
|
||||
bnllr cr7
|
||||
bng cr6,.L4
|
||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||
fabs fp1,fp1 /* if (x == 0.0) */
|
||||
blr /* x = 0.0; */
|
||||
.L4:
|
||||
bnllr- cr6 /* if (x < 0.0) */
|
||||
bnllr cr6 /* if (x < 0.0) */
|
||||
fsubs fp1,fp1,fp13 /* x-= TWO23; */
|
||||
fadds fp1,fp1,fp13 /* x+= TWO23; */
|
||||
fnabs fp1,fp1 /* if (x == 0.0) */
|
||||
|
Loading…
x
Reference in New Issue
Block a user