glibc/sysdeps/aarch64/libm-test-ulps

1421 lines
18 KiB
Plaintext
Raw Normal View History

2012-11-10 01:53:51 +08:00
# Begin of automatic generation
# Maximal error of functions:
Function: "acos":
float: 1
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "acos_downward":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 1
Function: "acos_towardzero":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 1
Function: "acos_upward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
ldouble: 1
2013-12-12 00:19:34 +08:00
2014-01-02 01:30:01 +08:00
Function: "acosh":
double: 2
float: 2
ldouble: 2
2014-01-02 01:30:01 +08:00
2014-06-03 20:25:34 +08:00
Function: "acosh_downward":
double: 2
float: 2
ldouble: 3
2014-06-03 20:25:34 +08:00
Function: "acosh_towardzero":
double: 2
float: 2
ldouble: 2
2014-06-03 20:25:34 +08:00
Function: "acosh_upward":
double: 2
float: 2
ldouble: 2
Function: "asin":
float: 1
2014-06-03 20:25:34 +08:00
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "asin_downward":
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "asin_towardzero":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
2013-12-12 00:19:34 +08:00
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: "asin_upward":
2014-01-02 01:30:01 +08:00
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "asinh":
2013-12-12 00:19:34 +08:00
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "asinh_downward":
double: 3
float: 3
ldouble: 4
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "asinh_towardzero":
double: 2
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 2
Function: "asinh_upward":
double: 3
float: 3
ldouble: 4
Function: "atan":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
2014-01-02 01:30:01 +08:00
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "atan2":
float: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: "atan2_downward":
double: 1
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "atan2_towardzero":
double: 1
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "atan2_upward":
double: 1
float: 1
ldouble: 2
Function: "atan_downward":
double: 1
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "atan_towardzero":
double: 1
float: 1
ldouble: 1
Function: "atan_upward":
double: 1
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
2012-11-10 01:53:51 +08:00
Function: "atanh":
double: 2
float: 2
ldouble: 3
Function: "atanh_downward":
double: 3
float: 3
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: "atanh_towardzero":
double: 2
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "atanh_upward":
double: 3
float: 3
ldouble: 4
Function: "cabs":
double: 1
ldouble: 1
Function: "cabs_downward":
double: 1
ldouble: 1
Function: "cabs_towardzero":
double: 1
ldouble: 1
Function: "cabs_upward":
2014-04-17 05:37:32 +08:00
double: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
2012-11-10 01:53:51 +08:00
Function: Real part of "cacos":
double: 1
2013-05-21 16:01:19 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "cacos":
double: 2
2013-05-21 16:01:19 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-06-03 20:25:34 +08:00
Function: Real part of "cacos_downward":
double: 3
2014-06-03 20:25:34 +08:00
float: 2
ldouble: 3
2014-06-03 20:25:34 +08:00
Function: Imaginary part of "cacos_downward":
double: 5
float: 3
ldouble: 6
2014-06-03 20:25:34 +08:00
Function: Real part of "cacos_towardzero":
double: 3
2014-06-03 20:25:34 +08:00
float: 2
ldouble: 3
2014-06-03 20:25:34 +08:00
Function: Imaginary part of "cacos_towardzero":
double: 5
float: 3
ldouble: 5
Function: Real part of "cacos_upward":
double: 2
float: 2
ldouble: 3
Function: Imaginary part of "cacos_upward":
double: 5
float: 5
ldouble: 7
2014-06-03 20:25:34 +08:00
2012-11-10 01:53:51 +08:00
Function: Real part of "cacosh":
double: 2
2013-05-21 16:01:19 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "cacosh":
double: 1
2013-05-21 16:01:19 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Real part of "cacosh_downward":
double: 5
float: 3
ldouble: 5
Function: Imaginary part of "cacosh_downward":
double: 3
float: 3
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: Real part of "cacosh_towardzero":
double: 5
float: 3
ldouble: 5
Function: Imaginary part of "cacosh_towardzero":
double: 3
2014-04-17 05:37:32 +08:00
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Real part of "cacosh_upward":
double: 4
float: 4
ldouble: 6
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "cacosh_upward":
double: 3
2014-04-17 05:37:32 +08:00
float: 2
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: "carg":
double: 1
float: 1
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "carg_downward":
double: 1
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "carg_towardzero":
double: 1
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "carg_upward":
double: 1
float: 1
ldouble: 2
2014-04-17 05:37:32 +08:00
2012-11-10 01:53:51 +08:00
Function: Real part of "casin":
double: 1
float: 1
2013-05-21 16:01:19 +08:00
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "casin":
double: 2
2013-05-21 16:01:19 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Real part of "casin_downward":
double: 3
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
Function: Imaginary part of "casin_downward":
double: 5
float: 3
ldouble: 6
2014-04-17 05:37:32 +08:00
Function: Real part of "casin_towardzero":
double: 3
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 3
Function: Imaginary part of "casin_towardzero":
double: 5
float: 3
ldouble: 5
Function: Real part of "casin_upward":
double: 3
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
Function: Imaginary part of "casin_upward":
double: 5
float: 5
ldouble: 7
2014-04-17 05:37:32 +08:00
2012-11-10 01:53:51 +08:00
Function: Real part of "casinh":
double: 2
2013-05-21 16:01:19 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "casinh":
double: 1
2013-05-21 16:01:19 +08:00
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Real part of "casinh_downward":
double: 5
float: 3
ldouble: 6
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "casinh_downward":
double: 3
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
Function: Real part of "casinh_towardzero":
double: 5
float: 3
ldouble: 5
Function: Imaginary part of "casinh_towardzero":
double: 3
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 3
Function: Real part of "casinh_upward":
double: 5
float: 5
ldouble: 7
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "casinh_upward":
double: 3
2014-04-17 05:37:32 +08:00
float: 2
ldouble: 3
2013-05-21 16:01:19 +08:00
Function: Real part of "catan":
double: 1
2013-05-21 16:01:19 +08:00
float: 1
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "catan":
double: 1
float: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: Real part of "catan_downward":
double: 1
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 2
Function: Imaginary part of "catan_downward":
double: 2
float: 2
ldouble: 3
Function: Real part of "catan_towardzero":
double: 1
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 2
Function: Imaginary part of "catan_towardzero":
double: 2
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
Function: Real part of "catan_upward":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "catan_upward":
double: 3
float: 3
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: Real part of "catanh":
double: 1
2013-05-21 16:01:19 +08:00
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 1
Function: Imaginary part of "catanh":
double: 1
2013-05-21 16:01:19 +08:00
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: Real part of "catanh_downward":
double: 2
float: 2
ldouble: 3
Function: Imaginary part of "catanh_downward":
double: 1
float: 2
ldouble: 2
Function: Real part of "catanh_towardzero":
double: 2
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
Function: Imaginary part of "catanh_towardzero":
double: 1
float: 2
ldouble: 2
Function: Real part of "catanh_upward":
double: 4
float: 4
2014-04-17 05:37:32 +08:00
ldouble: 4
Function: Imaginary part of "catanh_upward":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 2
2014-04-17 05:37:32 +08:00
2012-11-10 01:53:51 +08:00
Function: "cbrt":
double: 3
2013-12-12 00:19:34 +08:00
float: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: "cbrt_downward":
double: 4
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 1
Function: "cbrt_towardzero":
double: 3
float: 1
2014-04-17 05:37:32 +08:00
ldouble: 1
Function: "cbrt_upward":
double: 5
2014-04-17 05:37:32 +08:00
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 1
Function: Real part of "ccos":
double: 1
float: 1
ldouble: 1
Function: Imaginary part of "ccos":
double: 1
float: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: Real part of "ccos_downward":
double: 1
float: 1
ldouble: 2
Function: Imaginary part of "ccos_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Real part of "ccos_towardzero":
double: 1
float: 2
ldouble: 2
Function: Imaginary part of "ccos_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Real part of "ccos_upward":
double: 1
float: 2
ldouble: 3
Function: Imaginary part of "ccos_upward":
double: 2
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Real part of "ccosh":
double: 1
float: 1
ldouble: 1
Function: Imaginary part of "ccosh":
double: 1
float: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: Real part of "ccosh_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Imaginary part of "ccosh_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Real part of "ccosh_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Imaginary part of "ccosh_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Real part of "ccosh_upward":
double: 1
float: 2
ldouble: 3
Function: Imaginary part of "ccosh_upward":
double: 2
float: 2
ldouble: 2
Function: Real part of "cexp":
double: 2
float: 1
ldouble: 1
Function: Imaginary part of "cexp":
double: 1
float: 2
ldouble: 1
Function: Real part of "cexp_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
float: 2
ldouble: 2
Function: Imaginary part of "cexp_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
float: 3
ldouble: 2
Function: Real part of "cexp_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
float: 2
ldouble: 2
Function: Imaginary part of "cexp_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
float: 3
ldouble: 2
Function: Real part of "cexp_upward":
double: 1
float: 2
ldouble: 3
Function: Imaginary part of "cexp_upward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Real part of "clog":
double: 3
float: 3
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog":
double: 1
float: 1
ldouble: 1
Function: Real part of "clog10":
double: 3
float: 4
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog10":
double: 2
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 2
Function: Real part of "clog10_downward":
double: 5
float: 5
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog10_downward":
double: 2
float: 4
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Real part of "clog10_towardzero":
double: 5
float: 5
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog10_towardzero":
double: 2
float: 4
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Real part of "clog10_upward":
double: 6
float: 5
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog10_upward":
2012-11-10 01:53:51 +08:00
double: 2
float: 4
2014-04-17 05:37:32 +08:00
ldouble: 3
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Real part of "clog_downward":
double: 4
float: 3
ldouble: 3
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog_downward":
2012-11-10 01:53:51 +08:00
double: 1
2014-04-17 05:37:32 +08:00
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Real part of "clog_towardzero":
double: 4
float: 4
ldouble: 3
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog_towardzero":
2012-11-10 01:53:51 +08:00
double: 1
float: 3
2014-04-17 05:37:32 +08:00
ldouble: 2
Function: Real part of "clog_upward":
double: 4
float: 3
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "clog_upward":
double: 1
float: 2
2012-11-10 01:53:51 +08:00
ldouble: 2
Function: "cos":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 1
Function: "cos_downward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: "cos_towardzero":
2013-12-12 00:19:34 +08:00
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "cos_upward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
2013-12-12 00:19:34 +08:00
ldouble: 2
Function: "cosh":
double: 1
2014-01-02 01:30:01 +08:00
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 1
Function: "cosh_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
2014-01-02 01:30:01 +08:00
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "cosh_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
2014-01-02 01:30:01 +08:00
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "cosh_upward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 2
2014-01-02 01:30:01 +08:00
float: 2
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: Real part of "cpow":
double: 2
float: 5
ldouble: 4
Function: Imaginary part of "cpow":
float: 2
ldouble: 1
2014-07-12 01:56:42 +08:00
Function: Real part of "cpow_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 5
2014-07-12 01:56:42 +08:00
float: 8
ldouble: 6
Function: Imaginary part of "cpow_downward":
double: 1
2014-07-12 01:56:42 +08:00
float: 2
ldouble: 2
Function: Real part of "cpow_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 5
2014-07-12 01:56:42 +08:00
float: 8
ldouble: 6
Function: Imaginary part of "cpow_towardzero":
double: 1
2014-07-12 01:56:42 +08:00
float: 2
ldouble: 2
Function: Real part of "cpow_upward":
double: 4
float: 1
ldouble: 3
Function: Imaginary part of "cpow_upward":
double: 1
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Real part of "csin":
double: 1
float: 1
ldouble: 1
Function: Imaginary part of "csin":
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: Real part of "csin_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Imaginary part of "csin_downward":
double: 1
float: 1
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Real part of "csin_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Imaginary part of "csin_towardzero":
double: 1
float: 1
ldouble: 2
Function: Real part of "csin_upward":
2014-04-17 05:37:32 +08:00
double: 2
float: 2
ldouble: 2
Function: Imaginary part of "csin_upward":
double: 1
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: Real part of "csinh":
float: 1
ldouble: 1
Function: Imaginary part of "csinh":
double: 1
float: 1
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: Real part of "csinh_downward":
double: 2
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "csinh_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
Function: Real part of "csinh_towardzero":
double: 2
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "csinh_towardzero":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: Real part of "csinh_upward":
double: 1
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 3
Function: Imaginary part of "csinh_upward":
double: 2
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Real part of "csqrt":
double: 2
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "csqrt":
double: 2
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: Real part of "csqrt_downward":
double: 5
float: 4
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "csqrt_downward":
double: 4
float: 3
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Real part of "csqrt_towardzero":
double: 4
float: 3
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "csqrt_towardzero":
double: 4
float: 3
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: Real part of "csqrt_upward":
double: 5
float: 4
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: Imaginary part of "csqrt_upward":
double: 3
float: 3
ldouble: 3
2014-04-17 05:37:32 +08:00
2012-11-10 01:53:51 +08:00
Function: Real part of "ctan":
double: 1
float: 1
2014-01-02 01:30:01 +08:00
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "ctan":
double: 2
float: 2
2014-01-02 01:30:01 +08:00
ldouble: 3
Function: Real part of "ctan_downward":
double: 6
float: 5
2012-11-10 01:53:51 +08:00
ldouble: 4
Function: Imaginary part of "ctan_downward":
2014-01-02 01:30:01 +08:00
double: 2
float: 2
2012-11-10 01:53:51 +08:00
ldouble: 5
Function: Real part of "ctan_towardzero":
2014-01-02 01:30:01 +08:00
double: 5
float: 3
2014-01-02 01:30:01 +08:00
ldouble: 4
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "ctan_towardzero":
double: 2
2014-01-02 01:30:01 +08:00
float: 2
2012-11-10 01:53:51 +08:00
ldouble: 5
Function: Real part of "ctan_upward":
double: 2
float: 4
2014-01-02 01:30:01 +08:00
ldouble: 5
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "ctan_upward":
2014-01-02 01:30:01 +08:00
double: 2
float: 3
2014-07-12 01:56:42 +08:00
ldouble: 5
2012-11-10 01:53:51 +08:00
Function: Real part of "ctanh":
2014-01-02 01:30:01 +08:00
double: 2
2012-11-10 01:53:51 +08:00
float: 2
2014-01-02 01:30:01 +08:00
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "ctanh":
2014-01-02 01:30:01 +08:00
double: 2
2012-11-10 01:53:51 +08:00
float: 1
2014-01-02 01:30:01 +08:00
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: Real part of "ctanh_downward":
2014-01-02 01:30:01 +08:00
double: 4
float: 2
2012-11-10 01:53:51 +08:00
ldouble: 5
Function: Imaginary part of "ctanh_downward":
2014-01-02 01:30:01 +08:00
double: 6
float: 5
2012-11-10 01:53:51 +08:00
ldouble: 4
Function: Real part of "ctanh_towardzero":
2014-01-02 01:30:01 +08:00
double: 2
float: 2
2012-11-10 01:53:51 +08:00
ldouble: 5
Function: Imaginary part of "ctanh_towardzero":
2014-01-02 01:30:01 +08:00
double: 5
2012-11-10 01:53:51 +08:00
float: 2
ldouble: 3
2014-01-02 01:30:01 +08:00
Function: Real part of "ctanh_upward":
double: 2
float: 3
2014-07-12 01:56:42 +08:00
ldouble: 5
2014-01-02 01:30:01 +08:00
2012-11-10 01:53:51 +08:00
Function: Imaginary part of "ctanh_upward":
double: 2
2014-01-02 01:30:01 +08:00
float: 3
ldouble: 5
2012-11-10 01:53:51 +08:00
Function: "erf":
double: 1
float: 1
2013-12-12 00:19:34 +08:00
ldouble: 1
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "erf_downward":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "erf_towardzero":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
ldouble: 1
Function: "erf_upward":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 2
Function: "erfc":
double: 2
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "erfc_downward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 4
float: 4
ldouble: 5
2014-04-17 05:37:32 +08:00
Function: "erfc_towardzero":
double: 3
2014-04-17 05:37:32 +08:00
float: 3
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: "erfc_upward":
Add new exp and exp2 implementations Optimized exp and exp2 implementations using a lookup table for fractional powers of 2. There are several variants, see e_exp_data.c, they can be selected by modifying math_config.h allowing different tradeoffs. The default selection should be acceptable as generic libm code. Worst case error is 0.509 ULP for exp and 0.507 ULP for exp2, on aarch64 the rodata size is 2160 bytes, shared between exp and exp2. On aarch64 .text + .rodata size decreased by 24912 bytes. The non-nearest rounding error is less than 1 ULP even on targets without efficient round implementation (although the error rate is higher in that case). Targets with single instruction, rounding mode independent, to nearest integer rounding and conversion can use them by setting TOINT_INTRINSICS and adding the necessary code to their math_private.h. The __exp1 code uses the same algorithm, so the error bound of pow increased a bit. New double precision error handling code was added following the style of the single precision error handling code. Improvements on Cortex-A72 compared to current glibc master: exp thruput: 1.61x in [-9.9 9.9] exp latency: 1.53x in [-9.9 9.9] exp thruput: 1.13x in [0.5 1] exp latency: 1.30x in [0.5 1] exp2 thruput: 2.03x in [-9.9 9.9] exp2 latency: 1.64x in [-9.9 9.9] For small (< 1) inputs the current exp code uses a separate algorithm so the speed up there is less. Was tested on aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and arm-linux-gnueabihf (!TOINT_INTRINSICS, no fma contraction) and x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction) and powerpc64le-linux-gnu (!TOINT_INTRINSICS, fma contraction) targets, only non-nearest rounding ulp errors increase and they are within acceptable bounds (ulp updates are in separate patches). * NEWS: Mention exp and exp2 improvements. * math/Makefile (libm-support): Remove t_exp. (type-double-routines): Add math_err and e_exp_data. * sysdeps/aarch64/libm-test-ulps: Update. * sysdeps/arm/libm-test-ulps: Update. * sysdeps/i386/fpu/e_exp_data.c: New file. * sysdeps/i386/fpu/math_err.c: New file. * sysdeps/i386/fpu/t_exp.c: Remove. * sysdeps/ia64/fpu/e_exp_data.c: New file. * sysdeps/ia64/fpu/math_err.c: New file. * sysdeps/ia64/fpu/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/e_exp.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp2.c: Rewrite. * sysdeps/ieee754/dbl-64/e_exp_data.c: New file. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Update error bound. * sysdeps/ieee754/dbl-64/eexp.tbl: Remove. * sysdeps/ieee754/dbl-64/math_config.h: New file. * sysdeps/ieee754/dbl-64/math_err.c: New file. * sysdeps/ieee754/dbl-64/t_exp.c: Remove. * sysdeps/ieee754/dbl-64/t_exp2.h: Remove. * sysdeps/ieee754/dbl-64/uexp.h: Remove. * sysdeps/ieee754/dbl-64/uexp.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_exp_data.c: New file. * sysdeps/m68k/m680x0/fpu/math_err.c: New file. * sysdeps/m68k/m680x0/fpu/t_exp.c: Remove. * sysdeps/powerpc/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Update.
2018-02-13 02:16:03 +08:00
double: 4
float: 4
ldouble: 5
Function: "exp":
float: 1
ldouble: 1
Function: "exp10":
2014-04-17 05:37:32 +08:00
double: 2
ldouble: 2
Function: "exp10_downward":
double: 2
float: 1
ldouble: 3
Function: "exp10_towardzero":
double: 2
float: 1
ldouble: 3
Function: "exp10_upward":
double: 2
float: 1
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "exp2":
2012-11-10 01:53:51 +08:00
double: 1
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 1
Function: "exp2_downward":
2014-01-02 01:30:01 +08:00
double: 1
2014-07-12 01:56:42 +08:00
float: 1
ldouble: 1
2014-01-02 01:30:01 +08:00
Function: "exp2_towardzero":
2014-01-02 01:30:01 +08:00
double: 1
2014-07-12 01:56:42 +08:00
float: 1
ldouble: 1
2014-01-02 01:30:01 +08:00
Function: "exp2_upward":
2014-01-02 01:30:01 +08:00
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "exp_downward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
2012-11-10 01:53:51 +08:00
Function: "exp_towardzero":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
2012-11-10 01:53:51 +08:00
Function: "exp_upward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
2012-11-10 01:53:51 +08:00
Function: "expm1":
double: 1
float: 1
ldouble: 1
2014-01-02 01:30:01 +08:00
Function: "expm1_downward":
2013-07-05 16:30:52 +08:00
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "expm1_towardzero":
2014-01-02 01:30:01 +08:00
double: 1
float: 2
ldouble: 4
2014-01-02 01:30:01 +08:00
2014-04-17 05:37:32 +08:00
Function: "expm1_upward":
2014-01-02 01:30:01 +08:00
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 3
2014-01-02 01:30:01 +08:00
2014-04-17 05:37:32 +08:00
Function: "gamma":
double: 3
float: 4
ldouble: 5
Function: "gamma_downward":
double: 4
float: 4
ldouble: 8
2014-04-17 05:37:32 +08:00
Function: "gamma_towardzero":
double: 4
float: 3
ldouble: 5
2014-04-17 05:37:32 +08:00
Function: "gamma_upward":
double: 4
float: 5
ldouble: 8
2014-04-17 05:37:32 +08:00
Function: "hypot":
double: 1
2014-01-02 01:30:01 +08:00
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: "hypot_downward":
double: 1
ldouble: 1
Function: "hypot_towardzero":
double: 1
ldouble: 1
Function: "hypot_upward":
2014-01-02 01:30:01 +08:00
double: 1
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "j0":
double: 2
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "j0_downward":
double: 2
float: 4
2014-04-17 05:37:32 +08:00
ldouble: 4
Function: "j0_towardzero":
double: 2
float: 1
ldouble: 2
Function: "j0_upward":
double: 3
float: 2
ldouble: 5
2012-11-10 01:53:51 +08:00
Function: "j1":
double: 1
float: 2
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: "j1_downward":
double: 3
float: 2
ldouble: 4
Function: "j1_towardzero":
double: 3
float: 2
ldouble: 4
Function: "j1_upward":
double: 3
float: 4
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: "jn":
double: 4
2014-01-02 01:30:01 +08:00
float: 4
ldouble: 7
2012-11-10 01:53:51 +08:00
Function: "jn_downward":
double: 4
float: 5
ldouble: 8
Function: "jn_towardzero":
double: 4
float: 5
ldouble: 8
Function: "jn_upward":
double: 5
float: 4
ldouble: 7
2012-11-10 01:53:51 +08:00
Function: "lgamma":
double: 3
float: 4
ldouble: 5
Function: "lgamma_downward":
double: 4
float: 4
ldouble: 8
2014-04-17 05:37:32 +08:00
Function: "lgamma_towardzero":
double: 4
float: 3
ldouble: 5
2014-04-17 05:37:32 +08:00
Function: "lgamma_upward":
double: 4
float: 5
ldouble: 8
2014-04-17 05:37:32 +08:00
2013-12-12 00:19:34 +08:00
Function: "log":
float: 1
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "log10":
double: 2
2012-11-10 01:53:51 +08:00
float: 2
ldouble: 1
2014-06-03 20:25:34 +08:00
Function: "log10_downward":
double: 2
float: 3
2014-06-03 20:25:34 +08:00
ldouble: 1
Function: "log10_towardzero":
double: 2
float: 2
2014-06-03 20:25:34 +08:00
ldouble: 1
Function: "log10_upward":
double: 2
float: 2
2014-06-03 20:25:34 +08:00
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "log1p":
double: 1
2013-12-12 00:19:34 +08:00
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "log1p_downward":
double: 1
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "log1p_towardzero":
double: 2
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "log1p_upward":
double: 2
float: 2
ldouble: 2
Function: "log2":
2014-04-17 05:37:32 +08:00
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-07-12 01:56:42 +08:00
Function: "log2_downward":
double: 3
float: 3
ldouble: 3
Function: "log2_towardzero":
2014-07-12 01:56:42 +08:00
double: 2
float: 2
ldouble: 1
Function: "log2_upward":
double: 3
float: 3
2014-07-12 01:56:42 +08:00
ldouble: 1
Function: "log_downward":
2014-07-12 01:56:42 +08:00
float: 2
ldouble: 1
Function: "log_towardzero":
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "log_upward":
double: 1
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "pow":
Remove slow paths from pow Remove the slow paths from pow. Like several other double precision math functions, pow is exactly rounded. This is not required from math functions and causes major overheads as it requires multiple fallbacks using higher precision arithmetic if a result is close to 0.5ULP. Ridiculous slowdowns of up to 100000x have been reported when the highest precision path triggers. All GLIBC math tests pass on AArch64 and x64 (with ULP of pow set to 1). The worst case error is ~0.506ULP. A simple test over a few hundred million values shows pow is 10% faster on average. This fixes BZ #13932. [BZ #13932] * sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove. * benchtests/pow-inputs: Update comment for slow path cases. * manual/probes.texi (slowpow_p10): Delete removed probe. (slowpow_p10): Likewise. * math/Makefile: Remove halfulp.c and slowpow.c. * sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1. * sysdeps/generic/math_private.h (__exp1): Remove error argument. (__halfulp): Remove. (__slowpow): Remove. * sysdeps/i386/fpu/halfulp.c: Delete file. * sysdeps/i386/fpu/slowpow.c: Likewise. * sysdeps/ia64/fpu/halfulp.c: Likewise. * sysdeps/ia64/fpu/slowpow.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument, improve comments and add error analysis. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis. (power1): Remove function: (log1): Remove error argument, add error analysis. (my_log2): Remove function. * sysdeps/ieee754/dbl-64/halfulp.c: Delete file. * sysdeps/ieee754/dbl-64/slowpow.c: Likewise. * sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise. * sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise. * sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c. * sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1. * sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c, slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c. * sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define. * sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise. * sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file. * sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
2018-02-12 18:42:42 +08:00
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "pow_downward":
2014-07-12 01:56:42 +08:00
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "pow_towardzero":
2014-07-12 01:56:42 +08:00
double: 1
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "pow_upward":
2014-07-12 01:56:42 +08:00
double: 1
float: 1
2014-07-12 01:56:42 +08:00
ldouble: 2
2012-11-10 01:53:51 +08:00
2013-12-12 00:19:34 +08:00
Function: "sin":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 1
2013-12-12 00:19:34 +08:00
Function: "sin_downward":
double: 1
float: 1
ldouble: 3
2013-12-12 00:19:34 +08:00
2014-04-17 05:37:32 +08:00
Function: "sin_towardzero":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "sin_upward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "sincos":
double: 1
2014-04-17 05:37:32 +08:00
float: 1
2012-11-10 01:53:51 +08:00
ldouble: 1
2014-04-17 05:37:32 +08:00
Function: "sincos_downward":
2013-12-12 00:19:34 +08:00
double: 1
float: 1
ldouble: 3
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "sincos_towardzero":
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "sincos_upward":
double: 1
float: 1
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "sinh":
double: 2
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
Function: "sinh_downward":
double: 3
float: 3
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: "sinh_towardzero":
double: 2
float: 2
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: "sinh_upward":
double: 3
float: 3
ldouble: 4
Function: "tan":
2013-12-12 00:19:34 +08:00
float: 1
ldouble: 1
2013-12-12 00:19:34 +08:00
2012-11-10 01:53:51 +08:00
Function: "tan_downward":
2013-12-12 00:19:34 +08:00
double: 1
float: 2
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "tan_towardzero":
2013-12-12 00:19:34 +08:00
double: 1
2012-11-10 01:53:51 +08:00
float: 1
ldouble: 1
Function: "tan_upward":
2013-12-12 00:19:34 +08:00
double: 1
2012-11-10 01:53:51 +08:00
float: 1
2014-04-17 05:37:32 +08:00
ldouble: 1
2012-11-10 01:53:51 +08:00
Function: "tanh":
double: 2
float: 2
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "tanh_downward":
double: 3
float: 3
ldouble: 4
2014-04-17 05:37:32 +08:00
Function: "tanh_towardzero":
double: 2
float: 2
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "tanh_upward":
double: 3
2013-05-21 16:01:19 +08:00
float: 3
ldouble: 3
Function: "tgamma":
double: 5
float: 4
ldouble: 4
Function: "tgamma_downward":
double: 5
float: 5
ldouble: 5
Function: "tgamma_towardzero":
double: 5
float: 4
ldouble: 5
Function: "tgamma_upward":
double: 4
float: 4
2013-05-21 16:01:19 +08:00
ldouble: 4
2012-11-10 01:53:51 +08:00
Function: "y0":
double: 2
float: 1
ldouble: 3
2014-04-17 05:37:32 +08:00
Function: "y0_downward":
double: 3
float: 4
2014-04-17 05:37:32 +08:00
ldouble: 4
Function: "y0_towardzero":
double: 3
float: 3
ldouble: 3
Function: "y0_upward":
double: 2
float: 5
2014-04-17 05:37:32 +08:00
ldouble: 3
2012-11-10 01:53:51 +08:00
Function: "y1":
double: 3
float: 2
2013-05-21 16:01:19 +08:00
ldouble: 2
2012-11-10 01:53:51 +08:00
2014-04-17 05:37:32 +08:00
Function: "y1_downward":
double: 3
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 4
Function: "y1_towardzero":
double: 3
float: 2
ldouble: 2
2014-04-17 05:37:32 +08:00
Function: "y1_upward":
double: 5
float: 2
2014-04-17 05:37:32 +08:00
ldouble: 5
2012-11-10 01:53:51 +08:00
Function: "yn":
double: 3
float: 3
2014-04-17 05:37:32 +08:00
ldouble: 5
2012-11-10 01:53:51 +08:00
2014-07-12 01:56:42 +08:00
Function: "yn_downward":
double: 3
float: 4
2014-07-12 01:56:42 +08:00
ldouble: 5
Function: "yn_towardzero":
double: 3
float: 3
ldouble: 5
Function: "yn_upward":
double: 4
float: 5
2014-07-12 01:56:42 +08:00
ldouble: 5
2012-11-10 01:53:51 +08:00
# end of automatic generation