From f1e005022ebd246e1541386cd2f3286f008d2d98 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 19 Dec 2017 18:11:37 +0000 Subject: [PATCH] Revert exp reimplementation (causes test failures). Revert: 2017-12-19 Joseph Myers * sysdeps/x86_64/fpu/libm-test-ulps: Update. 2017-12-19 Patrick McGehearty * sysdeps/ieee754/dbl-64/e_exp.c: Include and . Include "eexp.tbl". (half): New constant. (one): Likewise. (__ieee754_exp): Rewrite. (__slowexp): Remove prototype. * sysdeps/ieee754/dbl-64/eexp.tbl: New file. * sysdeps/ieee754/dbl-64/slowexp.c: Remove file. * sysdeps/i386/fpu/slowexp.c: Likewise. * sysdeps/ia64/fpu/slowexp.c: Likewise. * sysdeps/m68k/m680x0/fpu/slowexp.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowexp-avx.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowexp-fma.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: Likewise. * sysdeps/generic/math_private.h (__slowexp): Remove prototype. * sysdeps/ieee754/dbl-64/e_pow.c: Remove mention of slowexp.c in comment. * sysdeps/powerpc/power4/fpu/Makefile [$(subdir) = math] (CPPFLAGS-slowexp.c): Remove variable. * sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines): Remove slowexp-fma, slowexp-fma4 and slowexp-avx. (CFLAGS-slowexp-fma.c): Remove variable. (CFLAGS-slowexp-fma4.c): Likewise. (CFLAGS-slowexp-avx.c): Likewise. * sysdeps/x86_64/fpu/multiarch/e_exp-avx.c (__slowexp): Do not define as macro. * sysdeps/x86_64/fpu/multiarch/e_exp-fma.c (__slowexp): Likewise. * sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c (__slowexp): Likewise. * math/Makefile (type-double-routines): Remove slowexp. * manual/probes.texi (slowexp_p6): Remove. (slowexp_p32): Likewise. --- ChangeLog | 42 +++ manual/probes.texi | 14 + math/Makefile | 2 +- sysdeps/generic/math_private.h | 1 + sysdeps/i386/fpu/slowexp.c | 1 + sysdeps/ia64/fpu/slowexp.c | 1 + sysdeps/ieee754/dbl-64/e_exp.c | 382 +++++++++----------- sysdeps/ieee754/dbl-64/e_pow.c | 2 +- sysdeps/ieee754/dbl-64/eexp.tbl | 255 ------------- sysdeps/ieee754/dbl-64/slowexp.c | 86 +++++ sysdeps/m68k/m680x0/fpu/slowexp.c | 1 + sysdeps/powerpc/power4/fpu/Makefile | 1 + sysdeps/x86_64/fpu/libm-test-ulps | 18 +- sysdeps/x86_64/fpu/multiarch/Makefile | 9 +- sysdeps/x86_64/fpu/multiarch/e_exp-avx.c | 1 + sysdeps/x86_64/fpu/multiarch/e_exp-fma.c | 1 + sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c | 1 + sysdeps/x86_64/fpu/multiarch/slowexp-avx.c | 9 + sysdeps/x86_64/fpu/multiarch/slowexp-fma.c | 9 + sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c | 9 + 20 files changed, 368 insertions(+), 477 deletions(-) create mode 100644 sysdeps/i386/fpu/slowexp.c create mode 100644 sysdeps/ia64/fpu/slowexp.c delete mode 100644 sysdeps/ieee754/dbl-64/eexp.tbl create mode 100644 sysdeps/ieee754/dbl-64/slowexp.c create mode 100644 sysdeps/m68k/m680x0/fpu/slowexp.c create mode 100644 sysdeps/x86_64/fpu/multiarch/slowexp-avx.c create mode 100644 sysdeps/x86_64/fpu/multiarch/slowexp-fma.c create mode 100644 sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c diff --git a/ChangeLog b/ChangeLog index a28c50c869..4f836e3f67 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,45 @@ +2017-12-19 Joseph Myers + + Revert: + + 2017-12-19 Joseph Myers + + * sysdeps/x86_64/fpu/libm-test-ulps: Update. + + 2017-12-19 Patrick McGehearty + + * sysdeps/ieee754/dbl-64/e_exp.c: Include and + . Include "eexp.tbl". + (half): New constant. + (one): Likewise. + (__ieee754_exp): Rewrite. + (__slowexp): Remove prototype. + * sysdeps/ieee754/dbl-64/eexp.tbl: New file. + * sysdeps/ieee754/dbl-64/slowexp.c: Remove file. + * sysdeps/i386/fpu/slowexp.c: Likewise. + * sysdeps/ia64/fpu/slowexp.c: Likewise. + * sysdeps/m68k/m680x0/fpu/slowexp.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowexp-avx.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowexp-fma.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: Likewise. + * sysdeps/generic/math_private.h (__slowexp): Remove prototype. + * sysdeps/ieee754/dbl-64/e_pow.c: Remove mention of slowexp.c in + comment. + * sysdeps/powerpc/power4/fpu/Makefile [$(subdir) = math] + (CPPFLAGS-slowexp.c): Remove variable. + * sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines): + Remove slowexp-fma, slowexp-fma4 and slowexp-avx. + (CFLAGS-slowexp-fma.c): Remove variable. + (CFLAGS-slowexp-fma4.c): Likewise. + (CFLAGS-slowexp-avx.c): Likewise. + * sysdeps/x86_64/fpu/multiarch/e_exp-avx.c (__slowexp): Do not + define as macro. + * sysdeps/x86_64/fpu/multiarch/e_exp-fma.c (__slowexp): Likewise. + * sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c (__slowexp): Likewise. + * math/Makefile (type-double-routines): Remove slowexp. + * manual/probes.texi (slowexp_p6): Remove. + (slowexp_p32): Likewise. + 2017-12-19 Adhemerval Zanella * lib/glob.c (glob): Use a 'char *', not a 'void *', in pointer diff --git a/manual/probes.texi b/manual/probes.texi index f8ae64be33..8ab67562d7 100644 --- a/manual/probes.texi +++ b/manual/probes.texi @@ -258,6 +258,20 @@ Unless explicitly mentioned otherwise, a precision of 1 implies 24 bits of precision in the mantissa of the multiple precision number. Hence, a precision level of 32 implies 768 bits of precision in the mantissa. +@deftp Probe slowexp_p6 (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{exp} function is called with an +input that results in multiple precision computation with precision +6. Argument @var{$arg1} is the input value and @var{$arg2} is the +computed output. +@end deftp + +@deftp Probe slowexp_p32 (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{exp} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input value and @var{$arg2} is the +computed output. +@end deftp + @deftp Probe slowpow_p10 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) This probe is triggered when the @code{pow} function is called with inputs that result in multiple precision computation with precision diff --git a/math/Makefile b/math/Makefile index c8cd7cff85..cba9ce9405 100644 --- a/math/Makefile +++ b/math/Makefile @@ -114,7 +114,7 @@ type-ldouble-yes := ldouble # double support type-double-suffix := type-double-routines := branred doasin dosincos halfulp mpa mpatan2 \ - mpatan mpexp mplog mpsqrt mptan sincos32 \ + mpatan mpexp mplog mpsqrt mptan sincos32 slowexp \ slowpow sincostab k_rem_pio2 # float support diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index 689dc54767..f29898c19c 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -262,6 +262,7 @@ extern double __sin32 (double __x, double __res, double __res1); extern double __cos32 (double __x, double __res, double __res1); extern double __mpsin (double __x, double __dx, bool __range_reduce); extern double __mpcos (double __x, double __dx, bool __range_reduce); +extern double __slowexp (double __x); extern double __slowpow (double __x, double __y, double __z); extern void __docos (double __x, double __dx, double __v[]); diff --git a/sysdeps/i386/fpu/slowexp.c b/sysdeps/i386/fpu/slowexp.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/i386/fpu/slowexp.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ia64/fpu/slowexp.c b/sysdeps/ia64/fpu/slowexp.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/ia64/fpu/slowexp.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c index 6a7122f585..6757a14ce1 100644 --- a/sysdeps/ieee754/dbl-64/e_exp.c +++ b/sysdeps/ieee754/dbl-64/e_exp.c @@ -1,4 +1,3 @@ -/* EXP function - Compute double precision exponential */ /* * IBM Accurate Mathematical Library * written by International Business Machines Corp. @@ -24,7 +23,7 @@ /* exp1 */ /* */ /* FILES NEEDED:dla.h endian.h mpa.h mydefs.h uexp.h */ -/* mpa.c mpexp.x */ +/* mpa.c mpexp.x slowexp.c */ /* */ /* An ultimate exp routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of e^x */ @@ -33,238 +32,207 @@ /* */ /***************************************************************************/ -/* IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains. */ -/* exp(x) - Hybrid algorithm of Peter Tang's Table driven method (for large - arguments) and an accurate table (for small arguments). - Written by K.C. Ng, November 1988. - Revised by Patrick McGehearty, Nov 2017 to use j/64 instead of j/32 - Method (large arguments): - 1. Argument Reduction: given the input x, find r and integer k - and j such that - x = (k+j/64)*(ln2) + r, |r| <= (1/128)*ln2 - - 2. exp(x) = 2^k * (2^(j/64) + 2^(j/64)*expm1(r)) - a. expm1(r) is approximated by a polynomial: - expm1(r) ~ r + t1*r^2 + t2*r^3 + ... + t5*r^6 - Here t1 = 1/2 exactly. - b. 2^(j/64) is represented to twice double precision - as TBL[2j]+TBL[2j+1]. - - Note: If divide were fast enough, we could use another approximation - in 2.a: - expm1(r) ~ (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) - (for the same t1 and t2 as above) - - Special cases: - exp(INF) is INF, exp(NaN) is NaN; - exp(-INF)= 0; - for finite argument, only exp(0)=1 is exact. - - Accuracy: - According to an error analysis, the error is always less than - an ulp (unit in the last place). The largest errors observed - are less than 0.55 ulp for normal results and less than 0.75 ulp - for subnormal results. - - Misc. info. - For IEEE double - if x > 7.09782712893383973096e+02 then exp(x) overflow - if x < -7.45133219101941108420e+02 then exp(x) underflow. */ - #include -#include -#include -#include #include "endian.h" #include "uexp.h" -#include "uexp.tbl" #include "mydefs.h" #include "MathLib.h" +#include "uexp.tbl" +#include #include #include -extern double __ieee754_exp (double); - -#include "eexp.tbl" - -static const double - half = 0.5, - one = 1.0; +#ifndef SECTION +# define SECTION +#endif +double __slowexp (double); +/* An ultimate exp routine. Given an IEEE double machine number x it computes + the correctly rounded (to nearest) value of e^x. */ double -__ieee754_exp (double x_arg) +SECTION +__ieee754_exp (double x) { - double z, t; + double bexp, t, eps, del, base, y, al, bet, res, rem, cor; + mynumber junk1, junk2, binexp = {{0, 0}}; + int4 i, j, m, n, ex; double retval; - int hx, ix, k, j, m; - int fe_val; - union + { - int i_part[2]; - double x; - } xx; - union - { - int y_part[2]; - double y; - } yy; - xx.x = x_arg; + SET_RESTORE_ROUND (FE_TONEAREST); - ix = xx.i_part[HIGH_HALF]; - hx = ix & ~0x80000000; + junk1.x = x; + m = junk1.i[HIGH_HALF]; + n = m & hugeint; - if (hx < 0x3ff0a2b2) - { /* |x| < 3/2 ln 2 */ - if (hx < 0x3f862e42) - { /* |x| < 1/64 ln 2 */ - if (hx < 0x3ed00000) - { /* |x| < 2^-18 */ - if (hx < 0x3e300000) - { - retval = one + xx.x; - return retval; - } - retval = one + xx.x * (one + half * xx.x); - return retval; - } - /* Use FE_TONEAREST rounding mode for computing yy.y. - Avoid set/reset of rounding mode if in FE_TONEAREST mode. */ - fe_val = get_rounding_mode (); - if (fe_val == FE_TONEAREST) - { - t = xx.x * xx.x; - yy.y = xx.x + (t * (half + xx.x * t2) - + (t * t) * (t3 + xx.x * t4 + t * t5)); - retval = one + yy.y; - } - else - { - libc_fesetround (FE_TONEAREST); - t = xx.x * xx.x; - yy.y = xx.x + (t * (half + xx.x * t2) - + (t * t) * (t3 + xx.x * t4 + t * t5)); - retval = one + yy.y; - libc_fesetround (fe_val); - } - return retval; - } + if (n > smallint && n < bigint) + { + y = x * log2e.x + three51.x; + bexp = y - three51.x; /* multiply the result by 2**bexp */ - /* Find the multiple of 2^-6 nearest x. */ - k = hx >> 20; - j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k); - j = (j - 1) & ~1; - if (ix < 0) - j += 134; - /* Use FE_TONEAREST rounding mode for computing yy.y. - Avoid set/reset of rounding mode if in FE_TONEAREST mode. */ - fe_val = get_rounding_mode (); - if (fe_val == FE_TONEAREST) - { - z = xx.x - TBL2[j]; - t = z * z; - yy.y = z + (t * (half + (z * t2)) - + (t * t) * (t3 + z * t4 + t * t5)); - retval = TBL2[j + 1] + TBL2[j + 1] * yy.y; - } - else - { - libc_fesetround (FE_TONEAREST); - z = xx.x - TBL2[j]; - t = z * z; - yy.y = z + (t * (half + (z * t2)) - + (t * t) * (t3 + z * t4 + t * t5)); - retval = TBL2[j + 1] + TBL2[j + 1] * yy.y; - libc_fesetround (fe_val); - } - return retval; - } + junk1.x = y; - if (hx >= 0x40862e42) - { /* x is large, infinite, or nan. */ - if (hx >= 0x7ff00000) - { - if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0) - return zero; /* exp(-inf) = 0. */ - return (xx.x * xx.x); /* exp(nan/inf) is nan or inf. */ - } - if (xx.x > threshold1) - { /* Set overflow error condition. */ - retval = hhuge * hhuge; - return retval; - } - if (-xx.x > threshold2) - { /* Set underflow error condition. */ - double force_underflow = tiny * tiny; - math_force_eval (force_underflow); - retval = force_underflow; - return retval; - } - } + eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */ + t = x - bexp * ln_two1.x; - /* Use FE_TONEAREST rounding mode for computing yy.y. - Avoid set/reset of rounding mode if already in FE_TONEAREST mode. */ - fe_val = get_rounding_mode (); - if (fe_val == FE_TONEAREST) - { - t = invln2_64 * xx.x; - if (ix < 0) - t -= half; - else - t += half; - k = (int) t; - j = (k & 0x3f) << 1; - m = k >> 6; - z = (xx.x - k * ln2_64hi) - k * ln2_64lo; + y = t + three33.x; + base = y - three33.x; /* t rounded to a multiple of 2**-18 */ + junk2.x = y; + del = (t - base) - eps; /* x = bexp*ln(2) + base + del */ + eps = del + del * del * (p3.x * del + p2.x); - /* z is now in primary range. */ - t = z * z; - yy.y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5)); - yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y); - } - else - { - libc_fesetround (FE_TONEAREST); - t = invln2_64 * xx.x; - if (ix < 0) - t -= half; - else - t += half; - k = (int) t; - j = (k & 0x3f) << 1; - m = k >> 6; - z = (xx.x - k * ln2_64hi) - k * ln2_64lo; + binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20; - /* z is now in primary range. */ - t = z * z; - yy.y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5)); - yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y); - libc_fesetround (fe_val); - } + i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356; + j = (junk2.i[LOW_HALF] & 511) << 1; - if (m < -1021) - { - yy.y_part[HIGH_HALF] += (m + 54) << 20; - retval = twom54 * yy.y; - if (retval < DBL_MIN) - { - double force_underflow = tiny * tiny; - math_force_eval (force_underflow); - } - return retval; - } - yy.y_part[HIGH_HALF] += m << 20; - return yy.y; + al = coar.x[i] * fine.x[j]; + bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j]) + + coar.x[i + 1] * fine.x[j + 1]); + + rem = (bet + bet * eps) + al * eps; + res = al + rem; + cor = (al - res) + rem; + if (res == (res + cor * err_0)) + { + retval = res * binexp.x; + goto ret; + } + else + { + retval = __slowexp (x); + goto ret; + } /*if error is over bound */ + } + + if (n <= smallint) + { + retval = 1.0; + goto ret; + } + + if (n >= badint) + { + if (n > infint) + { + retval = x + x; + goto ret; + } /* x is NaN */ + if (n < infint) + { + if (x > 0) + goto ret_huge; + else + goto ret_tiny; + } + /* x is finite, cause either overflow or underflow */ + if (junk1.i[LOW_HALF] != 0) + { + retval = x + x; + goto ret; + } /* x is NaN */ + retval = (x > 0) ? inf.x : zero; /* |x| = inf; return either inf or 0 */ + goto ret; + } + + y = x * log2e.x + three51.x; + bexp = y - three51.x; + junk1.x = y; + eps = bexp * ln_two2.x; + t = x - bexp * ln_two1.x; + y = t + three33.x; + base = y - three33.x; + junk2.x = y; + del = (t - base) - eps; + eps = del + del * del * (p3.x * del + p2.x); + i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356; + j = (junk2.i[LOW_HALF] & 511) << 1; + al = coar.x[i] * fine.x[j]; + bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j]) + + coar.x[i + 1] * fine.x[j + 1]); + rem = (bet + bet * eps) + al * eps; + res = al + rem; + cor = (al - res) + rem; + if (m >> 31) + { + ex = junk1.i[LOW_HALF]; + if (res < 1.0) + { + res += res; + cor += cor; + ex -= 1; + } + if (ex >= -1022) + { + binexp.i[HIGH_HALF] = (1023 + ex) << 20; + if (res == (res + cor * err_0)) + { + retval = res * binexp.x; + goto ret; + } + else + { + retval = __slowexp (x); + goto check_uflow_ret; + } /*if error is over bound */ + } + ex = -(1022 + ex); + binexp.i[HIGH_HALF] = (1023 - ex) << 20; + res *= binexp.x; + cor *= binexp.x; + eps = 1.0000000001 + err_0 * binexp.x; + t = 1.0 + res; + y = ((1.0 - t) + res) + cor; + res = t + y; + cor = (t - res) + y; + if (res == (res + eps * cor)) + { + binexp.i[HIGH_HALF] = 0x00100000; + retval = (res - 1.0) * binexp.x; + goto check_uflow_ret; + } + else + { + retval = __slowexp (x); + goto check_uflow_ret; + } /* if error is over bound */ + check_uflow_ret: + if (retval < DBL_MIN) + { + double force_underflow = tiny * tiny; + math_force_eval (force_underflow); + } + if (retval == 0) + goto ret_tiny; + goto ret; + } + else + { + binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20; + if (res == (res + cor * err_0)) + retval = res * binexp.x * t256.x; + else + retval = __slowexp (x); + if (isinf (retval)) + goto ret_huge; + else + goto ret; + } + } +ret: + return retval; + + ret_huge: + return hhuge * hhuge; + + ret_tiny: + return tiny * tiny; } #ifndef __ieee754_exp strong_alias (__ieee754_exp, __exp_finite) #endif -#ifndef SECTION -# define SECTION -#endif - /* Compute e^(x+xx). The routine also receives bound of error of previous calculation. If after computing exp the error exceeds the allowed bounds, the routine returns a non-positive number. Otherwise it returns the diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c index 2eb8dbfd5f..9f6439ee42 100644 --- a/sysdeps/ieee754/dbl-64/e_pow.c +++ b/sysdeps/ieee754/dbl-64/e_pow.c @@ -25,7 +25,7 @@ /* log1 */ /* checkint */ /* FILES NEEDED: dla.h endian.h mpa.h mydefs.h */ -/* halfulp.c mpexp.c mplog.c slowpow.c mpa.c */ +/* halfulp.c mpexp.c mplog.c slowexp.c slowpow.c mpa.c */ /* uexp.c upow.c */ /* root.tbl uexp.tbl upow.tbl */ /* An ultimate power routine. Given two IEEE double machine numbers y,x */ diff --git a/sysdeps/ieee754/dbl-64/eexp.tbl b/sysdeps/ieee754/dbl-64/eexp.tbl deleted file mode 100644 index 5941b9522b..0000000000 --- a/sysdeps/ieee754/dbl-64/eexp.tbl +++ /dev/null @@ -1,255 +0,0 @@ -/* EXP function tables - for use in computing double precision exponential - Copyright (C) 2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - - -/* - TBL[2*j] is 2**(j/64), rounded to nearest. - TBL[2*j+1] is 2**(j/64) - TBL[2*j], rounded to nearest. - These values are used to approximate exp(x) using the formula - given in the comments for e_exp.c. */ - -static const double TBL[128] = { - 0x1.0000000000000p+0, 0x0.0000000000000p+0, - 0x1.02c9a3e778061p+0, -0x1.19083535b085dp-56, - 0x1.059b0d3158574p+0, 0x1.d73e2a475b465p-55, - 0x1.0874518759bc8p+0, 0x1.186be4bb284ffp-57, - 0x1.0b5586cf9890fp+0, 0x1.8a62e4adc610bp-54, - 0x1.0e3ec32d3d1a2p+0, 0x1.03a1727c57b52p-59, - 0x1.11301d0125b51p+0, -0x1.6c51039449b3ap-54, - 0x1.1429aaea92de0p+0, -0x1.32fbf9af1369ep-54, - 0x1.172b83c7d517bp+0, -0x1.19041b9d78a76p-55, - 0x1.1a35beb6fcb75p+0, 0x1.e5b4c7b4968e4p-55, - 0x1.1d4873168b9aap+0, 0x1.e016e00a2643cp-54, - 0x1.2063b88628cd6p+0, 0x1.dc775814a8495p-55, - 0x1.2387a6e756238p+0, 0x1.9b07eb6c70573p-54, - 0x1.26b4565e27cddp+0, 0x1.2bd339940e9d9p-55, - 0x1.29e9df51fdee1p+0, 0x1.612e8afad1255p-55, - 0x1.2d285a6e4030bp+0, 0x1.0024754db41d5p-54, - 0x1.306fe0a31b715p+0, 0x1.6f46ad23182e4p-55, - 0x1.33c08b26416ffp+0, 0x1.32721843659a6p-54, - 0x1.371a7373aa9cbp+0, -0x1.63aeabf42eae2p-54, - 0x1.3a7db34e59ff7p+0, -0x1.5e436d661f5e3p-56, - 0x1.3dea64c123422p+0, 0x1.ada0911f09ebcp-55, - 0x1.4160a21f72e2ap+0, -0x1.ef3691c309278p-58, - 0x1.44e086061892dp+0, 0x1.89b7a04ef80d0p-59, - 0x1.486a2b5c13cd0p+0, 0x1.3c1a3b69062f0p-56, - 0x1.4bfdad5362a27p+0, 0x1.d4397afec42e2p-56, - 0x1.4f9b2769d2ca7p+0, -0x1.4b309d25957e3p-54, - 0x1.5342b569d4f82p+0, -0x1.07abe1db13cadp-55, - 0x1.56f4736b527dap+0, 0x1.9bb2c011d93adp-54, - 0x1.5ab07dd485429p+0, 0x1.6324c054647adp-54, - 0x1.5e76f15ad2148p+0, 0x1.ba6f93080e65ep-54, - 0x1.6247eb03a5585p+0, -0x1.383c17e40b497p-54, - 0x1.6623882552225p+0, -0x1.bb60987591c34p-54, - 0x1.6a09e667f3bcdp+0, -0x1.bdd3413b26456p-54, - 0x1.6dfb23c651a2fp+0, -0x1.bbe3a683c88abp-57, - 0x1.71f75e8ec5f74p+0, -0x1.16e4786887a99p-55, - 0x1.75feb564267c9p+0, -0x1.0245957316dd3p-54, - 0x1.7a11473eb0187p+0, -0x1.41577ee04992fp-55, - 0x1.7e2f336cf4e62p+0, 0x1.05d02ba15797ep-56, - 0x1.82589994cce13p+0, -0x1.d4c1dd41532d8p-54, - 0x1.868d99b4492edp+0, -0x1.fc6f89bd4f6bap-54, - 0x1.8ace5422aa0dbp+0, 0x1.6e9f156864b27p-54, - 0x1.8f1ae99157736p+0, 0x1.5cc13a2e3976cp-55, - 0x1.93737b0cdc5e5p+0, -0x1.75fc781b57ebcp-57, - 0x1.97d829fde4e50p+0, -0x1.d185b7c1b85d1p-54, - 0x1.9c49182a3f090p+0, 0x1.c7c46b071f2bep-56, - 0x1.a0c667b5de565p+0, -0x1.359495d1cd533p-54, - 0x1.a5503b23e255dp+0, -0x1.d2f6edb8d41e1p-54, - 0x1.a9e6b5579fdbfp+0, 0x1.0fac90ef7fd31p-54, - 0x1.ae89f995ad3adp+0, 0x1.7a1cd345dcc81p-54, - 0x1.b33a2b84f15fbp+0, -0x1.2805e3084d708p-57, - 0x1.b7f76f2fb5e47p+0, -0x1.5584f7e54ac3bp-56, - 0x1.bcc1e904bc1d2p+0, 0x1.23dd07a2d9e84p-55, - 0x1.c199bdd85529cp+0, 0x1.11065895048ddp-55, - 0x1.c67f12e57d14bp+0, 0x1.2884dff483cadp-54, - 0x1.cb720dcef9069p+0, 0x1.503cbd1e949dbp-56, - 0x1.d072d4a07897cp+0, -0x1.cbc3743797a9cp-54, - 0x1.d5818dcfba487p+0, 0x1.2ed02d75b3707p-55, - 0x1.da9e603db3285p+0, 0x1.c2300696db532p-54, - 0x1.dfc97337b9b5fp+0, -0x1.1a5cd4f184b5cp-54, - 0x1.e502ee78b3ff6p+0, 0x1.39e8980a9cc8fp-55, - 0x1.ea4afa2a490dap+0, -0x1.e9c23179c2893p-54, - 0x1.efa1bee615a27p+0, 0x1.dc7f486a4b6b0p-54, - 0x1.f50765b6e4540p+0, 0x1.9d3e12dd8a18bp-54, - 0x1.fa7c1819e90d8p+0, 0x1.74853f3a5931ep-55}; - -/* For i = 0, ..., 66, - TBL2[2*i] is a double precision number near (i+1)*2^-6, and - TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less - than 2^-60. - - For i = 67, ..., 133, - TBL2[2*i] is a double precision number near -(i+1)*2^-6, and - TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less - than 2^-60. */ - -static const double TBL2[268] = { - 0x1.ffffffffffc82p-7, 0x1.04080ab55de32p+0, - 0x1.fffffffffffdbp-6, 0x1.08205601127ecp+0, - 0x1.80000000000a0p-5, 0x1.0c49236829e91p+0, - 0x1.fffffffffff79p-5, 0x1.1082b577d34e9p+0, - 0x1.3fffffffffffcp-4, 0x1.14cd4fc989cd6p+0, - 0x1.8000000000060p-4, 0x1.192937074e0d4p+0, - 0x1.c000000000061p-4, 0x1.1d96b0eff0e80p+0, - 0x1.fffffffffffd6p-4, 0x1.2216045b6f5cap+0, - 0x1.1ffffffffff58p-3, 0x1.26a7793f6014cp+0, - 0x1.3ffffffffff75p-3, 0x1.2b4b58b372c65p+0, - 0x1.5ffffffffff00p-3, 0x1.3001ecf601ad1p+0, - 0x1.8000000000020p-3, 0x1.34cb8170b583ap+0, - 0x1.9ffffffffa629p-3, 0x1.39a862bd3b344p+0, - 0x1.c00000000000fp-3, 0x1.3e98deaa11dcep+0, - 0x1.e00000000007fp-3, 0x1.439d443f5f16dp+0, - 0x1.0000000000072p-2, 0x1.48b5e3c3e81abp+0, - 0x1.0fffffffffecap-2, 0x1.4de30ec211dfbp+0, - 0x1.1ffffffffff8fp-2, 0x1.5325180cfacd2p+0, - 0x1.300000000003bp-2, 0x1.587c53c5a7b04p+0, - 0x1.4000000000034p-2, 0x1.5de9176046007p+0, - 0x1.4ffffffffff89p-2, 0x1.636bb9a98322fp+0, - 0x1.5ffffffffffe7p-2, 0x1.690492cbf942ap+0, - 0x1.6ffffffffff78p-2, 0x1.6eb3fc55b1e45p+0, - 0x1.7ffffffffff65p-2, 0x1.747a513dbef32p+0, - 0x1.8ffffffffffd5p-2, 0x1.7a57ede9ea22ep+0, - 0x1.9ffffffffff6ep-2, 0x1.804d30347b50fp+0, - 0x1.affffffffffc3p-2, 0x1.865a7772164aep+0, - 0x1.c000000000053p-2, 0x1.8c802477b0030p+0, - 0x1.d00000000004dp-2, 0x1.92be99a09bf1ep+0, - 0x1.e000000000096p-2, 0x1.99163ad4b1e08p+0, - 0x1.efffffffffefap-2, 0x1.9f876d8e8c4fcp+0, - 0x1.fffffffffffd0p-2, 0x1.a61298e1e0688p+0, - 0x1.0800000000002p-1, 0x1.acb82581eee56p+0, - 0x1.100000000001fp-1, 0x1.b3787dc80f979p+0, - 0x1.17ffffffffff8p-1, 0x1.ba540dba56e4fp+0, - 0x1.1fffffffffffap-1, 0x1.c14b431256441p+0, - 0x1.27fffffffffc4p-1, 0x1.c85e8d43f7c9bp+0, - 0x1.2fffffffffffdp-1, 0x1.cf8e5d84758a6p+0, - 0x1.380000000001fp-1, 0x1.d6db26d16cd84p+0, - 0x1.3ffffffffffd8p-1, 0x1.de455df80e39bp+0, - 0x1.4800000000052p-1, 0x1.e5cd799c6a59cp+0, - 0x1.4ffffffffffc8p-1, 0x1.ed73f240dc10cp+0, - 0x1.5800000000013p-1, 0x1.f539424d90f71p+0, - 0x1.5ffffffffffbcp-1, 0x1.fd1de6182f885p+0, - 0x1.680000000002dp-1, 0x1.02912df5ce741p+1, - 0x1.7000000000040p-1, 0x1.06a39207f0a2ap+1, - 0x1.780000000004fp-1, 0x1.0ac660691652ap+1, - 0x1.7ffffffffff6fp-1, 0x1.0ef9db467dcabp+1, - 0x1.87fffffffffe5p-1, 0x1.133e45d82e943p+1, - 0x1.9000000000035p-1, 0x1.1793e4652cc6dp+1, - 0x1.97fffffffffb3p-1, 0x1.1bfafc47bda48p+1, - 0x1.a000000000000p-1, 0x1.2073d3f1bd518p+1, - 0x1.a80000000004ap-1, 0x1.24feb2f105ce2p+1, - 0x1.affffffffffedp-1, 0x1.299be1f3e7f11p+1, - 0x1.b7ffffffffffbp-1, 0x1.2e4baacdb6611p+1, - 0x1.c00000000001dp-1, 0x1.330e587b62b39p+1, - 0x1.c800000000079p-1, 0x1.37e437282d538p+1, - 0x1.cffffffffff51p-1, 0x1.3ccd943268248p+1, - 0x1.d7fffffffff74p-1, 0x1.41cabe304cadcp+1, - 0x1.e000000000011p-1, 0x1.46dc04f4e5343p+1, - 0x1.e80000000001ep-1, 0x1.4c01b9950a124p+1, - 0x1.effffffffff9ep-1, 0x1.513c2e6c73196p+1, - 0x1.f7fffffffffedp-1, 0x1.568bb722dd586p+1, - 0x1.0000000000034p+0, 0x1.5bf0a8b1457b0p+1, - 0x1.03fffffffffe2p+0, 0x1.616b5967376dfp+1, - 0x1.07fffffffff4bp+0, 0x1.66fc20f0337a9p+1, - 0x1.0bffffffffffdp+0, 0x1.6ca35859290f5p+1, - -0x1.fffffffffffe4p-7, 0x1.f80feabfeefa5p-1, - -0x1.ffffffffffb0bp-6, 0x1.f03f56a88b5fep-1, - -0x1.7ffffffffffa7p-5, 0x1.e88dc6afecfc5p-1, - -0x1.ffffffffffea8p-5, 0x1.e0fabfbc702b8p-1, - -0x1.3ffffffffffb3p-4, 0x1.d985c89d041acp-1, - -0x1.7ffffffffffe3p-4, 0x1.d22e6a0197c06p-1, - -0x1.bffffffffff9ap-4, 0x1.caf42e73a4c89p-1, - -0x1.fffffffffff98p-4, 0x1.c3d6a24ed822dp-1, - -0x1.1ffffffffffe9p-3, 0x1.bcd553b9d7b67p-1, - -0x1.3ffffffffffe0p-3, 0x1.b5efd29f24c2dp-1, - -0x1.5fffffffff553p-3, 0x1.af25b0a61a9f4p-1, - -0x1.7ffffffffff8bp-3, 0x1.a876812c08794p-1, - -0x1.9fffffffffe51p-3, 0x1.a1e1d93d68828p-1, - -0x1.bffffffffff6ep-3, 0x1.9b674f8f2f3f5p-1, - -0x1.dffffffffff7fp-3, 0x1.95067c7837a0cp-1, - -0x1.fffffffffff7ap-3, 0x1.8ebef9eac8225p-1, - -0x1.0fffffffffffep-2, 0x1.8890636e31f55p-1, - -0x1.1ffffffffff41p-2, 0x1.827a56188975ep-1, - -0x1.2ffffffffffbap-2, 0x1.7c7c708877656p-1, - -0x1.3fffffffffff8p-2, 0x1.769652df22f81p-1, - -0x1.4ffffffffff90p-2, 0x1.70c79eba33c2fp-1, - -0x1.5ffffffffffdbp-2, 0x1.6b0ff72deb8aap-1, - -0x1.6ffffffffff9ap-2, 0x1.656f00bf5798ep-1, - -0x1.7ffffffffff9fp-2, 0x1.5fe4615e98eb0p-1, - -0x1.8ffffffffffeep-2, 0x1.5a6fc061433cep-1, - -0x1.9fffffffffc4ap-2, 0x1.5510c67cd26cdp-1, - -0x1.affffffffff30p-2, 0x1.4fc71dc13566bp-1, - -0x1.bfffffffffff0p-2, 0x1.4a9271936fd0ep-1, - -0x1.cfffffffffff3p-2, 0x1.45726ea84fb8cp-1, - -0x1.dfffffffffff3p-2, 0x1.4066c2ff3912bp-1, - -0x1.effffffffff80p-2, 0x1.3b6f1ddd05ab9p-1, - -0x1.fffffffffffdfp-2, 0x1.368b2fc6f9614p-1, - -0x1.0800000000000p-1, 0x1.31baaa7dca843p-1, - -0x1.0ffffffffffa4p-1, 0x1.2cfd40f8bdce4p-1, - -0x1.17fffffffff0ap-1, 0x1.2852a760d5ce7p-1, - -0x1.2000000000000p-1, 0x1.23ba930c1568bp-1, - -0x1.27fffffffffbbp-1, 0x1.1f34ba78d568dp-1, - -0x1.2fffffffffe32p-1, 0x1.1ac0d5492c1dbp-1, - -0x1.37ffffffff042p-1, 0x1.165e9c3e67ef2p-1, - -0x1.3ffffffffff77p-1, 0x1.120dc93499431p-1, - -0x1.47fffffffff6bp-1, 0x1.0dce171e34ecep-1, - -0x1.4fffffffffff1p-1, 0x1.099f41ffbe588p-1, - -0x1.57ffffffffe02p-1, 0x1.058106eb8a7aep-1, - -0x1.5ffffffffffe5p-1, 0x1.017323fd9002ep-1, - -0x1.67fffffffffb0p-1, 0x1.faeab0ae9386cp-2, - -0x1.6ffffffffffb2p-1, 0x1.f30ec837503d7p-2, - -0x1.77fffffffff7fp-1, 0x1.eb5210d627133p-2, - -0x1.7ffffffffffe8p-1, 0x1.e3b40ebefcd95p-2, - -0x1.87fffffffffc8p-1, 0x1.dc3448110dae2p-2, - -0x1.8fffffffffb30p-1, 0x1.d4d244cf4ef06p-2, - -0x1.97fffffffffefp-1, 0x1.cd8d8ed8ee395p-2, - -0x1.9ffffffffffa7p-1, 0x1.c665b1e1f1e5cp-2, - -0x1.a7fffffffffdcp-1, 0x1.bf5a3b6bf18d6p-2, - -0x1.affffffffff95p-1, 0x1.b86ababeef93bp-2, - -0x1.b7fffffffffcbp-1, 0x1.b196c0e24d256p-2, - -0x1.bffffffffff32p-1, 0x1.aadde095dadf7p-2, - -0x1.c7fffffffff6ap-1, 0x1.a43fae4b047c9p-2, - -0x1.cffffffffffb6p-1, 0x1.9dbbc01e182a4p-2, - -0x1.d7fffffffffcap-1, 0x1.9751adcfa81ecp-2, - -0x1.dffffffffffcdp-1, 0x1.910110be0699ep-2, - -0x1.e7ffffffffffbp-1, 0x1.8ac983dedbc69p-2, - -0x1.effffffffff88p-1, 0x1.84aaa3b8d51a9p-2, - -0x1.f7fffffffffbbp-1, 0x1.7ea40e5d6d92ep-2, - -0x1.fffffffffffdbp-1, 0x1.78b56362cef53p-2, - -0x1.03fffffffff00p+0, 0x1.72de43ddcb1f2p-2, - -0x1.07ffffffffe6fp+0, 0x1.6d1e525bed085p-2, - -0x1.0bfffffffffd6p+0, 0x1.677532dda1c57p-2}; - -static const double -/* invln2_64 = 64/ln2 - used to scale x to primary range. */ - invln2_64 = 0x1.71547652b82fep+6, -/* ln2_64hi = high 32 bits of log(2.)/64. */ - ln2_64hi = 0x1.62e42fee00000p-7, -/* ln2_64lo = remainder bits for log(2.)/64 - ln2_64hi. */ - ln2_64lo = 0x1.a39ef35793c76p-39, -/* t2-t5 terms used for polynomial computation. */ - t2 = 0x1.5555555555555p-3, /* 1.6666666666666665741e-1 */ - t3 = 0x1.5555555555555p-5, /* 4.1666666666666664354e-2 */ - t4 = 0x1.1111111111111p-7, /* 8.3333333333333332177e-3 */ - t5 = 0x1.6c16c16c16c17p-10, /* 1.3888888888888719040e-3 */ -/* Maximum value for x to not overflow. */ - threshold1 = 0x1.62e42fefa39efp+9, /* 7.09782712893383973096e+02 */ -/* Maximum value for -x to not underflow to zero in FE_TONEAREST mode. */ - threshold2 = 0x1.74910d52d3051p+9, /* 7.45133219101941108420e+02 */ -/* Scaling factor used when result near zero. */ - twom54 = 0x1.0000000000000p-54; /* 5.55111512312578270212e-17 */ diff --git a/sysdeps/ieee754/dbl-64/slowexp.c b/sysdeps/ieee754/dbl-64/slowexp.c new file mode 100644 index 0000000000..e8fa2e263b --- /dev/null +++ b/sysdeps/ieee754/dbl-64/slowexp.c @@ -0,0 +1,86 @@ +/* + * IBM Accurate Mathematical Library + * written by International Business Machines Corp. + * Copyright (C) 2001-2017 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see . + */ +/**************************************************************************/ +/* MODULE_NAME:slowexp.c */ +/* */ +/* FUNCTION:slowexp */ +/* */ +/* FILES NEEDED:mpa.h */ +/* mpa.c mpexp.c */ +/* */ +/*Converting from double precision to Multi-precision and calculating */ +/* e^x */ +/**************************************************************************/ +#include + +#include + +#ifndef USE_LONG_DOUBLE_FOR_MP +# include "mpa.h" +void __mpexp (mp_no *x, mp_no *y, int p); +#endif + +#ifndef SECTION +# define SECTION +#endif + +/*Converting from double precision to Multi-precision and calculating e^x */ +double +SECTION +__slowexp (double x) +{ +#ifndef USE_LONG_DOUBLE_FOR_MP + double w, z, res, eps = 3.0e-26; + int p; + mp_no mpx, mpy, mpz, mpw, mpeps, mpcor; + + /* Use the multiple precision __MPEXP function to compute the exponential + First at 144 bits and if it is not accurate enough, at 768 bits. */ + p = 6; + __dbl_mp (x, &mpx, p); + __mpexp (&mpx, &mpy, p); + __dbl_mp (eps, &mpeps, p); + __mul (&mpeps, &mpy, &mpcor, p); + __add (&mpy, &mpcor, &mpw, p); + __sub (&mpy, &mpcor, &mpz, p); + __mp_dbl (&mpw, &w, p); + __mp_dbl (&mpz, &z, p); + if (w == z) + { + /* Track how often we get to the slow exp code plus + its input/output values. */ + LIBC_PROBE (slowexp_p6, 2, &x, &w); + return w; + } + else + { + p = 32; + __dbl_mp (x, &mpx, p); + __mpexp (&mpx, &mpy, p); + __mp_dbl (&mpy, &res, p); + + /* Track how often we get to the uber-slow exp code plus + its input/output values. */ + LIBC_PROBE (slowexp_p32, 2, &x, &res); + return res; + } +#else + return (double) __ieee754_expl((long double)x); +#endif +} diff --git a/sysdeps/m68k/m680x0/fpu/slowexp.c b/sysdeps/m68k/m680x0/fpu/slowexp.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/m68k/m680x0/fpu/slowexp.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/powerpc/power4/fpu/Makefile b/sysdeps/powerpc/power4/fpu/Makefile index ded9976b9d..e17d32f30e 100644 --- a/sysdeps/powerpc/power4/fpu/Makefile +++ b/sysdeps/powerpc/power4/fpu/Makefile @@ -3,4 +3,5 @@ ifeq ($(subdir),math) CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1 +CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1 endif diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 9a3ce73361..85552bd695 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1094,10 +1094,10 @@ ildouble: 2 ldouble: 2 Function: Imaginary part of "cexp_upward": -double: 3 +double: 1 float: 2 float128: 3 -idouble: 3 +idouble: 1 ifloat: 2 ifloat128: 3 ildouble: 3 @@ -1902,9 +1902,7 @@ ildouble: 5 ldouble: 5 Function: "exp": -double: 1 float128: 1 -idouble: 1 ifloat128: 1 ildouble: 1 ldouble: 1 @@ -2756,30 +2754,30 @@ ildouble: 5 ldouble: 5 Function: "tgamma_downward": -double: 6 +double: 5 float: 5 float128: 5 -idouble: 6 +idouble: 5 ifloat: 5 ifloat128: 5 ildouble: 5 ldouble: 5 Function: "tgamma_towardzero": -double: 7 +double: 5 float: 5 float128: 5 -idouble: 7 +idouble: 5 ifloat: 5 ifloat128: 5 ildouble: 5 ldouble: 5 Function: "tgamma_upward": -double: 6 +double: 5 float: 5 float128: 4 -idouble: 6 +idouble: 5 ifloat: 5 ifloat128: 4 ildouble: 5 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index bec45e073c..0825340c0c 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -10,7 +10,7 @@ libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \ libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \ e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \ - mplog-fma mpa-fma slowpow-fma \ + mplog-fma mpa-fma slowexp-fma slowpow-fma \ sincos32-fma doasin-fma dosincos-fma \ halfulp-fma mpexp-fma \ mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma @@ -32,6 +32,7 @@ CFLAGS-mpsqrt-fma.c = -mfma -mavx2 CFLAGS-mptan-fma.c = -mfma -mavx2 CFLAGS-s_atan-fma.c = -mfma -mavx2 CFLAGS-sincos32-fma.c = -mfma -mavx2 +CFLAGS-slowexp-fma.c = -mfma -mavx2 CFLAGS-slowpow-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 @@ -51,7 +52,7 @@ CFLAGS-s_cosf-fma.c = -mfma -mavx2 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ - mplog-fma4 mpa-fma4 slowpow-fma4 \ + mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ sincos32-fma4 doasin-fma4 dosincos-fma4 \ halfulp-fma4 mpexp-fma4 \ mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4 @@ -73,13 +74,14 @@ CFLAGS-mpsqrt-fma4.c = -mfma4 CFLAGS-mptan-fma4.c = -mfma4 CFLAGS-s_atan-fma4.c = -mfma4 CFLAGS-sincos32-fma4.c = -mfma4 +CFLAGS-slowexp-fma4.c = -mfma4 CFLAGS-slowpow-fma4.c = -mfma4 CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ e_atan2-avx s_sin-avx s_tan-avx \ - mplog-avx mpa-avx \ + mplog-avx mpa-avx slowexp-avx \ mpexp-avx CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX @@ -90,6 +92,7 @@ CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX +CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX endif diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c index afd917442a..ee5dd6d2dc 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c @@ -1,5 +1,6 @@ #define __ieee754_exp __ieee754_exp_avx #define __exp1 __exp1_avx +#define __slowexp __slowexp_avx #define SECTION __attribute__ ((section (".text.avx"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c index 765b1b9dd3..6e0fdb7941 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c @@ -1,5 +1,6 @@ #define __ieee754_exp __ieee754_exp_fma #define __exp1 __exp1_fma +#define __slowexp __slowexp_fma #define SECTION __attribute__ ((section (".text.fma"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c index 9ac7acad28..ae6eb67603 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c @@ -1,5 +1,6 @@ #define __ieee754_exp __ieee754_exp_fma4 #define __exp1 __exp1_fma4 +#define __slowexp __slowexp_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c b/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c new file mode 100644 index 0000000000..d01c6d71a4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c @@ -0,0 +1,9 @@ +#define __slowexp __slowexp_avx +#define __add __add_avx +#define __dbl_mp __dbl_mp_avx +#define __mpexp __mpexp_avx +#define __mul __mul_avx +#define __sub __sub_avx +#define SECTION __attribute__ ((section (".text.avx"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma.c new file mode 100644 index 0000000000..6fffca1a93 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/slowexp-fma.c @@ -0,0 +1,9 @@ +#define __slowexp __slowexp_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __mpexp __mpexp_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c new file mode 100644 index 0000000000..3bcde84233 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c @@ -0,0 +1,9 @@ +#define __slowexp __slowexp_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mpexp __mpexp_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 +#define SECTION __attribute__ ((section (".text.fma4"))) + +#include