Trigonometric optimizations for POWER

This commit is contained in:
Adhemerval Zanella 2011-09-07 22:10:26 -04:00 committed by Ulrich Drepper
parent 24f579d86e
commit 77a2a8b4a1
10 changed files with 1083 additions and 0 deletions

View File

@ -1,3 +1,16 @@
2011-08-01 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/fpu/e_hypot.c: New file.
* sysdeps/powerpc/fpu/e_hypotf.c: New file.
* sysdeps/powerpc/fpu/e_rem_pio2f.c: New file.
* sysdeps/powerpc/fpu/k_rem_pio2f.c: New file.
* sysdeps/powerpc/fpu/k_cosf.c: New file.
* sysdeps/powerpc/fpu/k_sinf.c: New file.
* sysdeps/powerpc/fpu/s_cosf.c: New file.
* sysdeps/powerpc/fpu/s_sinf.c: New file.
* sysdeps/powerpc/fpu/s_scalbnf.c: New file.
* sysdeps/powerpc/fpu/s_float_bitwise.h: New file.
2011-08-15 Alan Modra <amodra@gmail.com>
[BZ #13092]

View File

@ -0,0 +1,117 @@
/* Pythagorean addition using doubles
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "math.h"
#include "math_private.h"
static const double two60 = 1.152921504606847e+18;
static const double two500 = 3.2733906078961419e+150;
static const double two600 = 4.149515568880993e+180;
static const double two1022 = 4.49423283715579e+307;
static const double twoM500 = 3.054936363499605e-151;
static const double twoM600 = 4.616489308892868e-128;
static const double pdnum = 2.225073858507201e-308;
/* __ieee754_hypot(x,y)
*
* This a FP only version without any FP->INT conversion.
* It is similar to default C version, making appropriates
* overflow and underflows checks as well scaling when it
* is needed.
*/
#ifdef _ARCH_PWR7
/* POWER7 isinf and isnan optimization are fast. */
# define TEST_INF_NAN(x, y) \
if (isinf(x) || isinf(y)) \
return INFINITY; \
if (isnan(x) || isnan(y)) \
return NAN;
# else
/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
* costly (especially for POWER6). */
# define GET_TW0_HIGH_WORD(d1,d2,i1,i2) \
do { \
ieee_double_shape_type gh_u1; \
ieee_double_shape_type gh_u2; \
gh_u1.value = (d1); \
gh_u2.value = (d2); \
(i1) = gh_u1.parts.msw; \
(i2) = gh_u2.parts.msw; \
} while (0)
# define TEST_INF_NAN(x, y) \
do { \
int32_t hx, hy; \
GET_TW0_HIGH_WORD(x, y, hx, hy); \
if (hy > hx) { \
uint32_t ht = hx; hx = hy; hy = ht; \
} \
if (hx >= 0x7ff00000) { \
if (hx == 0x7ff00000 || hy == 0x7ff00000) \
return INFINITY; \
return NAN; \
} \
} while (0)
#endif
double
__ieee754_hypot (double x, double y)
{
x = fabs (x);
y = fabs (y);
TEST_INF_NAN (x, y);
if (y > x)
{
double t = x;
x = y;
y = t;
}
if (y == 0.0 || (x / y) > two60)
{
return x + y;
}
if (x > two500)
{
x *= twoM600;
y *= twoM600;
return sqrt (x * x + y * y) / twoM600;
}
if (y < twoM500)
{
if (y <= pdnum)
{
x *= two1022;
y *= two1022;
return sqrt (x * x + y * y) / two1022;
}
else
{
x *= two600;
y *= two600;
return sqrt (x * x + y * y) / two600;
}
}
return sqrt (x * x + y * y);
}

View File

@ -0,0 +1,118 @@
/* Pythagorean addition using floats
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "math.h"
#include "math_private.h"
static const float two30 = 1.0737418e09;
static const float two50 = 1.1259000e15;
static const float two60 = 1.1529221e18;
static const float two126 = 8.5070592e+37;
static const float twoM50 = 8.8817842e-16;
static const float twoM60 = 6.7762644e-21;
static const float pdnum = 1.1754939e-38;
/* __ieee754_hypotf(x,y)
*
* This a FP only version without any FP->INT conversion.
* It is similar to default C version, making appropriates
* overflow and underflows checks as well scaling when it
* is needed.
*/
#ifdef _ARCH_PWR7
/* POWER7 isinf and isnan optimizations are fast. */
# define TEST_INF_NAN(x, y) \
if (isinff(x) || isinff(y)) \
return INFINITY; \
if (isnanf(x) || isnanf(y)) \
return NAN;
# else
/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
* costly (especially for POWER6). */
# define GET_TWO_FLOAT_WORD(f1,f2,i1,i2) \
do { \
ieee_float_shape_type gf_u1; \
ieee_float_shape_type gf_u2; \
gf_u1.value = (f1); \
gf_u2.value = (f2); \
(i1) = gf_u1.word; \
(i2) = gf_u2.word; \
} while (0)
# define TEST_INF_NAN(x, y) \
do { \
int32_t hx, hy; \
GET_TWO_FLOAT_WORD(x, y, hx, hy); \
if (hy > hx) { \
uint32_t ht = hx; hx = hy; hy = ht; \
} \
if (hx >= 0x7f800000) { \
if (hx == 0x7f800000 || hy == 0x7f800000) \
return INFINITY; \
return NAN; \
} \
} while (0)
#endif
float
__ieee754_hypotf (float x, float y)
{
x = fabsf (x);
y = fabsf (y);
TEST_INF_NAN (x, y);
if (y > x)
{
float t = y;
y = x;
x = t;
}
if (y == 0.0 || (x / y) > two30)
{
return x + y;
}
if (x > two50)
{
x *= twoM60;
y *= twoM60;
return sqrtf (x * x + y * y) / twoM60;
}
if (y < twoM50)
{
if (y <= pdnum)
{
x *= two126;
y *= two126;
return sqrtf (x * x + y * y) / two126;
}
else
{
x *= two60;
y *= two60;
return sqrtf (x * x + y * y) / two60;
}
}
return sqrtf (x * x + y * y);
}

View File

@ -0,0 +1,190 @@
/* e_rem_pio2f.c -- float version of e_rem_pio2.c
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <math.h>
#include "math_private.h"
#include "s_float_bitwise.h"
/* defined in sysdeps/powerpc/fpu/k_rem_pio2f.c */
int __fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx);
/* __ieee754_rem_pio2f(x,y)
*
* return the remainder of x rem pi/2 in y[0]+y[1]
*/
static const float npio2_hw[] = {
1.57077026e+00, 3.14154053e+00, 4.71228027e+00, 6.28308105e+00,
7.85388184e+00, 9.42456055e+00, 1.09953613e+01, 1.25661621e+01,
1.41369629e+01, 1.57077637e+01, 1.72783203e+01, 1.88491211e+01,
2.04199219e+01, 2.19907227e+01, 2.35615234e+01, 2.51323242e+01,
2.67031250e+01, 2.82739258e+01, 2.98447266e+01, 3.14155273e+01,
3.29863281e+01, 3.45566406e+01, 3.61279297e+01, 3.76982422e+01,
3.92695312e+01, 4.08398438e+01, 4.24111328e+01, 4.39814453e+01,
4.55527344e+01, 4.71230469e+01, 4.86943359e+01, 5.02646484e+01
};
static const float zero = 0.0000000000e+00;
static const float two8 = 2.5600000000e+02;
static const float half = 5.0000000000e-01;
static const float invpio2 = 6.3661980629e-01;
static const float pio2_1 = 1.5707855225e+00;
static const float pio2_1t = 1.0804334124e-05;
static const float pio2_2 = 1.0804273188e-05;
static const float pio2_2t = 6.0770999344e-11;
static const float pio2_3 = 6.0770943833e-11;
static const float pio2_3t = 6.1232342629e-17;
static const float pio4 = 7.8539801e-01;
static const float pio3_4 = 2.3561945e+00;
static const float pio2_24b = 1.5707951e+00;
static const float pio2_2e7 = 2.0106054e+02;
int32_t
__ieee754_rem_pio2f (float x, float *y)
{
float ax, z, n, r, w, t, e0;
float tx[3];
int32_t i, nx;
ax = __builtin_fabsf (x);
if (ax <= pio4)
{
y[0] = x;
y[1] = 0;
return 0;
}
if (ax < pio3_4)
{
if (x > 0)
{
z = x - pio2_1;
if (!__float_and_test28 (ax, pio2_24b))
{
y[0] = z - pio2_1t;
y[1] = (z - y[0]) - pio2_1t;
}
else
{
z -= pio2_2;
y[0] = z - pio2_2t;
y[1] = (z - y[0]) - pio2_2t;
}
return 1;
}
else
{
z = x + pio2_1;
if (!__float_and_test28 (ax, pio2_24b))
{
y[0] = z + pio2_1t;
y[1] = (z - y[0]) + pio2_1t;
}
else
{
z += pio2_2;
y[0] = z + pio2_2t;
y[1] = (z - y[0]) + pio2_2t;
}
return -1;
}
}
if (ax <= pio2_2e7)
{
n = floorf (ax * invpio2 + half);
i = (int32_t) n;
r = ax - n * pio2_1;
w = n * pio2_1t; /* 1st round good to 40 bit */
if (i < 32 && !__float_and_test24 (ax, npio2_hw[i - 1]))
{
y[0] = r - w;
}
else
{
float i, j;
j = __float_and8 (ax);
y[0] = r - w;
i = __float_and8 (y[0]);
if (j / i > 256.0 || j / i < 3.9062500e-3)
{ /* 2nd iterations needed, good to 57 */
t = r;
w = n * pio2_2;
r = t - w;
w = n * pio2_2t - ((t - r) - w);
y[0] = r - w;
i = __float_and8 (y[0]);
if (j / i > 33554432 || j / i < 2.9802322e-8)
{ /* 3rd iteration needed, 74 bits acc */
t = r;
w = n * pio2_3;
r = t - w;
w = n * pio2_3t - ((t - r) - w);
y[0] = r - w;
}
}
}
y[1] = (r - y[0]) - w;
if (x < 0)
{
y[0] = -y[0];
y[1] = -y[1];
return -i;
}
else
{
return i;
}
}
/* all other (large) arguments */
if (isnanf (x) || isinff (x))
{
y[0] = y[1] = x - x;
return 0;
}
/* set z = scalbn(|x|,ilogb(x)-7) */
e0 = __float_and8 (ax / 128.0);
z = ax / e0;
tx[0] = floorf (z);
z = (z - tx[0]) * two8;
tx[1] = floorf (z);
z = (z - tx[1]) * two8;
tx[2] = floorf (z);
nx = 3;
while (tx[nx - 1] == zero)
nx--;
i = __fp_kernel_rem_pio2f (tx, y, e0, nx);
if (x < 0)
{
y[0] = -y[0];
y[1] = -y[1];
return -i;
}
return i;
}

View File

@ -0,0 +1,65 @@
/* k_cosf.c -- float version of k_cos.c
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "math.h"
#include "math_private.h"
static const float twom27 = 7.4505806e-09;
static const float dot3 = 3.0000001e-01;
static const float dot78125 = 7.8125000e-01;
static const float one = 1.0000000000e+00;
static const float C1 = 4.1666667908e-02;
static const float C2 = -1.3888889225e-03;
static const float C3 = 2.4801587642e-05;
static const float C4 = -2.7557314297e-07;
static const float C5 = 2.0875723372e-09;
static const float C6 = -1.1359647598e-11;
float
__kernel_cosf (float x, float y)
{
float a, hz, z, r, qx;
float ix;
ix = __builtin_fabsf (x);
if (ix < twom27)
{ /* |x| < 2**-27 */
if (x == 0.0)
return one;
}
z = x * x;
r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
if (ix < dot3) /* if |x| < 0.3 */
return one - ((float) 0.5 * z - (z * r - x * y));
else
{
if (ix > dot78125)
{ /* x > 0.78125 */
qx = (float) 0.28125;
}
else
{
qx = ix / 4.0;
}
hz = (float) 0.5 *z - qx;
a = one - qx;
return a - (hz - (z * r - x * y));
}
}

View File

@ -0,0 +1,274 @@
/* k_rem_pio2f.c -- float version of e_rem_pio2.c
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <math.h>
#include "math_private.h"
#include "s_float_bitwise.h"
static const float two_over_pi[] = {
1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02,
7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01,
2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02,
2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02,
2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02,
6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01,
2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02,
2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01,
1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01,
6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02,
6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02,
9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01,
2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01,
1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01,
2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02,
4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02,
2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01,
1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01,
5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01,
1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02,
1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02,
1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01,
3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02,
2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01,
2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02,
9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01,
1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02,
3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02,
7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02,
1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01,
1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02,
2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02,
6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02,
1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02,
1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02,
3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00,
8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01,
2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02,
2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01,
1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01,
1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01,
9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00,
1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01,
2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01,
1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02,
7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01,
6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01,
2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02,
9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02,
1.40000000e+02, 1.07000000e+02
};
static const float PIo2[] = {
1.5703125000e+00, /* 0x3fc90000 */
4.5776367188e-04, /* 0x39f00000 */
2.5987625122e-05, /* 0x37da0000 */
7.5437128544e-08, /* 0x33a20000 */
6.0026650317e-11, /* 0x2e840000 */
7.3896444519e-13, /* 0x2b500000 */
5.3845816694e-15, /* 0x27c20000 */
5.6378512969e-18, /* 0x22d00000 */
8.3009228831e-20, /* 0x1fc40000 */
3.2756352257e-22, /* 0x1bc60000 */
6.3331015649e-25, /* 0x17440000 */
};
static const float zero = 0.0000000000e+00;
static const float one = 1.0000000000;
static const float twon8 = 3.9062500000e-03;
static const float two8 = 2.5600000000e+02;
int32_t
__fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx)
{
int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp;
float z, fw, f[20], fq[20], q[20];
/* initialize jk */
jp = jk = 9;
/* determine jx,jv,q0, note that 3>q0 */
jx = nx - 1;
exp = __float_get_exp (e0) - 127;
jv = (exp - 3) / 8;
if (jv < 0)
jv = 0;
q0 = exp - 8 * (jv + 1);
/* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */
j = jv - jx;
m = jx + jk;
for (i = 0; i <= m; i++, j++)
f[i] = (j < 0) ? zero : two_over_pi[j];
/* compute q[0],q[1],...q[jk] */
for (i = 0; i <= jk; i++)
{
for (j = 0, fw = 0.0; j <= jx; j++)
fw += x[j] * f[jx + i - j];
q[i] = fw;
}
jz = jk;
recompute:
/* distill q[] into iq[] reversingly */
for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
{
fw = truncf (twon8 * z);
iq[i] = (int32_t) (z - two8 * fw);
z = q[j - 1] + fw;
}
/* compute n */
z = __scalbnf (z, q0); /* actual value of z */
z -= 8.0 * floorf (z * 0.125); /* trim off integer >= 8 */
n = (int32_t) z;
z -= truncf (z);
ih = 0;
if (q0 > 0)
{ /* need iq[jz-1] to determine n */
i = (iq[jz - 1] >> (8 - q0));
n += i;
iq[jz - 1] -= i << (8 - q0);
ih = iq[jz - 1] >> (7 - q0);
}
else if (q0 == 0)
ih = iq[jz - 1] >> 8;
else if (z >= 0.5)
ih = 2;
if (ih > 0)
{ /* q > 0.5 */
n += 1;
carry = 0;
for (i = 0; i < jz; i++)
{ /* compute 1-q */
j = iq[i];
if (carry == 0)
{
if (j != 0)
{
carry = 1;
iq[i] = 0x100 - j;
}
}
else
iq[i] = 0xff - j;
}
if (q0 > 0)
{ /* rare case: chance is 1 in 12 */
switch (q0)
{
case 1:
iq[jz - 1] &= 0x7f;
break;
case 2:
iq[jz - 1] &= 0x3f;
break;
}
}
if (ih == 2)
{
z = one - z;
if (carry != 0)
z -= __scalbnf (one, q0);
}
}
/* check if recomputation is needed */
if (z == zero)
{
j = 0;
for (i = jz - 1; i >= jk; i--)
j |= iq[i];
if (j == 0)
{ /* need recomputation */
for (k = 1; iq[jk - k] == 0; k++); /* k = no. of terms needed */
for (i = jz + 1; i <= jz + k; i++)
{ /* add q[jz+1] to q[jz+k] */
f[jx + i] = two_over_pi[jv + i];
for (j = 0, fw = 0.0; j <= jx; j++)
fw += x[j] * f[jx + i - j];
q[i] = fw;
}
jz += k;
goto recompute;
}
}
/* chop off zero terms */
if (z == 0.0)
{
jz -= 1;
q0 -= 8;
while (iq[jz] == 0)
{
jz--;
q0 -= 8;
}
}
else
{ /* break z into 8-bit if necessary */
z = __scalbnf (z, -q0);
if (z >= two8)
{
fw = truncf (twon8 * z);
iq[jz] = (int32_t) (z - two8 * fw);
jz += 1;
q0 += 8;
iq[jz] = (int32_t) fw;
}
else
iq[jz] = (int32_t) z;
}
/* convert integer "bit" chunk to floating-point value */
fw = __scalbnf (one, q0);
for (i = jz; i >= 0; i--)
{
q[i] = fw * (float) iq[i];
fw *= twon8;
}
/* compute PIo2[0,...,jp]*q[jz,...,0] */
for (i = jz; i >= 0; i--)
{
for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
fw += PIo2[k] * q[i + k];
fq[jz - i] = fw;
}
/* compress fq[] into y[] */
fw = 0.0;
for (i = jz; i >= 0; i--)
fw += fq[i];
y[0] = (ih == 0) ? fw : -fw;
fw = fq[0] - fw;
for (i = 1; i <= jz; i++)
fw += fq[i];
y[1] = (ih == 0) ? fw : -fw;
return n & 7;
}

View File

@ -0,0 +1,53 @@
/* k_sinf.c -- float version of k_sin.c
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "math.h"
#include "math_private.h"
static const float twom27 = 7.4505806000e-09;
static const float half = 5.0000000000e-01;
static const float S1 = -1.6666667163e-01;
static const float S2 = 8.3333337680e-03;
static const float S3 = -1.9841270114e-04;
static const float S4 = 2.7557314297e-06;
static const float S5 = -2.5050759689e-08;
static const float S6 = 1.5896910177e-10;
float
__kernel_sinf (float x, float y, int iy)
{
float z, r, v;
float ix;
ix = __builtin_fabsf (x);
if (ix < twom27)
{ /* |x| < 2**-27 */
if (x == 0.0)
return x;
}
z = x * x;
v = z * x;
r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
if (iy == 0)
return x + v * (S1 + z * r);
else
return x - ((z * (half * y - v * r) - y) - v * S1);
}

View File

@ -0,0 +1,71 @@
/* s_cosf.c -- float version of s_cos.c.
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <errno.h>
#include "math.h"
#include "math_private.h"
static const float one = 1.0;
static const float pio4 = 7.8539801e-1;
float
__cosf (float x)
{
float y[2], z = 0.0;
float ix;
int32_t n;
ix = __builtin_fabsf (x);
/* |x| ~< pi/4 */
if (ix <= pio4)
{
return __kernel_cosf (x, z);
/* cos(Inf or NaN) is NaN */
}
else if (isnanf (ix))
{
return x - x;
}
else if (isinff (ix))
{
__set_errno (EDOM);
return x - x;
}
/* argument reduction needed */
else
{
n = __ieee754_rem_pio2f (x, y);
switch (n & 3)
{
case 0:
return __kernel_cosf (y[0], y[1]);
case 1:
return -__kernel_sinf (y[0], y[1], 1);
case 2:
return -__kernel_cosf (y[0], y[1]);
default:
return __kernel_sinf (y[0], y[1], 1);
}
}
}
weak_alias (__cosf, cosf)

View File

@ -0,0 +1,112 @@
/* Bitwise manipulation over float. Function prototypes.
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef _FLOAT_BITWISE_
#define _FLOAT_BITWISE_ 1
#include "math_private.h"
/* Returns (int)(num & 0x7FFFFFF0 == value) */
static inline
int __float_and_test28 (float num, float value)
{
float ret;
#ifdef _ARCH_PWR7
vector int mask = (vector int) {
0x7ffffffe, 0x00000000, 0x00000000, 0x0000000
};
__asm__ (
/* the 'f' constrain is use on mask because we just need
* to compare floats, not full vector */
"xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
);
#else
int32_t inum;
GET_FLOAT_WORD(inum, num);
inum = (inum & 0x7ffffff0);
SET_FLOAT_WORD(ret, inum);
#endif
return (ret == value);
}
/* Returns (int)(num & 0x7FFFFF00 == value) */
static inline
int __float_and_test24 (float num, float value)
{
float ret;
#ifdef _ARCH_PWR7
vector int mask = (vector int) {
0x7fffffe0, 0x00000000, 0x00000000, 0x0000000
};
__asm__ (
"xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
);
#else
int32_t inum;
GET_FLOAT_WORD(inum, num);
inum = (inum & 0x7fffff00);
SET_FLOAT_WORD(ret, inum);
#endif
return (ret == value);
}
/* Returns (float)(num & 0x7F800000) */
static inline
float __float_and8 (float num)
{
float ret;
#ifdef _ARCH_PWR7
vector int mask = (vector int) {
0x7ff00000, 0x00000000, 0x00000000, 0x00000000
};
__asm__ (
"xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
);
#else
int32_t inum;
GET_FLOAT_WORD(inum, num);
inum = (inum & 0x7f800000);
SET_FLOAT_WORD(ret, inum);
#endif
return ret;
}
/* Returns ((int32_t)(num & 0x7F800000) >> 23) */
static inline
int32_t __float_get_exp (float num)
{
int32_t inum;
#ifdef _ARCH_PWR7
float ret;
vector int mask = (vector int) {
0x7ff00000, 0x00000000, 0x00000000, 0x00000000
};
__asm__ (
"xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
);
GET_FLOAT_WORD(inum, ret);
#else
GET_FLOAT_WORD(inum, num);
inum = inum & 0x7f800000;
#endif
return inum >> 23;
}
#endif /* s_float_bitwise.h */

View File

@ -0,0 +1,70 @@
/* s_sinf.c -- float version of s_sin.c.
Copyright (C) 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <errno.h>
#include "math.h"
#include "math_private.h"
static const float pio4 = 7.8539801e-1;
float
__sinf (float x)
{
float y[2], z = 0.0;
float ix;
int32_t n;
ix = __builtin_fabsf (x);
/* |x| ~< pi/4 */
if (ix <= pio4)
{
return __kernel_sinf (x, z, 0);
/* sin(Inf or NaN) is NaN */
}
else if (isnanf (ix))
{
return x - x;
}
else if (isinff (ix))
{
__set_errno (EDOM);
return x - x;
}
/* argument reduction needed */
else
{
n = __ieee754_rem_pio2f (x, y);
switch (n & 3)
{
case 0:
return __kernel_sinf (y[0], y[1], 1);
case 1:
return __kernel_cosf (y[0], y[1]);
case 2:
return -__kernel_sinf (y[0], y[1], 1);
default:
return -__kernel_cosf (y[0], y[1]);
}
}
}
weak_alias (__sinf, sinf)