Make mantissa type of mp_no configurable

The mantissa of mp_no is intended to take only integral values.  This
is a relatively good choice for powerpc due to its 4 fpus, but not for
other architectures, which suffer due to this choice.  This change
makes the default mantissa a long integer and allows powerpc to
override it.  Additionally, some operations have been optimized for
integer manipulation, resulting in a significant improvement in
performance.
This commit is contained in:
Siddhesh Poyarekar 2013-03-26 19:28:50 +05:30
parent fce14d4e9c
commit 6f2e90e78f
5 changed files with 176 additions and 69 deletions

View File

@ -1,3 +1,22 @@
2013-03-26 Siddhesh Poyarekar <siddhesh@redhat.com>
* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
* sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T and
MANTISSA_STORE_T to store computations on mantissa. Use
macros for rounding and division.
(denorm): Likewise.
(__dbl_mp): Likewise.
(add_magnitudes): Likewise.
(sub_magnitudes): Likewise.
(__mul): Likewise.
(__sqr): Likewise.
* sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h. Define
powers of two in terms of TWOPOW macro.
(mp_no): Make type of mantissa as MANTISSA_T.
[!RADIXI]: Define RADIXI.
[!TWO52]: Define TWO52.
* sysdeps/powerpc/power4/fpu/mpa-arch.h: New file.
2013-03-25 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/fpu/s_llround.c: Fix libm ABI issue with missing

View File

@ -0,0 +1,47 @@
/* Overridable constants and operations.
Copyright (C) 2013 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>. */
#include <stdint.h>
typedef long mantissa_t;
typedef int64_t mantissa_store_t;
#define TWOPOW(i) (1L << i)
#define RADIX_EXP 24
#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */
/* Divide D by RADIX and put the remainder in R. D must be a non-negative
integral value. */
#define DIV_RADIX(d, r) \
({ \
r = d & (RADIX - 1); \
d >>= RADIX_EXP; \
})
/* Put the integer component of a double X in R and retain the fraction in
X. This is used in extracting mantissa digits for MP_NO by using the
integer portion of the current value of the number as the current mantissa
digit and then scaling by RADIX to get the next mantissa digit in the same
manner. */
#define INTEGER_OF(x, i) \
({ \
i = (mantissa_t) x; \
x -= i; \
})
/* Align IN down to F. The code assumes that F is a power of two. */
#define ALIGN_DOWN_TO(in, f) ((in) & -(f))

View File

@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p)
{
#define R RADIXI
long i;
double a, c, u, v, z[5];
double c;
mantissa_t a, u, v, z[5];
if (p < 5)
{
if (p == 1)
@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p)
for (i = 2; i < 5; i++)
{
z[i] = X[i] * a;
u = (z[i] + CUTTER) - CUTTER;
if (u > z[i])
u -= RADIX;
z[i] -= u;
z[i - 1] += u * RADIXI;
mantissa_store_t d, r;
d = X[i] * (mantissa_store_t) a;
DIV_RADIX (d, r);
z[i] = r;
z[i - 1] += d;
}
u = (z[3] + TWO71) - TWO71;
if (u > z[3])
u -= TWO19;
u = ALIGN_DOWN_TO (z[3], TWO19);
v = z[3] - u;
if (v == TWO18)
@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p)
{
long i, k;
long p2 = p;
double c, u, z[5];
double c;
mantissa_t u, z[5];
#define R RADIXI
if (EX < -44 || (EX == -44 && X[1] < TWO5))
@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p)
z[3] = X[k];
}
u = (z[3] + TWO57) - TWO57;
if (u > z[3])
u -= TWO5;
u = ALIGN_DOWN_TO (z[3], TWO5);
if (u == z[3])
{
@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p)
{
long i, n;
long p2 = p;
double u;
/* Sign. */
if (x == ZERO)
@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p)
n = MIN (p2, 4);
for (i = 1; i <= n; i++)
{
u = (x + TWO52) - TWO52;
if (u > x)
u -= ONE;
Y[i] = u;
x -= u;
INTEGER_OF (x, Y[i]);
x *= RADIX;
}
for (; i <= p2; i++)
@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k;
long p2 = p;
double zk;
mantissa_t zk;
EZ = EX;
@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k;
long p2 = p;
double zk;
mantissa_t zk;
EZ = EX;
i = p2;
@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k, ip, ip2;
long p2 = p;
double u, zk;
mantissa_store_t zk;
const mp_no *a;
double *diag;
mantissa_store_t *diag;
/* Is z=0? */
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@ -680,11 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
/* Precompute sums of diagonal elements so that we can directly use them
later. See the next comment to know we why need them. */
diag = alloca (k * sizeof (double));
double d = ZERO;
diag = alloca (k * sizeof (mantissa_store_t));
mantissa_store_t d = ZERO;
for (i = 1; i <= ip; i++)
{
d += X[i] * Y[i];
d += X[i] * (mantissa_store_t) Y[i];
diag[i] = d;
}
while (i < k)
@ -697,18 +689,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
if (k % 2 == 0)
/* We want to add this only once, but since we subtract it in the sum
of products above, we add twice. */
zk += 2 * X[lim] * Y[lim];
zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
for (i = k - p2, j = p2; i < j; i++, j--)
zk += (X[i] + X[j]) * (Y[i] + Y[j]);
zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
zk -= diag[k - 1];
u = (zk + CUTTER) - CUTTER;
if (u > zk)
u -= RADIX;
Z[k--] = zk - u;
zk = u * RADIXI;
DIV_RADIX (zk, Z[k]);
k--;
}
/* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
@ -731,18 +720,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
if (k % 2 == 0)
/* We want to add this only once, but since we subtract it in the sum
of products above, we add twice. */
zk += 2 * X[lim] * Y[lim];
zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
for (i = 1, j = k - 1; i < j; i++, j--)
zk += (X[i] + X[j]) * (Y[i] + Y[j]);
zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
zk -= diag[k - 1];
u = (zk + CUTTER) - CUTTER;
if (u > zk)
u -= RADIX;
Z[k--] = zk - u;
zk = u * RADIXI;
DIV_RADIX (zk, Z[k]);
k--;
}
Z[k] = zk;
@ -774,7 +760,7 @@ SECTION
__sqr (const mp_no *x, mp_no *y, int p)
{
long i, j, k, ip;
double u, yk;
mantissa_store_t yk;
/* Is z=0? */
if (__glibc_unlikely (X[0] == ZERO))
@ -798,11 +784,11 @@ __sqr (const mp_no *x, mp_no *y, int p)
while (k > p)
{
double yk2 = 0.0;
mantissa_store_t yk2 = 0;
long lim = k / 2;
if (k % 2 == 0)
yk += X[lim] * X[lim];
yk += X[lim] * (mantissa_store_t) X[lim];
/* In __mul, this loop (and the one within the next while loop) run
between a range to calculate the mantissa as follows:
@ -814,36 +800,30 @@ __sqr (const mp_no *x, mp_no *y, int p)
result. For cases where the range size is even, the mid-point needs
to be added separately (above). */
for (i = k - p, j = p; i < j; i++, j--)
yk2 += X[i] * X[j];
yk2 += X[i] * (mantissa_store_t) X[j];
yk += 2.0 * yk2;
u = (yk + CUTTER) - CUTTER;
if (u > yk)
u -= RADIX;
Y[k--] = yk - u;
yk = u * RADIXI;
DIV_RADIX (yk, Y[k]);
k--;
}
while (k > 1)
{
double yk2 = 0.0;
mantissa_store_t yk2 = 0;
long lim = k / 2;
if (k % 2 == 0)
yk += X[lim] * X[lim];
yk += X[lim] * (mantissa_store_t) X[lim];
/* Likewise for this loop. */
for (i = 1, j = k - 1; i < j; i++, j--)
yk2 += X[i] * X[j];
yk2 += X[i] * (mantissa_store_t) X[j];
yk += 2.0 * yk2;
u = (yk + CUTTER) - CUTTER;
if (u > yk)
u -= RADIX;
Y[k--] = yk - u;
yk = u * RADIXI;
DIV_RADIX (yk, Y[k]);
k--;
}
Y[k] = yk;

View File

@ -35,6 +35,7 @@
/* Common types and definition */
/************************************************************************/
#include <mpa-arch.h>
/* The mp_no structure holds the details of a multi-precision floating point
number.
@ -61,7 +62,7 @@
typedef struct
{
int e;
double d[40];
mantissa_t d[40];
} mp_no;
typedef union
@ -82,9 +83,13 @@ extern const mp_no mptwo;
#define ABS(x) ((x) < 0 ? -(x) : (x))
#define RADIX 0x1.0p24 /* 2^24 */
#ifndef RADIXI
# define RADIXI 0x1.0p-24 /* 2^-24 */
#define CUTTER 0x1.0p76 /* 2^76 */
#endif
#ifndef TWO52
# define TWO52 0x1.0p52 /* 2^52 */
#endif
#define ZERO 0.0 /* 0 */
#define MZERO -0.0 /* 0 with the sign bit set */
@ -92,13 +97,13 @@ extern const mp_no mptwo;
#define MONE -1.0 /* -1 */
#define TWO 2.0 /* 2 */
#define TWO5 0x1.0p5 /* 2^5 */
#define TWO8 0x1.0p8 /* 2^52 */
#define TWO10 0x1.0p10 /* 2^10 */
#define TWO18 0x1.0p18 /* 2^18 */
#define TWO19 0x1.0p19 /* 2^19 */
#define TWO23 0x1.0p23 /* 2^23 */
#define TWO52 0x1.0p52 /* 2^52 */
#define TWO5 TWOPOW (5) /* 2^5 */
#define TWO8 TWOPOW (8) /* 2^52 */
#define TWO10 TWOPOW (10) /* 2^10 */
#define TWO18 TWOPOW (18) /* 2^18 */
#define TWO19 TWOPOW (19) /* 2^19 */
#define TWO23 TWOPOW (23) /* 2^23 */
#define TWO57 0x1.0p57 /* 2^57 */
#define TWO71 0x1.0p71 /* 2^71 */
#define TWOM1032 0x1.0p-1032 /* 2^-1032 */

View File

@ -0,0 +1,56 @@
/* Overridable constants and operations.
Copyright (C) 2013 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>. */
typedef double mantissa_t;
typedef double mantissa_store_t;
#define TWOPOW(i) (0x1.0p##i)
#define RADIX TWOPOW (24) /* 2^24 */
#define CUTTER TWOPOW (76) /* 2^76 */
#define RADIXI 0x1.0p-24 /* 2^-24 */
#define TWO52 TWOPOW (52) /* 2^52 */
/* Divide D by RADIX and put the remainder in R. */
#define DIV_RADIX(d,r) \
({ \
double u = ((d) + CUTTER) - CUTTER; \
if (u > (d)) \
u -= RADIX; \
r = (d) - u; \
(d) = u * RADIXI; \
})
/* Put the integer component of a double X in R and retain the fraction in
X. */
#define INTEGER_OF(x, r) \
({ \
double u = ((x) + TWO52) - TWO52; \
if (u > (x)) \
u -= ONE; \
(r) = u; \
(x) -= u; \
})
/* Align IN down to a multiple of F, where F is a power of two. */
#define ALIGN_DOWN_TO(in, f) \
({ \
double factor = f * TWO52; \
double u = (in + factor) - factor; \
if (u > in) \
u -= f; \
u; \
})