Fix fma underflows with small x * y (bug 14793).

This commit is contained in:
Joseph Myers 2012-11-06 14:12:54 +00:00
parent d7fcee3a58
commit 82477c28f4
6 changed files with 223 additions and 55 deletions

View File

@ -1,3 +1,16 @@
2012-11-06 Joseph Myers <joseph@codesourcery.com>
[BZ #14793]
* sysdeps/ieee754/dbl-64/s_fma.c (__fma): In case of large z
exponent and small x and y exponents, scale x or y up. Increase
by 2 the exponent used in scaling up.
* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
* math/libm-test.inc (fma_test): Add more tests.
(fma_test_towardzero): Likewise.
(fma_test_downward): Likewise.
(fma_test_upward): Likewise.
2012-11-05 Joseph Myers <joseph@codesourcery.com>
[BZ #14805]

2
NEWS
View File

@ -18,7 +18,7 @@ Version 2.17
14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562,
14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638,
14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743,
14767, 14783, 14784, 14785, 14796, 14797, 14801, 14805.
14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805.
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
Optimized versions of memcpy, memset, and memcmp added for System z10 and

View File

@ -4662,6 +4662,14 @@ fma_test (void)
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
@ -4712,6 +4720,14 @@ fma_test (void)
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
@ -4748,6 +4764,14 @@ fma_test (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L);
@ -4791,6 +4815,14 @@ fma_test (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
#endif
END (fma);
@ -4884,6 +4916,14 @@ fma_test_towardzero (void)
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
@ -4914,6 +4954,14 @@ fma_test_towardzero (void)
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
@ -4944,6 +4992,14 @@ fma_test_towardzero (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
@ -4974,6 +5030,14 @@ fma_test_towardzero (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
#endif
}
@ -5070,6 +5134,14 @@ fma_test_downward (void)
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1.000002p127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1.000002p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
@ -5100,6 +5172,14 @@ fma_test_downward (void)
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1.0000000000001p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1.0000000000001p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
@ -5130,6 +5210,14 @@ fma_test_downward (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1.0000000000000002p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1.0000000000000002p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
@ -5160,6 +5248,14 @@ fma_test_downward (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1.0000000000000000000000000001p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1.0000000000000000000000000001p16319L);
#endif
}
@ -5256,6 +5352,14 @@ fma_test_upward (void)
TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24);
TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1.000002p127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1.000002p103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103);
TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103);
TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
@ -5286,6 +5390,14 @@ fma_test_upward (void)
TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106);
TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1.0000000000001p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1.0000000000001p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970);
TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
@ -5316,6 +5428,14 @@ fma_test_upward (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L);
TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1.0000000000000002p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1.0000000000000002p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L);
TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
@ -5346,6 +5466,14 @@ fma_test_upward (void)
TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L);
TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1.0000000000000000000000000001p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1.0000000000000000000000000001p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L);
TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L);
#endif
}

View File

@ -114,8 +114,17 @@ __fma (double x, double y, double z)
{
/* Similarly.
If z exponent is very large and x and y exponents are
very small, it doesn't matter if we don't adjust it. */
if (u.ieee.exponent > v.ieee.exponent)
very small, adjust them up to avoid spurious underflows,
rather than down. */
if (u.ieee.exponent + v.ieee.exponent
<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG)
{
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
else
v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
}
else if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > DBL_MANT_DIG)
u.ieee.exponent -= DBL_MANT_DIG;
@ -145,15 +154,15 @@ __fma (double x, double y, double z)
<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
{
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent += 2 * DBL_MANT_DIG;
u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
else
v.ieee.exponent += 2 * DBL_MANT_DIG;
if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6)
{
if (w.ieee.exponent)
w.ieee.exponent += 2 * DBL_MANT_DIG;
w.ieee.exponent += 2 * DBL_MANT_DIG + 2;
else
w.d *= 0x1p106;
w.d *= 0x1p108;
adjust = -1;
}
/* Otherwise x * y should just affect inexact
@ -238,19 +247,19 @@ __fma (double x, double y, double z)
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
return v.d * 0x1p-106;
return v.d * 0x1p-108;
/* If result rounded to zero is not subnormal, no double
rounding will occur. */
if (v.ieee.exponent > 106)
return (a1 + u.d) * 0x1p-106;
/* If v.d * 0x1p-106 with round to zero is a subnormal above
or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
if (v.ieee.exponent > 108)
return (a1 + u.d) * 0x1p-108;
/* If v.d * 0x1p-108 with round to zero is a subnormal above
or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa
down just by 1 bit, which means v.ieee.mantissa1 |= j would
change the round bit, not sticky or guard bit.
v.d * 0x1p-106 never normalizes by shifting up,
v.d * 0x1p-108 never normalizes by shifting up,
so round bit plus sticky bit should be already enough
for proper rounding. */
if (v.ieee.exponent == 106)
if (v.ieee.exponent == 108)
{
/* If the exponent would be in the normal range when
rounding to normal precision with unbounded exponent
@ -260,8 +269,8 @@ __fma (double x, double y, double z)
if (TININESS_AFTER_ROUNDING)
{
w.d = a1 + u.d;
if (w.ieee.exponent == 107)
return w.d * 0x1p-106;
if (w.ieee.exponent == 109)
return w.d * 0x1p-108;
}
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
@ -270,12 +279,12 @@ __fma (double x, double y, double z)
w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa1 &= ~3U;
v.d *= 0x1p-106;
v.d *= 0x1p-108;
w.d *= 0x1p-2;
return v.d + w.d;
}
v.ieee.mantissa1 |= j;
return v.d * 0x1p-106;
return v.d * 0x1p-108;
}
}
#ifndef __fma

View File

@ -118,8 +118,17 @@ __fmal (long double x, long double y, long double z)
{
/* Similarly.
If z exponent is very large and x and y exponents are
very small, it doesn't matter if we don't adjust it. */
if (u.ieee.exponent > v.ieee.exponent)
very small, adjust them up to avoid spurious underflows,
rather than down. */
if (u.ieee.exponent + v.ieee.exponent
<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG)
{
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
}
else if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > LDBL_MANT_DIG)
u.ieee.exponent -= LDBL_MANT_DIG;
@ -149,15 +158,15 @@ __fmal (long double x, long double y, long double z)
<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
{
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent += 2 * LDBL_MANT_DIG;
u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
v.ieee.exponent += 2 * LDBL_MANT_DIG;
if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
{
if (w.ieee.exponent)
w.ieee.exponent += 2 * LDBL_MANT_DIG;
w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
w.d *= 0x1p226L;
w.d *= 0x1p228L;
adjust = -1;
}
/* Otherwise x * y should just affect inexact
@ -242,19 +251,19 @@ __fmal (long double x, long double y, long double z)
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
return v.d * 0x1p-226L;
return v.d * 0x1p-228L;
/* If result rounded to zero is not subnormal, no double
rounding will occur. */
if (v.ieee.exponent > 226)
return (a1 + u.d) * 0x1p-226L;
/* If v.d * 0x1p-226L with round to zero is a subnormal above
or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa
if (v.ieee.exponent > 228)
return (a1 + u.d) * 0x1p-228L;
/* If v.d * 0x1p-228L with round to zero is a subnormal above
or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
down just by 1 bit, which means v.ieee.mantissa3 |= j would
change the round bit, not sticky or guard bit.
v.d * 0x1p-226L never normalizes by shifting up,
v.d * 0x1p-228L never normalizes by shifting up,
so round bit plus sticky bit should be already enough
for proper rounding. */
if (v.ieee.exponent == 226)
if (v.ieee.exponent == 228)
{
/* If the exponent would be in the normal range when
rounding to normal precision with unbounded exponent
@ -264,8 +273,8 @@ __fmal (long double x, long double y, long double z)
if (TININESS_AFTER_ROUNDING)
{
w.d = a1 + u.d;
if (w.ieee.exponent == 227)
return w.d * 0x1p-226L;
if (w.ieee.exponent == 229)
return w.d * 0x1p-228L;
}
/* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa3 & 1 is the round bit and j is our sticky
@ -274,12 +283,12 @@ __fmal (long double x, long double y, long double z)
w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa3 &= ~3U;
v.d *= 0x1p-226L;
v.d *= 0x1p-228L;
w.d *= 0x1p-2L;
return v.d + w.d;
}
v.ieee.mantissa3 |= j;
return v.d * 0x1p-226L;
return v.d * 0x1p-228L;
}
}
weak_alias (__fmal, fmal)

View File

@ -116,8 +116,17 @@ __fmal (long double x, long double y, long double z)
{
/* Similarly.
If z exponent is very large and x and y exponents are
very small, it doesn't matter if we don't adjust it. */
if (u.ieee.exponent > v.ieee.exponent)
very small, adjust them up to avoid spurious underflows,
rather than down. */
if (u.ieee.exponent + v.ieee.exponent
<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG)
{
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
}
else if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > LDBL_MANT_DIG)
u.ieee.exponent -= LDBL_MANT_DIG;
@ -147,15 +156,15 @@ __fmal (long double x, long double y, long double z)
<= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
{
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent += 2 * LDBL_MANT_DIG;
u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
v.ieee.exponent += 2 * LDBL_MANT_DIG;
if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
{
if (w.ieee.exponent)
w.ieee.exponent += 2 * LDBL_MANT_DIG;
w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
else
w.d *= 0x1p128L;
w.d *= 0x1p130L;
adjust = -1;
}
/* Otherwise x * y should just affect inexact
@ -240,19 +249,19 @@ __fmal (long double x, long double y, long double z)
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
return v.d * 0x1p-128L;
return v.d * 0x1p-130L;
/* If result rounded to zero is not subnormal, no double
rounding will occur. */
if (v.ieee.exponent > 128)
return (a1 + u.d) * 0x1p-128L;
/* If v.d * 0x1p-128L with round to zero is a subnormal above
or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa
if (v.ieee.exponent > 130)
return (a1 + u.d) * 0x1p-130L;
/* If v.d * 0x1p-130L with round to zero is a subnormal above
or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa
down just by 1 bit, which means v.ieee.mantissa1 |= j would
change the round bit, not sticky or guard bit.
v.d * 0x1p-128L never normalizes by shifting up,
v.d * 0x1p-130L never normalizes by shifting up,
so round bit plus sticky bit should be already enough
for proper rounding. */
if (v.ieee.exponent == 128)
if (v.ieee.exponent == 130)
{
/* If the exponent would be in the normal range when
rounding to normal precision with unbounded exponent
@ -262,8 +271,8 @@ __fmal (long double x, long double y, long double z)
if (TININESS_AFTER_ROUNDING)
{
w.d = a1 + u.d;
if (w.ieee.exponent == 129)
return w.d * 0x1p-128L;
if (w.ieee.exponent == 131)
return w.d * 0x1p-130L;
}
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
@ -272,12 +281,12 @@ __fmal (long double x, long double y, long double z)
w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
w.ieee.negative = v.ieee.negative;
v.ieee.mantissa1 &= ~3U;
v.d *= 0x1p-128L;
v.d *= 0x1p-130L;
w.d *= 0x1p-2L;
return v.d + w.d;
}
v.ieee.mantissa1 |= j;
return v.d * 0x1p-128L;
return v.d * 0x1p-130L;
}
}
weak_alias (__fmal, fmal)