fp: support bfloat16 constants

Support generating bfloat16 constants. This is a bit awkward, as "DW" already generates IEEE half precision constants; therefore there is no longer a single floating-point format for each size. This requires some replumbing. Fortunately bfloat16 fits in 64 bits, so support generating them with a macro that uses __?bfloat16?__() to convert to integers first before passing them to DW. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2025-04-12 18:40:23 +08:00 · 2020-07-16 23:11:03 -07:00 · 2020-07-16 23:11:03 -07:00 · d081f0db5d
commit d081f0db5d
parent e830e92b77
10 changed files with 140 additions and 60 deletions
--- a/asm/eval.c
+++ b/asm/eval.c
@ -694,21 +694,13 @@ static expr *expr5(void)
 static expr *eval_floatize(enum floatize type)
 {
    uint8_t result[16], *p;     /* Up to 128 bits */
-    static const struct {
-        int bytes, start, len;
-    } formats[] = {
-        {  1, 0, 1 },           /* FLOAT_8 */
-        {  2, 0, 2 },           /* FLOAT_16 */
-        {  4, 0, 4 },           /* FLOAT_32 */
-        {  8, 0, 8 },           /* FLOAT_64 */
-        { 10, 0, 8 },           /* FLOAT_80M */
-        { 10, 8, 2 },           /* FLOAT_80E */
-        { 16, 0, 8 },           /* FLOAT_128L */
-        { 16, 8, 8 },           /* FLOAT_128H */
-    };
    int sign = 1;
    int64_t val;
+    size_t len;
    int i;
+    const struct ieee_format *fmt;
+
+    fmt = &fp_formats[type];

    scan();
    if (tt != '(') {
@ -724,7 +716,7 @@ static expr *eval_floatize(enum floatize type)
        nasm_nonfatal("expecting floating-point number");
        return NULL;
    }
-    if (!float_const(tokval->t_charptr, sign, result, formats[type].bytes))
+    if (!float_const(tokval->t_charptr, sign, result, type))
        return NULL;
    scan();
    if (tt != ')') {
@ -732,9 +724,12 @@ static expr *eval_floatize(enum floatize type)
        return NULL;
    }

-    p = result+formats[type].start+formats[type].len;
+    len = fmt->bytes - fmt->offset;
+    if (len > 8)
+        len = 8;                /* Max 64 bits */
+    p = result + len;
    val = 0;
-    for (i = formats[type].len; i; i--) {
+    for (i = len; i; i--) {
        p--;
        val = (val << 8) + *p;
    }
--- a/asm/floats.c
+++ b/asm/floats.c
@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *
- *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@ -629,13 +629,6 @@ static void ieee_shr(fp_limb *mant, int i)
   - the sign bit plus exponent fit in 16 bits.
   - the exponent bias is 2^(n-1)-1 for an n-bit exponent */

-struct ieee_format {
-    int bytes;
-    int mantissa;               /* Fractional bits in the mantissa */
-    int explicit;               /* Explicit integer */
-    int exponent;               /* Bits in the exponent */
-};
-
 /*
 * The 16- and 128-bit formats are expected to be in IEEE 754r.
 * AMD SSE5 uses the 16-bit format.
@ -646,13 +639,31 @@ struct ieee_format {
 *
 * The 8-bit format appears to be the consensus 8-bit floating-point
 * format.  It is apparently used in graphics applications.
+ *
+ * The b16 format is a 16-bit format with smaller mantissa and larger
+ * exponent field.  It is effectively a truncated version of the standard
+ * IEEE 32-bit (single) format, but is explicitly supported here in
+ * order to support proper rounding.
+ *
+ * This array must correspond to enum floatize in include/nasm.h.
+ * Note that there are some formats which have more than one enum;
+ * both need to be listed here with the appropriate offset into the
+ * floating-point byte array (use for the floatize operators.)
+ *
+ * FLOAT_ERR is a value that both represents "invalid format" and the
+ * size of this array.
 */
-static const struct ieee_format ieee_8   = {  1,   3, 0,  4 };
-static const struct ieee_format ieee_16  = {  2,  10, 0,  5 };
-static const struct ieee_format ieee_32  = {  4,  23, 0,  8 };
-static const struct ieee_format ieee_64  = {  8,  52, 0, 11 };
-static const struct ieee_format ieee_80  = { 10,  63, 1, 15 };
-static const struct ieee_format ieee_128 = { 16, 112, 0, 15 };
+const struct ieee_format fp_formats[FLOAT_ERR] = {
+    {  1,   3, 0,  4, 0 },         /* FLOAT_8 */
+    {  2,  10, 0,  5, 0 },         /* FLOAT_16 */
+    {  2,   7, 0,  8, 0 },         /* FLOAT_B16 */
+    {  4,  23, 0,  8, 0 },         /* FLOAT_32 */
+    {  8,  52, 0, 11, 0 },         /* FLOAT_64 */
+    { 10,  63, 1, 15, 0 },         /* FLOAT_80M */
+    { 10,  63, 1, 15, 8 },         /* FLOAT_80E */
+    { 16, 112, 0, 15, 0 },         /* FLOAT_128L */
+    { 16, 112, 0, 15, 8 }          /* FLOAT_128H */
+};

 /* Types of values we can generate */
 enum floats {
@ -672,7 +683,7 @@ static int to_packed_bcd(const char *str, const char *p,
    char c;
    int tv = -1;

-    if (fmt != &ieee_80) {
+    if (fmt->bytes != 10) {
        nasm_nonfatal("packed BCD requires an 80-bit format");
        return 0;
    }
@ -711,9 +722,9 @@ static int to_packed_bcd(const char *str, const char *p,
    return 1;                   /* success */
 }

-static int to_float(const char *str, int s, uint8_t *result,
-                    const struct ieee_format *fmt)
+int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt)
 {
+    const struct ieee_format *fmt = &fp_formats[ffmt];
    fp_limb mant[MANT_LIMBS];
    int32_t exponent = 0;
    const int32_t expmax = 1 << (fmt->exponent - 1);
@ -902,25 +913,20 @@ static int to_float(const char *str, int s, uint8_t *result,
    return 1;                   /* success */
 }

-int float_const(const char *number, int sign, uint8_t *result, int bytes)
+/*
+ * Get the default floating point format for this specific field size.
+ * Used for the Dx pseudoops.
+ */
+enum floatize float_deffmt(int bytes)
 {
-    switch (bytes) {
-    case 1:
-        return to_float(number, sign, result, &ieee_8);
-    case 2:
-        return to_float(number, sign, result, &ieee_16);
-    case 4:
-        return to_float(number, sign, result, &ieee_32);
-    case 8:
-        return to_float(number, sign, result, &ieee_64);
-    case 10:
-        return to_float(number, sign, result, &ieee_80);
-    case 16:
-        return to_float(number, sign, result, &ieee_128);
-    default:
-        nasm_panic("strange value %d passed to float_const", bytes);
-        return 0;
+    enum floatize type;
+
+    for (type = 0; type < FLOAT_ERR; type++) {
+        if (fp_formats[type].bytes == bytes)
+            break;
    }
+
+    return type;                /* FLOAT_ERR if invalid */
 }

 /* Set floating-point options */
--- a/asm/floats.h
+++ b/asm/floats.h
@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *   
- *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@ -48,7 +48,20 @@ enum float_round {
    FLOAT_RC_UP
 };

-int float_const(const char *string, int sign, uint8_t *result, int bytes);
+/* Note: enum floatize and FLOAT_ERR are defined in nasm.h */
+
+/* Floating-point format description */
+struct ieee_format {
+    int bytes;                  /* Total bytes */
+    int mantissa;               /* Fractional bits in the mantissa */
+    int explicit;               /* Explicit integer */
+    int exponent;               /* Bits in the exponent */
+    int offset;                 /* Offset into byte array for floatize op */
+};
+extern const struct ieee_format fp_formats[FLOAT_ERR];
+
+int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt);
+enum floatize float_deffmt(int bytes);
 int float_option(const char *option);

 #endif /* NASM_FLOATS_H */
--- a/asm/parser.c
+++ b/asm/parser.c
@ -531,10 +531,12 @@ static int parse_eops(extop **result, bool critical, int elem)
                goto is_float;
            }
        } else if (i == TOKEN_FLOAT) {
+            enum floatize fmt;
        is_float:
            eop->type = EOT_DB_FLOAT;

-            if (eop->elem > 16) {
+            fmt = float_deffmt(eop->elem);
+            if (fmt == FLOAT_ERR) {
                nasm_nonfatal("no %d-bit floating-point format supported",
                              eop->elem << 3);
                eop->val.string.len = 0;
@ -552,8 +554,7 @@ static int parse_eops(extop **result, bool critical, int elem)
                eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
                eop->val.string.data = (char *)eop + sizeof(extop);
                if (!float_const(tokval.t_charptr, sign,
-                                 (uint8_t *)eop->val.string.data,
-                                 eop->val.string.len))
+                                 (uint8_t *)eop->val.string.data, fmt))
                    eop->val.string.len = 0;
            }
            if (!eop->val.string.len)
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@ -113,6 +113,9 @@ __?float80e?__
 __?float128l?__
 __?float128h?__

+% TOKEN_FLOATIZE, 0, 0, FLOAT_B{__?bfloat*?__}
+__?bfloat16?__
+
 % TOKEN_STRFUNC, 0, 0, STRFUNC_{__?*?__}
 __?utf16?__
 __?utf16le?__
--- a/doc/changes.src
+++ b/doc/changes.src
@ -12,6 +12,9 @@ since 2007.
 \b Add instructions from the Intel Instruction Set Extensions and
 Future Features Programming Reference, June 2020.

+\b Support for \c{bfloat16} floating-point constants. See \k{fltconst}
+and \k{pkg_fp}.
+
 \b Properly display warnings in preprocess-only mode.

 \b Fix copy-and-paste of examples from the PDF documentation.
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@ -1692,9 +1692,9 @@ context.
 \i{Floating-point} constants are acceptable only as arguments to
 \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as
 arguments to the special operators \i\c{__?float8?__},
-\i\c{__?float16?__}, \i\c{__?float32?__}, \i\c{__?float64?__},
-\i\c{__?float80m?__}, \i\c{__?float80e?__}, \i\c{__?float128l?__}, and
-\i\c{__?float128h?__}.
+\i\c{__?float16?__}, \i\c{__?bfloat16?__}, \i\c{__?float32?__},
+\i\c{__?float64?__}, \i\c{__?float80m?__}, \i\c{__?float80e?__},
+\i\c{__?float128l?__}, and \i\c{__?float128h?__}. See also \k{pkg_fp}.

 Floating-point constants are expressed in the traditional form:
 digits, then a period, then optionally more digits, then optionally an
@ -1733,6 +1733,13 @@ appears to be the most frequently used 8-bit floating-point format,
 although it is not covered by any formal standard.  This is sometimes
 called a "\i{minifloat}."

+The \i\c{bfloat16} format is effectively a compressed version of the
+32-bit single precision format, with a reduced mantissa. It is
+effectively the same as truncating the 32-bit format to the upper 16
+bits, except for rounding. There is no \c{D}\e{x} directive that
+corresponds to \c{bfloat16} as it obviously has the same size as the
+IEEE standard 16-bit half precision format, see however \k{pkg_fp}.
+
 The special operators are used to produce floating-point numbers in
 other contexts.  They produce the binary representation of a specific
 floating-point number as an integer, and can use anywhere integer
@ -4633,6 +4640,7 @@ This packages contains the following floating-point convenience macros:
 \c
 \c %define float8(x)       __?float8?__(x)
 \c %define float16(x)      __?float16?__(x)
+\c %define bfloat16(x)     __?bfloat16?__(x)
 \c %define float32(x)      __?float32?__(x)
 \c %define float64(x)      __?float64?__(x)
 \c %define float80m(x)     __?float80m?__(x)
@ -4640,6 +4648,12 @@ This packages contains the following floating-point convenience macros:
 \c %define float128l(x)    __?float128l?__(x)
 \c %define float128h(x)    __?float128h?__(x)

+It also defines the a multi-line macro \i\c{bf16} that can be used
+in a similar way to the \c{D}\e{x} directives for the other
+floating-point numbers:
+
+\c      bf16 -3.1415, NaN, 2000.0, +Inf
+

 \H{pkg_ifunc} \i\c{ifunc}: \i{Integer functions}

--- a/include/nasm.h
+++ b/include/nasm.h
@ -196,15 +196,18 @@ enum token_type { /* token types, other than chars */
    TOKEN_OPMASK        /* translated token for opmask registers */
 };

+/* Must match the fp_formats[] array in asm/floats.c */
 enum floatize {
    FLOAT_8,
    FLOAT_16,
+    FLOAT_B16,
    FLOAT_32,
    FLOAT_64,
    FLOAT_80M,
    FLOAT_80E,
    FLOAT_128L,
-    FLOAT_128H
+    FLOAT_128H,
+    FLOAT_ERR                   /* Invalid format, MUST BE LAST */
 };

 /* Must match the list in string_transform(), in strfunc.c */
--- a/macros/fp.mac
+++ b/macros/fp.mac
@ -1,6 +1,6 @@
 ;; --------------------------------------------------------------------------
 ;;   
-;;   Copyright 2010 The NASM Authors - All Rights Reserved
+;;   Copyright 2010-2020 The NASM Authors - All Rights Reserved
 ;;   See the file AUTHORS included with the NASM distribution for
 ;;   the specific copyright holders.
 ;;
@ -46,9 +46,17 @@ USE: fp

 %define float8(x)	__?float8?__(x)
 %define float16(x)	__?float16?__(x)
+%define bfloat16(x)     __?bfloat16?__(x)
 %define float32(x)	__?float32?__(x)
 %define float64(x)	__?float64?__(x)
 %define float80m(x)	__?float80m?__(x)
 %define float80e(x)	__?float80e?__(x)
 %define float128l(x)	__?float128l?__(x)
 %define float128h(x)	__?float128h?__(x)
+
+%imacro bf16 1-*.nolist
+  %rep %0
+    dw __?bfloat16?__(%1)
+    %rotate 1
+  %endrep
+%endmacro
--- a/test/float.asm
+++ b/test/float.asm
@ -5,6 +5,8 @@
 ; Test of floating-point formats
 ;

+%use fp
+
 ; 8-bit
 	db 1.0
 	db +1.0
@ -65,6 +67,37 @@
 	dw __SNaN__
 	dw 3.1415926535_8979323846_2643383279_5028841971_6939937510_5

+; 16-bit bfloat
+	bf16 1.0
+	bf16 +1.0
+	bf16 -1.0
+	bf16 1.5
+	bf16 +1.5
+	bf16 -1.5
+	bf16 0.0
+	bf16 +0.0
+	bf16 -0.0
+	bf16 1.83203125
+	bf16 +1.83203125
+	bf16 -1.83203125
+	bf16 1.83203125e15
+	bf16 +1.83203125e15
+	bf16 -1.83203125e15
+	bf16 1.83203125e-15
+	bf16 +1.83203125e-15
+	bf16 -1.83203125e-15
+	bf16 1.83203125e-40		; Denormal!
+	bf16 +1.83203125e-40		; Denormal!
+	bf16 -1.83203125e-40		; Denormal!
+	bf16 __Infinity__
+	bf16 +__Infinity__
+	bf16 -__Infinity__
+	bf16 __NaN__
+	bf16 __QNaN__
+	bf16 __SNaN__
+	bf16 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
+	bf16 -3.1415, NaN, 2000.0, +Inf
+
 ; 32-bit
 	dd 1.0
 	dd +1.0
@ -94,6 +127,7 @@
 	dd __QNaN__
 	dd __SNaN__
 	dd 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
+	dd -3.1415, NaN, 2000.0, +Inf

 ; 64-bit
 	dq 1.0
@ -124,7 +158,7 @@
 	dq __QNaN__
 	dq __SNaN__
 	dq 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
-	
+
 ; 80-bit
 	dt 1.0
 	dt +1.0