From d081f0db5d491ee473fdb97b109dd9810b68d9b7 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 16 Jul 2020 23:11:03 -0700
Subject: [PATCH] fp: support bfloat16 constants

Support generating bfloat16 constants. This is a bit awkward, as "DW"
already generates IEEE half precision constants; therefore there is no
longer a single floating-point format for each size. This requires
some replumbing.

Fortunately bfloat16 fits in 64 bits, so support generating them with
a macro that uses __?bfloat16?__() to convert to integers first before
passing them to DW.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 asm/eval.c      | 25 +++++++----------
 asm/floats.c    | 74 ++++++++++++++++++++++++++-----------------------
 asm/floats.h    | 17 ++++++++++--
 asm/parser.c    |  7 +++--
 asm/tokens.dat  |  3 ++
 doc/changes.src |  3 ++
 doc/nasmdoc.src | 20 +++++++++++--
 include/nasm.h  |  5 +++-
 macros/fp.mac   | 10 ++++++-
 test/float.asm  | 36 +++++++++++++++++++++++-
 10 files changed, 140 insertions(+), 60 deletions(-)

diff --git a/asm/eval.c b/asm/eval.c
index cd3c526d..5d6ee1e7 100644
--- a/asm/eval.c
+++ b/asm/eval.c
@@ -694,21 +694,13 @@ static expr *expr5(void)
 static expr *eval_floatize(enum floatize type)
 {
     uint8_t result[16], *p;     /* Up to 128 bits */
-    static const struct {
-        int bytes, start, len;
-    } formats[] = {
-        {  1, 0, 1 },           /* FLOAT_8 */
-        {  2, 0, 2 },           /* FLOAT_16 */
-        {  4, 0, 4 },           /* FLOAT_32 */
-        {  8, 0, 8 },           /* FLOAT_64 */
-        { 10, 0, 8 },           /* FLOAT_80M */
-        { 10, 8, 2 },           /* FLOAT_80E */
-        { 16, 0, 8 },           /* FLOAT_128L */
-        { 16, 8, 8 },           /* FLOAT_128H */
-    };
     int sign = 1;
     int64_t val;
+    size_t len;
     int i;
+    const struct ieee_format *fmt;
+
+    fmt = &fp_formats[type];
 
     scan();
     if (tt != '(') {
@@ -724,7 +716,7 @@ static expr *eval_floatize(enum floatize type)
         nasm_nonfatal("expecting floating-point number");
         return NULL;
     }
-    if (!float_const(tokval->t_charptr, sign, result, formats[type].bytes))
+    if (!float_const(tokval->t_charptr, sign, result, type))
         return NULL;
     scan();
     if (tt != ')') {
@@ -732,9 +724,12 @@ static expr *eval_floatize(enum floatize type)
         return NULL;
     }
 
-    p = result+formats[type].start+formats[type].len;
+    len = fmt->bytes - fmt->offset;
+    if (len > 8)
+        len = 8;                /* Max 64 bits */
+    p = result + len;
     val = 0;
-    for (i = formats[type].len; i; i--) {
+    for (i = len; i; i--) {
         p--;
         val = (val << 8) + *p;
     }
diff --git a/asm/floats.c b/asm/floats.c
index adc6afbf..27180bdc 100644
--- a/asm/floats.c
+++ b/asm/floats.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -629,13 +629,6 @@ static void ieee_shr(fp_limb *mant, int i)
    - the sign bit plus exponent fit in 16 bits.
    - the exponent bias is 2^(n-1)-1 for an n-bit exponent */
 
-struct ieee_format {
-    int bytes;
-    int mantissa;               /* Fractional bits in the mantissa */
-    int explicit;               /* Explicit integer */
-    int exponent;               /* Bits in the exponent */
-};
-
 /*
  * The 16- and 128-bit formats are expected to be in IEEE 754r.
  * AMD SSE5 uses the 16-bit format.
@@ -646,13 +639,31 @@ struct ieee_format {
  *
  * The 8-bit format appears to be the consensus 8-bit floating-point
  * format.  It is apparently used in graphics applications.
+ *
+ * The b16 format is a 16-bit format with smaller mantissa and larger
+ * exponent field.  It is effectively a truncated version of the standard
+ * IEEE 32-bit (single) format, but is explicitly supported here in
+ * order to support proper rounding.
+ *
+ * This array must correspond to enum floatize in include/nasm.h.
+ * Note that there are some formats which have more than one enum;
+ * both need to be listed here with the appropriate offset into the
+ * floating-point byte array (use for the floatize operators.)
+ *
+ * FLOAT_ERR is a value that both represents "invalid format" and the
+ * size of this array.
  */
-static const struct ieee_format ieee_8   = {  1,   3, 0,  4 };
-static const struct ieee_format ieee_16  = {  2,  10, 0,  5 };
-static const struct ieee_format ieee_32  = {  4,  23, 0,  8 };
-static const struct ieee_format ieee_64  = {  8,  52, 0, 11 };
-static const struct ieee_format ieee_80  = { 10,  63, 1, 15 };
-static const struct ieee_format ieee_128 = { 16, 112, 0, 15 };
+const struct ieee_format fp_formats[FLOAT_ERR] = {
+    {  1,   3, 0,  4, 0 },         /* FLOAT_8 */
+    {  2,  10, 0,  5, 0 },         /* FLOAT_16 */
+    {  2,   7, 0,  8, 0 },         /* FLOAT_B16 */
+    {  4,  23, 0,  8, 0 },         /* FLOAT_32 */
+    {  8,  52, 0, 11, 0 },         /* FLOAT_64 */
+    { 10,  63, 1, 15, 0 },         /* FLOAT_80M */
+    { 10,  63, 1, 15, 8 },         /* FLOAT_80E */
+    { 16, 112, 0, 15, 0 },         /* FLOAT_128L */
+    { 16, 112, 0, 15, 8 }          /* FLOAT_128H */
+};
 
 /* Types of values we can generate */
 enum floats {
@@ -672,7 +683,7 @@ static int to_packed_bcd(const char *str, const char *p,
     char c;
     int tv = -1;
 
-    if (fmt != &ieee_80) {
+    if (fmt->bytes != 10) {
         nasm_nonfatal("packed BCD requires an 80-bit format");
         return 0;
     }
@@ -711,9 +722,9 @@ static int to_packed_bcd(const char *str, const char *p,
     return 1;                   /* success */
 }
 
-static int to_float(const char *str, int s, uint8_t *result,
-                    const struct ieee_format *fmt)
+int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt)
 {
+    const struct ieee_format *fmt = &fp_formats[ffmt];
     fp_limb mant[MANT_LIMBS];
     int32_t exponent = 0;
     const int32_t expmax = 1 << (fmt->exponent - 1);
@@ -902,25 +913,20 @@ static int to_float(const char *str, int s, uint8_t *result,
     return 1;                   /* success */
 }
 
-int float_const(const char *number, int sign, uint8_t *result, int bytes)
+/*
+ * Get the default floating point format for this specific field size.
+ * Used for the Dx pseudoops.
+ */
+enum floatize float_deffmt(int bytes)
 {
-    switch (bytes) {
-    case 1:
-        return to_float(number, sign, result, &ieee_8);
-    case 2:
-        return to_float(number, sign, result, &ieee_16);
-    case 4:
-        return to_float(number, sign, result, &ieee_32);
-    case 8:
-        return to_float(number, sign, result, &ieee_64);
-    case 10:
-        return to_float(number, sign, result, &ieee_80);
-    case 16:
-        return to_float(number, sign, result, &ieee_128);
-    default:
-        nasm_panic("strange value %d passed to float_const", bytes);
-        return 0;
+    enum floatize type;
+
+    for (type = 0; type < FLOAT_ERR; type++) {
+        if (fp_formats[type].bytes == bytes)
+            break;
     }
+
+    return type;                /* FLOAT_ERR if invalid */
 }
 
 /* Set floating-point options */
diff --git a/asm/floats.h b/asm/floats.h
index 4f80acac..c4635136 100644
--- a/asm/floats.h
+++ b/asm/floats.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *   
- *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -48,7 +48,20 @@ enum float_round {
     FLOAT_RC_UP
 };
 
-int float_const(const char *string, int sign, uint8_t *result, int bytes);
+/* Note: enum floatize and FLOAT_ERR are defined in nasm.h */
+
+/* Floating-point format description */
+struct ieee_format {
+    int bytes;                  /* Total bytes */
+    int mantissa;               /* Fractional bits in the mantissa */
+    int explicit;               /* Explicit integer */
+    int exponent;               /* Bits in the exponent */
+    int offset;                 /* Offset into byte array for floatize op */
+};
+extern const struct ieee_format fp_formats[FLOAT_ERR];
+
+int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt);
+enum floatize float_deffmt(int bytes);
 int float_option(const char *option);
 
 #endif /* NASM_FLOATS_H */
diff --git a/asm/parser.c b/asm/parser.c
index 47b46ecd..dbd2240c 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -531,10 +531,12 @@ static int parse_eops(extop **result, bool critical, int elem)
                 goto is_float;
             }
         } else if (i == TOKEN_FLOAT) {
+            enum floatize fmt;
         is_float:
             eop->type = EOT_DB_FLOAT;
 
-            if (eop->elem > 16) {
+            fmt = float_deffmt(eop->elem);
+            if (fmt == FLOAT_ERR) {
                 nasm_nonfatal("no %d-bit floating-point format supported",
                               eop->elem << 3);
                 eop->val.string.len = 0;
@@ -552,8 +554,7 @@ static int parse_eops(extop **result, bool critical, int elem)
                 eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
                 eop->val.string.data = (char *)eop + sizeof(extop);
                 if (!float_const(tokval.t_charptr, sign,
-                                 (uint8_t *)eop->val.string.data,
-                                 eop->val.string.len))
+                                 (uint8_t *)eop->val.string.data, fmt))
                     eop->val.string.len = 0;
             }
             if (!eop->val.string.len)
diff --git a/asm/tokens.dat b/asm/tokens.dat
index ab37dcc1..356b39a2 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -113,6 +113,9 @@ __?float80e?__
 __?float128l?__
 __?float128h?__
 
+% TOKEN_FLOATIZE, 0, 0, FLOAT_B{__?bfloat*?__}
+__?bfloat16?__
+
 % TOKEN_STRFUNC, 0, 0, STRFUNC_{__?*?__}
 __?utf16?__
 __?utf16le?__
diff --git a/doc/changes.src b/doc/changes.src
index cf95224a..c1459231 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -12,6 +12,9 @@ since 2007.
 \b Add instructions from the Intel Instruction Set Extensions and
 Future Features Programming Reference, June 2020.
 
+\b Support for \c{bfloat16} floating-point constants. See \k{fltconst}
+and \k{pkg_fp}.
+
 \b Properly display warnings in preprocess-only mode.
 
 \b Fix copy-and-paste of examples from the PDF documentation.
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index cfa92fd1..e3d503c5 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -1692,9 +1692,9 @@ context.
 \i{Floating-point} constants are acceptable only as arguments to
 \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as
 arguments to the special operators \i\c{__?float8?__},
-\i\c{__?float16?__}, \i\c{__?float32?__}, \i\c{__?float64?__},
-\i\c{__?float80m?__}, \i\c{__?float80e?__}, \i\c{__?float128l?__}, and
-\i\c{__?float128h?__}.
+\i\c{__?float16?__}, \i\c{__?bfloat16?__}, \i\c{__?float32?__},
+\i\c{__?float64?__}, \i\c{__?float80m?__}, \i\c{__?float80e?__},
+\i\c{__?float128l?__}, and \i\c{__?float128h?__}. See also \k{pkg_fp}.
 
 Floating-point constants are expressed in the traditional form:
 digits, then a period, then optionally more digits, then optionally an
@@ -1733,6 +1733,13 @@ appears to be the most frequently used 8-bit floating-point format,
 although it is not covered by any formal standard.  This is sometimes
 called a "\i{minifloat}."
 
+The \i\c{bfloat16} format is effectively a compressed version of the
+32-bit single precision format, with a reduced mantissa. It is
+effectively the same as truncating the 32-bit format to the upper 16
+bits, except for rounding. There is no \c{D}\e{x} directive that
+corresponds to \c{bfloat16} as it obviously has the same size as the
+IEEE standard 16-bit half precision format, see however \k{pkg_fp}.
+
 The special operators are used to produce floating-point numbers in
 other contexts.  They produce the binary representation of a specific
 floating-point number as an integer, and can use anywhere integer
@@ -4633,6 +4640,7 @@ This packages contains the following floating-point convenience macros:
 \c
 \c %define float8(x)       __?float8?__(x)
 \c %define float16(x)      __?float16?__(x)
+\c %define bfloat16(x)     __?bfloat16?__(x)
 \c %define float32(x)      __?float32?__(x)
 \c %define float64(x)      __?float64?__(x)
 \c %define float80m(x)     __?float80m?__(x)
@@ -4640,6 +4648,12 @@ This packages contains the following floating-point convenience macros:
 \c %define float128l(x)    __?float128l?__(x)
 \c %define float128h(x)    __?float128h?__(x)
 
+It also defines the a multi-line macro \i\c{bf16} that can be used
+in a similar way to the \c{D}\e{x} directives for the other
+floating-point numbers:
+
+\c      bf16 -3.1415, NaN, 2000.0, +Inf
+
 
 \H{pkg_ifunc} \i\c{ifunc}: \i{Integer functions}
 
diff --git a/include/nasm.h b/include/nasm.h
index 6cffaf5d..950ac45b 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -196,15 +196,18 @@ enum token_type { /* token types, other than chars */
     TOKEN_OPMASK        /* translated token for opmask registers */
 };
 
+/* Must match the fp_formats[] array in asm/floats.c */
 enum floatize {
     FLOAT_8,
     FLOAT_16,
+    FLOAT_B16,
     FLOAT_32,
     FLOAT_64,
     FLOAT_80M,
     FLOAT_80E,
     FLOAT_128L,
-    FLOAT_128H
+    FLOAT_128H,
+    FLOAT_ERR                   /* Invalid format, MUST BE LAST */
 };
 
 /* Must match the list in string_transform(), in strfunc.c */
diff --git a/macros/fp.mac b/macros/fp.mac
index eb297014..3a094a5c 100644
--- a/macros/fp.mac
+++ b/macros/fp.mac
@@ -1,6 +1,6 @@
 ;; --------------------------------------------------------------------------
 ;;   
-;;   Copyright 2010 The NASM Authors - All Rights Reserved
+;;   Copyright 2010-2020 The NASM Authors - All Rights Reserved
 ;;   See the file AUTHORS included with the NASM distribution for
 ;;   the specific copyright holders.
 ;;
@@ -46,9 +46,17 @@ USE: fp
 
 %define float8(x)	__?float8?__(x)
 %define float16(x)	__?float16?__(x)
+%define bfloat16(x)     __?bfloat16?__(x)
 %define float32(x)	__?float32?__(x)
 %define float64(x)	__?float64?__(x)
 %define float80m(x)	__?float80m?__(x)
 %define float80e(x)	__?float80e?__(x)
 %define float128l(x)	__?float128l?__(x)
 %define float128h(x)	__?float128h?__(x)
+
+%imacro bf16 1-*.nolist
+  %rep %0
+    dw __?bfloat16?__(%1)
+    %rotate 1
+  %endrep
+%endmacro
diff --git a/test/float.asm b/test/float.asm
index 88519b2e..1dd92a96 100644
--- a/test/float.asm
+++ b/test/float.asm
@@ -5,6 +5,8 @@
 ; Test of floating-point formats
 ;
 
+%use fp
+
 ; 8-bit
 	db 1.0
 	db +1.0
@@ -65,6 +67,37 @@
 	dw __SNaN__
 	dw 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
 
+; 16-bit bfloat
+	bf16 1.0
+	bf16 +1.0
+	bf16 -1.0
+	bf16 1.5
+	bf16 +1.5
+	bf16 -1.5
+	bf16 0.0
+	bf16 +0.0
+	bf16 -0.0
+	bf16 1.83203125
+	bf16 +1.83203125
+	bf16 -1.83203125
+	bf16 1.83203125e15
+	bf16 +1.83203125e15
+	bf16 -1.83203125e15
+	bf16 1.83203125e-15
+	bf16 +1.83203125e-15
+	bf16 -1.83203125e-15
+	bf16 1.83203125e-40		; Denormal!
+	bf16 +1.83203125e-40		; Denormal!
+	bf16 -1.83203125e-40		; Denormal!
+	bf16 __Infinity__
+	bf16 +__Infinity__
+	bf16 -__Infinity__
+	bf16 __NaN__
+	bf16 __QNaN__
+	bf16 __SNaN__
+	bf16 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
+	bf16 -3.1415, NaN, 2000.0, +Inf
+
 ; 32-bit
 	dd 1.0
 	dd +1.0
@@ -94,6 +127,7 @@
 	dd __QNaN__
 	dd __SNaN__
 	dd 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
+	dd -3.1415, NaN, 2000.0, +Inf
 
 ; 64-bit
 	dq 1.0
@@ -124,7 +158,7 @@
 	dq __QNaN__
 	dq __SNaN__
 	dq 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
-	
+
 ; 80-bit
 	dt 1.0
 	dt +1.0