From 487f352b6222c79fd6c719d2c00ab6087c6b6b3c Mon Sep 17 00:00:00 2001
From: Jin Kyu Song <jin.kyu.song@intel.com>
Date: Wed, 27 Nov 2013 14:10:40 -0800
Subject: [PATCH] stdscan: Rework curly brace parsing routines

As recommended by the community, a comma-separated decorators ({k1,z})
and nested braces ({{k1},{z}}) are dropped out. So only standard syntax
is supported from now.

This rework made source code neat and easy to maintain. Most of the codes
for handling corner cases are removed.

Signed-off-by: Jin Kyu Song <jin.kyu.song@intel.com>
---
 nasm.h     |   6 +++
 parser.c   |   2 +-
 stdscan.c  | 110 ++++++++++++++++++++++-------------------------------
 tokhash.pl |   1 +
 4 files changed, 54 insertions(+), 65 deletions(-)

diff --git a/nasm.h b/nasm.h
index cb786f80..87e82d26 100644
--- a/nasm.h
+++ b/nasm.h
@@ -65,6 +65,7 @@
 #endif
 
 #define IDLEN_MAX 4096
+#define DECOLEN_MAX 32
 
 /*
  * Name pollution problems: <time.h> on Digital UNIX pulls in some
@@ -421,6 +422,8 @@ extern struct preproc_ops preproc_nop;
  * identifier. E.g. a period may only appear at the start of an identifier
  * (for local labels), whereas a number may appear anywhere *but* at the
  * start.
+ * isbrcchar matches any character that may placed inside curly braces as a
+ * decorator. E.g. {rn-sae}, {1to8}, {k1}{z}
  */
 
 #define isidstart(c) (nasm_isalpha(c)   ||  \
@@ -435,6 +438,9 @@ extern struct preproc_ops preproc_nop;
                      (c) == '#'         ||  \
                      (c) == '~')
 
+#define isbrcchar(c) (isidchar(c)       ||  \
+                      (c) == '-')
+
 /* Ditto for numeric constants. */
 
 #define isnumstart(c)  (nasm_isdigit(c) || (c) == '$')
diff --git a/parser.c b/parser.c
index 343f35e5..f73c7b5a 100644
--- a/parser.c
+++ b/parser.c
@@ -200,7 +200,7 @@ static void process_size_override(insn *result, operand *op)
  * when two or more decorators follow a register operand,
  * consecutive decorators are parsed here.
  * opmask and zeroing decorators can be placed in any order.
- * e.g. zmm1 {k2}{z} or zmm2 {z,k3}
+ * e.g. zmm1 {k2}{z} or zmm2 {z}{k3}
  * decorator(s) are placed at the end of an operand.
  */
 static bool parse_braces(decoflags_t *decoflags)
diff --git a/stdscan.c b/stdscan.c
index b5e389da..ea7537dd 100644
--- a/stdscan.c
+++ b/stdscan.c
@@ -53,8 +53,6 @@
 static char *stdscan_bufptr = NULL;
 static char **stdscan_tempstorage = NULL;
 static int stdscan_tempsize = 0, stdscan_templen = 0;
-static int brace = 0;               /* nested brace counter */
-static bool brace_opened = false;   /* if brace is just opened */
 #define STDSCAN_TEMP_DELTA 256
 
 void stdscan_set(char *str)
@@ -110,7 +108,6 @@ static char *stdscan_copy(char *p, int len)
 /*
  * a token is enclosed with braces. proper token type will be assigned
  * accordingly with the token flag.
- * a closing brace is treated as an ending character of corresponding token.
  */
 static int stdscan_handle_brace(struct tokenval *tv)
 {
@@ -126,18 +123,6 @@ static int stdscan_handle_brace(struct tokenval *tv)
         }
     }
 
-    stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
-
-    if (stdscan_bufptr[0] == '}') {
-        stdscan_bufptr ++;      /* skip the closing brace */
-        brace --;
-    } else if (stdscan_bufptr[0] != ',') {
-        /* treat {foo,bar} as {foo}{bar}
-         * by regarding ',' as a mere separator between decorators
-         */
-        nasm_error(ERR_NONFATAL, "closing brace expected");
-        tv->t_type = TOKEN_INVALID;
-    }
     return tv->t_type;
 }
 
@@ -148,23 +133,16 @@ int stdscan(void *private_data, struct tokenval *tv)
     (void)private_data;         /* Don't warn that this parameter is unused */
 
     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
-    if (!*stdscan_bufptr) {
-        /* nested brace shouldn't affect following lines */
-        brace = 0;
+    if (!*stdscan_bufptr)
         return tv->t_type = TOKEN_EOS;
-    }
 
     /* we have a token; either an id, a number or a char */
     if (isidstart(*stdscan_bufptr) ||
-        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1])) ||
-        (brace && isidchar(*stdscan_bufptr))) {     /* because of {1to8} */
+        (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
         /* now we've got an identifier */
         bool is_sym = false;
         int token_type;
 
-        /* opening brace is followed by any letter */
-        brace_opened = false;
-
         if (*stdscan_bufptr == '$') {
             is_sym = true;
             stdscan_bufptr++;
@@ -172,8 +150,7 @@ int stdscan(void *private_data, struct tokenval *tv)
 
         r = stdscan_bufptr++;
         /* read the entire buffer to advance the buffer pointer but... */
-        /* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens. */
-        while (isidchar(*stdscan_bufptr) || (brace && *stdscan_bufptr == '-'))
+        while (isidchar(*stdscan_bufptr))
             stdscan_bufptr++;
 
         /* ... copy only up to IDLEN_MAX-1 characters */
@@ -190,16 +167,11 @@ int stdscan(void *private_data, struct tokenval *tv)
          * is it actually a register or instruction name, or what? */
         token_type = nasm_token_hash(ourcopy, tv);
 
-        if (likely(!brace)) {
-            if (likely(!(tv->t_flag & TFLAG_BRC))) {
-                /* most of the tokens fall into this case */
-                return token_type;
-            } else {
-                return tv->t_type = TOKEN_ID;
-            }
+        if (likely(!(tv->t_flag & TFLAG_BRC))) {
+            /* most of the tokens fall into this case */
+            return token_type;
         } else {
-            /* handle tokens inside braces */
-            return stdscan_handle_brace(tv);
+            return tv->t_type = TOKEN_ID;
         }
     } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
         /*
@@ -285,6 +257,45 @@ int stdscan(void *private_data, struct tokenval *tv)
             return tv->t_type = TOKEN_ERRSTR;
         stdscan_bufptr++;       /* Skip final quote */
         return tv->t_type = TOKEN_STR;
+    } else if (*stdscan_bufptr == '{') {
+        /* now we've got a decorator */
+        int token_len;
+
+        stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+
+        r = ++stdscan_bufptr;
+        /*
+         * read the entire buffer to advance the buffer pointer
+         * {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
+         */
+        while (isbrcchar(*stdscan_bufptr))
+            stdscan_bufptr++;
+
+        token_len = stdscan_bufptr - r;
+
+        /* ... copy only up to DECOLEN_MAX-1 characters */
+        tv->t_charptr = stdscan_copy(r, token_len < DECOLEN_MAX ?
+                                        token_len : DECOLEN_MAX - 1);
+
+        stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+        /* if brace is not closed properly or token is too long  */
+        if ((*stdscan_bufptr != '}') || (token_len > MAX_KEYWORD)) {
+            nasm_error(ERR_NONFATAL,
+                       "invalid decorator token inside braces");
+            return tv->t_type = TOKEN_INVALID;
+        }
+
+        stdscan_bufptr++;       /* skip closing brace */
+
+        for (s = tv->t_charptr, r = ourcopy; *s; s++)
+            *r++ = nasm_tolower(*s);
+        *r = '\0';
+
+        /* right, so we have a decorator sitting in temp storage. */
+        nasm_token_hash(ourcopy, tv);
+
+        /* handle tokens inside braces */
+        return stdscan_handle_brace(tv);
     } else if (*stdscan_bufptr == ';') {
         /* a comment has happened - stay */
         return tv->t_type = TOKEN_EOS;
@@ -324,35 +335,6 @@ int stdscan(void *private_data, struct tokenval *tv)
     } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
         stdscan_bufptr += 2;
         return tv->t_type = TOKEN_DBL_OR;
-    } else if (stdscan_bufptr[0] == '{') {
-        stdscan_bufptr ++;      /* skip the opening brace */
-        brace ++;               /* in case of nested braces */
-        brace_opened = true;    /* brace is just opened */
-        return stdscan(private_data, tv);
-    } else if (stdscan_bufptr[0] == ',' && brace) {
-        /*
-         * a comma inside braces should be treated just as a separator.
-         * this is almost same as an opening brace except increasing counter.
-         */
-        stdscan_bufptr ++;
-        brace_opened = true;    /* brace is just opened */
-        return stdscan(private_data, tv);
-    } else if (stdscan_bufptr[0] == '}') {
-        stdscan_bufptr ++;      /* skip the closing brace */
-        if (brace) {
-            /* unhandled nested closing brace */
-            brace --;
-            /* if brace is closed without any content in it */
-            if (brace_opened) {
-                brace_opened = false;
-                nasm_error(ERR_NONFATAL, "nothing inside braces");
-            }
-            return stdscan(private_data, tv);
-        } else {
-            /* redundant closing brace */
-            return tv->t_type = TOKEN_INVALID;
-        }
-        return stdscan(private_data, tv);
     } else                      /* just an ordinary char */
         return tv->t_type = (uint8_t)(*stdscan_bufptr++);
 }
diff --git a/tokhash.pl b/tokhash.pl
index 4ea387d9..60bd258e 100755
--- a/tokhash.pl
+++ b/tokhash.pl
@@ -260,6 +260,7 @@ if ($output eq 'h') {
     print  "    uint16_t ix;\n";
     print  "    const struct tokendata *data;\n";
     print  "\n";
+    printf "    tv->t_flag = 0;\n";
     printf "    crc = crc64(UINT64_C(0x%08x%08x), token);\n",
 	$$sv[0], $$sv[1];
     print  "    k1 = (uint32_t)crc;\n";