From 516fbba03e75cd706ea383ecb8c5ecd83eceeaf6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 21 Nov 2022 10:47:08 -0800 Subject: [PATCH] preproc: add conditional-string smacro parameters; simplify functions Add the option of having strings only conditionally quoted (&&) -- do not quote an already quoted string again -- as opposed to always quoting a string. This makes a lot of the string functions way simpler to implement, and removes the need to share ad hoc parsing code with directives. Signed-off-by: H. Peter Anvin --- asm/preproc.c | 168 ++++++++++++++++++++++++++---------------------- doc/nasmdoc.src | 29 ++++++--- 2 files changed, 110 insertions(+), 87 deletions(-) diff --git a/asm/preproc.c b/asm/preproc.c index c59ba4fb..a613af2e 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -206,13 +206,14 @@ typedef Token *(*ExpandSMacro)(const SMacro *s, Token **params, int nparams); * if SPARM_GREEDY is set. */ enum sparmflags { - SPARM_PLAIN = 0, - SPARM_EVAL = 1, /* Evaluate as a numeric expression (=) */ - SPARM_STR = 2, /* Convert to quoted string ($) */ - SPARM_NOSTRIP = 4, /* Don't strip braces (!) */ - SPARM_GREEDY = 8, /* Greedy final parameter (+) */ - SPARM_VARADIC = 16, /* Any number of separate arguments */ - SPARM_OPTIONAL = 32 /* Optional argument */ + SPARM_PLAIN = 0, + SPARM_EVAL = 1, /* Evaluate as a numeric expression (=) */ + SPARM_STR = 2, /* Convert to quoted string ($) */ + SPARM_NOSTRIP = 4, /* Don't strip braces (!) */ + SPARM_GREEDY = 8, /* Greedy final parameter (+) */ + SPARM_VARADIC = 16, /* Any number of separate arguments */ + SPARM_OPTIONAL = 32, /* Optional argument */ + SPARM_CONDQUOTE = 64 /* With SPARM_STR, don't re-quote a string */ }; struct smac_param { @@ -2875,11 +2876,11 @@ list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m) if (m->nparam) { /* * Space for ( and either , or ) around each - * parameter, plus up to 4 flags. + * parameter, plus up to 5 flags. */ int i; - size += 1 + 4 * m->nparam; + size += 1 + 5 * m->nparam; for (i = 0; i < m->nparam; i++) size += m->params[i].name.len; } @@ -2910,8 +2911,11 @@ list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m) if (flags & SPARM_NOSTRIP) *--p = '!'; - if (flags & SPARM_STR) + if (flags & SPARM_STR) { *--p = '&'; + if (flags & SPARM_CONDQUOTE) + *--p = '&'; + } if (flags & SPARM_EVAL) *--p = '='; *--p = ','; @@ -3019,6 +3023,9 @@ static int parse_smacro_template(Token ***tpp, SMacro *tmpl) case '&': flags |= SPARM_STR; break; + case TOKEN_DBL_AND: + flags |= SPARM_STR|SPARM_CONDQUOTE; + break; case '!': flags |= SPARM_NOSTRIP; break; @@ -3691,15 +3698,16 @@ err: return res; } + /* * Implement substring extraction as used by the %substr directive * and function. */ +static Token *pp_substr_common(Token *t, int64_t start, int64_t count); + static Token *pp_substr(Token *tline, const char *dname) { int64_t start, count; - const char *txt; - size_t len; struct ppscan pps; Token *t; Token *res = NULL; @@ -3730,7 +3738,7 @@ static Token *pp_substr(Token *tline, const char *dname) nasm_nonfatal("non-constant value given to `%s'", dname); goto err; } - start = evalresult->value - 1; + start = evalresult->value; pps.tptr = skip_white(pps.tptr); if (!pps.tptr) { @@ -3747,10 +3755,24 @@ static Token *pp_substr(Token *tline, const char *dname) count = evalresult->value; } + res = pp_substr_common(t, start, count); + +err: + free_tlist(tline); + return res; +} + +static Token *pp_substr_common(Token *t, int64_t start, int64_t count) +{ + size_t len; + const char *txt; + unquote_token(t); len = t->len; /* make start and count being in range */ + start -= 1; /* First character is 1 */ + if (start < 0) start = 0; if (count < 0) @@ -3761,10 +3783,7 @@ static Token *pp_substr(Token *tline, const char *dname) start = -1, count = 0; /* empty string */ txt = (start < 0) ? "" : tok_text(t) + start; - res = make_tok_qstr_len(NULL, txt, count); -err: - free_tlist(tline); - return res; + return make_tok_qstr_len(NULL, txt, count); } /** @@ -5893,14 +5912,19 @@ static SMacro *expand_one_smacro(Token ***tpp) if (flags & SPARM_STR) { /* Convert expansion to a quoted string */ - char *arg; Token *qs; qs = expand_smacro_noreset(params[i]); - arg = detoken(qs, false); - free_tlist(qs); - params[i] = make_tok_qstr(NULL, arg); - nasm_free(arg); + if ((flags & SPARM_CONDQUOTE) && + tok_is(qs, TOKEN_STR) && !qs->next) { + /* A single quoted string token */ + params[i] = qs; + } else { + char *arg = detoken(qs, false); + free_tlist(qs); + params[i] = make_tok_qstr(NULL, arg); + nasm_free(arg); + } } } } @@ -7051,78 +7075,61 @@ stdmac_join(const SMacro *s, Token **params, int nparams) static Token * stdmac_strcat(const SMacro *s, Token **params, int nparams) { - Token *tline; - (void)nparams; + int i; + size_t len = 0; + char *str, *p; - tline = params[0]; - params[0] = NULL; /* Don't free this later */ - return pp_strcat(expand_smacro_noreset(tline), s->name); + (void)s; + + for (i = 0; i < nparams; i++) { + unquote_token(params[i]); + len += params[i]->len; + } + + nasm_newn(str, len+1); + p = str; + + for (i = 0; i < nparams; i++) { + p = mempcpy(p, tok_text(params[i]), params[i]->len); + } + + return make_tok_qstr_len(NULL, str, len); } /* %substr() function */ static Token * stdmac_substr(const SMacro *s, Token **params, int nparams) { - Token *tline; + int64_t start, count; + (void)nparams; + (void)s; - tline = params[0]; - params[0] = NULL; /* Don't free this later */ - return pp_substr(expand_smacro_noreset(tline), s->name); -} + start = get_tok_num(params[1], NULL); + count = get_tok_num(params[2], NULL); -/* Expand a the argument and enforce it being a single quoted string */ -static Token *expand_to_string(Token **tp, const char *dname) -{ - Token *tlist, *t; - - tlist = *tp; - *tp = NULL; /* Don't free this later */ - t = zap_white(expand_smacro_noreset(tlist)); - - if (!tok_is(t, TOKEN_STR)) { - nasm_nonfatal("`%s' requires string as parameter", dname); - return NULL; - } - - t->next = zap_white(t->next); - if (t->next) { - nasm_nonfatal("`%s' requires exactly one string as parameter", dname); - return NULL; - } - - return t; + return pp_substr_common(params[0], start, count); } /* %strlen() function */ static Token * stdmac_strlen(const SMacro *s, Token **params, int nparams) { - Token *t; - (void)nparams; + (void)s; - t = expand_to_string(¶ms[0], s->name); - if (!t) - return NULL; - - unquote_token(t); - return make_tok_num(NULL, t->len); + unquote_token(params[0]); + return make_tok_num(NULL, params[0]->len); } /* %tok() function */ static Token * stdmac_tok(const SMacro *s, Token **params, int nparams) { - Token *t; - (void)nparams; + (void)s; - t = expand_to_string(¶ms[0], s->name); - if (!t) - return NULL; - - return reverse_tokens(tokenize(unquote_token_cstr(t))); + return reverse_tokens(tokenize(unquote_token_cstr(params[0]))); } /* %cond() or %sel() */ @@ -7272,12 +7279,6 @@ struct magic_macros { ExpandSMacro func; }; -struct num_macros { - const char name[6]; - uint8_t base; - char prefix; -}; - static void pp_add_magic_stdmac(void) { static const struct magic_macros magic_macros[] = { @@ -7289,10 +7290,9 @@ static void pp_add_magic_stdmac(void) { "%count", false, 1, SPARM_VARADIC, stdmac_count }, { "%eval", false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join }, { "%str", false, 1, SPARM_GREEDY|SPARM_STR, stdmac_join }, - { "%strcat", false, 1, SPARM_GREEDY, stdmac_strcat }, - { "%strlen", false, 1, 0, stdmac_strlen }, - { "%substr", false, 1, SPARM_GREEDY, stdmac_substr }, - { "%tok", false, 1, 0, stdmac_tok }, + { "%strcat", false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_strcat }, + { "%strlen", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_strlen }, + { "%tok", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_tok }, { NULL, false, 0, 0, NULL } }; const struct magic_macros *m; @@ -7361,6 +7361,18 @@ static void pp_add_magic_stdmac(void) tmpl.params[2].def = make_tok_num(NULL, 10); define_smacro("%num", false, NULL, &tmpl); + /* %substr() function */ + nasm_zero(tmpl); + tmpl.nparam = 3; + tmpl.expand = stdmac_substr; + tmpl.recursive = true; + nasm_newn(tmpl.params, tmpl.nparam); + tmpl.params[0].flags = SPARM_STR|SPARM_CONDQUOTE; + tmpl.params[1].flags = SPARM_EVAL; + tmpl.params[2].flags = SPARM_EVAL|SPARM_OPTIONAL; + tmpl.params[2].def = make_tok_num(NULL, -1); + define_smacro("%substr", false, NULL, &tmpl); + /* %is...() macro functions */ nasm_zero(tmpl); tmpl.nparam = 1; diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 0883be4b..bd933db5 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -2424,17 +2424,22 @@ A single pair of parentheses is a subcase of a single, unused argument: This is similar to the behavior of the C preprocessor. -\b If declared with an \c{=}, NASM will evaluate the argument as an -expression after expansion. +\b If declared with an \c{=}, NASM will expand the argument and then +evaluate it as a numeric expression. -\b If an argument declared with an \c{&}, a macro parameter will be -turned into a quoted string after expansion. +\b If declared with an \c{&}, NASM will expand the argument and then +turn into a quoted string; if the argument already \e{is} a quoted +string, it will be quoted again. + +\b If declared with \c{&&}, NASM will expand the argument and then +turn it into a quoted string, but if the argument already is a quoted +string, it will \e{not} be re-quoted. \b If declared with a \c{+}, it is a greedy or variadic parameter; it -includes any subsequent commas and parameters. +will include any subsequent commas and parameters. \b If declared with an \c{!}, NASM will not strip whitespace and -braces (useful in conjunction with \c{&}). +braces (potentially useful in conjunction with \c{&} or \c{&&}.) For example: @@ -2849,7 +2854,9 @@ means "until N-1 characters before the end of string", i.e. \c{-1} means until end of string, \c{-2} until one character before, etc. The corresponding preprocessor function is \c{%substr()}, see -\k{f_substr}. +\k{f_substr}, however please note that the default value for the +length parameter, if omitted, is \c{-1} rather than \c{1} for +\c{%substr()}. \H{ppfunc} \i{Preprocessor Functions} @@ -3016,13 +3023,17 @@ in the same way the \i\c{%strlen} directive would, see \k{strlen}. The \c{%substr()} function extracts a substring of a quoted string, in the same way the \i\c{%substr} directive would, see \k{substr}. Note -that unlike the \c{%substr} directive, a comma is required after the -string argument. +that unlike the \c{%substr} directive, commas are required between all +parameters, is required after the string argument, and that the +default for the length argument, if omitted, is \c{-1} (i.e. the +remainder of the string) rather than \c{1}. \c ; The following lines are all equivalent \c %define mychar 'yzw' \c %substr mychar 'xyzw' 2,-1 +\c %xdefine mychar %substr('xyzw',2,3) \c %xdefine mychar %substr('xyzw',2,-1) +\c %xdefine mychar %substr('xyzw',2) \S{f_tok} \i\c{%tok()} function