preproc: add conditional-string smacro parameters; simplify functions

Add the option of having strings only conditionally quoted (&&) -- do
not quote an already quoted string again -- as opposed to always
quoting a string.

This makes a lot of the string functions way simpler to implement, and
removes the need to share ad hoc parsing code with directives.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin 2022-11-21 10:47:08 -08:00
parent 30ff4f92c0
commit 516fbba03e
2 changed files with 110 additions and 87 deletions

View File

@ -206,13 +206,14 @@ typedef Token *(*ExpandSMacro)(const SMacro *s, Token **params, int nparams);
* if SPARM_GREEDY is set.
*/
enum sparmflags {
SPARM_PLAIN = 0,
SPARM_EVAL = 1, /* Evaluate as a numeric expression (=) */
SPARM_STR = 2, /* Convert to quoted string ($) */
SPARM_NOSTRIP = 4, /* Don't strip braces (!) */
SPARM_GREEDY = 8, /* Greedy final parameter (+) */
SPARM_VARADIC = 16, /* Any number of separate arguments */
SPARM_OPTIONAL = 32 /* Optional argument */
SPARM_PLAIN = 0,
SPARM_EVAL = 1, /* Evaluate as a numeric expression (=) */
SPARM_STR = 2, /* Convert to quoted string ($) */
SPARM_NOSTRIP = 4, /* Don't strip braces (!) */
SPARM_GREEDY = 8, /* Greedy final parameter (+) */
SPARM_VARADIC = 16, /* Any number of separate arguments */
SPARM_OPTIONAL = 32, /* Optional argument */
SPARM_CONDQUOTE = 64 /* With SPARM_STR, don't re-quote a string */
};
struct smac_param {
@ -2875,11 +2876,11 @@ list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m)
if (m->nparam) {
/*
* Space for ( and either , or ) around each
* parameter, plus up to 4 flags.
* parameter, plus up to 5 flags.
*/
int i;
size += 1 + 4 * m->nparam;
size += 1 + 5 * m->nparam;
for (i = 0; i < m->nparam; i++)
size += m->params[i].name.len;
}
@ -2910,8 +2911,11 @@ list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m)
if (flags & SPARM_NOSTRIP)
*--p = '!';
if (flags & SPARM_STR)
if (flags & SPARM_STR) {
*--p = '&';
if (flags & SPARM_CONDQUOTE)
*--p = '&';
}
if (flags & SPARM_EVAL)
*--p = '=';
*--p = ',';
@ -3019,6 +3023,9 @@ static int parse_smacro_template(Token ***tpp, SMacro *tmpl)
case '&':
flags |= SPARM_STR;
break;
case TOKEN_DBL_AND:
flags |= SPARM_STR|SPARM_CONDQUOTE;
break;
case '!':
flags |= SPARM_NOSTRIP;
break;
@ -3691,15 +3698,16 @@ err:
return res;
}
/*
* Implement substring extraction as used by the %substr directive
* and function.
*/
static Token *pp_substr_common(Token *t, int64_t start, int64_t count);
static Token *pp_substr(Token *tline, const char *dname)
{
int64_t start, count;
const char *txt;
size_t len;
struct ppscan pps;
Token *t;
Token *res = NULL;
@ -3730,7 +3738,7 @@ static Token *pp_substr(Token *tline, const char *dname)
nasm_nonfatal("non-constant value given to `%s'", dname);
goto err;
}
start = evalresult->value - 1;
start = evalresult->value;
pps.tptr = skip_white(pps.tptr);
if (!pps.tptr) {
@ -3747,10 +3755,24 @@ static Token *pp_substr(Token *tline, const char *dname)
count = evalresult->value;
}
res = pp_substr_common(t, start, count);
err:
free_tlist(tline);
return res;
}
static Token *pp_substr_common(Token *t, int64_t start, int64_t count)
{
size_t len;
const char *txt;
unquote_token(t);
len = t->len;
/* make start and count being in range */
start -= 1; /* First character is 1 */
if (start < 0)
start = 0;
if (count < 0)
@ -3761,10 +3783,7 @@ static Token *pp_substr(Token *tline, const char *dname)
start = -1, count = 0; /* empty string */
txt = (start < 0) ? "" : tok_text(t) + start;
res = make_tok_qstr_len(NULL, txt, count);
err:
free_tlist(tline);
return res;
return make_tok_qstr_len(NULL, txt, count);
}
/**
@ -5893,14 +5912,19 @@ static SMacro *expand_one_smacro(Token ***tpp)
if (flags & SPARM_STR) {
/* Convert expansion to a quoted string */
char *arg;
Token *qs;
qs = expand_smacro_noreset(params[i]);
arg = detoken(qs, false);
free_tlist(qs);
params[i] = make_tok_qstr(NULL, arg);
nasm_free(arg);
if ((flags & SPARM_CONDQUOTE) &&
tok_is(qs, TOKEN_STR) && !qs->next) {
/* A single quoted string token */
params[i] = qs;
} else {
char *arg = detoken(qs, false);
free_tlist(qs);
params[i] = make_tok_qstr(NULL, arg);
nasm_free(arg);
}
}
}
}
@ -7051,78 +7075,61 @@ stdmac_join(const SMacro *s, Token **params, int nparams)
static Token *
stdmac_strcat(const SMacro *s, Token **params, int nparams)
{
Token *tline;
(void)nparams;
int i;
size_t len = 0;
char *str, *p;
tline = params[0];
params[0] = NULL; /* Don't free this later */
return pp_strcat(expand_smacro_noreset(tline), s->name);
(void)s;
for (i = 0; i < nparams; i++) {
unquote_token(params[i]);
len += params[i]->len;
}
nasm_newn(str, len+1);
p = str;
for (i = 0; i < nparams; i++) {
p = mempcpy(p, tok_text(params[i]), params[i]->len);
}
return make_tok_qstr_len(NULL, str, len);
}
/* %substr() function */
static Token *
stdmac_substr(const SMacro *s, Token **params, int nparams)
{
Token *tline;
int64_t start, count;
(void)nparams;
(void)s;
tline = params[0];
params[0] = NULL; /* Don't free this later */
return pp_substr(expand_smacro_noreset(tline), s->name);
}
start = get_tok_num(params[1], NULL);
count = get_tok_num(params[2], NULL);
/* Expand a the argument and enforce it being a single quoted string */
static Token *expand_to_string(Token **tp, const char *dname)
{
Token *tlist, *t;
tlist = *tp;
*tp = NULL; /* Don't free this later */
t = zap_white(expand_smacro_noreset(tlist));
if (!tok_is(t, TOKEN_STR)) {
nasm_nonfatal("`%s' requires string as parameter", dname);
return NULL;
}
t->next = zap_white(t->next);
if (t->next) {
nasm_nonfatal("`%s' requires exactly one string as parameter", dname);
return NULL;
}
return t;
return pp_substr_common(params[0], start, count);
}
/* %strlen() function */
static Token *
stdmac_strlen(const SMacro *s, Token **params, int nparams)
{
Token *t;
(void)nparams;
(void)s;
t = expand_to_string(&params[0], s->name);
if (!t)
return NULL;
unquote_token(t);
return make_tok_num(NULL, t->len);
unquote_token(params[0]);
return make_tok_num(NULL, params[0]->len);
}
/* %tok() function */
static Token *
stdmac_tok(const SMacro *s, Token **params, int nparams)
{
Token *t;
(void)nparams;
(void)s;
t = expand_to_string(&params[0], s->name);
if (!t)
return NULL;
return reverse_tokens(tokenize(unquote_token_cstr(t)));
return reverse_tokens(tokenize(unquote_token_cstr(params[0])));
}
/* %cond() or %sel() */
@ -7272,12 +7279,6 @@ struct magic_macros {
ExpandSMacro func;
};
struct num_macros {
const char name[6];
uint8_t base;
char prefix;
};
static void pp_add_magic_stdmac(void)
{
static const struct magic_macros magic_macros[] = {
@ -7289,10 +7290,9 @@ static void pp_add_magic_stdmac(void)
{ "%count", false, 1, SPARM_VARADIC, stdmac_count },
{ "%eval", false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join },
{ "%str", false, 1, SPARM_GREEDY|SPARM_STR, stdmac_join },
{ "%strcat", false, 1, SPARM_GREEDY, stdmac_strcat },
{ "%strlen", false, 1, 0, stdmac_strlen },
{ "%substr", false, 1, SPARM_GREEDY, stdmac_substr },
{ "%tok", false, 1, 0, stdmac_tok },
{ "%strcat", false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_strcat },
{ "%strlen", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_strlen },
{ "%tok", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_tok },
{ NULL, false, 0, 0, NULL }
};
const struct magic_macros *m;
@ -7361,6 +7361,18 @@ static void pp_add_magic_stdmac(void)
tmpl.params[2].def = make_tok_num(NULL, 10);
define_smacro("%num", false, NULL, &tmpl);
/* %substr() function */
nasm_zero(tmpl);
tmpl.nparam = 3;
tmpl.expand = stdmac_substr;
tmpl.recursive = true;
nasm_newn(tmpl.params, tmpl.nparam);
tmpl.params[0].flags = SPARM_STR|SPARM_CONDQUOTE;
tmpl.params[1].flags = SPARM_EVAL;
tmpl.params[2].flags = SPARM_EVAL|SPARM_OPTIONAL;
tmpl.params[2].def = make_tok_num(NULL, -1);
define_smacro("%substr", false, NULL, &tmpl);
/* %is...() macro functions */
nasm_zero(tmpl);
tmpl.nparam = 1;

View File

@ -2424,17 +2424,22 @@ A single pair of parentheses is a subcase of a single, unused argument:
This is similar to the behavior of the C preprocessor.
\b If declared with an \c{=}, NASM will evaluate the argument as an
expression after expansion.
\b If declared with an \c{=}, NASM will expand the argument and then
evaluate it as a numeric expression.
\b If an argument declared with an \c{&}, a macro parameter will be
turned into a quoted string after expansion.
\b If declared with an \c{&}, NASM will expand the argument and then
turn into a quoted string; if the argument already \e{is} a quoted
string, it will be quoted again.
\b If declared with \c{&&}, NASM will expand the argument and then
turn it into a quoted string, but if the argument already is a quoted
string, it will \e{not} be re-quoted.
\b If declared with a \c{+}, it is a greedy or variadic parameter; it
includes any subsequent commas and parameters.
will include any subsequent commas and parameters.
\b If declared with an \c{!}, NASM will not strip whitespace and
braces (useful in conjunction with \c{&}).
braces (potentially useful in conjunction with \c{&} or \c{&&}.)
For example:
@ -2849,7 +2854,9 @@ means "until N-1 characters before the end of string", i.e. \c{-1}
means until end of string, \c{-2} until one character before, etc.
The corresponding preprocessor function is \c{%substr()}, see
\k{f_substr}.
\k{f_substr}, however please note that the default value for the
length parameter, if omitted, is \c{-1} rather than \c{1} for
\c{%substr()}.
\H{ppfunc} \i{Preprocessor Functions}
@ -3016,13 +3023,17 @@ in the same way the \i\c{%strlen} directive would, see \k{strlen}.
The \c{%substr()} function extracts a substring of a quoted string, in
the same way the \i\c{%substr} directive would, see \k{substr}. Note
that unlike the \c{%substr} directive, a comma is required after the
string argument.
that unlike the \c{%substr} directive, commas are required between all
parameters, is required after the string argument, and that the
default for the length argument, if omitted, is \c{-1} (i.e. the
remainder of the string) rather than \c{1}.
\c ; The following lines are all equivalent
\c %define mychar 'yzw'
\c %substr mychar 'xyzw' 2,-1
\c %xdefine mychar %substr('xyzw',2,3)
\c %xdefine mychar %substr('xyzw',2,-1)
\c %xdefine mychar %substr('xyzw',2)
\S{f_tok} \i\c{%tok()} function