mirror of
https://github.com/netwide-assembler/nasm.git
synced 2024-11-27 08:10:07 +08:00
preproc: when parsing a # marker, use C-style string unquoting
To handle escape codes in filename strings after # markers correctly, we need nasm_unquote() to be aware that it is using C escapes; otherwise things like "foo`bar" will break. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
parent
4c3798b7e6
commit
1d151a8558
@ -721,30 +721,37 @@ static inline bool tok_isnt(const Token *x, char c)
|
||||
* Unquote a token if it is a string, and set its type to
|
||||
* TOK_INTERNAL_STRING.
|
||||
*/
|
||||
static const char *unquote_token(Token *t)
|
||||
|
||||
/*
|
||||
* Common version for any kind of quoted string; see asm/quote.c for
|
||||
* information about the arguments.
|
||||
*/
|
||||
static const char *unquote_token_anystr(Token *t, uint32_t badctl, char qstart)
|
||||
{
|
||||
size_t nlen, olen;
|
||||
char *p;
|
||||
|
||||
if (t->type != TOK_STRING)
|
||||
return tok_text(t);
|
||||
|
||||
olen = t->len;
|
||||
p = (olen > INLINE_TEXT) ? t->text.p.ptr : t->text.a;
|
||||
t->len = nlen = nasm_unquote_anystr(p, NULL, badctl, qstart);
|
||||
t->type = TOK_INTERNAL_STRING;
|
||||
|
||||
if (t->len > INLINE_TEXT) {
|
||||
char *p = t->text.p.ptr;
|
||||
if (olen <= INLINE_TEXT || nlen > INLINE_TEXT)
|
||||
return p;
|
||||
|
||||
t->len = nasm_unquote(p, NULL);
|
||||
nasm_zero(t->text.a);
|
||||
memcpy(t->text.a, p, nlen);
|
||||
nasm_free(p);
|
||||
return t->text.a;
|
||||
}
|
||||
|
||||
if (t->len <= INLINE_TEXT) {
|
||||
nasm_zero(t->text.a);
|
||||
memcpy(t->text.a, p, t->len);
|
||||
nasm_free(p);
|
||||
return t->text.a;
|
||||
} else {
|
||||
return p;
|
||||
}
|
||||
} else {
|
||||
t->len = nasm_unquote(t->text.a, NULL);
|
||||
return t->text.a;
|
||||
}
|
||||
/* Unquote any string, can produce any arbitrary binary output */
|
||||
static const char *unquote_token(Token *t)
|
||||
{
|
||||
return unquote_token_anystr(t, 0, STR_NASM);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -753,28 +760,7 @@ static const char *unquote_token(Token *t)
|
||||
*/
|
||||
static const char *unquote_token_cstr(Token *t)
|
||||
{
|
||||
if (t->type != TOK_STRING)
|
||||
return tok_text(t);
|
||||
|
||||
t->type = TOK_INTERNAL_STRING;
|
||||
|
||||
if (t->len > INLINE_TEXT) {
|
||||
char *p = t->text.p.ptr;
|
||||
|
||||
t->len = nasm_unquote_cstr(p, NULL);
|
||||
|
||||
if (t->len <= INLINE_TEXT) {
|
||||
nasm_zero(t->text.a);
|
||||
memcpy(t->text.a, p, t->len);
|
||||
nasm_free(p);
|
||||
return t->text.a;
|
||||
} else {
|
||||
return p;
|
||||
}
|
||||
} else {
|
||||
t->len = nasm_unquote_cstr(t->text.a, NULL);
|
||||
return t->text.a;
|
||||
}
|
||||
return unquote_token_anystr(t, BADCTL, STR_NASM);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3389,14 +3375,19 @@ static int line_directive(Token *origline, Token *tline)
|
||||
tline = skip_white(tline);
|
||||
if (tline) {
|
||||
if (tline->type == TOK_STRING) {
|
||||
const char *fname;
|
||||
/*
|
||||
* If this is a quoted string, ignore anything after
|
||||
* it; this allows for compatiblity with gcc's
|
||||
* additional flags options.
|
||||
*/
|
||||
src_set_fname(unquote_token(tline));
|
||||
|
||||
fname = unquote_token_anystr(tline, BADCTL,
|
||||
dname[0] == '#' ? STR_C : STR_NASM);
|
||||
src_set_fname(fname);
|
||||
} else {
|
||||
char *fname = detoken(tline, false);
|
||||
char *fname;
|
||||
fname = detoken(tline, false);
|
||||
src_set_fname(fname);
|
||||
nasm_free(fname);
|
||||
}
|
||||
|
75
asm/quote.c
75
asm/quote.c
@ -1,6 +1,6 @@
|
||||
/* ----------------------------------------------------------------------- *
|
||||
*
|
||||
* Copyright 1996-2019 The NASM Authors - All Rights Reserved
|
||||
* Copyright 1996-2020 The NASM Authors - All Rights Reserved
|
||||
* See the file AUTHORS included with the NASM distribution for
|
||||
* the specific copyright holders.
|
||||
*
|
||||
@ -291,10 +291,17 @@ char *nasm_quote_cstr(const char *str, size_t *lenp)
|
||||
* corresponding to bits set in badctl; in that case, the output
|
||||
* string, but not *ep, is truncated before the first invalid
|
||||
* character.
|
||||
*
|
||||
* badctl is a bitmask of control characters (0-31) which are forbidden
|
||||
* from appearing in the final output.
|
||||
*
|
||||
* The qstart character can be either '`' (NASM style) or '\"' (C style),
|
||||
* to indicate the lead marker of a quoted string. If it is '\"', then
|
||||
* '`' is not a special character at all.
|
||||
*/
|
||||
|
||||
static size_t nasm_unquote_common(char *str, char **ep,
|
||||
const uint32_t badctl)
|
||||
size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
|
||||
const char qstart)
|
||||
{
|
||||
unsigned char bq;
|
||||
const unsigned char *p;
|
||||
@ -319,15 +326,7 @@ static size_t nasm_unquote_common(char *str, char **ep,
|
||||
if (!bq)
|
||||
return 0;
|
||||
|
||||
switch (bq) {
|
||||
case '\'':
|
||||
case '\"':
|
||||
/* '...' or "..." string */
|
||||
while ((c = *p++) && (c != bq))
|
||||
EMIT(c);
|
||||
break;
|
||||
|
||||
case '`':
|
||||
if (bq == (unsigned char)qstart) {
|
||||
/* `...` string */
|
||||
state = st_start;
|
||||
|
||||
@ -335,18 +334,13 @@ static size_t nasm_unquote_common(char *str, char **ep,
|
||||
c = *p++;
|
||||
switch (state) {
|
||||
case st_start:
|
||||
switch (c) {
|
||||
case '\\':
|
||||
if (c == '\\') {
|
||||
state = st_backslash;
|
||||
break;
|
||||
case '`':
|
||||
case '\0':
|
||||
} else if ((c == '\0') | (c == bq)) {
|
||||
state = st_done;
|
||||
break;
|
||||
default:
|
||||
} else {
|
||||
EMIT(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case st_backslash:
|
||||
@ -450,14 +444,19 @@ static size_t nasm_unquote_common(char *str, char **ep,
|
||||
default:
|
||||
panic();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
}
|
||||
} else if (bq == '\'' || bq == '\"') {
|
||||
/*
|
||||
* '...' or "..." string, NASM legacy style (no escapes of
|
||||
* * any kind, including collapsing double quote marks.)
|
||||
* We obviously can't get here if qstart == '\"'.
|
||||
*/
|
||||
while ((c = *p++) && (c != bq))
|
||||
EMIT(c);
|
||||
} else {
|
||||
/* Not a quoted string, just return the input... */
|
||||
while ((c = *p++))
|
||||
EMIT(c);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Zero-terminate the output */
|
||||
@ -472,24 +471,30 @@ static size_t nasm_unquote_common(char *str, char **ep,
|
||||
}
|
||||
#undef EMIT
|
||||
|
||||
/*
|
||||
* Unquote any arbitrary string; may produce any bytes, including embedded
|
||||
* control- and NUL characters.
|
||||
*/
|
||||
size_t nasm_unquote(char *str, char **ep)
|
||||
{
|
||||
return nasm_unquote_common(str, ep, 0);
|
||||
return nasm_unquote_anystr(str, ep, 0, STR_NASM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unquote a string indended to be used as a C string; most control
|
||||
* characters are rejected, including whitespace characters that
|
||||
* would imply line endings and so on.
|
||||
*/
|
||||
size_t nasm_unquote_cstr(char *str, char **ep)
|
||||
{
|
||||
/*
|
||||
* These are the only control characters permitted: BEL BS TAB ESC
|
||||
*/
|
||||
const uint32_t okctl = (1 << '\a') | (1 << '\b') | (1 << '\t') | (1 << 27);
|
||||
|
||||
return nasm_unquote_common(str, ep, ~okctl);
|
||||
return nasm_unquote_anystr(str, ep, BADCTL, STR_NASM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the end of a quoted string; returns the pointer to the terminating
|
||||
* character (either the ending quote or the null character, if unterminated.)
|
||||
* If the input is not a quoted string, return NULL.
|
||||
* This applies to NASM style strings only.
|
||||
*/
|
||||
char *nasm_skip_string(const char *str)
|
||||
{
|
||||
@ -537,7 +542,9 @@ char *nasm_skip_string(const char *str)
|
||||
* Note: for the purpose of finding the end of the string,
|
||||
* all successor states to st_backslash are functionally
|
||||
* equivalent to st_start, since either a backslash or
|
||||
* a backquote will force a return to the st_start state.
|
||||
* a backquote will force a return to the st_start state,
|
||||
* and any possible multi-character state will terminate
|
||||
* for any non-alphanumeric character.
|
||||
*/
|
||||
state = c ? st_start : st_done;
|
||||
break;
|
||||
|
15
asm/quote.h
15
asm/quote.h
@ -38,9 +38,24 @@
|
||||
|
||||
char *nasm_quote(const char *str, size_t *len);
|
||||
char *nasm_quote_cstr(const char *str, size_t *len);
|
||||
size_t nasm_unquote_anystr(char *str, char **endptr,
|
||||
uint32_t badctl, char qstart);
|
||||
size_t nasm_unquote(char *str, char **endptr);
|
||||
size_t nasm_unquote_cstr(char *str, char **endptr);
|
||||
char *nasm_skip_string(const char *str);
|
||||
|
||||
/* Arguments used with nasm_quote_anystr() */
|
||||
|
||||
/*
|
||||
* These are the only control characters when we produce a C string:
|
||||
* BEL BS TAB ESC
|
||||
*/
|
||||
#define OKCTL ((1U << '\a') | (1U << '\b') | (1U << '\t') | (1U << 27))
|
||||
#define BADCTL (~(uint32_t)OKCTL)
|
||||
|
||||
/* Initial quotation mark */
|
||||
#define STR_C '\"'
|
||||
#define STR_NASM '`'
|
||||
|
||||
#endif /* NASM_QUOTE_H */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user