parser: tidy up handling of prefixes; allow braced prefix after insn

Clean up the handling of prefixes in general. Allow a set of braced
prefixes to follow the instruction; this is required for things like
{dfv=} but might also be a nicer syntax for things like {rex}.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin 2024-07-24 13:09:36 -07:00
parent a556ea3edf
commit e03b9325e2
7 changed files with 185 additions and 158 deletions

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2023 The NASM Authors - All Rights Reserved
* Copyright 1996-2024 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -621,6 +621,35 @@ fail:
return -1;
}
/* Return true if not a prefix token */
static bool add_prefix(insn *result)
{
enum prefix_pos slot;
switch (tokval.t_type) {
case TOKEN_PREFIX:
slot = tokval.t_inttwo;
break;
case TOKEN_REG:
slot = PPS_SEG;
if (!IS_SREG(tokval.t_integer))
return false;
break;
default:
return false;
}
if (result->prefixes[slot]) {
if (result->prefixes[slot] == tokval.t_integer)
nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
else
nasm_nonfatal("instruction has conflicting prefixes");
}
result->prefixes[slot] = tokval.t_integer;
return true;
}
insn *parse_line(char *buffer, insn *result)
{
bool insn_is_label = false;
@ -630,39 +659,26 @@ insn *parse_line(char *buffer, insn *result)
bool first;
bool recover;
bool far_jmp_ok;
bool have_prefixes;
int i;
nasm_static_assert(P_none == 0);
restart_parse:
first = true;
result->forw_ref = false;
stdscan_reset();
stdscan_set(buffer);
i = stdscan(NULL, &tokval);
memset(result->prefixes, P_none, sizeof(result->prefixes));
result->times = 1; /* No TIMES either yet */
result->label = NULL; /* Assume no label */
result->eops = NULL; /* must do this, whatever happens */
result->operands = 0; /* must initialize this */
result->evex_rm = 0; /* Ensure EVEX rounding mode is reset */
result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */
nasm_static_assert(P_none == 0);
/* Ignore blank lines */
if (i == TOKEN_EOS)
goto fail;
nasm_zero(*result);
result->opcode = I_none; /* No opcode */
result->times = 1; /* No TIMES either yet */
result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */
if (i != TOKEN_ID &&
i != TOKEN_INSN &&
i != TOKEN_PREFIX &&
(i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
nasm_nonfatal("label or instruction expected at start of line");
goto fail;
}
if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
if (i == TOKEN_ID || insn_is_label) {
/* there's a label here */
first = false;
result->label = tokval.t_charptr;
@ -678,7 +694,7 @@ restart_parse:
*! of a typo, but is technically correct NASM syntax (see \k{syntax}.)
*/
nasm_warn(WARN_LABEL_ORPHAN ,
"label alone on a line without a colon might be in error");
"label alone on a line without a colon might be in error");
}
if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
/*
@ -694,83 +710,64 @@ restart_parse:
}
}
/* Just a label here */
if (i == TOKEN_EOS)
goto fail;
have_prefixes = false;
/* Process things that go before the opcode */
while (i) {
int slot = PPS_SEG;
if (i == TOKEN_TIMES) {
/* TIMES is a very special prefix */
expr *value;
if (i == TOKEN_PREFIX) {
slot = tokval.t_inttwo;
if (slot == PPS_TIMES) {
/* TIMES is a very special prefix */
expr *value;
i = stdscan(NULL, &tokval);
value = evaluate(stdscan, NULL, &tokval, NULL,
pass_stable(), NULL);
i = tokval.t_type;
if (!value) /* Error in evaluator */
goto fail;
if (!is_simple(value)) {
nasm_nonfatal("non-constant argument supplied to TIMES");
result->times = 1;
} else {
result->times = value->value;
if (value->value < 0) {
nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
result->times = 0;
}
i = stdscan(NULL, &tokval);
value = evaluate(stdscan, NULL, &tokval, NULL,
pass_stable(), NULL);
i = tokval.t_type;
if (!value) /* Error in evaluator */
goto fail;
if (!is_simple(value)) {
nasm_nonfatal("non-constant argument supplied to TIMES");
result->times = 1;
} else {
result->times = value->value;
if (value->value < 0) {
nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
result->times = 0;
}
first = false;
continue;
}
} else if (i == TOKEN_REG && IS_SREG(tokval.t_integer)) {
slot = PPS_SEG;
first = false;
} else {
break; /* Not a prefix */
if (!add_prefix(result))
break;
have_prefixes = true;
i = stdscan(NULL, &tokval);
}
if (result->prefixes[slot]) {
if (result->prefixes[slot] == tokval.t_integer)
nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
else
nasm_nonfatal("instruction has conflicting prefixes");
}
result->prefixes[slot] = tokval.t_integer;
i = stdscan(NULL, &tokval);
first = false;
}
if (i != TOKEN_INSN) {
int j;
enum prefixes pfx;
for (j = 0; j < MAXPREFIX; j++) {
if ((pfx = result->prefixes[j]) != P_none)
break;
}
if (i == 0 && pfx != P_none) {
/*
* Instruction prefixes are present, but no actual
* instruction. This is allowed: at this point we
* invent a notional instruction of RESB 0.
*/
result->opcode = I_RESB;
result->operands = 1;
nasm_zero(result->oprs);
result->oprs[0].type = IMMEDIATE;
result->oprs[0].offset = 0L;
result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
return result;
} else {
nasm_nonfatal("parser: instruction expected");
goto fail;
if (!i) {
if (have_prefixes) {
/*
* Instruction prefixes are present, but no actual
* instruction. This is allowed: at this point we
* invent a notional instruction of RESB 0.
*
* Note that this can be combined with TIMES, so do
* not clear result->
*
*/
result->opcode = I_RESB;
result->operands = 1;
result->oprs[0].type = IMMEDIATE;
result->oprs[0].offset = 0;
result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
}
} else if (!first) {
nasm_nonfatal("instruction expected");
} else if (!result->label) {
nasm_nonfatal("label or instruction expected at start of line");
}
return result;
}
result->opcode = tokval.t_integer;
@ -842,7 +839,7 @@ restart_parse:
}
/*
* Now we begin to parse the operands. There may be up to four
* Now we begin to parse the operands. There may be up to MAX_OPERANDS
* of these, separated by commas, and terminated by a zero token.
*/
far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
@ -859,13 +856,27 @@ restart_parse:
init_operand(op);
i = stdscan(NULL, &tokval);
if (i == TOKEN_EOS)
break; /* end of operands: get out of here */
else if (first && i == ':') {
if (first && i == ':') {
insn_is_label = true;
goto restart_parse;
}
first = false;
if (opnum == 0) {
/*
* Allow braced prefix tokens like {evex} or {dfv} after
* the opcode mnemonic proper, but before the first
* operand. This is currently not allowed for non-braced
* prefix tokens.
*/
while ((tokval.t_flag & TFLAG_BRC) && add_prefix(result))
i = stdscan(NULL, &tokval);
}
if (i == TOKEN_EOS)
break;
op->type = 0; /* so far, no override */
/* size specifiers */
while (i == TOKEN_SPECIAL || i == TOKEN_SIZE) {

View File

@ -110,7 +110,7 @@ static int stdscan_handle_brace(struct tokenval *tv)
{
if (!(tv->t_flag & TFLAG_BRC_ANY)) {
/* invalid token is put inside braces */
nasm_nonfatal("`%s' is not a valid decorator with braces", tv->t_charptr);
nasm_nonfatal("`{%s}' is not a valid token", tv->t_charptr);
tv->t_type = TOKEN_INVALID;
} else if (tv->t_flag & TFLAG_BRC_OPT) {
if (is_reg_class(OPMASKREG, tv->t_integer)) {
@ -122,6 +122,48 @@ static int stdscan_handle_brace(struct tokenval *tv)
return tv->t_type;
}
/*
* Parse a braced token
*/
static int stdscan_parse_braces(struct tokenval *tv)
{
int token_len;
char *r;
r = stdscan_bufptr = nasm_skip_spaces(++stdscan_bufptr);
/*
* read the entire buffer to advance the buffer pointer
* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
*/
while (nasm_isbrcchar(*stdscan_bufptr))
stdscan_bufptr++;
token_len = stdscan_bufptr - r;
/* ... copy only up to DECOLEN_MAX-1 characters */
if (token_len <= MAX_KEYWORD)
tv->t_charptr = stdscan_copy(r, token_len);
stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
/* if brace is not closed properly or token is too long */
if (*stdscan_bufptr != '}') {
nasm_nonfatal("unterminated braces at end of line");
return tv->t_type = TOKEN_INVALID;
}
stdscan_bufptr++; /* skip closing brace */
if (token_len > MAX_KEYWORD) {
nasm_nonfatal("`{%.*s}' is not a valid token", token_len, r);
return tv->t_type = TOKEN_INVALID;
}
/* handle tokens inside braces */
nasm_token_hash(tv->t_charptr, tv);
return stdscan_handle_brace(tv);
}
static int stdscan_token(struct tokenval *tv);
int stdscan(void *private_data, struct tokenval *tv)
@ -278,37 +320,8 @@ static int stdscan_token(struct tokenval *tv)
stdscan_bufptr++; /* Skip final quote */
return tv->t_type = TOKEN_STR;
} else if (*stdscan_bufptr == '{') {
return stdscan_parse_braces(tv);
/* now we've got a decorator */
int token_len;
stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
r = ++stdscan_bufptr;
/*
* read the entire buffer to advance the buffer pointer
* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
*/
while (nasm_isbrcchar(*stdscan_bufptr))
stdscan_bufptr++;
token_len = stdscan_bufptr - r;
/* ... copy only up to DECOLEN_MAX-1 characters */
tv->t_charptr = stdscan_copy(r, token_len < DECOLEN_MAX ?
token_len : DECOLEN_MAX - 1);
stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
/* if brace is not closed properly or token is too long */
if ((*stdscan_bufptr != '}') || (token_len > MAX_KEYWORD)) {
nasm_nonfatal("invalid decorator token inside braces");
return tv->t_type = TOKEN_INVALID;
}
stdscan_bufptr++; /* skip closing brace */
/* handle tokens inside braces */
nasm_token_hash(tv->t_charptr, tv);
return stdscan_handle_brace(tv);
} else if (*stdscan_bufptr == ';') {
/* a comment has happened - stay */
return tv->t_type = TOKEN_EOS;

View File

@ -72,9 +72,6 @@ xrelease
bnd
nobnd
% TOKEN_PREFIX, PPS_TIMES, 0, P_*
times
% TOKEN_PREFIX, PPS_WAIT, 0, P_*
wait
@ -150,6 +147,7 @@ __?ilog2c?__
% TOKEN_*, 0, 0, 0
seg
wrt
times
% TOKEN_{__?*?__}, 0, 0, 0
__?masm_ptr?__

View File

@ -228,6 +228,7 @@ enum token_type { /* token types, other than chars */
TOKEN_SEG, /* SEG */
TOKEN_WRT, /* WRT */
TOKEN_TIMES, /* TIMES */
TOKEN_FLOATIZE, /* __?floatX?__ */
TOKEN_STRFUNC, /* __utf16*__, __utf32*__ */
TOKEN_IFUNC, /* __ilog2*__ */
@ -268,6 +269,18 @@ enum token_type { /* token types, other than chars */
TOKEN_MAX = INT_MAX /* Keep compiler from reducing the range */
};
/*
* Token flags
*/
enum token_flags {
TFLAG_BRC = 1 << 0, /* valid only with braces. {1to8}, {rd-sae}, ...*/
TFLAG_BRC_OPT = 1 << 1, /* may or may not have braces. opmasks {k1} */
TFLAG_BRC_ANY = TFLAG_BRC | TFLAG_BRC_OPT,
TFLAG_BRDCAST = 1 << 2, /* broadcasting decorator */
TFLAG_WARN = 1 << 3, /* warning only, treat as ID */
TFLAG_DUP = 1 << 4 /* valid ID but also has context-specific use */
};
/* Must match the fp_formats[] array in asm/floats.c */
enum floatize {
FLOAT_8,
@ -311,13 +324,13 @@ size_t string_transform(char *, size_t, char **, enum strfunc);
* `t_type' field in the structure.
*/
struct tokenval {
char *t_charptr;
int64_t t_integer;
int64_t t_inttwo;
enum token_type t_type;
int8_t t_flag;
char *t_charptr;
const char *t_start; /* Pointer to token in input buffer */
int t_len; /* Length of token in input buffer */
enum token_type t_type;
enum token_flags t_flag;
};
typedef int (*scanner)(void *private_data, struct tokenval *tv);
@ -525,16 +538,6 @@ static inline bool is_register(int reg)
return reg >= EXPR_REG_START && reg < REG_ENUM_LIMIT;
}
/*
* token flags
*/
#define TFLAG_BRC (1 << 0) /* valid only with braces. {1to8}, {rd-sae}, ...*/
#define TFLAG_BRC_OPT (1 << 1) /* may or may not have braces. opmasks {k1} */
#define TFLAG_BRC_ANY (TFLAG_BRC | TFLAG_BRC_OPT)
#define TFLAG_BRDCAST (1 << 2) /* broadcasting decorator */
#define TFLAG_WARN (1 << 3) /* warning only, treat as ID */
#define TFLAG_DUP (1 << 4) /* valid ID but also has context-specific use */
/*
* REX flags
*/
@ -699,17 +702,19 @@ enum ea_type {
*
* LOCK and REP used to be one slot; this is no longer the case since
* the introduction of HLE.
*
* Note: these are stored in an PPS_BITS-bit field in the token hash!
*
*/
enum prefix_pos {
PPS_TIMES = -1, /* TIMES (not a slot, handled separately) */
PPS_WAIT = 0, /* WAIT (technically not a prefix!) */
PPS_REP, /* REP/HLE prefix */
PPS_LOCK, /* LOCK prefix */
PPS_SEG, /* Segment override prefix */
PPS_OSIZE, /* Operand size prefix */
PPS_ASIZE, /* Address size prefix */
PPS_REX, /* REX/VEX type */
MAXPREFIX /* Total number of prefix slots */
PPS_WAIT = 0, /* WAIT (technically not a prefix!) */
PPS_REP, /* REP/HLE prefix */
PPS_LOCK, /* LOCK prefix */
PPS_SEG, /* Segment override prefix */
PPS_OSIZE, /* Operand size prefix */
PPS_ASIZE, /* Address size prefix */
PPS_REX, /* REX/VEX type */
MAXPREFIX /* Total number of prefix slots */
};
/*

View File

@ -1,29 +1,29 @@
./travis/test/br3392531.asm:1: error: label or instruction expected at start of line
./travis/test/br3392531.asm:4: error: invalid decorator token inside braces
./travis/test/br3392531.asm:4: error: unterminated braces at end of line
./travis/test/br3392531.asm:4: error: label or instruction expected at start of line
./travis/test/br3392531.asm:5: error: parser: instruction expected
./travis/test/br3392531.asm:5: error: instruction expected
./travis/test/br3392531.asm:7: error: `%macro' expects a parameter count
./travis/test/br3392531.asm:11: warning: unterminated string (missing ``') [-w+pp-open-string]
./travis/test/br3392531.asm:14: error: parser: instruction expected
./travis/test/br3392531.asm:14: error: instruction expected
./travis/test/br3392531.asm:17: error: `%$LRG': context stack is empty
./travis/test/br3392531.asm:17: error: `%$LRG': context stack is empty
./travis/test/br3392531.asm:17: error: label or instruction expected at start of line
./travis/test/br3392531.asm:18: error: label or instruction expected at start of line
./travis/test/br3392531.asm:19: error: parser: instruction expected
./travis/test/br3392531.asm:19: error: instruction expected
./travis/test/br3392531.asm:20: error: `%1': not in a macro call
./travis/test/br3392531.asm:20: error: label or instruction expected at start of line
./travis/test/br3392531.asm:21: error: label or instruction expected at start of line
./travis/test/br3392531.asm:8: ... from macro `section' defined here
./travis/test/br3392531.asm:21: error: parser: instruction expected
./travis/test/br3392531.asm:21: error: instruction expected
./travis/test/br3392531.asm:9: ... from macro `section' defined here
./travis/test/br3392531.asm:21: error: label or instruction expected at start of line
./travis/test/br3392531.asm:10: ... from macro `section' defined here
./travis/test/br3392531.asm:21: error: invalid macro parameter: `%4stru@namB'
./travis/test/br3392531.asm:11: ... from macro `section' defined here
./travis/test/br3392531.asm:21: error: parser: instruction expected
./travis/test/br3392531.asm:21: error: instruction expected
./travis/test/br3392531.asm:11: ... from macro `section' defined here
./travis/test/br3392531.asm:21: error: `%unmacro' expects a parameter count
./travis/test/br3392531.asm:12: ... from macro `section' defined here
./travis/test/br3392531.asm:21: error: `%unmacro' can't undefine the macro being expanded
./travis/test/br3392531.asm:12: ... from macro `section' defined here
./travis/test/br3392531.asm:22: error: parser: instruction expected
./travis/test/br3392531.asm:22: error: instruction expected

View File

@ -8,7 +8,7 @@
./travis/test/br3392716.asm:15: warning: unterminated string (missing `'') [-w+pp-open-string]
./travis/test/br3392716.asm:20: warning: unterminated string (missing `'') [-w+pp-open-string]
./travis/test/br3392716.asm:20: warning: multi-line macro `sst' exists, but not taking 1 parameter [-w+pp-macro-params-multi]
./travis/test/br3392716.asm:20: error: parser: instruction expected
./travis/test/br3392716.asm:20: error: instruction expected
./travis/test/br3392716.asm:21: error: `%%cTo': not in a macro call
./travis/test/br3392716.asm:21: error: label or instruction expected at start of line
./travis/test/br3392716.asm:6: ... from macro `sst' defined here
@ -18,7 +18,7 @@
./travis/test/br3392716.asm:7: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: label or instruction expected at start of line
./travis/test/br3392716.asm:8: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: parser: instruction expected
./travis/test/br3392716.asm:21: error: instruction expected
./travis/test/br3392716.asm:10: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: label or instruction expected at start of line
./travis/test/br3392716.asm:11: ... from macro `sst' defined here
@ -28,10 +28,10 @@
./travis/test/br3392716.asm:12: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: label or instruction expected at start of line
./travis/test/br3392716.asm:13: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: parser: instruction expected
./travis/test/br3392716.asm:21: error: instruction expected
./travis/test/br3392716.asm:15: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: `%macro' expects a macro name
./travis/test/br3392716.asm:16: ... from macro `sst' defined here
./travis/test/br3392716.asm:21: error: parser: instruction expected
./travis/test/br3392716.asm:21: error: instruction expected
./travis/test/br3392716.asm:17: ... from macro `sst' defined here
./travis/test/br3392716.asm:22: error: label or instruction expected at start of line

View File

@ -1 +1 @@
./travis/test/org.asm:5: error: parser: instruction expected
./travis/test/org.asm:5: error: instruction expected