apx: support parsing special constants like {dfv=}

{dfv=} is basically a constant (immediate). Treat it as such during
parsing, except that if "naked" (not in an expression), it has special
matching properties and does not need a terminal comma.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin 2024-07-28 16:57:24 -07:00
parent bdfa9f952d
commit 1618fa745b
6 changed files with 125 additions and 59 deletions

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2023 The NASM Authors - All Rights Reserved
* Copyright 1996-2024 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -69,6 +69,7 @@ enum match_result {
MERR_BADREPNE,
MERR_REGSETSIZE,
MERR_REGSET,
MERR_WRONGIMM,
/*
* Matching success; the conditional ones first
*/
@ -920,6 +921,9 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
case MERR_REGSET:
nasm_nonfatal("register set not valid for operand");
break;
case MERR_WRONGIMM:
nasm_nonfatal("operand/operator invalid for this instruction");
break;
default:
nasm_nonfatal("invalid combination of opcode and operands");
break;
@ -2552,11 +2556,15 @@ static enum match_result matches(const struct itemplate *itemp,
}
/*
* Check that no spurious colons or TOs are present
* First, cursory operand filtering
*/
for (i = 0; i < itemp->operands; i++)
if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
for (i = 0; i < itemp->operands; i++) {
struct operand * const op = &instruction->oprs[i];
if (op->type & ~itemp->opd[i] & (COLON | TO))
return MERR_INVALOP;
if (op->iflag && !itemp_has(itemp, op->iflag))
return MERR_WRONGIMM;
}
/*
* Process size flags

View File

@ -160,7 +160,7 @@ static void process_size_override(insn *result, operand *op)
* decorators can be placed in any order. e.g. zmm1 {k2}{z} or zmm2
* {z}{k3} decorator(s) are placed at the end of an operand.
*/
static bool parse_braces(decoflags_t *decoflags)
static bool parse_decorators(decoflags_t *decoflags)
{
int i, j;
@ -650,6 +650,27 @@ static bool add_prefix(insn *result)
return true;
}
/* Set value-specific immediate flags. */
static opflags_t imm_flags(int64_t n, opflags_t flags)
{
if (n == 1)
flags |= UNITY;
if (optimizing.level < 0 || (flags & STRICT))
return flags;
if ((int32_t)n == (int8_t)n)
flags |= SBYTEDWORD;
if ((int16_t)n == (int8_t)n)
flags |= SBYTEWORD;
if ((uint64_t)n == (uint32_t)n)
flags |= UDWORD;
if ((int64_t)n == (int32_t)n)
flags |= SDWORD;
return flags;
}
insn *parse_line(char *buffer, insn *result)
{
bool insn_is_label = false;
@ -757,7 +778,7 @@ restart_parse:
*/
result->opcode = I_RESB;
result->operands = 1;
result->oprs[0].type = IMMEDIATE;
result->oprs[0].type = imm_flags(0, IMM_NORMAL);
result->oprs[0].offset = 0;
result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
}
@ -863,20 +884,37 @@ restart_parse:
first = false;
if (opnum == 0) {
/*
* Allow braced prefix tokens like {evex} or {dfv} after
* the opcode mnemonic proper, but before the first
* operand. This is currently not allowed for non-braced
* prefix tokens.
* Allow braced prefix tokens like {evex} after the opcode
* mnemonic proper, but before the first operand. This is
* currently not allowed for non-braced prefix tokens.
*/
while ((tokval.t_flag & TFLAG_BRC) && add_prefix(result))
i = stdscan(NULL, &tokval);
}
if (i == TOKEN_EOS)
break;
op->type = 0; /* so far, no override */
/*
* Naked special immediate token. Terminates the expression
* without requiring a post-comma.
*/
if (i == TOKEN_BRCCONST) {
op->type = imm_flags(tokval.t_integer, IMMEDIATE);
op->opflags = 0;
op->offset = tokval.t_integer;
op->segment = NO_SEG;
op->wrt = NO_SEG;
op->iflag = tokval.t_inttwo;
i = stdscan(NULL, &tokval);
if (i != ',')
stdscan_pushback(&tokval);
continue; /* Next operand */
}
/* size specifiers */
while (i == TOKEN_SPECIAL || i == TOKEN_SIZE) {
switch (tokval.t_integer) {
@ -1100,7 +1138,7 @@ restart_parse:
if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
/* parse opmask (and zeroing) after an operand */
recover = parse_braces(&brace_flags);
recover = parse_decorators(&brace_flags);
i = tokval.t_type;
}
if (!recover && i != 0 && i != ',') {
@ -1117,7 +1155,7 @@ restart_parse:
op->type |= COLON;
} else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
/* parse opmask (and zeroing) after an operand */
recover = parse_braces(&brace_flags);
recover = parse_decorators(&brace_flags);
}
}
if (recover) {
@ -1146,7 +1184,7 @@ restart_parse:
recover = true;
} else { /* it's not a memory reference */
if (is_just_unknown(value)) { /* it's immediate but unknown */
op->type |= IMMEDIATE;
op->type |= IMM_NORMAL;
op->opflags |= OPFLAG_UNKNOWN;
op->offset = 0; /* don't care */
op->segment = NO_SEG; /* don't care again */
@ -1160,26 +1198,14 @@ restart_parse:
} else if (is_reloc(value)) { /* it's immediate */
uint64_t n = reloc_value(value);
op->type |= IMMEDIATE;
op->type |= IMM_NORMAL;
op->offset = n;
op->segment = reloc_seg(value);
op->wrt = reloc_wrt(value);
op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
if (is_simple(value)) {
if (n == 1)
op->type |= UNITY;
if (optimizing.level >= 0 && !(op->type & STRICT)) {
if ((uint32_t) (n + 128) <= 255)
op->type |= SBYTEDWORD;
if ((uint16_t) (n + 128) <= 255)
op->type |= SBYTEWORD;
if (n <= UINT64_C(0xFFFFFFFF))
op->type |= UDWORD;
if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
op->type |= SDWORD;
}
}
if (is_simple(value))
op->type = imm_flags(n, op->type);
} else if (value->type == EXPR_RDSAE) {
/*
* it's not an operand but a rounding or SAE decorator.

View File

@ -168,29 +168,34 @@ static int stdscan_parse_braces(struct tokenval *tv)
size_t prefix_len = 0;
size_t suffix_len = 0;
size_t brace_len;
const char *startp, *endp;
const char *pfx, *r;
const char *startp;
char *endp;
const char *pfx, *r, *e;
char *buf;
char nextchar;
int64_t t_integer, t_inttwo;
bool first;
startp = scan.bufptr; /* Beginning including { */
pfx = r = scan.bufptr = nasm_skip_spaces(++scan.bufptr);
/*
* read the entire buffer to advance the buffer pointer
* Read the token to advance the buffer pointer
* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
*/
while (nasm_isbrcchar(*scan.bufptr))
scan.bufptr++;
e = scan.bufptr;
/*
* Followed by equal sign?
*/
if (*scan.bufptr == '=') {
r = ++scan.bufptr;
prefix_len = scan.bufptr - pfx;
/* Note that the prefix includes = and the first suffix is blank */
scan.bufptr = nasm_skip_spaces(scan.bufptr);
if (r != e && *scan.bufptr == '=') {
prefix_len = e - pfx;
r = e = ++scan.bufptr;
/* Note that the first suffix is blank */
}
/*
@ -203,35 +208,31 @@ static int stdscan_parse_braces(struct tokenval *tv)
nasm_nonfatal("unterminated braces at end of line");
return tv->t_type = TOKEN_INVALID;
}
brace_len = endp - startp + 1;
brace_len = ++endp - startp;
buf = tv->t_charptr = stdscan_alloc(brace_len + 1);
memcpy(buf, pfx, prefix_len);
if (prefix_len) {
memcpy(buf, pfx, prefix_len);
buf[prefix_len++] = '=';
}
t_integer = t_inttwo = 0;
first = true;
do {
suffix_len = scan.bufptr - r;
scan.bufptr = nasm_skip_spaces(scan.bufptr);
nextchar = *scan.bufptr++;
if (nextchar != '}' && (!prefix_len || nextchar != ',')) {
nasm_nonfatal("invalid character `%c' in brace sequence",
nextchar);
return tv->t_type = TOKEN_INVALID;
}
while (1) {
suffix_len = e - r;
memcpy(buf + prefix_len, r, suffix_len);
buf[prefix_len + suffix_len] = '\0';
/* handle tokens inside braces */
nasm_token_hash(tv->t_charptr, tv);
/* Note: nasm_token_hash doesn't modify t_charptr */
nasm_token_hash(buf, tv);
if (!(tv->t_flag & TFLAG_BRC_ANY)) {
/* invalid token is put inside braces */
nasm_nonfatal("`{%.*s%.*s}' is not a valid brace token",
(int)prefix_len, pfx, (int)suffix_len, r);
return tv->t_type = TOKEN_INVALID;
nasm_nonfatal("`{%s}' is not a valid brace token", buf);
tv->t_type = TOKEN_INVALID;
break;
}
if (tv->t_type == TOKEN_REG &&
@ -247,7 +248,35 @@ static int stdscan_parse_braces(struct tokenval *tv)
t_integer = tv->t_integer;
t_inttwo = tv->t_inttwo;
}
} while (nextchar != '}');
scan.bufptr = nasm_skip_spaces(scan.bufptr);
nextchar = *scan.bufptr;
if (nextchar == '}')
break;
if (!prefix_len ||
!(nextchar == ',' || (first && nasm_isbrcchar(nextchar)))) {
nasm_nonfatal("invalid character `%c' in brace sequence",
nextchar);
tv->t_type = TOKEN_INVALID;
break;
}
if (nextchar == ',')
scan.bufptr = nasm_skip_spaces(++scan.bufptr);
r = scan.bufptr;
while (nasm_isbrcchar(*scan.bufptr))
scan.bufptr++;
e = scan.bufptr;
first = false;
}
memcpy(tv->t_charptr, startp, brace_len);
buf[brace_len] = '\0';
scan.bufptr = endp;
tv->t_integer = t_integer;
tv->t_inttwo = t_inttwo;

View File

@ -648,6 +648,7 @@ typedef struct operand { /* operand to an instruction */
int32_t wrt; /* segment base it's relative to */
int eaflags; /* special EA flags */
int opflags; /* see OPFLAG_* defines below */
int iflag; /* Requires a specific IF_* flag */
decoflags_t decoflags; /* decorator flags such as {...} */
} operand;
@ -758,7 +759,7 @@ typedef struct insn { /* an instruction itself */
char *label; /* the label defined, or NULL */
int prefixes[MAXPREFIX]; /* instruction prefixes, if any */
enum opcode opcode; /* the opcode - not just the string */
int operands; /* how many operands? 0-3 (more if db et al) */
int operands; /* how many operands? 0-7 (more if db et al) */
int addr_size; /* address size */
operand oprs[MAX_OPERANDS]; /* the operands, defined as above */
extop *eops; /* extended operands */

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2018 The NASM Authors - All Rights Reserved
* Copyright 1996-2024 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -328,6 +328,7 @@ static inline bool is_reg_class(opflags_t class, opflags_t reg)
#define SBYTEDWORD (GEN_SUBCLASS(2) | IMMEDIATE) /* operand is in the range -128..127 mod 2^32 */
#define SDWORD (GEN_SUBCLASS(3) | IMMEDIATE) /* operand is in the range -0x80000000..0x7FFFFFFF */
#define UDWORD (GEN_SUBCLASS(4) | IMMEDIATE) /* operand is in the range 0..0xFFFFFFFF */
#define IMM_NORMAL (GEN_SUBCLASS(5) | IMMEDIATE) /* operand is NOT a brcconst */
/* Register set sizes */
#define RS2 GEN_REGSET(0)

View File

@ -1,7 +1,7 @@
#!/usr/bin/perl
## --------------------------------------------------------------------------
##
## Copyright 1996-2020 The NASM Authors - All Rights Reserved
## Copyright 1996-2024 The NASM Authors - All Rights Reserved
## See the file AUTHORS included with the NASM distribution for
## the specific copyright holders.
##
@ -535,7 +535,8 @@ sub format_insn($$$$$) {
}
$opp =~ s/^mem$/memory/;
$opp =~ s/^memory_offs$/mem_offs/;
$opp =~ s/^imm$/immediate/;
$opp =~ s/^spec$/immediate/; # Immediate or special immediate
$opp =~ s/^imm$/imm_normal/; # Normal immediates only
$opp =~ s/^([a-z]+)rm$/rm_$1/;
$opp =~ s/^rm$/rm_gpr/;
$opp =~ s/^reg$/reg_gpr/;