Support self-relative expressions in offsets and immediates

Handle, hopefully correctly, self-relative expressions (that is,
expressions of the form X - Y where Y is a symbol in the current
segment, possibly $ or $$) used as offsets or immediates, as opposed
to arguments to Dx statements (which have already been supported for a
while.)

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin 2017-02-20 02:39:56 -08:00
parent 9b4b92b014
commit 164d24677a
5 changed files with 241 additions and 124 deletions

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2016 The NASM Authors - All Rights Reserved
* Copyright 1996-2017 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -245,6 +245,12 @@ static void add_asp(insn *, int);
static enum ea_type process_ea(operand *, ea *, int, int, opflags_t, insn *);
static inline bool absolute_op(const struct operand *o)
{
return o->segment == NO_SEG && o->wrt == NO_SEG &&
!(o->opflags & OPFLAG_RELATIVE);
}
static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
{
return ins->prefixes[pos] == prefix;
@ -295,7 +301,7 @@ static void warn_overflow_const(int64_t data, int size)
static void warn_overflow_opd(const struct operand *o, int size)
{
if (o->wrt == NO_SEG && o->segment == NO_SEG) {
if (absolute_op(o)) {
if (overflow_general(o->offset, size))
warn_overflow(ERR_PASS2, size);
}
@ -426,10 +432,11 @@ static inline void out_reserve(struct out_data *data, uint64_t size)
out(data);
}
static inline void out_imm(struct out_data *data, struct operand *opx,
static inline void out_imm(struct out_data *data, const struct operand *opx,
int size, enum out_sign sign)
{
data->type = OUT_ADDRESS;
data->type =
(opx->opflags & OPFLAG_RELATIVE) ? OUT_RELADDR : OUT_ADDRESS;
data->sign = sign;
data->size = size;
data->toffset = opx->offset;
@ -438,9 +445,12 @@ static inline void out_imm(struct out_data *data, struct operand *opx,
out(data);
}
static inline void out_reladdr(struct out_data *data, struct operand *opx,
int size)
static void out_reladdr(struct out_data *data, const struct operand *opx,
int size)
{
if (opx->opflags & OPFLAG_RELATIVE)
nasm_error(ERR_NONFATAL, "invalid use of self-relative expression");
data->type = OUT_RELADDR;
data->sign = OUT_SIGNED;
data->size = size;
@ -450,7 +460,8 @@ static inline void out_reladdr(struct out_data *data, struct operand *opx,
out(data);
}
static inline void out_segment(struct out_data *data, struct operand *opx)
static inline void out_segment(struct out_data *data,
const struct operand *opx)
{
data->type = OUT_SEGMENT;
data->sign = OUT_UNSIGNED;
@ -1126,7 +1137,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
break;
case 0340:
if (ins->oprs[0].segment != NO_SEG)
if (!absolute_op(&ins->oprs[0]))
nasm_error(ERR_NONFATAL, "attempt to reserve non-constant"
" quantity of BSS space");
else if (ins->oprs[0].opflags & OPFLAG_FORWARD)
@ -1631,7 +1642,7 @@ static void gencode(struct out_data *data, insn *ins)
c = *codes++;
opx = &ins->oprs[c >> 3];
opy = &ins->oprs[c & 7];
if (opy->segment != NO_SEG || opy->wrt != NO_SEG) {
if (!absolute_op(opy)) {
nasm_error(ERR_NONFATAL,
"non-absolute expression not permitted as argument %d",
c & 7);
@ -1657,7 +1668,7 @@ static void gencode(struct out_data *data, insn *ins)
break;
case4(0254):
if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
if (absolute_op(opx) &&
(int32_t)opx->offset != (int64_t)opx->offset) {
nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
"signed dword immediate exceeds bounds");
@ -2429,11 +2440,12 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
/*
* It's a pure offset.
*/
if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
input->segment == NO_SEG) {
nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
input->type &= ~IP_REL;
input->type |= MEMORY;
if (bits == 64 && ((input->type & IP_REL) == IP_REL)) {
if (input->segment == NO_SEG || (input->opflags & OPFLAG_RELATIVE)) {
nasm_error(ERR_WARNING | ERR_PASS2, "absolute address can not be RIP-relative");
input->type &= ~IP_REL;
input->type |= MEMORY;
}
}
if (bits == 64 &&

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2009 The NASM Authors - All Rights Reserved
*
* Copyright 1996-2017 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -14,7 +14,7 @@
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@ -43,82 +43,88 @@
* Return true if the argument is a simple scalar. (Or a far-
* absolute, which counts.)
*/
int is_simple(expr * vect)
bool is_simple(const expr *vect)
{
while (vect->type && !vect->value)
vect++;
if (!vect->type)
return 1;
return true;
if (vect->type != EXPR_SIMPLE)
return 0;
return false;
do {
vect++;
} while (vect->type && !vect->value);
if (vect->type && vect->type < EXPR_SEGBASE + SEG_ABS)
return 0;
return 1;
return false;
return true;
}
/*
* Return true if the argument is a simple scalar, _NOT_ a far-
* absolute.
*/
int is_really_simple(expr * vect)
bool is_really_simple(const expr *vect)
{
while (vect->type && !vect->value)
vect++;
if (!vect->type)
return 1;
return true;
if (vect->type != EXPR_SIMPLE)
return 0;
return false;
do {
vect++;
} while (vect->type && !vect->value);
if (vect->type)
return 0;
return 1;
return false;
return true;
}
/*
* Return true if the argument is relocatable (i.e. a simple
* scalar, plus at most one segment-base, plus possibly a WRT).
* scalar, plus at most one segment-base, possibly a subtraction
* of the current segment base, plus possibly a WRT).
*/
int is_reloc(expr * vect)
bool is_reloc(const expr *vect)
{
while (vect->type && !vect->value) /* skip initial value-0 terms */
vect++;
if (!vect->type) /* trivially return true if nothing */
return 1; /* is present apart from value-0s */
if (vect->type < EXPR_SIMPLE) /* false if a register is present */
return 0;
if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */
do {
vect++;
} while (vect->type && !vect->value);
if (!vect->type) /* ...returning true if that's all */
return 1;
bool has_rel = false; /* Has a self-segment-subtract */
bool has_seg = false; /* Has a segment base */
for (; vect->type; vect++) {
if (!vect->value) {
/* skip value-0 terms */
continue;
} else if (vect->type < EXPR_SIMPLE) {
/* false if a register is present */
return false;
} else if (vect->type == EXPR_SIMPLE) {
/* skip over a pure number term... */
continue;
} else if (vect->type == EXPR_WRT) {
/* skip over a WRT term... */
continue;
} else if (vect->type < EXPR_SEGBASE) {
/* other special type -> problem */
return false;
} else if (vect->value == 1) {
if (has_seg)
return false; /* only one segbase allowed */
has_seg = true;
} else if (vect->value == -1) {
if (vect->type != location.segment + EXPR_SEGBASE)
return false; /* can only subtract current segment */
if (has_rel)
return false; /* already is relative */
has_rel = true;
}
}
if (vect->type == EXPR_WRT) { /* skip over a WRT term... */
do {
vect++;
} while (vect->type && !vect->value);
if (!vect->type) /* ...returning true if that's all */
return 1;
}
if (vect->value != 0 && vect->value != 1)
return 0; /* segment base multiplier non-unity */
do { /* skip over _one_ seg-base term... */
vect++;
} while (vect->type && !vect->value);
if (!vect->type) /* ...returning true if that's all */
return 1;
return 0; /* And return false if there's more */
return true;
}
/*
* Return true if the argument contains an `unknown' part.
*/
int is_unknown(expr * vect)
bool is_unknown(const expr *vect)
{
while (vect->type && vect->type < EXPR_UNKNOWN)
vect++;
@ -129,7 +135,7 @@ int is_unknown(expr * vect)
* Return true if the argument contains nothing but an `unknown'
* part.
*/
int is_just_unknown(expr * vect)
bool is_just_unknown(const expr *vect)
{
while (vect->type && !vect->value)
vect++;
@ -140,7 +146,7 @@ int is_just_unknown(expr * vect)
* Return the scalar part of a relocatable vector. (Including
* simple scalar vectors - those qualify as relocatable.)
*/
int64_t reloc_value(expr * vect)
int64_t reloc_value(const expr *vect)
{
while (vect->type && !vect->value)
vect++;
@ -156,26 +162,21 @@ int64_t reloc_value(expr * vect)
* Return the segment number of a relocatable vector, or NO_SEG for
* simple scalars.
*/
int32_t reloc_seg(expr * vect)
int32_t reloc_seg(const expr *vect)
{
while (vect->type && (vect->type == EXPR_WRT || !vect->value))
vect++;
if (vect->type == EXPR_SIMPLE) {
do {
vect++;
} while (vect->type && (vect->type == EXPR_WRT || !vect->value));
for (; vect->type; vect++) {
if (vect->type >= EXPR_SEGBASE && vect->value == 1)
return vect->type - EXPR_SEGBASE;
}
if (!vect->type)
return NO_SEG;
else
return vect->type - EXPR_SEGBASE;
return NO_SEG;
}
/*
* Return the WRT segment number of a relocatable vector, or NO_SEG
* if no WRT part is present.
*/
int32_t reloc_wrt(expr * vect)
int32_t reloc_wrt(const expr *vect)
{
while (vect->type && vect->type < EXPR_WRT)
vect++;
@ -184,3 +185,58 @@ int32_t reloc_wrt(expr * vect)
} else
return NO_SEG;
}
/*
* Return true if this expression contains a subtraction of the location
*/
bool is_self_relative(const expr *vect)
{
for (; vect->type; vect++) {
if (vect->type == location.segment + EXPR_SEGBASE && vect->value == -1)
return true;
}
return false;
}
/*
* Debug support: dump a description of an expression vector to stdout
*/
static const char *expr_type(int32_t type)
{
static char seg_str[64];
switch (type) {
case 0:
return "null";
case EXPR_UNKNOWN:
return "unknown";
case EXPR_SIMPLE:
return "simple";
case EXPR_WRT:
return "wrt";
case EXPR_RDSAE:
return "sae";
default:
break;
}
if (type >= EXPR_REG_START && type <= EXPR_REG_END) {
return nasm_reg_names[type - EXPR_REG_START];
} else if (type >= EXPR_SEGBASE) {
snprintf(seg_str, sizeof seg_str, "%sseg %d",
(type - EXPR_SEGBASE) == location.segment ? "this " : "",
type - EXPR_SEGBASE);
return seg_str;
} else {
return "ERR";
}
}
void dump_expr(const expr *e)
{
printf("[");
for (; e->type; e++)
printf("<%s(%d),%ld>", expr_type(e->type), e->type, e->value);
printf("]\n");
}

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2016 The NASM Authors - All Rights Reserved
* Copyright 1996-2017 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -247,6 +247,7 @@ static int parse_mref(operand *op, const expr *e)
b = i = -1;
o = s = 0;
op->segment = op->wrt = NO_SEG;
if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */
bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
@ -272,17 +273,16 @@ static int parse_mref(operand *op, const expr *e)
b = e->type;
e++;
}
if (e->type != 0) { /* is there an offset? */
if (e->type) { /* is there an offset? */
if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */
nasm_error(ERR_NONFATAL,
"beroset-p-603-invalid effective address");
"invalid effective address: impossible register");
return -1;
} else {
if (e->type == EXPR_UNKNOWN) {
op->opflags |= OPFLAG_UNKNOWN;
o = 0; /* doesn't matter what */
op->wrt = NO_SEG; /* nor this */
op->segment = NO_SEG; /* or this */
while (e->type)
e++; /* go to the end of the line */
} else {
@ -293,48 +293,44 @@ static int parse_mref(operand *op, const expr *e)
if (e->type == EXPR_WRT) {
op->wrt = e->value;
e++;
} else
op->wrt = NO_SEG;
}
/*
* Look for a segment base type.
*/
if (e->type && e->type < EXPR_SEGBASE) {
nasm_error(ERR_NONFATAL,
"beroset-p-630-invalid effective address");
return -1;
}
while (e->type && e->value == 0)
e++;
if (e->type && e->value != 1) {
nasm_error(ERR_NONFATAL,
"beroset-p-637-invalid effective address");
return -1;
}
if (e->type) {
op->segment = e->type - EXPR_SEGBASE;
e++;
} else
op->segment = NO_SEG;
while (e->type && e->value == 0)
e++;
if (e->type) {
nasm_error(ERR_NONFATAL,
"beroset-p-650-invalid effective address");
return -1;
for (; e->type; e++) {
if (!e->value)
continue;
if (e->type <= EXPR_REG_END) {
nasm_error(ERR_NONFATAL,
"invalid effective address: too many registers");
return -1;
} else if (e->type < EXPR_SEGBASE) {
nasm_error(ERR_NONFATAL,
"invalid effective address: bad subexpression type");
return -1;
} else if (e->value == 1) {
if (op->segment != NO_SEG) {
nasm_error(ERR_NONFATAL,
"invalid effective address: multiple base segments");
return -1;
}
op->segment = e->type - EXPR_SEGBASE;
} else if (e->value == -1 &&
e->type == location.segment + EXPR_SEGBASE &&
!(op->opflags & OPFLAG_RELATIVE)) {
op->opflags |= OPFLAG_RELATIVE;
} else {
nasm_error(ERR_NONFATAL,
"invalid effective address: impossible segment base multiplier");
return -1;
}
}
}
}
} else {
o = 0;
op->wrt = NO_SEG;
op->segment = NO_SEG;
}
if (e->type != 0) { /* there'd better be nothing left! */
nasm_error(ERR_NONFATAL,
"beroset-p-663-invalid effective address");
return -1;
}
nasm_assert(!e->type); /* We should be at the end */
op->basereg = b;
op->indexreg = i;
@ -1056,6 +1052,7 @@ is_expression:
op->offset = reloc_value(value);
op->segment = reloc_seg(value);
op->wrt = reloc_wrt(value);
op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
if (is_simple(value)) {
uint64_t n = reloc_value(value);
@ -1073,7 +1070,7 @@ is_expression:
op->type |= SDWORD;
}
}
} else if(value->type == EXPR_RDSAE) {
} else if (value->type == EXPR_RDSAE) {
/*
* it's not an operand but a rounding or SAE decorator.
* put the decorator information in the (opflag_t) type field

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2016 The NASM Authors - All Rights Reserved
* Copyright 1996-2017 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -273,14 +273,16 @@ typedef struct {
/*
* Library routines to manipulate expression data types.
*/
int is_reloc(expr *vect);
int is_simple(expr *vect);
int is_really_simple(expr *vect);
int is_unknown(expr *vect);
int is_just_unknown(expr *vect);
int64_t reloc_value(expr *vect);
int32_t reloc_seg(expr *vect);
int32_t reloc_wrt(expr *vect);
bool is_reloc(const expr *vect);
bool is_simple(const expr *vect);
bool is_really_simple(const expr *vect);
bool is_unknown(const expr *vect);
bool is_just_unknown(const expr *vect);
int64_t reloc_value(const expr *vect);
int32_t reloc_seg(const expr *vect);
int32_t reloc_wrt(const expr *vect);
bool is_self_relative(const expr *vect);
void dump_expr(const expr *vect);
/*
* The evaluator can also return hints about which of two registers
@ -575,7 +577,6 @@ typedef struct operand { /* operand to an instruction */
int32_t segment; /* immediate segment, if needed */
int64_t offset; /* any immediate number */
int32_t wrt; /* segment base it's relative to */
bool relative; /* self-relative expression */
int eaflags; /* special EA flags */
int opflags; /* see OPFLAG_* defines below */
decoflags_t decoflags; /* decorator flags such as {...} */
@ -584,8 +585,9 @@ typedef struct operand { /* operand to an instruction */
#define OPFLAG_FORWARD 1 /* operand is a forward reference */
#define OPFLAG_EXTERN 2 /* operand is an external reference */
#define OPFLAG_UNKNOWN 4 /* operand is an unknown reference
* (always a forward reference also)
*/
(always a forward reference also) */
#define OPFLAG_RELATIVE 8 /* operand is self-relative, e.g. [foo - $]
where foo is not in the current segment */
typedef struct extop { /* extended operand */
struct extop *next; /* linked list */

50
test/pcrel.asm Normal file
View File

@ -0,0 +1,50 @@
bits 32
foo: ; Backwards reference
mov eax,[foo - $]
mov ebx,[ebx + foo - $]
mov ecx,foo - $
mov edx,foo - bar
mov eax,[bar - $]
mov ebx,[ebx + bar - $]
mov ecx,bar - $
mov edx,bar - foo
mov eax,[baz - $]
mov ebx,[ebx + baz - $]
mov esi,[baz - bar]
mov ecx,baz - $
mov edx,baz - bar
bits 64
default rel
mov eax,[foo]
mov eax,[foo - $]
mov eax,[abs foo - $]
mov ebx,[ebx + foo - $]
mov ecx,foo - $
mov edx,foo - bar
mov eax,[bar]
mov eax,[bar - $]
mov eax,[abs bar - $]
mov ebx,[ebx + bar - $]
mov ecx,bar - $
mov edx,bar - foo
mov eax,[baz]
mov eax,[baz - $]
mov eax,[abs baz - $]
mov ebx,[ebx + baz - $]
mov esi,[baz - bar]
mov esi,[abs baz - bar]
mov ecx,baz - $
mov edx,baz - bar
bar: ; Forwards reference
hlt
section ".data"
baz: ; Other-segment reference
dd 0