From 164d24677a590c2b24d75aa085cbb13b6bccab61 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 20 Feb 2017 02:39:56 -0800 Subject: [PATCH] Support self-relative expressions in offsets and immediates Handle, hopefully correctly, self-relative expressions (that is, expressions of the form X - Y where Y is a symbol in the current segment, possibly $ or $$) used as offsets or immediates, as opposed to arguments to Dx statements (which have already been supported for a while.) Signed-off-by: H. Peter Anvin --- asm/assemble.c | 42 +++++++----- asm/exprlib.c | 170 ++++++++++++++++++++++++++++++++----------------- asm/parser.c | 77 +++++++++++----------- include/nasm.h | 26 ++++---- test/pcrel.asm | 50 +++++++++++++++ 5 files changed, 241 insertions(+), 124 deletions(-) create mode 100644 test/pcrel.asm diff --git a/asm/assemble.c b/asm/assemble.c index 10011c99..1748a0ec 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2016 The NASM Authors - All Rights Reserved + * Copyright 1996-2017 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -245,6 +245,12 @@ static void add_asp(insn *, int); static enum ea_type process_ea(operand *, ea *, int, int, opflags_t, insn *); +static inline bool absolute_op(const struct operand *o) +{ + return o->segment == NO_SEG && o->wrt == NO_SEG && + !(o->opflags & OPFLAG_RELATIVE); +} + static int has_prefix(insn * ins, enum prefix_pos pos, int prefix) { return ins->prefixes[pos] == prefix; @@ -295,7 +301,7 @@ static void warn_overflow_const(int64_t data, int size) static void warn_overflow_opd(const struct operand *o, int size) { - if (o->wrt == NO_SEG && o->segment == NO_SEG) { + if (absolute_op(o)) { if (overflow_general(o->offset, size)) warn_overflow(ERR_PASS2, size); } @@ -426,10 +432,11 @@ static inline void out_reserve(struct out_data *data, uint64_t size) out(data); } -static inline void out_imm(struct out_data *data, struct operand *opx, +static inline void out_imm(struct out_data *data, const struct operand *opx, int size, enum out_sign sign) { - data->type = OUT_ADDRESS; + data->type = + (opx->opflags & OPFLAG_RELATIVE) ? OUT_RELADDR : OUT_ADDRESS; data->sign = sign; data->size = size; data->toffset = opx->offset; @@ -438,9 +445,12 @@ static inline void out_imm(struct out_data *data, struct operand *opx, out(data); } -static inline void out_reladdr(struct out_data *data, struct operand *opx, - int size) +static void out_reladdr(struct out_data *data, const struct operand *opx, + int size) { + if (opx->opflags & OPFLAG_RELATIVE) + nasm_error(ERR_NONFATAL, "invalid use of self-relative expression"); + data->type = OUT_RELADDR; data->sign = OUT_SIGNED; data->size = size; @@ -450,7 +460,8 @@ static inline void out_reladdr(struct out_data *data, struct operand *opx, out(data); } -static inline void out_segment(struct out_data *data, struct operand *opx) +static inline void out_segment(struct out_data *data, + const struct operand *opx) { data->type = OUT_SEGMENT; data->sign = OUT_UNSIGNED; @@ -1126,7 +1137,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, break; case 0340: - if (ins->oprs[0].segment != NO_SEG) + if (!absolute_op(&ins->oprs[0])) nasm_error(ERR_NONFATAL, "attempt to reserve non-constant" " quantity of BSS space"); else if (ins->oprs[0].opflags & OPFLAG_FORWARD) @@ -1631,7 +1642,7 @@ static void gencode(struct out_data *data, insn *ins) c = *codes++; opx = &ins->oprs[c >> 3]; opy = &ins->oprs[c & 7]; - if (opy->segment != NO_SEG || opy->wrt != NO_SEG) { + if (!absolute_op(opy)) { nasm_error(ERR_NONFATAL, "non-absolute expression not permitted as argument %d", c & 7); @@ -1657,7 +1668,7 @@ static void gencode(struct out_data *data, insn *ins) break; case4(0254): - if (opx->wrt == NO_SEG && opx->segment == NO_SEG && + if (absolute_op(opx) && (int32_t)opx->offset != (int64_t)opx->offset) { nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV, "signed dword immediate exceeds bounds"); @@ -2429,11 +2440,12 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, /* * It's a pure offset. */ - if (bits == 64 && ((input->type & IP_REL) == IP_REL) && - input->segment == NO_SEG) { - nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative"); - input->type &= ~IP_REL; - input->type |= MEMORY; + if (bits == 64 && ((input->type & IP_REL) == IP_REL)) { + if (input->segment == NO_SEG || (input->opflags & OPFLAG_RELATIVE)) { + nasm_error(ERR_WARNING | ERR_PASS2, "absolute address can not be RIP-relative"); + input->type &= ~IP_REL; + input->type |= MEMORY; + } } if (bits == 64 && diff --git a/asm/exprlib.c b/asm/exprlib.c index 7eb3436c..6315ff19 100644 --- a/asm/exprlib.c +++ b/asm/exprlib.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * - * - * Copyright 1996-2009 The NASM Authors - All Rights Reserved + * + * Copyright 1996-2017 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -14,7 +14,7 @@ * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF @@ -43,82 +43,88 @@ * Return true if the argument is a simple scalar. (Or a far- * absolute, which counts.) */ -int is_simple(expr * vect) +bool is_simple(const expr *vect) { while (vect->type && !vect->value) vect++; if (!vect->type) - return 1; + return true; if (vect->type != EXPR_SIMPLE) - return 0; + return false; do { vect++; } while (vect->type && !vect->value); if (vect->type && vect->type < EXPR_SEGBASE + SEG_ABS) - return 0; - return 1; + return false; + return true; } /* * Return true if the argument is a simple scalar, _NOT_ a far- * absolute. */ -int is_really_simple(expr * vect) +bool is_really_simple(const expr *vect) { while (vect->type && !vect->value) vect++; if (!vect->type) - return 1; + return true; if (vect->type != EXPR_SIMPLE) - return 0; + return false; do { vect++; } while (vect->type && !vect->value); if (vect->type) - return 0; - return 1; + return false; + return true; } /* * Return true if the argument is relocatable (i.e. a simple - * scalar, plus at most one segment-base, plus possibly a WRT). + * scalar, plus at most one segment-base, possibly a subtraction + * of the current segment base, plus possibly a WRT). */ -int is_reloc(expr * vect) +bool is_reloc(const expr *vect) { - while (vect->type && !vect->value) /* skip initial value-0 terms */ - vect++; - if (!vect->type) /* trivially return true if nothing */ - return 1; /* is present apart from value-0s */ - if (vect->type < EXPR_SIMPLE) /* false if a register is present */ - return 0; - if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */ - do { - vect++; - } while (vect->type && !vect->value); - if (!vect->type) /* ...returning true if that's all */ - return 1; + bool has_rel = false; /* Has a self-segment-subtract */ + bool has_seg = false; /* Has a segment base */ + + for (; vect->type; vect++) { + if (!vect->value) { + /* skip value-0 terms */ + continue; + } else if (vect->type < EXPR_SIMPLE) { + /* false if a register is present */ + return false; + } else if (vect->type == EXPR_SIMPLE) { + /* skip over a pure number term... */ + continue; + } else if (vect->type == EXPR_WRT) { + /* skip over a WRT term... */ + continue; + } else if (vect->type < EXPR_SEGBASE) { + /* other special type -> problem */ + return false; + } else if (vect->value == 1) { + if (has_seg) + return false; /* only one segbase allowed */ + has_seg = true; + } else if (vect->value == -1) { + if (vect->type != location.segment + EXPR_SEGBASE) + return false; /* can only subtract current segment */ + if (has_rel) + return false; /* already is relative */ + has_rel = true; + } } - if (vect->type == EXPR_WRT) { /* skip over a WRT term... */ - do { - vect++; - } while (vect->type && !vect->value); - if (!vect->type) /* ...returning true if that's all */ - return 1; - } - if (vect->value != 0 && vect->value != 1) - return 0; /* segment base multiplier non-unity */ - do { /* skip over _one_ seg-base term... */ - vect++; - } while (vect->type && !vect->value); - if (!vect->type) /* ...returning true if that's all */ - return 1; - return 0; /* And return false if there's more */ + + return true; } /* * Return true if the argument contains an `unknown' part. */ -int is_unknown(expr * vect) +bool is_unknown(const expr *vect) { while (vect->type && vect->type < EXPR_UNKNOWN) vect++; @@ -129,7 +135,7 @@ int is_unknown(expr * vect) * Return true if the argument contains nothing but an `unknown' * part. */ -int is_just_unknown(expr * vect) +bool is_just_unknown(const expr *vect) { while (vect->type && !vect->value) vect++; @@ -140,7 +146,7 @@ int is_just_unknown(expr * vect) * Return the scalar part of a relocatable vector. (Including * simple scalar vectors - those qualify as relocatable.) */ -int64_t reloc_value(expr * vect) +int64_t reloc_value(const expr *vect) { while (vect->type && !vect->value) vect++; @@ -156,26 +162,21 @@ int64_t reloc_value(expr * vect) * Return the segment number of a relocatable vector, or NO_SEG for * simple scalars. */ -int32_t reloc_seg(expr * vect) +int32_t reloc_seg(const expr *vect) { - while (vect->type && (vect->type == EXPR_WRT || !vect->value)) - vect++; - if (vect->type == EXPR_SIMPLE) { - do { - vect++; - } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + for (; vect->type; vect++) { + if (vect->type >= EXPR_SEGBASE && vect->value == 1) + return vect->type - EXPR_SEGBASE; } - if (!vect->type) - return NO_SEG; - else - return vect->type - EXPR_SEGBASE; + + return NO_SEG; } /* * Return the WRT segment number of a relocatable vector, or NO_SEG * if no WRT part is present. */ -int32_t reloc_wrt(expr * vect) +int32_t reloc_wrt(const expr *vect) { while (vect->type && vect->type < EXPR_WRT) vect++; @@ -184,3 +185,58 @@ int32_t reloc_wrt(expr * vect) } else return NO_SEG; } + +/* + * Return true if this expression contains a subtraction of the location + */ +bool is_self_relative(const expr *vect) +{ + for (; vect->type; vect++) { + if (vect->type == location.segment + EXPR_SEGBASE && vect->value == -1) + return true; + } + + return false; +} + +/* + * Debug support: dump a description of an expression vector to stdout + */ +static const char *expr_type(int32_t type) +{ + static char seg_str[64]; + + switch (type) { + case 0: + return "null"; + case EXPR_UNKNOWN: + return "unknown"; + case EXPR_SIMPLE: + return "simple"; + case EXPR_WRT: + return "wrt"; + case EXPR_RDSAE: + return "sae"; + default: + break; + } + + if (type >= EXPR_REG_START && type <= EXPR_REG_END) { + return nasm_reg_names[type - EXPR_REG_START]; + } else if (type >= EXPR_SEGBASE) { + snprintf(seg_str, sizeof seg_str, "%sseg %d", + (type - EXPR_SEGBASE) == location.segment ? "this " : "", + type - EXPR_SEGBASE); + return seg_str; + } else { + return "ERR"; + } +} + +void dump_expr(const expr *e) +{ + printf("["); + for (; e->type; e++) + printf("<%s(%d),%ld>", expr_type(e->type), e->type, e->value); + printf("]\n"); +} diff --git a/asm/parser.c b/asm/parser.c index 5fc46679..48b49c02 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2016 The NASM Authors - All Rights Reserved + * Copyright 1996-2017 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -247,6 +247,7 @@ static int parse_mref(operand *op, const expr *e) b = i = -1; o = s = 0; + op->segment = op->wrt = NO_SEG; if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */ bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]); @@ -272,17 +273,16 @@ static int parse_mref(operand *op, const expr *e) b = e->type; e++; } - if (e->type != 0) { /* is there an offset? */ + + if (e->type) { /* is there an offset? */ if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */ nasm_error(ERR_NONFATAL, - "beroset-p-603-invalid effective address"); + "invalid effective address: impossible register"); return -1; } else { if (e->type == EXPR_UNKNOWN) { op->opflags |= OPFLAG_UNKNOWN; o = 0; /* doesn't matter what */ - op->wrt = NO_SEG; /* nor this */ - op->segment = NO_SEG; /* or this */ while (e->type) e++; /* go to the end of the line */ } else { @@ -293,48 +293,44 @@ static int parse_mref(operand *op, const expr *e) if (e->type == EXPR_WRT) { op->wrt = e->value; e++; - } else - op->wrt = NO_SEG; + } /* * Look for a segment base type. */ - if (e->type && e->type < EXPR_SEGBASE) { - nasm_error(ERR_NONFATAL, - "beroset-p-630-invalid effective address"); - return -1; - } - while (e->type && e->value == 0) - e++; - if (e->type && e->value != 1) { - nasm_error(ERR_NONFATAL, - "beroset-p-637-invalid effective address"); - return -1; - } - if (e->type) { - op->segment = e->type - EXPR_SEGBASE; - e++; - } else - op->segment = NO_SEG; - while (e->type && e->value == 0) - e++; - if (e->type) { - nasm_error(ERR_NONFATAL, - "beroset-p-650-invalid effective address"); - return -1; + for (; e->type; e++) { + if (!e->value) + continue; + + if (e->type <= EXPR_REG_END) { + nasm_error(ERR_NONFATAL, + "invalid effective address: too many registers"); + return -1; + } else if (e->type < EXPR_SEGBASE) { + nasm_error(ERR_NONFATAL, + "invalid effective address: bad subexpression type"); + return -1; + } else if (e->value == 1) { + if (op->segment != NO_SEG) { + nasm_error(ERR_NONFATAL, + "invalid effective address: multiple base segments"); + return -1; + } + op->segment = e->type - EXPR_SEGBASE; + } else if (e->value == -1 && + e->type == location.segment + EXPR_SEGBASE && + !(op->opflags & OPFLAG_RELATIVE)) { + op->opflags |= OPFLAG_RELATIVE; + } else { + nasm_error(ERR_NONFATAL, + "invalid effective address: impossible segment base multiplier"); + return -1; + } } } } - } else { - o = 0; - op->wrt = NO_SEG; - op->segment = NO_SEG; } - if (e->type != 0) { /* there'd better be nothing left! */ - nasm_error(ERR_NONFATAL, - "beroset-p-663-invalid effective address"); - return -1; - } + nasm_assert(!e->type); /* We should be at the end */ op->basereg = b; op->indexreg = i; @@ -1056,6 +1052,7 @@ is_expression: op->offset = reloc_value(value); op->segment = reloc_seg(value); op->wrt = reloc_wrt(value); + op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0; if (is_simple(value)) { uint64_t n = reloc_value(value); @@ -1073,7 +1070,7 @@ is_expression: op->type |= SDWORD; } } - } else if(value->type == EXPR_RDSAE) { + } else if (value->type == EXPR_RDSAE) { /* * it's not an operand but a rounding or SAE decorator. * put the decorator information in the (opflag_t) type field diff --git a/include/nasm.h b/include/nasm.h index d1b13e13..e373d767 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2016 The NASM Authors - All Rights Reserved + * Copyright 1996-2017 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -273,14 +273,16 @@ typedef struct { /* * Library routines to manipulate expression data types. */ -int is_reloc(expr *vect); -int is_simple(expr *vect); -int is_really_simple(expr *vect); -int is_unknown(expr *vect); -int is_just_unknown(expr *vect); -int64_t reloc_value(expr *vect); -int32_t reloc_seg(expr *vect); -int32_t reloc_wrt(expr *vect); +bool is_reloc(const expr *vect); +bool is_simple(const expr *vect); +bool is_really_simple(const expr *vect); +bool is_unknown(const expr *vect); +bool is_just_unknown(const expr *vect); +int64_t reloc_value(const expr *vect); +int32_t reloc_seg(const expr *vect); +int32_t reloc_wrt(const expr *vect); +bool is_self_relative(const expr *vect); +void dump_expr(const expr *vect); /* * The evaluator can also return hints about which of two registers @@ -575,7 +577,6 @@ typedef struct operand { /* operand to an instruction */ int32_t segment; /* immediate segment, if needed */ int64_t offset; /* any immediate number */ int32_t wrt; /* segment base it's relative to */ - bool relative; /* self-relative expression */ int eaflags; /* special EA flags */ int opflags; /* see OPFLAG_* defines below */ decoflags_t decoflags; /* decorator flags such as {...} */ @@ -584,8 +585,9 @@ typedef struct operand { /* operand to an instruction */ #define OPFLAG_FORWARD 1 /* operand is a forward reference */ #define OPFLAG_EXTERN 2 /* operand is an external reference */ #define OPFLAG_UNKNOWN 4 /* operand is an unknown reference - * (always a forward reference also) - */ + (always a forward reference also) */ +#define OPFLAG_RELATIVE 8 /* operand is self-relative, e.g. [foo - $] + where foo is not in the current segment */ typedef struct extop { /* extended operand */ struct extop *next; /* linked list */ diff --git a/test/pcrel.asm b/test/pcrel.asm new file mode 100644 index 00000000..239e8cb8 --- /dev/null +++ b/test/pcrel.asm @@ -0,0 +1,50 @@ + bits 32 +foo: ; Backwards reference + mov eax,[foo - $] + mov ebx,[ebx + foo - $] + mov ecx,foo - $ + mov edx,foo - bar + + mov eax,[bar - $] + mov ebx,[ebx + bar - $] + mov ecx,bar - $ + mov edx,bar - foo + + mov eax,[baz - $] + mov ebx,[ebx + baz - $] + mov esi,[baz - bar] + mov ecx,baz - $ + mov edx,baz - bar + + bits 64 + default rel + + mov eax,[foo] + mov eax,[foo - $] + mov eax,[abs foo - $] + mov ebx,[ebx + foo - $] + mov ecx,foo - $ + mov edx,foo - bar + + mov eax,[bar] + mov eax,[bar - $] + mov eax,[abs bar - $] + mov ebx,[ebx + bar - $] + mov ecx,bar - $ + mov edx,bar - foo + + mov eax,[baz] + mov eax,[baz - $] + mov eax,[abs baz - $] + mov ebx,[ebx + baz - $] + mov esi,[baz - bar] + mov esi,[abs baz - bar] + mov ecx,baz - $ + mov edx,baz - bar + +bar: ; Forwards reference + hlt + + section ".data" +baz: ; Other-segment reference + dd 0