64-bit addressing and prefix handling changes

Revamp the address- and prefix-handling code to make more sense in
64-bit mode.  We are now a lot closer to where we want to be, but
we're not quite there yet.

ndisasm may very well have problems, or give counterintuitive output.
However, checking it in so we can make forward progress.
This commit is contained in:
H. Peter Anvin 2007-10-01 15:41:25 -07:00
parent d1cf2de1c0
commit de4b89bb3e
6 changed files with 387 additions and 225 deletions

View File

@ -22,8 +22,7 @@
* assembly mode or the operand-size override on the operand
* \40..\43 - a long immediate operand, from operand 0..3
* \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7]
* depending on assembly mode or the address-size override
* on the operand.
* depending on the address size of the instruction.
* \50..\53 - a byte relative operand, from operand 0..3
* \54..\57 - a qword immediate operand, from operand 0..3
* \60..\63 - a word relative operand, from operand 0..3
@ -115,25 +114,50 @@ static int32_t regflag(const operand *);
static int32_t regval(const operand *);
static int rexflags(int, int32_t, int);
static int op_rexflags(const operand *, int);
static ea *process_ea(operand *, ea *, int, int, int32_t, int);
static ea *process_ea(operand *, ea *, int, int, int, int32_t, int);
static void add_asp(insn *, int);
static int has_prefix(insn * ins, enum prefixes prefix)
static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
{
int j;
for (j = 0; j < ins->nprefix; j++) {
if (ins->prefixes[j] == prefix)
return 1;
return ins->prefixes[pos] == prefix;
}
static void assert_no_prefix(insn * ins, enum prefix_pos pos)
{
if (ins->prefixes[pos])
errfunc(ERR_NONFATAL, "invalid %s prefix",
prefix_name(ins->prefixes[pos]));
}
static const char *size_name(int size)
{
switch (size) {
case 1:
return "byte";
case 2:
return "word";
case 4:
return "dword";
case 8:
return "qword";
case 10:
return "tword";
case 16:
return "oword";
default:
return "???";
}
return 0;
}
static void assert_no_prefix(insn * ins, enum prefixes prefix)
static void warn_overflow(int size, int64_t data)
{
if (has_prefix(ins, prefix))
errfunc(ERR_NONFATAL, "invalid %s prefix", prefix_name(prefix));
}
if (size < 8) {
int64_t lim = (1 << (size*8))-1;
if (data < ~lim || data > lim)
errfunc(ERR_WARNING, "%s data exceeds bounds", size_name(size));
}
}
/*
* This routine wrappers the real output format's output routine,
* in order to pass a copy of the data off to the listing file
@ -433,7 +457,7 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
error(ERR_PANIC, "errors made it through from pass one");
else
while (itimes--) {
for (j = 0; j < instruction->nprefix; j++) {
for (j = 0; j < MAXPREFIX; j++) {
uint8_t c = 0;
switch (instruction->prefixes[j]) {
case P_LOCK:
@ -492,15 +516,23 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
error(ERR_NONFATAL,
"16-bit addressing is not supported "
"in 64-bit mode");
break;
}
if (bits != 16)
} else if (bits != 16)
c = 0x67;
break;
case P_A32:
if (bits != 32)
c = 0x67;
break;
case P_A64:
if (bits != 64) {
error(ERR_NONFATAL,
"64-bit addressing is only supported "
"in 64-bit mode");
}
break;
case P_ASP:
c = 0x67;
break;
case P_O16:
if (bits != 16)
c = 0x66;
@ -509,6 +541,14 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
if (bits == 16)
c = 0x66;
break;
case P_O64:
/* REX.W */
break;
case P_OSP:
c = 0x66;
break;
case P_none:
break;
default:
error(ERR_PANIC, "invalid instruction prefix");
}
@ -634,7 +674,8 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
strncpy(fname, instruction->eops->stringval, len);
fname[len] = '\0';
while (1) { /* added by alexfru: 'incbin' uses include paths */
/* added by alexfru: 'incbin' uses include paths */
while (1) {
combine = nasm_malloc(strlen(prefix) + len + 1);
strcpy(combine, prefix);
strcat(combine, fname);
@ -689,7 +730,7 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
isize = calcsize(segment, offset, bits, instruction, codes);
if (isize < 0)
return -1;
for (j = 0; j < instruction->nprefix; j++) {
for (j = 0; j < MAXPREFIX; j++) {
switch (instruction->prefixes[j]) {
case P_A16:
if (bits != 16)
@ -707,6 +748,10 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
if (bits == 16)
isize++;
break;
case P_A64:
case P_O64:
case P_none:
break;
default:
isize++;
break;
@ -745,6 +790,9 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
int rex_mask = ~0;
ins->rex = 0; /* Ensure REX is reset */
if (ins->prefixes[PPS_OSIZE] == P_O64)
ins->rex |= REX_W;
(void)segment; /* Don't warn that this parameter is unused */
(void)offset; /* Don't warn that this parameter is unused */
@ -812,8 +860,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
case 045:
case 046:
case 047:
length += ((ins->oprs[c - 044].addr_size ?
ins->oprs[c - 044].addr_size : bits) >> 3);
length += ins->addr_size >> 3;
break;
case 050:
case 051:
@ -909,15 +956,16 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
case 0310:
if (bits == 64)
return -1;
length += (bits != 16) && !has_prefix(ins,P_A16);
length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
break;
case 0311:
length += (bits != 32) && !has_prefix(ins,P_A32);
length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
break;
case 0312:
break;
case 0313:
if (bits != 64 || has_prefix(ins,P_A16) || has_prefix(ins,P_A32))
if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
has_prefix(ins, PPS_ASIZE, P_A32))
return -1;
break;
case 0320:
@ -944,7 +992,6 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
length++;
break;
case 0334:
assert_no_prefix(ins, P_LOCK);
ins->rex |= REX_L;
break;
case 0335:
@ -990,7 +1037,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
if (!process_ea
(&ins->oprs[(c >> 3) & 7], &ea_data, bits,
rfield, rflags, ins->forw_ref)) {
ins->addr_size, rfield, rflags, ins->forw_ref)) {
errfunc(ERR_NONFATAL, "invalid effective address");
return -1;
} else {
@ -1019,10 +1066,13 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
if (ins->rex & REX_H) {
errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
return -1;
} else if (bits == 64 ||
((ins->rex & REX_L) &&
!(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
cpu >= IF_X86_64)) {
} else if (bits == 64) {
length++;
} else if ((ins->rex & REX_L) &&
!(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
cpu >= IF_X86_64) {
/* LOCK-as-REX.R */
assert_no_prefix(ins, PPS_LREP);
length++;
} else {
errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
@ -1179,13 +1229,10 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 031:
case 032:
case 033:
if (ins->oprs[c - 030].segment == NO_SEG &&
ins->oprs[c - 030].wrt == NO_SEG &&
(ins->oprs[c - 030].offset < -65536L ||
ins->oprs[c - 030].offset > 65535L)) {
errfunc(ERR_WARNING, "word value exceeds bounds");
}
data = ins->oprs[c - 030].offset;
if (ins->oprs[c - 030].segment == NO_SEG &&
ins->oprs[c - 030].wrt == NO_SEG)
warn_overflow(2, data);
out(offset, segment, &data, OUT_ADDRESS + 2,
ins->oprs[c - 030].segment, ins->oprs[c - 030].wrt);
offset += 2;
@ -1200,8 +1247,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
else
size = (bits == 16) ? 2 : 4;
data = ins->oprs[c - 034].offset;
if (size == 2 && (data < -65536L || data > 65535L))
errfunc(ERR_WARNING, "word value exceeds bounds");
warn_overflow(size, data);
out(offset, segment, &data, OUT_ADDRESS + size,
ins->oprs[c - 034].segment, ins->oprs[c - 034].wrt);
offset += size;
@ -1222,10 +1268,8 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 046:
case 047:
data = ins->oprs[c - 044].offset;
size = ((ins->oprs[c - 044].addr_size ?
ins->oprs[c - 044].addr_size : bits) >> 3);
if (size == 2 && (data < -65536L || data > 65535L))
errfunc(ERR_WARNING, "word value exceeds bounds");
size = ins->addr_size >> 3;
warn_overflow(size, data);
out(offset, segment, &data, OUT_ADDRESS + size,
ins->oprs[c - 044].segment, ins->oprs[c - 044].wrt);
offset += size;
@ -1337,10 +1381,8 @@ static void gencode(int32_t segment, int32_t offset, int bits,
offset++;
} else {
if (ins->oprs[c - 0140].segment == NO_SEG &&
ins->oprs[c - 0140].wrt == NO_SEG &&
(data < -65536L || data > 65535L)) {
errfunc(ERR_WARNING, "word value exceeds bounds");
}
ins->oprs[c - 0140].wrt == NO_SEG)
warn_overflow(2, data);
out(offset, segment, &data, OUT_ADDRESS + 2,
ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt);
offset += 2;
@ -1424,7 +1466,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
break;
case 0310:
if (bits == 32 && !has_prefix(ins,P_A16)) {
if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
*bytes = 0x67;
out(offset, segment, bytes,
OUT_RAWDATA + 1, NO_SEG, NO_SEG);
@ -1434,7 +1476,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
break;
case 0311:
if (bits != 32 && !has_prefix(ins,P_A32)) {
if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
*bytes = 0x67;
out(offset, segment, bytes,
OUT_RAWDATA + 1, NO_SEG, NO_SEG);
@ -1562,7 +1604,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
if (!process_ea
(&ins->oprs[(c >> 3) & 7], &ea_data, bits,
rfield, rflags, ins->forw_ref)) {
ins->addr_size, rfield, rflags, ins->forw_ref)) {
errfunc(ERR_NONFATAL, "invalid effective address");
}
@ -1700,7 +1742,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
if (instruction->oprs[i].type != instruction->oprs[j].type ||
instruction->oprs[i].basereg != instruction->oprs[j].basereg)
return 0;
} else if (itemp->opd[i] & ~instruction->oprs[i].type ||
} else if (itemp->opd[i] & ~instruction->oprs[i].type ||
((itemp->opd[i] & SIZE_MASK) &&
((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) ||
@ -1824,8 +1866,8 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
return ret;
}
static ea *process_ea(operand * input, ea * output, int addrbits,
int rfield, int32_t rflags, int forw_ref)
static ea *process_ea(operand * input, ea * output, int bits,
int addrbits, int rfield, int32_t rflags, int forw_ref)
{
output->rip = false;
@ -1854,10 +1896,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
if (input->basereg == -1
&& (input->indexreg == -1 || input->scale == 0)) {
/* it's a pure offset */
if (input->addr_size)
addrbits = input->addr_size;
if (globalbits == 64 && (~input->type & IP_REL)) {
if (bits == 64 && (~input->type & IP_REL)) {
int scale, index, base;
output->sib_present = true;
scale = 0;
@ -1871,7 +1910,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
output->sib_present = false;
output->bytes = (addrbits != 16 ? 4 : 2);
output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
output->rip = globalbits == 64;
output->rip = bits == 64;
}
} else { /* it's an indirection */
int i = input->indexreg, b = input->basereg, s = input->scale;
@ -1921,11 +1960,15 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
sok &= ~bx;
}
/* While we're here, ensure the user didn't specify WORD. */
if (input->addr_size == 16 ||
(input->addr_size == 32 && !(sok & BITS32)) ||
(input->addr_size == 64 && !(sok & BITS64)))
return NULL;
/* While we're here, ensure the user didn't specify
WORD or QWORD. */
if (input->disp_size == 16 || input->disp_size == 64)
return NULL;
if (addrbits == 16 ||
(addrbits == 32 && !(sok & BITS32)) ||
(addrbits == 64 && !(sok & BITS64)))
return NULL;
/* now reorganize base/index */
if (s == 1 && bt != it && bt != -1 && it != -1 &&
@ -2051,7 +2094,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
return NULL;
/* ensure the user didn't specify DWORD/QWORD */
if (input->addr_size == 32 || input->addr_size == 64)
if (input->disp_size == 32 || input->disp_size == 64)
return NULL;
if (s != 1 && i != -1)
@ -2130,36 +2173,56 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
return output;
}
static void add_asp(insn *instruction, int addrbits)
static void add_asp(insn *ins, int addrbits)
{
int j, valid;
int defdisp;
valid = (addrbits == 64) ? 64|32 : 32|16;
for (j = 0; j < instruction->operands; j++) {
if (!(MEMORY & ~instruction->oprs[j].type)) {
switch (ins->prefixes[PPS_ASIZE]) {
case P_A16:
valid &= 16;
break;
case P_A32:
valid &= 32;
break;
case P_A64:
valid &= 64;
break;
case P_ASP:
valid &= (addrbits == 32) ? 16 : 32;
break;
default:
break;
}
for (j = 0; j < ins->operands; j++) {
if (!(MEMORY & ~ins->oprs[j].type)) {
int32_t i, b;
/* Verify as Register */
if (instruction->oprs[j].indexreg < EXPR_REG_START
|| instruction->oprs[j].indexreg >= REG_ENUM_LIMIT)
if (ins->oprs[j].indexreg < EXPR_REG_START
|| ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
i = 0;
else
i = reg_flags[instruction->oprs[j].indexreg];
i = reg_flags[ins->oprs[j].indexreg];
/* Verify as Register */
if (instruction->oprs[j].basereg < EXPR_REG_START
|| instruction->oprs[j].basereg >= REG_ENUM_LIMIT)
if (ins->oprs[j].basereg < EXPR_REG_START
|| ins->oprs[j].basereg >= REG_ENUM_LIMIT)
b = 0;
else
b = reg_flags[instruction->oprs[j].basereg];
b = reg_flags[ins->oprs[j].basereg];
if (instruction->oprs[j].scale == 0)
if (ins->oprs[j].scale == 0)
i = 0;
if (!i && !b) {
if (instruction->oprs[j].addr_size)
valid &= instruction->oprs[j].addr_size;
int ds = ins->oprs[j].disp_size;
if ((addrbits != 64 && ds > 8) ||
(addrbits == 64 && ds == 16))
valid &= ds;
} else {
if (!(REG16 & ~b))
valid &= 16;
@ -2179,18 +2242,27 @@ static void add_asp(insn *instruction, int addrbits)
}
if (valid & addrbits) {
/* Don't do anything */
ins->addr_size = addrbits;
} else if (valid & ((addrbits == 32) ? 16 : 32)) {
/* Add an instruction size prefix */
/* Add an address size prefix */
enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
for (j = 0; j < instruction->nprefix; j++) {
if (instruction->prefixes[j] == pref)
return; /* Already there */
}
instruction->prefixes[j] = pref;
instruction->nprefix++;
ins->prefixes[PPS_ASIZE] = pref;
ins->addr_size = (addrbits == 32) ? 16 : 32;
} else {
/* Impossible... */
errfunc(ERR_NONFATAL, "impossible combination of address sizes");
ins->addr_size = addrbits; /* Error recovery */
}
defdisp = ins->addr_size == 16 ? 16 : 32;
for (j = 0; j < ins->operands; j++) {
if (!(MEM_OFFS & ~ins->oprs[j].type) &&
(ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
!= ins->addr_size) {
/* mem_offs sizes must match the address size; if not,
strip the MEM_OFFS bit and match only EA instructions */
ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
}
}
}

View File

@ -215,7 +215,7 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
return data;
}
op->addr_size = 0;
op->disp_size = 0;
op->eaflags = 0;
if (asize == 16) {
@ -260,7 +260,7 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
if (rm == 6 && mod == 0) { /* special case */
op->basereg = -1;
if (segsize != 16)
op->addr_size = 16;
op->disp_size = 16;
mod = 2; /* fake disp16 */
}
switch (mod) {
@ -307,7 +307,7 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
}
if (asize != 64)
op->addr_size = asize;
op->disp_size = asize;
op->basereg = -1;
mod = 2; /* fake disp32 */
@ -336,7 +336,7 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
op->basereg = rd_reg32[base | ((rex & REX_B) ? 8 : 0)];
if (segsize != 32)
op->addr_size = 32;
op->disp_size = 32;
}
switch (mod) {
@ -375,7 +375,7 @@ static int matches(const struct itemplate *t, uint8_t *data,
int i;
for (i = 0; i < MAX_OPERANDS; i++) {
ins->oprs[i].segment = ins->oprs[i].addr_size =
ins->oprs[i].segment = ins->oprs[i].disp_size =
(segsize == 64 ? SEG_64BIT : segsize == 32 ? SEG_32BIT : 0);
}
ins->condition = -1;
@ -478,7 +478,7 @@ static int matches(const struct itemplate *t, uint8_t *data,
data += 2;
}
if (segsize != asize)
ins->oprs[c - 034].addr_size = asize;
ins->oprs[c - 034].disp_size = asize;
} else if (c >= 040 && c <= 043) {
ins->oprs[c - 040].offset = getu32(data);
data += 4;
@ -498,7 +498,7 @@ static int matches(const struct itemplate *t, uint8_t *data,
break;
}
if (segsize != asize)
ins->oprs[c - 044].addr_size = asize;
ins->oprs[c - 044].disp_size = asize;
} else if (c >= 050 && c <= 053) {
ins->oprs[c - 050].offset = gets8(data++);
ins->oprs[c - 050].segment |= SEG_RELATIVE;
@ -658,15 +658,26 @@ static int matches(const struct itemplate *t, uint8_t *data,
a_used = true;
}
ins->nprefix = 0;
if (lock)
ins->prefixes[ins->nprefix++] = P_LOCK;
if (drep)
ins->prefixes[ins->nprefix++] = drep;
if (!a_used && asize != segsize)
ins->prefixes[ins->nprefix++] = asize == 16 ? P_A16 : P_A32;
if (!o_used && osize == ((segsize == 16) ? 32 : 16))
ins->prefixes[ins->nprefix++] = osize == 16 ? P_O16 : P_O32;
if (lock) {
if (ins->prefixes[PPS_LREP])
return false;
ins->prefixes[PPS_LREP] = P_LOCK;
}
if (drep) {
if (ins->prefixes[PPS_LREP])
return false;
ins->prefixes[PPS_LREP] = drep;
}
if (!o_used && osize == ((segsize == 16) ? 32 : 16)) {
if (ins->prefixes[PPS_OSIZE])
return false;
ins->prefixes[PPS_OSIZE] = osize == 16 ? P_O16 : P_O32;
}
if (!a_used && asize != segsize) {
if (ins->prefixes[PPS_ASIZE])
return false;
ins->prefixes[PPS_ASIZE] = asize == 16 ? P_A16 : P_A32;
}
/* Fix: check for redundant REX prefixes */
@ -780,13 +791,18 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
* selection.
*/
if (works) {
int i, nprefix;
goodness = ((*p)->flags & IF_PFMASK) ^ prefer;
if (tmp_ins.nprefix < best_pref ||
(tmp_ins.nprefix == best_pref && goodness < best)) {
nprefix = 0;
for (i = 0; i < MAXPREFIX; i++)
if (tmp_ins.prefixes[i])
nprefix++;
if (nprefix < best_pref ||
(nprefix == best_pref && goodness < best)) {
/* This is the best one found so far */
best = goodness;
best_p = p;
best_pref = tmp_ins.nprefix;
best_pref = nprefix;
best_length = length;
ins = tmp_ins;
}
@ -810,7 +826,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
* the return value is "sane." Maybe a macro wrapper could
* be used for that purpose.
*/
for (i = 0; i < ins.nprefix; i++)
for (i = 0; i < MAXPREFIX; i++)
switch (ins.prefixes[i]) {
case P_LOCK:
slen += snprintf(output + slen, outbufsize - slen, "lock ");
@ -930,8 +946,8 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
snprintf(output + slen, outbufsize - slen, "[%s%s%s0x%"PRIx64"]",
(segover ? segover : ""),
(segover ? ":" : ""),
(o->addr_size ==
32 ? "dword " : o->addr_size ==
(o->disp_size ==
32 ? "dword " : o->disp_size ==
16 ? "word " : ""), offs);
segover = NULL;
} else if (!(REGMEM & ~t)) {
@ -957,11 +973,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
slen +=
snprintf(output + slen, outbufsize - slen, "near ");
output[slen++] = '[';
if (o->addr_size)
if (o->disp_size)
slen += snprintf(output + slen, outbufsize - slen, "%s",
(o->addr_size == 64 ? "qword " :
o->addr_size == 32 ? "dword " :
o->addr_size == 16 ? "word " :
(o->disp_size == 64 ? "qword " :
o->disp_size == 32 ? "dword " :
o->disp_size == 16 ? "word " :
""));
if (o->eaflags & EAF_REL)
slen += snprintf(output + slen, outbufsize - slen, "rel ");

View File

@ -722,15 +722,15 @@ MOV reg16,reg16 \320\1\x8B\110 8086
MOV reg32,mem \321\1\x8B\110 386,SM
MOV reg32,reg32 \321\1\x8B\110 386
MOV reg64,mem \324\1\x8B\110 X64,SM
MOV reg64,reg64 \324\1\x8B\110 X64,SM
MOV reg64,reg64 \324\1\x8B\110 X64
MOV reg8,imm \10\xB0\21 8086,SM
MOV reg16,imm \320\10\xB8\31 8086,SM
MOV reg32,imm \321\10\xB8\41 386,SM
MOV reg64,imm \324\10\xB8\55 X64,SQ
MOV reg64,imm \324\10\xB8\55 X64,SM
MOV rm8,imm \1\xC6\200\21 8086,SM
MOV rm16,imm \320\1\xC7\200\31 8086,SM
MOV rm32,imm \321\1\xC7\200\41 386,SM
MOV rm64,imm \324\1\xC7\200\41 X64,SD
MOV rm64,imm \324\1\xC7\200\41 X64,SM
MOV mem,imm8 \1\xC6\200\21 8086,SM
MOV mem,imm16 \320\1\xC7\200\31 8086,SM
MOV mem,imm32 \321\1\xC7\200\41 386,SM

41
nasm.h
View File

@ -541,9 +541,12 @@ enum ccode { /* condition code names */
* register names do not overlap.
*/
enum prefixes { /* instruction prefixes */
P_none = 0,
PREFIX_ENUM_START = REG_ENUM_LIMIT,
P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32,
P_REP, P_REPE, P_REPNE, P_REPNZ, P_REPZ, P_TIMES
P_A16 = PREFIX_ENUM_START, P_A32, P_A64, P_ASP,
P_LOCK, P_O16, P_O32, P_O64, P_OSP,
P_REP, P_REPE, P_REPNE, P_REPNZ, P_REPZ, P_TIMES,
PREFIX_ENUM_LIMIT
};
enum { /* extended operand types */
@ -565,9 +568,9 @@ enum eval_hint { /* values for `hinttype' */
EAH_NOTBASE = 2 /* try _not_ to make reg the base */
};
typedef struct { /* operand to an instruction */
typedef struct operand { /* operand to an instruction */
int32_t type; /* type of operand */
int addr_size; /* 0 means default; 16; 32; 64 */
int disp_size; /* 0 means default; 16; 32; 64 */
enum reg_enum basereg, indexreg; /* address registers */
int scale; /* index scale */
int hintbase;
@ -592,17 +595,32 @@ typedef struct extop { /* extended operand */
int32_t wrt; /* ... and here */
} extop;
#define MAXPREFIX 4
/* Prefix positions: each type of prefix goes in a specific slot.
This affects the final ordering of the assembled output, which
shouldn't matter to the processor, but if you have stylistic
preferences, you can change this. REX prefixes are handled
differently for the time being.
Note that LOCK and REP are in the same slot. This is
an x86 architectural constraint. */
enum prefix_pos {
PPS_LREP, /* Lock or REP prefix */
PPS_SEG, /* Segment override prefix */
PPS_OSIZE, /* Operand size prefix */
PPS_ASIZE, /* Address size prefix */
MAXPREFIX /* Total number of prefix slots */
};
#define MAX_OPERANDS 4
typedef struct { /* an instruction itself */
char *label; /* the label defined, or NULL */
typedef struct insn { /* an instruction itself */
char *label; /* the label defined, or NULL */
enum prefixes prefixes[MAXPREFIX]; /* instruction prefixes, if any */
int nprefix; /* number of entries in above */
enum opcode opcode; /* the opcode - not just the string */
enum ccode condition; /* the condition code, if Jcc/SETcc */
int operands; /* how many operands? 0-3
* (more if db et al) */
int addr_size; /* address size */
operand oprs[MAX_OPERANDS]; /* the operands, defined as above */
extop *eops; /* extended operands */
int eops_float; /* true if DD and floating */
@ -934,8 +952,11 @@ struct dfmt {
*/
enum special_tokens {
S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT,
S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD
SPECIAL_ENUM_START = PREFIX_ENUM_LIMIT,
S_ABS = SPECIAL_ENUM_START,
S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT,
S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD,
SPECIAL_ENUM_LIMIT
};
/*

255
parser.c
View File

@ -44,12 +44,133 @@ void parser_global_info(struct ofmt *output, struct location * locp)
location = locp;
}
static int prefix_slot(enum prefixes prefix)
{
switch (prefix) {
case R_CS:
case R_DS:
case R_SS:
case R_ES:
case R_FS:
case R_GS:
return PPS_SEG;
case P_LOCK:
case P_REP:
case P_REPE:
case P_REPZ:
case P_REPNE:
case P_REPNZ:
return PPS_LREP;
case P_O16:
case P_O32:
case P_O64:
case P_OSP:
return PPS_OSIZE;
case P_A16:
case P_A32:
case P_A64:
case P_ASP:
return PPS_ASIZE;
default:
error(ERR_PANIC, "Invalid value %d passed to prefix_slot()", prefix);
return -1;
}
}
static void process_size_override(insn * result, int operand)
{
if (tasm_compatible_mode) {
switch ((int)tokval.t_integer) {
/* For TASM compatibility a size override inside the
* brackets changes the size of the operand, not the
* address type of the operand as it does in standard
* NASM syntax. Hence:
*
* mov eax,[DWORD val]
*
* is valid syntax in TASM compatibility mode. Note that
* you lose the ability to override the default address
* type for the instruction, but we never use anything
* but 32-bit flat model addressing in our code.
*/
case S_BYTE:
result->oprs[operand].type |= BITS8;
break;
case S_WORD:
result->oprs[operand].type |= BITS16;
break;
case S_DWORD:
case S_LONG:
result->oprs[operand].type |= BITS32;
break;
case S_QWORD:
result->oprs[operand].type |= BITS64;
break;
case S_TWORD:
result->oprs[operand].type |= BITS80;
break;
case S_OWORD:
result->oprs[operand].type |= BITS128;
break;
default:
error(ERR_NONFATAL,
"invalid operand size specification");
break;
}
} else {
/* Standard NASM compatible syntax */
switch ((int)tokval.t_integer) {
case S_NOSPLIT:
result->oprs[operand].eaflags |= EAF_TIMESTWO;
break;
case S_REL:
result->oprs[operand].eaflags |= EAF_REL;
break;
case S_ABS:
result->oprs[operand].eaflags |= EAF_ABS;
break;
case S_BYTE:
result->oprs[operand].disp_size = 8;
result->oprs[operand].eaflags |= EAF_BYTEOFFS;
break;
case P_A16:
case P_A32:
case P_A64:
if (result->prefixes[PPS_ASIZE] &&
result->prefixes[PPS_ASIZE] != tokval.t_integer)
error(ERR_NONFATAL,
"conflicting address size specifications");
else
result->prefixes[PPS_ASIZE] = tokval.t_integer;
break;
case S_WORD:
result->oprs[operand].disp_size = 16;
result->oprs[operand].eaflags |= EAF_WORDOFFS;
break;
case S_DWORD:
case S_LONG:
result->oprs[operand].disp_size = 32;
result->oprs[operand].eaflags |= EAF_WORDOFFS;
break;
case S_QWORD:
result->oprs[operand].disp_size = 64;
result->oprs[operand].eaflags |= EAF_WORDOFFS;
break;
default:
error(ERR_NONFATAL, "invalid size specification in"
" effective address");
break;
}
}
}
insn *parse_line(int pass, char *buffer, insn * result,
efunc errfunc, evalfunc evaluate, ldfunc ldef)
{
int operand;
int critical;
struct eval_hints hints;
int j;
result->forw_ref = false;
error = errfunc;
@ -101,7 +222,8 @@ insn *parse_line(int pass, char *buffer, insn * result,
return result;
}
result->nprefix = 0;
for (j = 0; j < MAXPREFIX; j++)
result->prefixes[j] = P_none;
result->times = 1L;
while (i == TOKEN_PREFIX ||
@ -134,17 +256,25 @@ insn *parse_line(int pass, char *buffer, insn * result,
}
}
} else {
if (result->nprefix == MAXPREFIX)
error(ERR_NONFATAL,
"instruction has more than %d prefixes", MAXPREFIX);
else
result->prefixes[result->nprefix++] = tokval.t_integer;
int slot = prefix_slot(tokval.t_integer);
if (result->prefixes[slot]) {
error(ERR_NONFATAL,
"instruction has conflicting prefixes");
}
result->prefixes[slot] = tokval.t_integer;
i = stdscan(NULL, &tokval);
}
}
if (i != TOKEN_INSN) {
if (result->nprefix > 0 && i == 0) {
int j;
enum prefixes pfx;
for (j = 0; j < MAXPREFIX; j++)
if ((pfx = result->prefixes[j]) != P_none)
break;
if (i == 0 && pfx != P_none) {
/*
* Instruction prefixes are present, but no actual
* instruction. This is allowed: at this point we
@ -358,12 +488,12 @@ insn *parse_line(int pass, char *buffer, insn * result,
* of these, separated by commas, and terminated by a zero token. */
for (operand = 0; operand < MAX_OPERANDS; operand++) {
expr *value; /* used most of the time */
expr *value; /* used most of the time */
int mref; /* is this going to be a memory ref? */
int bracket; /* is it a [] mref, or a & mref? */
int setsize = 0;
result->oprs[operand].addr_size = 0; /* have to zero this whatever */
result->oprs[operand].disp_size = 0; /* have to zero this whatever */
result->oprs[operand].eaflags = 0; /* and this */
result->oprs[operand].opflags = 0;
@ -428,78 +558,10 @@ insn *parse_line(int pass, char *buffer, insn * result,
if (i == '[' || i == '&') { /* memory reference */
mref = true;
bracket = (i == '[');
while ((i = stdscan(NULL, &tokval)) == TOKEN_SPECIAL) {
/* check for address directives */
if (tasm_compatible_mode) {
switch ((int)tokval.t_integer) {
/* For TASM compatibility a size override inside the
* brackets changes the size of the operand, not the
* address type of the operand as it does in standard
* NASM syntax. Hence:
*
* mov eax,[DWORD val]
*
* is valid syntax in TASM compatibility mode. Note that
* you lose the ability to override the default address
* type for the instruction, but we never use anything
* but 32-bit flat model addressing in our code.
*/
case S_BYTE:
result->oprs[operand].type |= BITS8;
break;
case S_WORD:
result->oprs[operand].type |= BITS16;
break;
case S_DWORD:
case S_LONG:
result->oprs[operand].type |= BITS32;
break;
case S_QWORD:
result->oprs[operand].type |= BITS64;
break;
case S_TWORD:
result->oprs[operand].type |= BITS80;
break;
case S_OWORD:
result->oprs[operand].type |= BITS128;
break;
default:
error(ERR_NONFATAL,
"invalid operand size specification");
}
} else {
/* Standard NASM compatible syntax */
switch ((int)tokval.t_integer) {
case S_NOSPLIT:
result->oprs[operand].eaflags |= EAF_TIMESTWO;
break;
case S_REL:
result->oprs[operand].eaflags |= EAF_REL;
break;
case S_ABS:
result->oprs[operand].eaflags |= EAF_ABS;
break;
case S_BYTE:
result->oprs[operand].eaflags |= EAF_BYTEOFFS;
break;
case S_WORD:
result->oprs[operand].addr_size = 16;
result->oprs[operand].eaflags |= EAF_WORDOFFS;
break;
case S_DWORD:
case S_LONG:
result->oprs[operand].addr_size = 32;
result->oprs[operand].eaflags |= EAF_WORDOFFS;
break;
case S_QWORD:
result->oprs[operand].addr_size = 64;
result->oprs[operand].eaflags |= EAF_WORDOFFS;
break;
default:
error(ERR_NONFATAL, "invalid size specification in"
" effective address");
}
}
i = stdscan(NULL, &tokval); /* then skip the colon */
while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
process_size_override(result, operand);
i = stdscan(NULL, &tokval);
}
} else { /* immediate operand, or register */
mref = false;
@ -529,32 +591,18 @@ insn *parse_line(int pass, char *buffer, insn * result,
if (value[1].type != 0 || value->value != 1 ||
REG_SREG & ~reg_flags[value->type])
error(ERR_NONFATAL, "invalid segment override");
else if (result->nprefix == MAXPREFIX)
else if (result->prefixes[PPS_SEG])
error(ERR_NONFATAL,
"instruction has more than %d prefixes", MAXPREFIX);
"instruction has conflicting segment overrides");
else {
result->prefixes[result->nprefix++] = value->type;
result->prefixes[PPS_SEG] = value->type;
if (!(REG_FSGS & ~reg_flags[value->type]))
result->oprs[operand].eaflags |= EAF_FSGS;
}
i = stdscan(NULL, &tokval); /* then skip the colon */
if (i == TOKEN_SPECIAL) { /* another check for size override */
switch ((int)tokval.t_integer) {
case S_WORD:
result->oprs[operand].addr_size = 16;
break;
case S_DWORD:
case S_LONG:
result->oprs[operand].addr_size = 32;
break;
case S_QWORD:
result->oprs[operand].addr_size = 64;
break;
default:
error(ERR_NONFATAL, "invalid size specification in"
" effective address");
}
while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
process_size_override(result, operand);
i = stdscan(NULL, &tokval);
}
value = evaluate(stdscan, NULL, &tokval,
@ -700,7 +748,7 @@ insn *parse_line(int pass, char *buffer, insn * result,
!(result->oprs[operand].eaflags & EAF_FSGS)) ||
(result->oprs[operand].eaflags & EAF_REL));
result->oprs[operand].type |= is_rel ? IP_REL : MEM_OFFS;
result->oprs[operand].type |= is_rel ? IP_REL : MEM_OFFS;
}
result->oprs[operand].basereg = b;
result->oprs[operand].indexreg = i;
@ -770,8 +818,9 @@ insn *parse_line(int pass, char *buffer, insn * result,
result->operands = operand; /* set operand count */
while (operand < 3) /* clear remaining operands */
result->oprs[operand++].type = 0;
/* clear remaining operands */
while (operand < MAX_OPERANDS)
result->oprs[operand++].type = 0;
/*
* Transform RESW, RESD, RESQ, REST, RESO into RESB.

View File

@ -5,9 +5,13 @@
% TOKEN_PREFIX, 0, P_*
a16
a32
a64
asp
lock
o16
o32
o64
osp
rep
repe
repne