From 7eb4a387939955c1c0b41fbc8b1216419082321f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 15:49:30 -0700 Subject: [PATCH 01/29] Initial support for four arguments per instruction For SSE5, we will need to support four arguments per instruction. --- assemble.c | 275 ++++++++++++++++++++++++++++++++--------------------- disasm.c | 54 +++++------ insns.dat | 162 +++++++++++++++---------------- insns.h | 18 ++-- insns.pl | 69 ++++++++------ nasm.h | 3 +- 6 files changed, 325 insertions(+), 256 deletions(-) diff --git a/assemble.c b/assemble.c index 54522712..7dc2b25b 100644 --- a/assemble.c +++ b/assemble.c @@ -12,39 +12,36 @@ * (POP is never used for CS) depending on operand 0 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending * on operand 0 - * \10, \11, \12 - a literal byte follows in the code stream, to be added - * to the register value of operand 0, 1 or 2 - * \17 - encodes the literal byte 0. (Some compilers don't take - * kindly to a zero byte in the _middle_ of a compile time - * string constant, so I had to put this hack in.) - * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2 - * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2 - * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2 - * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2 - * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit + * \10..\13 - a literal byte follows in the code stream, to be added + * to the register value of operand 0..3 + * \14..\17 - a signed byte immediate operand, from operand 0..3 + * \20..\23 - a byte immediate operand, from operand 0..3 + * \24..\27 - an unsigned byte immediate operand, from operand 0..3 + * \30..\33 - a word immediate operand, from operand 0..3 + * \34..\37 - select between \3[0-3] and \4[0-3] depending on 16/32 bit * assembly mode or the operand-size override on the operand - * \37 - a word constant, from the _segment_ part of operand 0 - * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2 - * \44, \45, \46 - select between \3[012], \4[012] and \5[456] + * \40..\43 - a long immediate operand, from operand 0..3 + * \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7] * depending on assembly mode or the address-size override * on the operand. - * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2 - * \54, \55, \56 - a qword immediate operand, from operand 0, 1 or 2 - * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2 - * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit + * \50..\53 - a byte relative operand, from operand 0..3 + * \54..\57 - a qword immediate operand, from operand 0..3 + * \60..\63 - a word relative operand, from operand 0..3 + * \64..\67 - select between \6[0-3] and \7[0-3] depending on 16/32 bit * assembly mode or the operand-size override on the operand - * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2 + * \70..\73 - a long relative operand, from operand 0..3 + * \74..\77 - a word constant, from the _segment_ part of operand 0..3 * \1ab - a ModRM, calculated on EA in operand a, with the spare * field the register value of operand b. - * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2 - * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 + * \140..\143 - an immediate word or signed byte for operand 0..3 + * \144..\147 - or 2 (s-field) into next opcode byte if operand 0..3 * is a signed byte rather than a word. - * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2 - * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 + * \150..\153 - an immediate dword or signed byte for operand 0..3 + * \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3 * is a signed byte rather than a dword. - * \150,\151,\152 - an immediate qword or signed byte for operand 0, 1, or 2 - * \153,\154,\155 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 - * is a signed byte rather than a qword. + * \170 - encodes the literal byte 0. (Some compilers don't take + * kindly to a zero byte in the _middle_ of a compile time + * string constant, so I had to put this hack in.) * \2ab - a ModRM, calculated on EA in operand a, with the spare * field equal to digit b. * \30x - might be an 0x67 byte, depending on the address size of @@ -730,73 +727,79 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 010: case 011: case 012: + case 013: ins->rex |= op_rexflags(&ins->oprs[c - 010], REX_B|REX_H|REX_P|REX_W); codes++, length++; break; - case 017: - length++; - break; case 014: case 015: case 016: + case 017: length++; break; case 020: case 021: case 022: + case 023: length++; break; case 024: case 025: case 026: + case 027: length++; break; case 030: case 031: case 032: + case 033: length += 2; break; case 034: case 035: case 036: + case 037: if (ins->oprs[c - 034].type & (BITS16 | BITS32 | BITS64)) length += (ins->oprs[c - 034].type & BITS16) ? 2 : 4; else length += (bits == 16) ? 2 : 4; break; - case 037: - length += 2; - break; case 040: case 041: case 042: + case 043: length += 4; break; case 044: case 045: case 046: + case 047: length += ((ins->oprs[c - 044].addr_size ? ins->oprs[c - 044].addr_size : bits) >> 3); break; case 050: case 051: case 052: + case 053: length++; break; case 054: case 055: case 056: + case 057: length += 8; /* MOV reg64/imm */ break; case 060: case 061: case 062: + case 063: length += 2; break; case 064: case 065: case 066: + case 067: if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64)) length += (ins->oprs[c - 064].type & BITS16) ? 2 : 4; else @@ -805,33 +808,48 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 070: case 071: case 072: + case 073: length += 4; break; - case 0130: - case 0131: - case 0132: - length += is_sbyte(ins, c - 0130, 16) ? 1 : 2; - break; - case 0133: - case 0134: - case 0135: - codes += 2; - length++; + case 074: + case 075: + case 076: + case 077: + length += 2; break; case 0140: case 0141: case 0142: - length += is_sbyte(ins, c - 0140, 32) ? 1 : 4; + case 0143: + length += is_sbyte(ins, c - 0140, 16) ? 1 : 2; break; - case 0143: case 0144: case 0145: + case 0146: + case 0147: codes += 2; length++; break; + case 0150: + case 0151: + case 0152: + case 0153: + length += is_sbyte(ins, c - 0150, 32) ? 1 : 4; + break; + case 0154: + case 0155: + case 0156: + case 0157: + codes += 2; + length++; + break; + case 0170: + length++; + break; case 0300: case 0301: case 0302: + case 0303: length += chsize(&ins->oprs[c - 0300], bits); break; case 0310: @@ -1020,21 +1038,17 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 010: case 011: case 012: + case 013: EMIT_REX(); bytes[0] = *codes++ + ((regval(&ins->oprs[c - 010])) & 7); out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); offset += 1; break; - case 017: - bytes[0] = 0; - out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); - offset += 1; - break; - case 014: case 015: case 016: + case 017: if (ins->oprs[c - 014].offset < -128 || ins->oprs[c - 014].offset > 127) { errfunc(ERR_WARNING, "signed byte value exceeds bounds"); @@ -1055,6 +1069,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 020: case 021: case 022: + case 023: if (ins->oprs[c - 020].offset < -256 || ins->oprs[c - 020].offset > 255) { errfunc(ERR_WARNING, "byte value exceeds bounds"); @@ -1074,6 +1089,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 024: case 025: case 026: + case 027: if (ins->oprs[c - 024].offset < 0 || ins->oprs[c - 024].offset > 255) errfunc(ERR_WARNING, "unsigned byte value exceeds bounds"); @@ -1092,6 +1108,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 030: case 031: case 032: + case 033: if (ins->oprs[c - 030].segment == NO_SEG && ins->oprs[c - 030].wrt == NO_SEG && (ins->oprs[c - 030].offset < -65536L || @@ -1107,6 +1124,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 034: case 035: case 036: + case 037: if (ins->oprs[c - 034].type & (BITS16 | BITS32)) size = (ins->oprs[c - 034].type & BITS16) ? 2 : 4; else @@ -1119,20 +1137,10 @@ static void gencode(int32_t segment, int32_t offset, int bits, offset += size; break; - case 037: - if (ins->oprs[0].segment == NO_SEG) - errfunc(ERR_NONFATAL, "value referenced by FAR is not" - " relocatable"); - data = 0L; - out(offset, segment, &data, OUT_ADDRESS + 2, - outfmt->segbase(1 + ins->oprs[0].segment), - ins->oprs[0].wrt); - offset += 2; - break; - case 040: case 041: case 042: + case 043: data = ins->oprs[c - 040].offset; out(offset, segment, &data, OUT_ADDRESS + 4, ins->oprs[c - 040].segment, ins->oprs[c - 040].wrt); @@ -1142,6 +1150,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 044: case 045: case 046: + case 047: data = ins->oprs[c - 044].offset; size = ((ins->oprs[c - 044].addr_size ? ins->oprs[c - 044].addr_size : bits) >> 3); @@ -1155,6 +1164,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 050: case 051: case 052: + case 053: if (ins->oprs[c - 050].segment != segment) errfunc(ERR_NONFATAL, "short relative jump outside segment"); @@ -1169,6 +1179,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 054: case 055: case 056: + case 057: data = (int64_t)ins->oprs[c - 054].offset; out(offset, segment, &data, OUT_ADDRESS + 8, ins->oprs[c - 054].segment, ins->oprs[c - 054].wrt); @@ -1178,6 +1189,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 060: case 061: case 062: + case 063: if (ins->oprs[c - 060].segment != segment) { data = ins->oprs[c - 060].offset; out(offset, segment, &data, @@ -1194,6 +1206,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 064: case 065: case 066: + case 067: if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64)) size = (ins->oprs[c - 064].type & BITS16) ? 2 : 4; else @@ -1214,6 +1227,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 070: case 071: case 072: + case 073: if (ins->oprs[c - 070].segment != segment) { data = ins->oprs[c - 070].offset; out(offset, segment, &data, @@ -1227,70 +1241,95 @@ static void gencode(int32_t segment, int32_t offset, int bits, offset += 4; break; - case 0130: - case 0131: - case 0132: - data = ins->oprs[c - 0130].offset; - if (is_sbyte(ins, c - 0130, 16)) { - bytes[0] = data; - out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, - NO_SEG); - offset++; - } else { - if (ins->oprs[c - 0130].segment == NO_SEG && - ins->oprs[c - 0130].wrt == NO_SEG && - (data < -65536L || data > 65535L)) { - errfunc(ERR_WARNING, "word value exceeds bounds"); - } - out(offset, segment, &data, OUT_ADDRESS + 2, - ins->oprs[c - 0130].segment, ins->oprs[c - 0130].wrt); - offset += 2; - } - break; - - case 0133: - case 0134: - case 0135: - EMIT_REX(); - codes++; - bytes[0] = *codes++; - if (is_sbyte(ins, c - 0133, 16)) - bytes[0] |= 2; /* s-bit */ - out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); - offset++; + case 074: + case 075: + case 076: + case 077: + if (ins->oprs[c - 074].segment == NO_SEG) + errfunc(ERR_NONFATAL, "value referenced by FAR is not" + " relocatable"); + data = 0L; + out(offset, segment, &data, OUT_ADDRESS + 2, + outfmt->segbase(1 + ins->oprs[c - 074].segment), + ins->oprs[c - 074].wrt); + offset += 2; break; case 0140: case 0141: case 0142: + case 0143: data = ins->oprs[c - 0140].offset; - if (is_sbyte(ins, c - 0140, 32)) { + if (is_sbyte(ins, c - 0140, 16)) { + bytes[0] = data; + out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, + NO_SEG); + offset++; + } else { + if (ins->oprs[c - 0140].segment == NO_SEG && + ins->oprs[c - 0140].wrt == NO_SEG && + (data < -65536L || data > 65535L)) { + errfunc(ERR_WARNING, "word value exceeds bounds"); + } + out(offset, segment, &data, OUT_ADDRESS + 2, + ins->oprs[c - 0140].segment, ins->oprs[c - 0130].wrt); + offset += 2; + } + break; + + case 0144: + case 0145: + case 0146: + case 0147: + EMIT_REX(); + codes++; + bytes[0] = *codes++; + if (is_sbyte(ins, c - 0144, 16)) + bytes[0] |= 2; /* s-bit */ + out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); + offset++; + break; + + case 0150: + case 0151: + case 0152: + case 0153: + data = ins->oprs[c - 0150].offset; + if (is_sbyte(ins, c - 0150, 32)) { bytes[0] = data; out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); offset++; } else { out(offset, segment, &data, OUT_ADDRESS + 4, - ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt); + ins->oprs[c - 0150].segment, ins->oprs[c - 0140].wrt); offset += 4; } break; - case 0143: - case 0144: - case 0145: + case 0154: + case 0155: + case 0156: + case 0157: EMIT_REX(); codes++; bytes[0] = *codes++; - if (is_sbyte(ins, c - 0143, 32)) + if (is_sbyte(ins, c - 0154, 32)) bytes[0] |= 2; /* s-bit */ out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); offset++; break; + case 0170: + bytes[0] = 0; + out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); + offset += 1; + break; + case 0300: case 0301: case 0302: + case 0303: if (chsize(&ins->oprs[c - 0300], bits)) { *bytes = 0x67; out(offset, segment, bytes, @@ -1537,7 +1576,7 @@ static int rexflags(int val, int32_t flags, int mask) static int matches(const struct itemplate *itemp, insn * instruction, int bits) { - int i, size[3], asize, oprs, ret; + int i, size[MAX_OPERANDS], asize, oprs, ret; ret = 100; @@ -1579,7 +1618,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) * Check operand sizes */ if (itemp->flags & IF_ARMASK) { - size[0] = size[1] = size[2] = 0; + memset(size, 0, sizeof size); switch (itemp->flags & IF_ARMASK) { case IF_AR0: @@ -1591,34 +1630,54 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) case IF_AR2: i = 2; break; +#if 0 /* Need to reorganize instruction flags to fit IF_AR3 */ + case IF_AR3: + i = 3; + break; +#endif default: break; /* Shouldn't happen */ } - if (itemp->flags & IF_SB) { + switch (itemp->flags & IF_SMASK) { + case IF_SB: size[i] = BITS8; - } else if (itemp->flags & IF_SW) { + break; + case IF_SW: size[i] = BITS16; - } else if (itemp->flags & IF_SD) { + break; + case IF_SD: size[i] = BITS32; - } else if (itemp->flags & IF_SQ) { + break; + case IF_SQ: size[i] = BITS64; + break; + default: + break; } } else { asize = 0; - if (itemp->flags & IF_SB) { + switch (itemp->flags & IF_SMASK) { + case IF_SB: asize = BITS8; oprs = itemp->operands; - } else if (itemp->flags & IF_SW) { + break; + case IF_SW: asize = BITS16; oprs = itemp->operands; - } else if (itemp->flags & IF_SD) { + break; + case IF_SD: asize = BITS32; oprs = itemp->operands; - } else if (itemp->flags & IF_SQ) { + break; + case IF_SQ: asize = BITS64; oprs = itemp->operands; + break; + default: + break; } - size[0] = size[1] = size[2] = asize; + for (i = 0; i < MAX_OPERANDS; i++) + size[i] = asize; } if (itemp->flags & (IF_SM | IF_SM2)) { diff --git a/disasm.c b/disasm.c index 0452c295..cfe86938 100644 --- a/disasm.c +++ b/disasm.c @@ -341,12 +341,12 @@ static int matches(const struct itemplate *t, uint8_t *data, uint8_t lock = prefix->lock; int osize = prefix->osize; int asize = prefix->asize; + int i; - ins->oprs[0].segment = ins->oprs[1].segment = - ins->oprs[2].segment = - ins->oprs[0].addr_size = ins->oprs[1].addr_size = - ins->oprs[2].addr_size = (segsize == 64 ? SEG_64BIT : - segsize == 32 ? SEG_32BIT : 0); + for (i = 0; i < MAX_OPERANDS; i++) { + ins->oprs[i].segment = ins->oprs[i].addr_size = + (segsize == 64 ? SEG_64BIT : segsize == 32 ? SEG_32BIT : 0); + } ins->condition = -1; ins->rex = prefix->rex; @@ -419,7 +419,7 @@ static int matches(const struct itemplate *t, uint8_t *data, default: return FALSE; } - } else if (c >= 010 && c <= 012) { + } else if (c >= 010 && c <= 013) { int t = *r++, d = *data++; if (d < t || d > t + 7) return FALSE; @@ -428,20 +428,17 @@ static int matches(const struct itemplate *t, uint8_t *data, (ins->rex & REX_B ? 8 : 0); ins->oprs[c - 010].segment |= SEG_RMREG; } - } else if (c == 017) { - if (*data++) - return FALSE; - } else if (c >= 014 && c <= 016) { + } else if (c >= 014 && c <= 017) { ins->oprs[c - 014].offset = (int8_t)*data++; ins->oprs[c - 014].segment |= SEG_SIGNED; - } else if (c >= 020 && c <= 022) { + } else if (c >= 020 && c <= 023) { ins->oprs[c - 020].offset = *data++; - } else if (c >= 024 && c <= 026) { + } else if (c >= 024 && c <= 027) { ins->oprs[c - 024].offset = *data++; - } else if (c >= 030 && c <= 032) { + } else if (c >= 030 && c <= 033) { ins->oprs[c - 030].offset = getu16(data); data += 2; - } else if (c >= 034 && c <= 036) { + } else if (c >= 034 && c <= 037) { if (osize == 32) { ins->oprs[c - 034].offset = getu32(data); data += 4; @@ -451,10 +448,10 @@ static int matches(const struct itemplate *t, uint8_t *data, } if (segsize != asize) ins->oprs[c - 034].addr_size = asize; - } else if (c >= 040 && c <= 042) { + } else if (c >= 040 && c <= 043) { ins->oprs[c - 040].offset = getu32(data); data += 4; - } else if (c >= 044 && c <= 046) { + } else if (c >= 044 && c <= 047) { switch (asize) { case 16: ins->oprs[c - 044].offset = getu16(data); @@ -471,18 +468,18 @@ static int matches(const struct itemplate *t, uint8_t *data, } if (segsize != asize) ins->oprs[c - 044].addr_size = asize; - } else if (c >= 050 && c <= 052) { + } else if (c >= 050 && c <= 053) { ins->oprs[c - 050].offset = gets8(data++); ins->oprs[c - 050].segment |= SEG_RELATIVE; - } else if (c >= 054 && c <= 056) { + } else if (c >= 054 && c <= 057) { ins->oprs[c - 054].offset = getu64(data); data += 8; - } else if (c >= 060 && c <= 062) { + } else if (c >= 060 && c <= 063) { ins->oprs[c - 060].offset = gets16(data); data += 2; ins->oprs[c - 060].segment |= SEG_RELATIVE; ins->oprs[c - 060].segment &= ~SEG_32BIT; - } else if (c >= 064 && c <= 066) { + } else if (c >= 064 && c <= 067) { if (osize == 16) { ins->oprs[c - 064].offset = getu16(data); data += 2; @@ -498,30 +495,33 @@ static int matches(const struct itemplate *t, uint8_t *data, (ins->oprs[c - 064].type & ~SIZE_MASK) | ((osize == 16) ? BITS16 : BITS32); } - } else if (c >= 070 && c <= 072) { + } else if (c >= 070 && c <= 073) { ins->oprs[c - 070].offset = getu32(data); data += 4; ins->oprs[c - 070].segment |= SEG_32BIT | SEG_RELATIVE; - } else if (c >= 0100 && c < 0130) { + } else if (c >= 0100 && c < 0140) { int modrm = *data++; ins->oprs[c & 07].basereg = ((modrm >> 3)&7)+ (ins->rex & REX_R ? 8 : 0); ins->oprs[c & 07].segment |= SEG_RMREG; data = do_ea(data, modrm, asize, segsize, &ins->oprs[(c >> 3) & 07], ins->rex); - } else if (c >= 0130 && c <= 0132) { - ins->oprs[c - 0130].offset = getu16(data); + } else if (c >= 0140 && c <= 0143) { + ins->oprs[c - 0140].offset = getu16(data); data += 2; - } else if (c >= 0140 && c <= 0142) { - ins->oprs[c - 0140].offset = getu32(data); + } else if (c >= 0150 && c <= 0153) { + ins->oprs[c - 0150].offset = getu32(data); data += 4; + } else if (c == 0170) { + if (*data++) + return FALSE; } else if (c >= 0200 && c <= 0277) { int modrm = *data++; if (((modrm >> 3) & 07) != (c & 07)) return FALSE; /* spare field doesn't match up */ data = do_ea(data, modrm, asize, segsize, &ins->oprs[(c >> 3) & 07], ins->rex); - } else if (c >= 0300 && c <= 0302) { + } else if (c >= 0300 && c <= 0303) { a_used = TRUE; } else if (c == 0310) { if (asize != 16) diff --git a/insns.dat b/insns.dat index 422109e1..1595ba69 100644 --- a/insns.dat +++ b/insns.dat @@ -47,14 +47,14 @@ ADC reg_eax,imm \321\1\x15\41 386,SM ADC reg_rax,sbyte \321\1\x83\202\15 X64,SM,ND ADC reg_rax,imm \321\1\x15\41 X64,SM ADC rm8,imm \300\1\x80\202\21 8086,SM -ADC rm16,imm \320\300\134\1\x81\202\131 8086,SM -ADC rm32,imm \321\300\144\1\x81\202\141 386,SM -ADC rm64,imm \324\300\144\1\x81\202\141 X64,SM +ADC rm16,imm \320\300\145\1\x81\202\141 8086,SM +ADC rm32,imm \321\300\155\1\x81\202\151 386,SM +ADC rm64,imm \324\300\155\1\x81\202\151 X64,SM ADC mem,imm8 \300\1\x80\202\21 8086,SM -ADC mem,imm16 \320\300\134\1\x81\202\131 8086,SM -ADC mem,imm32 \321\300\144\1\x81\202\141 386,SM -ADD mem,reg8 \300\17\101 8086,SM -ADD reg8,reg8 \17\101 8086 +ADC mem,imm16 \320\300\145\1\x81\202\141 8086,SM +ADC mem,imm32 \321\300\155\1\x81\202\151 386,SM +ADD mem,reg8 \300\170\101 8086,SM +ADD reg8,reg8 \170\101 8086 ADD mem,reg16 \320\300\1\x01\101 8086,SM ADD reg16,reg16 \320\1\x01\101 8086 ADD mem,reg32 \321\300\1\x01\101 386,SM @@ -80,12 +80,12 @@ ADD reg_eax,imm \321\1\x05\41 386,SM ADD reg_rax,sbyte \321\1\x83\200\15 X64,SM,ND ADD reg_rax,imm \323\1\x05\41 X64,SM ADD rm8,imm \300\1\x80\200\21 8086,SM -ADD rm16,imm \320\300\134\1\x81\200\131 8086,SM -ADD rm32,imm \321\300\144\1\x81\200\141 386,SM -ADD rm64,imm \324\300\144\1\x81\200\141 X64,SM +ADD rm16,imm \320\300\145\1\x81\200\141 8086,SM +ADD rm32,imm \321\300\155\1\x81\200\151 386,SM +ADD rm64,imm \324\300\155\1\x81\200\151 X64,SM ADD mem,imm8 \300\1\x80\200\21 8086,SM -ADD mem,imm16 \320\300\134\1\x81\200\131 8086,SM -ADD mem,imm32 \321\300\144\1\x81\200\141 386,SM +ADD mem,imm16 \320\300\145\1\x81\200\141 8086,SM +ADD mem,imm32 \321\300\155\1\x81\200\151 386,SM AND mem,reg8 \300\1\x20\101 8086,SM AND reg8,reg8 \1\x20\101 8086 AND mem,reg16 \320\300\1\x21\101 8086,SM @@ -113,12 +113,12 @@ AND reg_eax,imm \321\1\x25\41 386,SM AND reg_rax,sbyte \321\1\x83\204\15 X64,SM,ND AND reg_rax,imm \324\1\x25\41 X64,SM AND rm8,imm \300\1\x80\204\21 8086,SM -AND rm16,imm \320\300\134\1\x81\204\131 8086,SM -AND rm32,imm \321\300\144\1\x81\204\141 386,SM -AND rm64,imm \324\300\144\1\x81\204\141 X64,SM +AND rm16,imm \320\300\145\1\x81\204\141 8086,SM +AND rm32,imm \321\300\155\1\x81\204\151 386,SM +AND rm64,imm \324\300\155\1\x81\204\151 X64,SM AND mem,imm8 \300\1\x80\204\21 8086,SM -AND mem,imm16 \320\300\134\1\x81\204\131 8086,SM -AND mem,imm32 \321\300\144\1\x81\204\141 386,SM +AND mem,imm16 \320\300\145\1\x81\204\141 8086,SM +AND mem,imm32 \321\300\155\1\x81\204\151 386,SM ARPL mem,reg16 \300\1\x63\101 286,PROT,SM,NOLONG ARPL reg16,reg16 \1\x63\101 286,PROT,NOLONG BOUND reg16,mem \320\301\1\x62\110 186,NOLONG @@ -175,13 +175,13 @@ BTS rm32,imm \321\300\2\x0F\xBA\205\25 386,SB BTS rm64,imm \324\300\2\x0F\xBA\205\25 X64,SB CALL imm \322\1\xE8\64 8086 CALL imm|near \322\1\xE8\64 8086 -CALL imm|far \322\1\x9A\34\37 8086,ND,NOLONG +CALL imm|far \322\1\x9A\34\74 8086,ND,NOLONG CALL imm16 \320\1\xE8\64 8086 CALL imm16|near \320\1\xE8\64 8086 -CALL imm16|far \320\1\x9A\34\37 8086,ND,NOLONG +CALL imm16|far \320\1\x9A\34\74 8086,ND,NOLONG CALL imm32 \321\1\xE8\64 386 CALL imm32|near \321\1\xE8\64 386 -CALL imm32|far \321\1\x9A\34\37 386,ND,NOLONG +CALL imm32|far \321\1\x9A\34\74 386,ND,NOLONG CALL imm:imm \322\1\x9A\35\30 8086,NOLONG CALL imm16:imm \320\1\x9A\31\30 8086,NOLONG CALL imm:imm16 \320\1\x9A\31\30 8086,NOLONG @@ -238,12 +238,12 @@ CMP reg_eax,imm \321\1\x3D\41 386,SM CMP reg_rax,sbyte \321\1\x83\207\15 X64,SM,ND CMP reg_rax,imm \321\1\x3D\41 X64,SM CMP rm8,imm \300\1\x80\207\21 8086,SM -CMP rm16,imm \320\300\134\1\x81\207\131 8086,SM -CMP rm32,imm \321\300\144\1\x81\207\141 386,SM -CMP rm64,imm \324\300\144\1\x81\207\141 X64,SM +CMP rm16,imm \320\300\145\1\x81\207\141 8086,SM +CMP rm32,imm \321\300\155\1\x81\207\151 386,SM +CMP rm64,imm \324\300\155\1\x81\207\151 X64,SM CMP mem,imm8 \300\1\x80\207\21 8086,SM -CMP mem,imm16 \320\300\134\1\x81\207\131 8086,SM -CMP mem,imm32 \321\300\144\1\x81\207\141 386,SM +CMP mem,imm16 \320\300\145\1\x81\207\141 8086,SM +CMP mem,imm32 \321\300\155\1\x81\207\151 386,SM CMPSB void \335\1\xA6 8086 CMPSD void \335\321\1\xA7 386 CMPSQ void \335\324\1\xA7 X64 @@ -497,38 +497,38 @@ IMUL reg64,reg64 \324\2\x0F\xAF\110 X64 IMUL reg16,mem,imm8 \320\301\1\x6B\110\16 186,SM IMUL reg16,mem,sbyte \320\301\1\x6B\110\16 186,SM,ND IMUL reg16,mem,imm16 \320\301\1\x69\110\32 186,SM -IMUL reg16,mem,imm \320\301\135\1\x69\110\132 186,SM,ND +IMUL reg16,mem,imm \320\301\146\1\x69\110\142 186,SM,ND IMUL reg16,reg16,imm8 \320\1\x6B\110\16 186 IMUL reg16,reg16,sbyte \320\1\x6B\110\16 186,SM,ND IMUL reg16,reg16,imm16 \320\1\x69\110\32 186 -IMUL reg16,reg16,imm \320\135\1\x69\110\132 186,SM,ND +IMUL reg16,reg16,imm \320\146\1\x69\110\142 186,SM,ND IMUL reg32,mem,imm8 \321\301\1\x6B\110\16 386,SM IMUL reg32,mem,sbyte \321\301\1\x6B\110\16 386,SM,ND IMUL reg32,mem,imm32 \321\301\1\x69\110\42 386,SM -IMUL reg32,mem,imm \321\301\145\1\x69\110\142 386,SM,ND +IMUL reg32,mem,imm \321\301\156\1\x69\110\152 386,SM,ND IMUL reg32,reg32,imm8 \321\1\x6B\110\16 386 IMUL reg32,reg32,sbyte \321\1\x6B\110\16 386,SM,ND IMUL reg32,reg32,imm32 \321\1\x69\110\42 386 -IMUL reg32,reg32,imm \321\145\1\x69\110\142 386,SM,ND +IMUL reg32,reg32,imm \321\156\1\x69\110\152 386,SM,ND IMUL reg64,mem,imm8 \324\301\1\x6B\110\16 X64,SM IMUL reg64,mem,sbyte \324\301\1\x6B\110\16 X64,SM,ND IMUL reg64,mem,imm32 \324\301\1\x69\110\42 X64,SM -IMUL reg64,mem,imm \324\301\145\1\x69\110\142 X64,SM,ND +IMUL reg64,mem,imm \324\301\156\1\x69\110\152 X64,SM,ND IMUL reg64,reg64,imm8 \324\1\x6B\110\16 X64 IMUL reg64,reg64,sbyte \324\1\x6B\110\16 X64,SM,ND IMUL reg64,reg64,imm32 \324\1\x69\110\42 X64 -IMUL reg64,reg64,imm \324\145\1\x69\110\142 X64,SM,ND +IMUL reg64,reg64,imm \324\156\1\x69\110\152 X64,SM,ND IMUL reg16,imm8 \320\1\x6B\100\15 186 IMUL reg16,sbyte \320\1\x6B\100\15 186,SM,ND IMUL reg16,imm16 \320\1\x69\100\31 186 -IMUL reg16,imm \320\134\1\x69\100\131 186,SM,ND +IMUL reg16,imm \320\145\1\x69\100\141 186,SM,ND IMUL reg32,imm8 \321\1\x6B\100\15 386 IMUL reg32,sbyte \321\1\x6B\100\15 386,SM,ND IMUL reg32,imm32 \321\1\x69\100\41 386 -IMUL reg32,imm \321\144\1\x69\100\141 386,SM,ND +IMUL reg32,imm \321\155\1\x69\100\151 386,SM,ND IMUL reg64,sbyte \324\1\x6B\100\15 X64,SM,ND IMUL reg64,imm32 \324\1\x69\100\41 X64 -IMUL reg64,imm \324\144\1\x69\100\141 X64,SM,ND +IMUL reg64,imm \324\155\1\x69\100\151 X64,SM,ND IN reg_al,imm \1\xE4\25 8086,SB IN reg_ax,imm \320\1\xE5\25 8086,SB IN reg_eax,imm \321\1\xE5\25 386,SB @@ -564,13 +564,13 @@ JMP imm|short \1\xEB\50 8086 JMP imm \371\1\xEB\50 8086,ND JMP imm \322\1\xE9\64 8086 JMP imm|near \322\1\xE9\64 8086,ND -JMP imm|far \322\1\xEA\34\37 8086,ND,NOLONG +JMP imm|far \322\1\xEA\34\74 8086,ND,NOLONG JMP imm16 \320\1\xE9\64 8086 JMP imm16|near \320\1\xE9\64 8086,ND -JMP imm16|far \320\1\xEA\34\37 8086,ND,NOLONG +JMP imm16|far \320\1\xEA\34\74 8086,ND,NOLONG JMP imm32 \321\1\xE9\64 386 JMP imm32|near \321\1\xE9\64 386,ND -JMP imm32|far \321\1\xEA\34\37 386,ND,NOLONG +JMP imm32|far \321\1\xEA\34\74 386,ND,NOLONG JMP imm:imm \322\1\xEA\35\30 8086,NOLONG JMP imm16:imm \320\1\xEA\31\30 8086,NOLONG JMP imm:imm16 \320\1\xEA\31\30 8086,NOLONG @@ -618,9 +618,9 @@ LGDT mem \300\2\x0F\x01\202 286,PRIV LGS reg16,mem \320\301\2\x0F\xB5\110 386 LGS reg32,mem \321\301\2\x0F\xB5\110 386 LIDT mem \300\2\x0F\x01\203 286,PRIV -LLDT mem \300\1\x0F\17\202 286,PROT,PRIV -LLDT mem16 \300\1\x0F\17\202 286,PROT,PRIV -LLDT reg16 \1\x0F\17\202 286,PROT,PRIV +LLDT mem \300\1\x0F\170\202 286,PROT,PRIV +LLDT mem16 \300\1\x0F\170\202 286,PROT,PRIV +LLDT reg16 \1\x0F\170\202 286,PROT,PRIV LMSW mem \300\2\x0F\x01\206 286,PRIV LMSW mem16 \300\2\x0F\x01\206 286,PRIV LMSW reg16 \2\x0F\x01\206 286,PRIV @@ -658,9 +658,9 @@ LSL reg64,mem \324\301\2\x0F\x03\110 X64,SM LSL reg64,reg64 \324\2\x0F\x03\110 X64,PROT LSS reg16,mem \320\301\2\x0F\xB2\110 386 LSS reg32,mem \321\301\2\x0F\xB2\110 386 -LTR mem \300\1\x0F\17\203 286,PROT,PRIV -LTR mem16 \300\1\x0F\17\203 286,PROT,PRIV,NOLONG -LTR reg16 \1\x0F\17\203 286,PROT,PRIV,NOLONG +LTR mem \300\1\x0F\170\203 286,PROT,PRIV +LTR mem16 \300\1\x0F\170\203 286,PROT,PRIV,NOLONG +LTR reg16 \1\x0F\170\203 286,PROT,PRIV,NOLONG MFENCE void \3\x0F\xAE\xF0 X64,AMD MONITOR void \3\x0F\x01\xC8 PRESCOTT MONITOR reg_eax,reg_ecx,reg_edx \3\x0F\x01\xC8 PRESCOTT,ND @@ -788,12 +788,12 @@ OR reg_eax,imm \321\1\x0D\41 386,SM OR reg_rax,sbyte \321\1\x83\201\15 X64,SM,ND OR reg_rax,imm \321\1\x0D\41 X64,SM OR rm8,imm \300\1\x80\201\21 8086,SM -OR rm16,imm \320\300\134\1\x81\201\131 8086,SM -OR rm32,imm \321\300\144\1\x81\201\141 386,SM -OR rm64,imm \324\300\144\1\x81\201\141 X64,SM +OR rm16,imm \320\300\145\1\x81\201\141 8086,SM +OR rm32,imm \321\300\155\1\x81\201\151 386,SM +OR rm64,imm \324\300\155\1\x81\201\151 X64,SM OR mem,imm8 \300\1\x80\201\21 8086,SM -OR mem,imm16 \320\300\134\1\x81\201\131 8086,SM -OR mem,imm32 \321\300\144\1\x81\201\141 386,SM +OR mem,imm16 \320\300\145\1\x81\201\141 8086,SM +OR mem,imm32 \321\300\155\1\x81\201\151 386,SM OUT imm,reg_al \1\xE6\24 8086,SB OUT imm,reg_ax \320\1\xE7\24 8086,SB OUT imm,reg_eax \321\1\xE7\24 386,SB @@ -987,9 +987,9 @@ PUSH reg_dess \6 8086,NOLONG PUSH reg_fsgs \1\x0F\7 386 PUSH imm8 \1\x6A\14 186 PUSH sbyte \1\x6A\14 186,ND -PUSH imm16 \320\133\1\x68\130 186 -PUSH imm32 \321\143\1\x68\140 386,NOLONG -PUSH imm64 \321\143\1\x68\140 X64 +PUSH imm16 \320\144\1\x68\140 186 +PUSH imm32 \321\154\1\x68\150 386,NOLONG +PUSH imm64 \321\154\1\x68\150 X64 PUSH imm \1\x68\34 186 PUSHA void \322\1\x60 186,NOLONG PUSHAD void \321\1\x60 386,NOLONG @@ -1121,12 +1121,12 @@ SBB reg_eax,imm \321\1\x1D\41 386,SM SBB reg_rax,sbyte \321\1\x83\203\15 X64,SM,ND SBB reg_rax,imm \321\1\x1D\41 X64,SM SBB rm8,imm \300\1\x80\203\21 8086,SM -SBB rm16,imm \320\300\134\1\x81\203\131 8086,SM -SBB rm32,imm \321\300\144\1\x81\203\141 386,SM -SBB rm64,imm \324\300\144\1\x81\203\141 X64,SM +SBB rm16,imm \320\300\145\1\x81\203\141 8086,SM +SBB rm32,imm \321\300\155\1\x81\203\151 386,SM +SBB rm64,imm \324\300\155\1\x81\203\151 X64,SM SBB mem,imm8 \300\1\x80\203\21 8086,SM -SBB mem,imm16 \320\300\134\1\x81\203\131 8086,SM -SBB mem,imm32 \321\300\144\1\x81\203\141 386,SM +SBB mem,imm16 \320\300\145\1\x81\203\141 8086,SM +SBB mem,imm32 \321\300\155\1\x81\203\151 386,SM SCASB void \335\1\xAE 8086 SCASD void \335\321\1\xAF 386 SCASQ void \335\324\1\xAF X64 @@ -1182,10 +1182,10 @@ SHRD reg32,reg32,reg_cl \321\2\x0F\xAD\101 386 SHRD mem,reg64,reg_cl \300\324\2\x0F\xAD\101 X64,SM SHRD reg64,reg64,reg_cl \324\2\x0F\xAD\101 X64 SIDT mem \300\2\x0F\x01\201 286 -SLDT mem \300\1\x0F\17\200 286 -SLDT mem16 \300\1\x0F\17\200 286 -SLDT reg16 \320\1\x0F\17\200 286 -SLDT reg32 \321\1\x0F\17\200 386 +SLDT mem \300\1\x0F\170\200 286 +SLDT mem16 \300\1\x0F\170\200 286 +SLDT reg16 \320\1\x0F\170\200 286 +SLDT reg32 \321\1\x0F\170\200 386 SKINIT void \3\x0F\x01\xDE X64 SMI void \1\xF1 386,UNDOC SMINT void \2\x0F\x38 P6,CYRIX @@ -1203,11 +1203,11 @@ STOSB void \1\xAA 8086 STOSD void \321\1\xAB 386 STOSQ void \324\1\xAB X64 STOSW void \320\1\xAB 8086 -STR mem \300\1\x0F\17\201 286,PROT -STR mem16 \300\1\x0F\17\201 286,PROT -STR reg16 \320\1\x0F\17\201 286,PROT -STR reg32 \321\1\x0F\17\201 386,PROT -STR reg64 \324\1\x0F\17\201 X64 +STR mem \300\1\x0F\170\201 286,PROT +STR mem16 \300\1\x0F\170\201 286,PROT +STR reg16 \320\1\x0F\170\201 286,PROT +STR reg32 \321\1\x0F\170\201 386,PROT +STR reg64 \324\1\x0F\170\201 X64 SUB mem,reg8 \300\1\x28\101 8086,SM SUB reg8,reg8 \1\x28\101 8086 SUB mem,reg16 \320\300\1\x29\101 8086,SM @@ -1235,12 +1235,12 @@ SUB reg_eax,imm \321\1\x2D\41 386,SM SUB reg_rax,sbyte \321\1\x83\205\15 X64,SM,ND SUB reg_rax,imm \321\1\x2D\41 X64,SM SUB rm8,imm \300\1\x80\205\21 8086,SM -SUB rm16,imm \320\300\134\1\x81\205\131 8086,SM -SUB rm32,imm \321\300\144\1\x81\205\141 386,SM -SUB rm64,imm \324\300\144\1\x81\205\141 X64,SM +SUB rm16,imm \320\300\145\1\x81\205\141 8086,SM +SUB rm32,imm \321\300\155\1\x81\205\151 386,SM +SUB rm64,imm \324\300\155\1\x81\205\151 X64,SM SUB mem,imm8 \300\1\x80\205\21 8086,SM -SUB mem,imm16 \320\300\134\1\x81\205\131 8086,SM -SUB mem,imm32 \321\300\144\1\x81\205\141 386,SM +SUB mem,imm16 \320\300\145\1\x81\205\141 8086,SM +SUB mem,imm32 \321\300\155\1\x81\205\151 386,SM SVDC mem80,reg_sreg \300\2\x0F\x78\101 486,CYRIX,SMM SVLDT mem80 \300\2\x0F\x7A\200 486,CYRIX,SMM SVTS mem80 \300\2\x0F\x7C\200 486,CYRIX,SMM @@ -1287,12 +1287,12 @@ UMOV reg16,mem \320\301\2\x0F\x13\110 386,UNDOC,SM UMOV reg16,reg16 \320\2\x0F\x13\110 386,UNDOC UMOV reg32,mem \321\301\2\x0F\x13\110 386,UNDOC,SM UMOV reg32,reg32 \321\2\x0F\x13\110 386,UNDOC -VERR mem \300\1\x0F\17\204 286,PROT -VERR mem16 \300\1\x0F\17\204 286,PROT -VERR reg16 \1\x0F\17\204 286,PROT -VERW mem \300\1\x0F\17\205 286,PROT -VERW mem16 \300\1\x0F\17\205 286,PROT -VERW reg16 \1\x0F\17\205 286,PROT +VERR mem \300\1\x0F\170\204 286,PROT +VERR mem16 \300\1\x0F\170\204 286,PROT +VERR reg16 \1\x0F\170\204 286,PROT +VERW mem \300\1\x0F\170\205 286,PROT +VERW mem16 \300\1\x0F\170\205 286,PROT +VERW reg16 \1\x0F\170\205 286,PROT WAIT void \1\x9B 8086 FWAIT void \1\x9B 8086 WBINVD void \2\x0F\x09 486,PRIV @@ -1360,12 +1360,12 @@ XOR reg_eax,imm \321\1\x35\41 386,SM XOR reg_rax,sbyte \321\1\x83\206\15 X64,SM,ND XOR reg_rax,imm \321\1\x35\41 X64,SM XOR rm8,imm \300\1\x80\206\21 8086,SM -XOR rm16,imm \320\300\134\1\x81\206\131 8086,SM -XOR rm32,imm \321\300\144\1\x81\206\141 386,SM -XOR rm64,imm \324\300\144\1\x81\206\141 X64,SM +XOR rm16,imm \320\300\145\1\x81\206\141 8086,SM +XOR rm32,imm \321\300\155\1\x81\206\151 386,SM +XOR rm64,imm \324\300\155\1\x81\206\151 X64,SM XOR mem,imm8 \300\1\x80\206\21 8086,SM -XOR mem,imm16 \320\300\134\1\x81\206\131 8086,SM -XOR mem,imm32 \321\300\144\1\x81\206\141 386,SM +XOR mem,imm16 \320\300\145\1\x81\206\141 8086,SM +XOR mem,imm32 \321\300\155\1\x81\206\151 386,SM XSTORE void \3\x0F\xA7\xC0 P6,CYRIX CMOVcc reg16,mem \320\301\1\x0F\330\x40\110 P6,SM CMOVcc reg16,reg16 \320\1\x0F\330\x40\110 P6 diff --git a/insns.h b/insns.h index 4deccf94..21dfd93b 100644 --- a/insns.h +++ b/insns.h @@ -9,7 +9,7 @@ #ifndef NASM_INSNS_H #define NASM_INSNS_H -#include "insnsi.h" /* instruction opcode enum */ +#include "nasm.h" /* max length of any instruction, register name etc. */ #if MAX_INSLEN > 9 /* MAX_INSLEN defined in insnsi.h */ @@ -21,7 +21,7 @@ struct itemplate { enum opcode opcode; /* the token, passed from "parser.c" */ int operands; /* number of operands */ - int32_t opd[3]; /* bit flags for operand types */ + int32_t opd[MAX_OPERANDS]; /* bit flags for operand types */ const char *code; /* the code it assembles to */ uint32_t flags; /* some flags */ }; @@ -66,12 +66,14 @@ extern const struct itemplate * const * const itable[]; #define IF_SM2 0x00000002UL /* size match first two operands */ #define IF_SB 0x00000004UL /* unsized operands can't be non-byte */ #define IF_SW 0x00000008UL /* unsized operands can't be non-word */ -#define IF_SD 0x00000010UL /* unsized operands can't be non-dword */ -#define IF_SQ 0x00000020UL /* unsized operands can't be non-qword */ -#define IF_AR0 0x00000040UL /* SB, SW, SD applies to argument 0 */ -#define IF_AR1 0x00000080UL /* SB, SW, SD applies to argument 1 */ -#define IF_AR2 0x000000C0UL /* SB, SW, SD applies to argument 2 */ -#define IF_ARMASK 0x000000C0UL /* mask for unsized argument spec */ +#define IF_SD 0x0000000CUL /* unsized operands can't be non-dword */ +#define IF_SQ 0x00000010UL /* unsized operands can't be non-qword */ +#define IF_SMASK 0x0000001CUL /* mask for unsized argument size */ +#define IF_AR0 0x00000020UL /* SB, SW, SD applies to argument 0 */ +#define IF_AR1 0x00000040UL /* SB, SW, SD applies to argument 1 */ +#define IF_AR2 0x00000060UL /* SB, SW, SD applies to argument 2 */ +#define IF_AR3 0x00000080UL /* SB, SW, SD applies to argument 2 */ +#define IF_ARMASK 0x000000E0UL /* mask for unsized argument spec */ #define IF_PRIV 0x00000100UL /* it's a privileged instruction */ #define IF_SMM 0x00000200UL /* it's only valid in SMM */ #define IF_PROT 0x00000400UL /* it's protected mode only */ diff --git a/insns.pl b/insns.pl index 421f16aa..e596b48b 100644 --- a/insns.pl +++ b/insns.pl @@ -203,45 +203,52 @@ if ( !defined($output) || $output eq 'n' ) { printf STDERR "Done: %d instructions\n", $insns; sub format { - local ($opcode, $operands, $codes, $flags) = @_; - local $num, $nd = 0; + my ($opcode, $operands, $codes, $flags) = @_; + my $num, $nd = 0; - return (undef, undef) if $operands eq "ignore"; - - # format the operands - $operands =~ s/:/|colon,/g; - $operands =~ s/mem(\d+)/mem|bits$1/g; - $operands =~ s/mem/memory/g; - $operands =~ s/memory_offs/mem_offs/g; - $operands =~ s/imm(\d+)/imm|bits$1/g; - $operands =~ s/imm/immediate/g; - $operands =~ s/rm(\d+)/rm_gpr|bits$1/g; - $operands =~ s/mmxrm/rm_mmx/g; - $operands =~ s/xmmrm/rm_xmm/g; - $num = 3; - $operands = '0,0,0', $num = 0 if $operands eq 'void'; - $operands .= ',0', $num-- while $operands !~ /,.*,/; - $operands =~ tr/a-z/A-Z/; - - # format the flags - $flags =~ s/,/|IF_/g; - $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/; - $flags = "IF_" . $flags; - - ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd); + return (undef, undef) if $operands eq "ignore"; + + # format the operands + $operands =~ s/:/|colon,/g; + $operands =~ s/mem(\d+)/mem|bits$1/g; + $operands =~ s/mem/memory/g; + $operands =~ s/memory_offs/mem_offs/g; + $operands =~ s/imm(\d+)/imm|bits$1/g; + $operands =~ s/imm/immediate/g; + $operands =~ s/rm(\d+)/rm_gpr|bits$1/g; + $operands =~ s/mmxrm/rm_mmx/g; + $operands =~ s/xmmrm/rm_xmm/g; + if ($operands eq 'void') { + @ops = (); + } else { + @ops = split(/\,/, $operands); + } + $num = scalar(@ops); + while (scalar(@ops) < 4) { + push(@ops, '0'); + } + $operands = join(',', @ops); + $operands =~ tr/a-z/A-Z/; + + # format the flags + $flags =~ s/,/|IF_/g; + $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/; + $flags = "IF_" . $flags; + + ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd); } # Here we determine the range of possible starting bytes for a given # instruction. We need only consider the codes: # \1 \2 \3 mean literal bytes, of course # \4 \5 \6 \7 mean PUSH/POP of segment registers: special case -# \10 \11 \12 mean byte plus register value -# \17 means byte zero +# \1[0123] mean byte plus register value +# \170 means byte zero # \330 means byte plus condition code # \0 or \340 mean give up and return empty set sub startbyte { - local ($codes) = @_; - local $word, @range; + my ($codes) = @_; + my $word, @range; while (1) { die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/; @@ -251,8 +258,8 @@ sub startbyte { return (0xA1, 0xA9) if $word eq "\\5"; return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6"; return (0xA0, 0xA8) if $word eq "\\7"; - $start=hex $1, $r=8, last if $word =~ /^\\1[012]$/ && $codes =~/^\\x(..)/; - return (0) if $word eq "\\17"; + $start=hex $1, $r=8, last if $word =~ /^\\1[0123]$/ && $codes =~/^\\x(..)/; + return (0) if $word eq "\\170"; $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/; return () if $word eq "\\0" || $word eq "\\340"; } diff --git a/nasm.h b/nasm.h index 4ae93b61..7c5a1b75 100644 --- a/nasm.h +++ b/nasm.h @@ -607,6 +607,7 @@ typedef struct extop { /* extended operand */ } extop; #define MAXPREFIX 4 +#define MAX_OPERANDS 4 typedef struct { /* an instruction itself */ char *label; /* the label defined, or NULL */ @@ -616,7 +617,7 @@ typedef struct { /* an instruction itself */ enum ccode condition; /* the condition code, if Jcc/SETcc */ int operands; /* how many operands? 0-3 * (more if db et al) */ - operand oprs[3]; /* the operands, defined as above */ + operand oprs[MAX_OPERANDS]; /* the operands, defined as above */ extop *eops; /* extended operands */ int eops_float; /* true if DD and floating */ int32_t times; /* repeat count (TIMES prefix) */ From 19315e012fda54ec3e4af65849170ab335dcc36f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 16:20:45 -0700 Subject: [PATCH 02/29] Enable IF_AR3 Enable IF_AR3, which was incorrectly disabled in a previous checkin. --- assemble.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/assemble.c b/assemble.c index 7dc2b25b..0821101a 100644 --- a/assemble.c +++ b/assemble.c @@ -1630,11 +1630,9 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) case IF_AR2: i = 2; break; -#if 0 /* Need to reorganize instruction flags to fit IF_AR3 */ case IF_AR3: i = 3; break; -#endif default: break; /* Shouldn't happen */ } From 8f94f988f0413c35520095866e00ac358d36c99c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 16:31:33 -0700 Subject: [PATCH 03/29] Fix a few instances of missing renumbers parser.c: change hard-coded argument count 3 to MAX_ARGUMENTS assemble.c: change a few missed code renumbers --- assemble.c | 4 ++-- parser.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/assemble.c b/assemble.c index 0821101a..f1b4dbb9 100644 --- a/assemble.c +++ b/assemble.c @@ -1272,7 +1272,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, errfunc(ERR_WARNING, "word value exceeds bounds"); } out(offset, segment, &data, OUT_ADDRESS + 2, - ins->oprs[c - 0140].segment, ins->oprs[c - 0130].wrt); + ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt); offset += 2; } break; @@ -1302,7 +1302,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, offset++; } else { out(offset, segment, &data, OUT_ADDRESS + 4, - ins->oprs[c - 0150].segment, ins->oprs[c - 0140].wrt); + ins->oprs[c - 0150].segment, ins->oprs[c - 0150].wrt); offset += 4; } break; diff --git a/parser.c b/parser.c index 1c7b8d9b..16164d77 100644 --- a/parser.c +++ b/parser.c @@ -339,10 +339,10 @@ insn *parse_line(int pass, char *buffer, insn * result, return result; } - /* right. Now we begin to parse the operands. There may be up to three + /* right. Now we begin to parse the operands. There may be up to four * of these, separated by commas, and terminated by a zero token. */ - for (operand = 0; operand < 3; operand++) { + for (operand = 0; operand < MAX_OPERANDS; operand++) { expr *value; /* used most of the time */ int mref; /* is this going to be a memory ref? */ int bracket; /* is it a [] mref, or a & mref? */ From 401c07e20d14130a2d147468a408fce9edd1faff Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 16:55:04 -0700 Subject: [PATCH 04/29] Initial support for generating DREX suffixes Initial support for generating DREX suffixes. Not used yet. No disassembler support yet, and no support for "operand X must match operand Y." --- assemble.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- nasm.h | 5 ++++- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/assemble.c b/assemble.c index f1b4dbb9..a140c729 100644 --- a/assemble.c +++ b/assemble.c @@ -39,9 +39,16 @@ * \150..\153 - an immediate dword or signed byte for operand 0..3 * \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3 * is a signed byte rather than a dword. + * \160..\163 - this instruction uses DREX rather than REX, with the + * OC0 field set to 0, and the dest field taken from + * operand 0..3. + * \164..\167 - this instruction uses DREX rather than REX, with the + * OC0 field set to 1, and the dest field taken from + * operand 0..3. * \170 - encodes the literal byte 0. (Some compilers don't take * kindly to a zero byte in the _middle_ of a compile time * string constant, so I had to put this hack in.) + * \171 - placement of DREX suffix in the absence of an EA * \2ab - a ModRM, calculated on EA in operand a, with the spare * field equal to digit b. * \30x - might be an 0x67 byte, depending on the address size of @@ -843,9 +850,25 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, codes += 2; length++; break; + case 0160: + case 0161: + case 0162: + case 0163: + length++; + ins->rex |= REX_D; + break; + case 0164: + case 0165: + case 0166: + case 0167: + length++; + ins->rex |= REX_D|REX_OC; + break; case 0170: length++; break; + case 0171: + break; case 0300: case 0301: case 0302: @@ -945,7 +968,14 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, } ins->rex &= rex_mask; - if (ins->rex & REX_REAL) { + + if (ins->rex & REX_D) { + if (ins->rex & REX_H) { + errfunc(ERR_NONFATAL, "cannot use high register in drex instruction"); + return -1; + } + length++; + } else if (ins->rex & REX_REAL) { if (ins->rex & REX_H) { errfunc(ERR_NONFATAL, "cannot use high register in rex instruction"); return -1; @@ -964,7 +994,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, } #define EMIT_REX() \ - if((ins->rex & REX_REAL) && (bits == 64)) { \ + if (!(ins->rex & REX_D) && (ins->rex & REX_REAL) && (bits == 64)) { \ ins->rex = (ins->rex & REX_REAL)|REX_P; \ out(offset, segment, &ins->rex, OUT_RAWDATA+1, NO_SEG, NO_SEG); \ ins->rex = 0; \ @@ -1320,12 +1350,33 @@ static void gencode(int32_t segment, int32_t offset, int bits, offset++; break; + case 0160: + case 0161: + case 0162: + case 0163: + case 0164: + case 0165: + case 0166: + case 0167: + ins->drexdst = regval(&ins->oprs[c & 3]); + break; + case 0170: bytes[0] = 0; out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); offset += 1; break; + case 0171: + bytes[0] = + (ins->drexdst << 4) | + (ins->rex & REX_OC ? 0x08 : 0) | + (ins->rex & (REX_R|REX_X|REX_B)); + ins->rex = 0; + out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); + offset++; + break; + case 0300: case 0301: case 0302: @@ -1487,6 +1538,15 @@ static void gencode(int32_t segment, int32_t offset, int bits, if (ea_data.sib_present) *p++ = ea_data.sib; + /* DREX suffixes come between the SIB and the displacement */ + if (ins->rex & REX_D) { + *p++ = + (ins->drexdst << 4) | + (ins->rex & REX_OC ? 0x08 : 0) | + (ins->rex & (REX_R|REX_X|REX_B)); + ins->rex = 0; + } + s = p - bytes; out(offset, segment, bytes, OUT_RAWDATA + s, NO_SEG, NO_SEG); diff --git a/nasm.h b/nasm.h index 7c5a1b75..93c35de6 100644 --- a/nasm.h +++ b/nasm.h @@ -540,6 +540,8 @@ enum ccode { /* condition code names */ /* * REX flags */ +#define REX_OC 0x0200 /* DREX suffix has the OC0 bit set */ +#define REX_D 0x0100 /* Instruction uses DREX instead of REX */ #define REX_H 0x80 /* High register present, REX forbidden */ #define REX_P 0x40 /* REX prefix present/required */ #define REX_L 0x20 /* Use LOCK prefix instead of REX.R */ @@ -622,7 +624,8 @@ typedef struct { /* an instruction itself */ int eops_float; /* true if DD and floating */ int32_t times; /* repeat count (TIMES prefix) */ int forw_ref; /* is there a forward reference? */ - uint8_t rex; /* Special REX Prefix */ + int rex; /* Special REX Prefix */ + int drexdst; /* Destination register for DREX suffix */ } insn; enum geninfo { GI_SWITCH }; From cf5180a9553e43bbaa46fd1a77c75dc8b7f6da42 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 17:25:27 -0700 Subject: [PATCH 05/29] Actually generate SSE5 instructions This checkin completes what is required to actually generate SSE5 instructions. No support in the disassembler yet. This checkin covers: - Support for actually generating DREX prefixes. - Support for matching operand "operand X must match Y" --- assemble.c | 19 +++++++++++++++---- insns.dat | 18 ++++++++++++++++++ insns.h | 3 ++- insns.pl | 1 + nasm.h | 12 +++++++++++- test/fmsub.asm | 16 ++++++++++++++++ 6 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 test/fmsub.asm diff --git a/assemble.c b/assemble.c index a140c729..ec3b1124 100644 --- a/assemble.c +++ b/assemble.c @@ -856,6 +856,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 0163: length++; ins->rex |= REX_D; + ins->drexdst = regval(&ins->oprs[c & 3]); break; case 0164: case 0165: @@ -863,6 +864,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 0167: length++; ins->rex |= REX_D|REX_OC; + ins->drexdst = regval(&ins->oprs[c & 3]); break; case 0170: length++; @@ -974,6 +976,11 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, errfunc(ERR_NONFATAL, "cannot use high register in drex instruction"); return -1; } + if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) || + ins->drexdst > 7)) { + errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); + return -1; + } length++; } else if (ins->rex & REX_REAL) { if (ins->rex & REX_H) { @@ -985,8 +992,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, cpu >= IF_X86_64)) { length++; } else { - errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); - return -1; + errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); + return -1; } } @@ -1358,7 +1365,6 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 0165: case 0166: case 0167: - ins->drexdst = regval(&ins->oprs[c & 3]); break; case 0170: @@ -1663,7 +1669,12 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) * Check that the operand flags all match up */ for (i = 0; i < itemp->operands; i++) { - if (itemp->opd[i] & ~instruction->oprs[i].type || + if (itemp->opd[i] & SAME_AS) { + int j = itemp->opd[i] & ~SAME_AS; + if (instruction->oprs[i].type != instruction->oprs[j].type || + instruction->oprs[i].basereg != instruction->oprs[j].basereg) + return 0; + } else if (itemp->opd[i] & ~instruction->oprs[i].type || ((itemp->opd[i] & SIZE_MASK) && ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) { if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) || diff --git a/insns.dat b/insns.dat index 4c7b2f5b..60bfa047 100644 --- a/insns.dat +++ b/insns.dat @@ -2020,3 +2020,21 @@ PCMPGTQ xmmreg,xmmrm \366\3\x0F\x38\x37\110 SSE42 POPCNT reg16,rm16 \320\333\2\x0F\xB8\110 NEHALEM POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64 + +; AMD SSE5 instructions +FMSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x08\132 SSE5 +FMSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x08\123 SSE5 +FMSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0C\121 SSE5 +FMSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0C\112 SSE5 +FMSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x09\132 SSE5 +FMSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x09\123 SSE5 +FMSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0D\121 SSE5 +FMSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0D\112 SSE5 +FMSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0A\132 SSE5 +FMSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0A\123 SSE5 +FMSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0E\121 SSE5 +FMSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0E\112 SSE5 +FMSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0B\132 SSE5 +FMSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0B\123 SSE5 +FMSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0F\121 SSE5 +FMSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0F\112 SSE5 diff --git a/insns.h b/insns.h index 21dfd93b..c7fa75a0 100644 --- a/insns.h +++ b/insns.h @@ -21,7 +21,7 @@ struct itemplate { enum opcode opcode; /* the token, passed from "parser.c" */ int operands; /* number of operands */ - int32_t opd[MAX_OPERANDS]; /* bit flags for operand types */ + opflags_t opd[MAX_OPERANDS]; /* bit flags for operand types */ const char *code; /* the code it assembles to */ uint32_t flags; /* some flags */ }; @@ -90,6 +90,7 @@ extern const struct itemplate * const * const itable[]; #define IF_SSSE3 0x00200000UL /* it's an SSSE3 instruction */ #define IF_SSE41 0x00400000UL /* it's an SSE4.1 instruction */ #define IF_SSE42 0x00800000UL /* it's an SSE4.2 instruction */ +#define IF_SSE5 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_PMASK 0xFF000000UL /* the mask for processor types */ #define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */ /* also the highest possible processor */ diff --git a/insns.pl b/insns.pl index e596b48b..30f59c65 100644 --- a/insns.pl +++ b/insns.pl @@ -218,6 +218,7 @@ sub format { $operands =~ s/rm(\d+)/rm_gpr|bits$1/g; $operands =~ s/mmxrm/rm_mmx/g; $operands =~ s/xmmrm/rm_xmm/g; + $operands =~ s/\=([0-9]+)/same_as|$1/g; if ($operands eq 'void') { @ops = (); } else { diff --git a/nasm.h b/nasm.h index 93c35de6..f5d64946 100644 --- a/nasm.h +++ b/nasm.h @@ -438,9 +438,16 @@ enum { * 25: RM_MMX (MMXREG) * 26: RM_XMM (XMMREG) * - * Bits 27-31 are currently unallocated. + * Bits 27-29 & 31 are currently unallocated. + * + * 30: SAME_AS + * Special flag only used in instruction patterns; means this operand + * has to be identical to another operand. Currently only supported + * for registers. */ +typedef uint32_t opflags_t; + /* Size, and other attributes, of the operand */ #define BITS8 0x00000001L #define BITS16 0x00000002L @@ -527,6 +534,9 @@ enum { #define UNITY 0x00012000L /* for shift/rotate instructions */ #define SBYTE 0x00022000L /* for op r16/32,immediate instrs. */ +/* special flags */ +#define SAME_AS 0x40000000L + /* Register names automatically generated from regs.dat */ #include "regs.h" diff --git a/test/fmsub.asm b/test/fmsub.asm new file mode 100644 index 00000000..7f087cd7 --- /dev/null +++ b/test/fmsub.asm @@ -0,0 +1,16 @@ + bits 64 + + fmsubps xmm0,xmm0,xmm1,xmm2 + fmsubps xmm0,xmm0,xmm1,[rax] + fmsubps xmm0,xmm0,xmm1,[rax+0x77] + fmsubps xmm0,xmm0,xmm1,[rax+0x7777] + fmsubps xmm1,xmm2,xmm3,xmm1 + fmsubps xmm1,xmm2,[rax],xmm1 + fmsubps xmm1,xmm2,[rax+0x77],xmm1 + fmsubps xmm1,xmm2,[rax+0x7777],xmm1 + fmsubps xmm0,[rax],xmm2,xmm0 + fmsubps xmm0,[rax+0x77],xmm2,xmm0 + fmsubps xmm0,[rax+0x7777],xmm2,xmm0 + fmsubps xmm14,[rax],xmm2,xmm14 + fmsubps xmm14,[rax+0x77],xmm2,xmm14 + fmsubps xmm14,[rax+0x7777],xmm2,xmm14 From 0a80739c46f2a6e7217c56f0b96248388a8ea1c9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 17:27:46 -0700 Subject: [PATCH 06/29] insns.dat: All SSE5 instructions are AMD SSE5 is an AMD-defined instruction set, so tag those AMD. --- insns.dat | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/insns.dat b/insns.dat index 60bfa047..fcf0bec8 100644 --- a/insns.dat +++ b/insns.dat @@ -2022,19 +2022,19 @@ POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64 ; AMD SSE5 instructions -FMSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x08\132 SSE5 -FMSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x08\123 SSE5 -FMSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0C\121 SSE5 -FMSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0C\112 SSE5 -FMSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x09\132 SSE5 -FMSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x09\123 SSE5 -FMSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0D\121 SSE5 -FMSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0D\112 SSE5 -FMSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0A\132 SSE5 -FMSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0A\123 SSE5 -FMSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0E\121 SSE5 -FMSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0E\112 SSE5 -FMSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0B\132 SSE5 -FMSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0B\123 SSE5 -FMSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0F\121 SSE5 -FMSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0F\112 SSE5 +FMSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x08\132 SSE5,AMD +FMSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x08\123 SSE5,AMD +FMSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0C\121 SSE5,AMD +FMSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0C\112 SSE5,AMD +FMSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x09\132 SSE5,AMD +FMSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x09\123 SSE5,AMD +FMSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0D\121 SSE5,AMD +FMSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0D\112 SSE5,AMD +FMSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0A\132 SSE5,AMD +FMSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0A\123 SSE5,AMD +FMSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0E\121 SSE5,AMD +FMSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0E\112 SSE5,AMD +FMSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0B\132 SSE5,AMD +FMSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0B\123 SSE5,AMD +FMSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0F\121 SSE5,AMD +FMSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0F\112 SSE5,AMD From 7786c364b455806e991b3ef785618ec16f940ee5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 Sep 2007 18:45:44 -0700 Subject: [PATCH 07/29] Disassembler support for SSE5 instructions Support for the SSE5 instruction format in the disassembler. Also adds some comments to insnsd.c for easier debugging. --- disasm.c | 207 +++++++++++++++++++++++++++++++++++-------------------- insns.pl | 5 +- 2 files changed, 134 insertions(+), 78 deletions(-) diff --git a/disasm.c b/disasm.c index cfe86938..3a8f710d 100644 --- a/disasm.c +++ b/disasm.c @@ -166,17 +166,47 @@ static const char *whichcond(int condval) return conditions[conds[condval]]; } +/* + * Process a DREX suffix + */ +static uint8_t *do_drex(uint8_t *data, insn *ins) +{ + uint8_t drex = *data++; + operand *dst = &ins->oprs[ins->drexdst]; + + if ((drex & 8) != ((ins->rex & REX_OC) ? 8 : 0)) + return NULL; /* OC0 mismatch */ + ins->rex = (ins->rex & ~7) | (drex & 7); + + dst->segment = SEG_RMREG; + dst->basereg = drex >> 4; + return data; +} + + /* * Process an effective address (ModRM) specification. */ static uint8_t *do_ea(uint8_t *data, int modrm, int asize, - int segsize, operand * op, int rex) + int segsize, operand * op, insn *ins) { int mod, rm, scale, index, base; + int rex; + uint8_t sib = 0; mod = (modrm >> 6) & 03; rm = modrm & 07; + if (mod != 3 && rm == 4 && asize != 16) + sib = *data++; + + if (ins->rex & REX_D) { + data = do_drex(data, ins); + if (!data) + return NULL; + } + rex = ins->rex; + if (mod == 3) { /* pure register version */ op->basereg = rm+(rex & REX_B ? 8 : 0); op->segment |= SEG_RMREG; @@ -282,10 +312,9 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize, } if (rm == 4) { /* process SIB */ - scale = (*data >> 6) & 03; - index = (*data >> 3) & 07; - base = *data & 07; - data++; + scale = (sib >> 6) & 03; + index = (sib >> 3) & 07; + base = sib & 07; op->scale = 1 << scale; @@ -501,26 +530,37 @@ static int matches(const struct itemplate *t, uint8_t *data, ins->oprs[c - 070].segment |= SEG_32BIT | SEG_RELATIVE; } else if (c >= 0100 && c < 0140) { int modrm = *data++; - ins->oprs[c & 07].basereg = ((modrm >> 3)&7)+ - (ins->rex & REX_R ? 8 : 0); ins->oprs[c & 07].segment |= SEG_RMREG; data = do_ea(data, modrm, asize, segsize, - &ins->oprs[(c >> 3) & 07], ins->rex); + &ins->oprs[(c >> 3) & 07], ins); + if (!data) + return FALSE; + ins->oprs[c & 07].basereg = ((modrm >> 3)&7)+ + (ins->rex & REX_R ? 8 : 0); } else if (c >= 0140 && c <= 0143) { ins->oprs[c - 0140].offset = getu16(data); data += 2; } else if (c >= 0150 && c <= 0153) { ins->oprs[c - 0150].offset = getu32(data); data += 4; + } else if (c >= 0160 && c <= 0167) { + ins->rex |= (c & 4) ? REX_D|REX_OC : REX_D; + ins->drexdst = c & 3; } else if (c == 0170) { if (*data++) return FALSE; + } else if (c == 0171) { + data = do_drex(data, ins); + if (!data) + return FALSE; } else if (c >= 0200 && c <= 0277) { int modrm = *data++; if (((modrm >> 3) & 07) != (c & 07)) return FALSE; /* spare field doesn't match up */ data = do_ea(data, modrm, asize, segsize, - &ins->oprs[(c >> 3) & 07], ins->rex); + &ins->oprs[(c >> 3) & 07], ins); + if (!data) + return FALSE; } else if (c >= 0300 && c <= 0303) { a_used = TRUE; } else if (c == 0310) { @@ -605,6 +645,10 @@ static int matches(const struct itemplate *t, uint8_t *data, } } + /* REX cannot be combined with DREX */ + if ((ins->rex & REX_D) && (prefix->rex)) + return FALSE; + /* * Check for unused rep or a/o prefixes. */ @@ -692,19 +736,21 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, * XXX: Need to make sure this is actually correct. */ for (i = 0; i < (*p)->operands; i++) { - if ( - /* If it's a mem-only EA but we have a register, die. */ - ((tmp_ins.oprs[i].segment & SEG_RMREG) && - !(MEMORY & ~(*p)->opd[i])) || - /* If it's a reg-only EA but we have a memory ref, die. */ - (!(tmp_ins.oprs[i].segment & SEG_RMREG) && - !(REG_EA & ~(*p)->opd[i]) && - !((*p)->opd[i] & REG_SMASK)) || - /* Register type mismatch (eg FS vs REG_DESS): die. */ - ((((*p)->opd[i] & (REGISTER | FPUREG)) || - (tmp_ins.oprs[i].segment & SEG_RMREG)) && - !whichreg((*p)->opd[i], - tmp_ins.oprs[i].basereg, tmp_ins.rex))) { + if (!((*p)->opd[i] & SAME_AS) && + ( + /* If it's a mem-only EA but we have a register, die. */ + ((tmp_ins.oprs[i].segment & SEG_RMREG) && + !(MEMORY & ~(*p)->opd[i])) || + /* If it's a reg-only EA but we have a memory ref, die. */ + (!(tmp_ins.oprs[i].segment & SEG_RMREG) && + !(REG_EA & ~(*p)->opd[i]) && + !((*p)->opd[i] & REG_SMASK)) || + /* Register type mismatch (eg FS vs REG_DESS): die. */ + ((((*p)->opd[i] & (REGISTER | FPUREG)) || + (tmp_ins.oprs[i].segment & SEG_RMREG)) && + !whichreg((*p)->opd[i], + tmp_ins.oprs[i].basereg, tmp_ins.rex)) + )) { works = FALSE; break; } @@ -793,107 +839,116 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, colon = FALSE; length += data - origdata; /* fix up for prefixes */ for (i = 0; i < (*p)->operands; i++) { + opflags_t t = (*p)->opd[i]; + const operand *o = &ins.oprs[i]; + int64_t offs; + + if (t & SAME_AS) { + o = &ins.oprs[t & ~SAME_AS]; + t = (*p)->opd[t & ~SAME_AS]; + } + output[slen++] = (colon ? ':' : i == 0 ? ' ' : ','); - if (ins.oprs[i].segment & SEG_RELATIVE) { - ins.oprs[i].offset += offset + length; + offs = o->offset; + if (o->segment & SEG_RELATIVE) { + offs += offset + length; /* * sort out wraparound */ - if (!(ins.oprs[i].segment & (SEG_32BIT|SEG_64BIT))) - ins.oprs[i].offset &= 0xffff; + if (!(o->segment & (SEG_32BIT|SEG_64BIT))) + offs &= 0xffff; /* * add sync marker, if autosync is on */ if (autosync) - add_sync(ins.oprs[i].offset, 0L); + add_sync(offs, 0L); } - if ((*p)->opd[i] & COLON) + if (t & COLON) colon = TRUE; else colon = FALSE; - if (((*p)->opd[i] & (REGISTER | FPUREG)) || - (ins.oprs[i].segment & SEG_RMREG)) { - ins.oprs[i].basereg = whichreg((*p)->opd[i], - ins.oprs[i].basereg, ins.rex); - if ((*p)->opd[i] & TO) + if ((t & (REGISTER | FPUREG)) || + (o->segment & SEG_RMREG)) { + enum reg_enum reg; + reg = whichreg(t, o->basereg, ins.rex); + if (t & TO) slen += snprintf(output + slen, outbufsize - slen, "to "); slen += snprintf(output + slen, outbufsize - slen, "%s", - reg_names[ins.oprs[i].basereg - - EXPR_REG_START]); - } else if (!(UNITY & ~(*p)->opd[i])) { + reg_names[reg - EXPR_REG_START]); + } else if (!(UNITY & ~t)) { output[slen++] = '1'; - } else if ((*p)->opd[i] & IMMEDIATE) { - if ((*p)->opd[i] & BITS8) { + } else if (t & IMMEDIATE) { + if (t & BITS8) { slen += snprintf(output + slen, outbufsize - slen, "byte "); - if (ins.oprs[i].segment & SEG_SIGNED) { - if (ins.oprs[i].offset < 0) { - ins.oprs[i].offset *= -1; + if (o->segment & SEG_SIGNED) { + if (offs < 0) { + offs *= -1; output[slen++] = '-'; } else output[slen++] = '+'; } - } else if ((*p)->opd[i] & BITS16) { + } else if (t & BITS16) { slen += snprintf(output + slen, outbufsize - slen, "word "); - } else if ((*p)->opd[i] & BITS32) { + } else if (t & BITS32) { slen += snprintf(output + slen, outbufsize - slen, "dword "); - } else if ((*p)->opd[i] & BITS64) { + } else if (t & BITS64) { slen += snprintf(output + slen, outbufsize - slen, "qword "); - } else if ((*p)->opd[i] & NEAR) { + } else if (t & NEAR) { slen += snprintf(output + slen, outbufsize - slen, "near "); - } else if ((*p)->opd[i] & SHORT) { + } else if (t & SHORT) { slen += snprintf(output + slen, outbufsize - slen, "short "); } slen += snprintf(output + slen, outbufsize - slen, "0x%"PRIx64"", - ins.oprs[i].offset); - } else if (!(MEM_OFFS & ~(*p)->opd[i])) { + offs); + } else if (!(MEM_OFFS & ~t)) { slen += snprintf(output + slen, outbufsize - slen, "[%s%s%s0x%"PRIx64"]", (segover ? segover : ""), (segover ? ":" : ""), - (ins.oprs[i].addr_size == - 32 ? "dword " : ins.oprs[i].addr_size == - 16 ? "word " : ""), ins.oprs[i].offset); + (o->addr_size == + 32 ? "dword " : o->addr_size == + 16 ? "word " : ""), offs); segover = NULL; - } else if (!(REGMEM & ~(*p)->opd[i])) { + } else if (!(REGMEM & ~t)) { int started = FALSE; - if ((*p)->opd[i] & BITS8) + if (t & BITS8) slen += snprintf(output + slen, outbufsize - slen, "byte "); - if ((*p)->opd[i] & BITS16) + if (t & BITS16) slen += snprintf(output + slen, outbufsize - slen, "word "); - if ((*p)->opd[i] & BITS32) + if (t & BITS32) slen += snprintf(output + slen, outbufsize - slen, "dword "); - if ((*p)->opd[i] & BITS64) + if (t & BITS64) slen += snprintf(output + slen, outbufsize - slen, "qword "); - if ((*p)->opd[i] & BITS80) + if (t & BITS80) slen += snprintf(output + slen, outbufsize - slen, "tword "); - if ((*p)->opd[i] & FAR) + if (t & FAR) slen += snprintf(output + slen, outbufsize - slen, "far "); - if ((*p)->opd[i] & NEAR) + if (t & NEAR) slen += snprintf(output + slen, outbufsize - slen, "near "); output[slen++] = '['; - if (ins.oprs[i].addr_size) + if (o->addr_size) slen += snprintf(output + slen, outbufsize - slen, "%s", - (ins.oprs[i].addr_size == 64 ? "qword " : - ins.oprs[i].addr_size == 32 ? "dword " : - ins.oprs[i].addr_size == 16 ? "word " : + (o->addr_size == 64 ? "qword " : + o->addr_size == 32 ? "dword " : + o->addr_size == 16 ? "word " : "")); - if (ins.oprs[i].eaflags & EAF_REL) + if (o->eaflags & EAF_REL) slen += snprintf(output + slen, outbufsize - slen, "rel "); if (segover) { slen += @@ -901,27 +956,27 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, segover); segover = NULL; } - if (ins.oprs[i].basereg != -1) { + if (o->basereg != -1) { slen += snprintf(output + slen, outbufsize - slen, "%s", - reg_names[(ins.oprs[i].basereg - + reg_names[(o->basereg - EXPR_REG_START)]); started = TRUE; } - if (ins.oprs[i].indexreg != -1) { + if (o->indexreg != -1) { if (started) output[slen++] = '+'; slen += snprintf(output + slen, outbufsize - slen, "%s", - reg_names[(ins.oprs[i].indexreg - + reg_names[(o->indexreg - EXPR_REG_START)]); - if (ins.oprs[i].scale > 1) + if (o->scale > 1) slen += snprintf(output + slen, outbufsize - slen, "*%d", - ins.oprs[i].scale); + o->scale); started = TRUE; } - if (ins.oprs[i].segment & SEG_DISP8) { + if (o->segment & SEG_DISP8) { int minus = 0; - int8_t offset = ins.oprs[i].offset; + int8_t offset = offs; if (offset < 0) { minus = 1; offset = -offset; @@ -929,9 +984,9 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, slen += snprintf(output + slen, outbufsize - slen, "%s0x%"PRIx8"", minus ? "-" : "+", offset); - } else if (ins.oprs[i].segment & SEG_DISP16) { + } else if (o->segment & SEG_DISP16) { int minus = 0; - int16_t offset = ins.oprs[i].offset; + int16_t offset = offs; if (offset < 0) { minus = 1; offset = -offset; @@ -939,9 +994,9 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, slen += snprintf(output + slen, outbufsize - slen, "%s0x%"PRIx16"", minus ? "-" : started ? "+" : "", offset); - } else if (ins.oprs[i].segment & SEG_DISP32) { + } else if (o->segment & SEG_DISP32) { char *prefix = ""; - int32_t offset = ins.oprs[i].offset; + int32_t offset = offs; if (offset < 0) { offset = -offset; prefix = "-"; diff --git a/insns.pl b/insns.pl index 30f59c65..6e961dec 100644 --- a/insns.pl +++ b/insns.pl @@ -102,10 +102,11 @@ if ( !defined($output) || $output eq 'd' ) { print D "\n"; print D "static const struct itemplate instrux[] = {\n"; + $n = 0; foreach $j (@big) { - print D " $j\n"; + printf D " /* %4d */ %s\n", $n++, $j; } - print D " ITEMPLATE_END\n};\n\n"; + print D " ITEMPLATE_END\n};\n\n"; for ($c=0; $c<256; $c++) { $h = sprintf "%02X", $c; From 24196047b52a410c281248796164187c275cf768 Mon Sep 17 00:00:00 2001 From: Frank Kotler Date: Tue, 18 Sep 2007 02:06:09 -0400 Subject: [PATCH 08/29] add "const" to output/outdbg.c apparently we missed that when updating "const" in other files --- output/outdbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/output/outdbg.c b/output/outdbg.c index 045ad371..0dda75a6 100644 --- a/output/outdbg.c +++ b/output/outdbg.c @@ -100,7 +100,7 @@ static void dbg_deflabel(char *name, int32_t segment, int32_t offset, is_global, special ? ": " : "", special); } -static void dbg_out(int32_t segto, void *data, uint32_t type, +static void dbg_out(int32_t segto, const void *data, uint32_t type, int32_t segment, int32_t wrt) { int32_t realbytes = type & OUT_SIZMASK; From 3ce3715fba17d5ad54278a42dd0fe75a5b0ebc8d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 12:23:21 -0700 Subject: [PATCH 09/29] SSE5 instruction table Implement the full SSE5 instruction table. --- insns.dat | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/insns.dat b/insns.dat index fcf0bec8..5214ee52 100644 --- a/insns.dat +++ b/insns.dat @@ -2022,6 +2022,24 @@ POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64 ; AMD SSE5 instructions + +; Four operands with DREX +FMADDPS xmmreg,=0,xmmreg,xmmrm \160\2\x0F\x24\170\132 SSE5,AMD +FMADDPS xmmreg,=0,xmmrm,xmmreg \164\2\x0F\x24\170\123 SSE5,AMD +FMADDPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x04\121 SSE5,AMD +FMADDPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x04\112 SSE5,AMD +FMADDPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x01\132 SSE5,AMD +FMADDPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x01\123 SSE5,AMD +FMADDPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x05\121 SSE5,AMD +FMADDPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x05\112 SSE5,AMD +FMADDSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x02\132 SSE5,AMD +FMADDSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x02\123 SSE5,AMD +FMADDSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x06\121 SSE5,AMD +FMADDSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x06\112 SSE5,AMD +FMADDSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x03\132 SSE5,AMD +FMADDSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x03\123 SSE5,AMD +FMADDSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x07\121 SSE5,AMD +FMADDSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x07\112 SSE5,AMD FMSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x08\132 SSE5,AMD FMSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x08\123 SSE5,AMD FMSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0C\121 SSE5,AMD @@ -2038,3 +2056,133 @@ FMSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0B\132 SSE5,AMD FMSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0B\123 SSE5,AMD FMSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0F\121 SSE5,AMD FMSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0F\112 SSE5,AMD +FMNADDPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x10\132 SSE5,AMD +FMNADDPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x10\123 SSE5,AMD +FMNADDPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x14\121 SSE5,AMD +FMNADDPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x14\112 SSE5,AMD +FMNADDPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x11\132 SSE5,AMD +FMNADDPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x11\123 SSE5,AMD +FMNADDPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x15\121 SSE5,AMD +FMNADDPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x15\112 SSE5,AMD +FMNADDSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x12\132 SSE5,AMD +FMNADDSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x12\123 SSE5,AMD +FMNADDSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x16\121 SSE5,AMD +FMNADDSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x16\112 SSE5,AMD +FMNADDSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x13\132 SSE5,AMD +FMNADDSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x13\123 SSE5,AMD +FMNADDSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x17\121 SSE5,AMD +FMNADDSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x17\112 SSE5,AMD +FMNSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x18\132 SSE5,AMD +FMNSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x18\123 SSE5,AMD +FMNSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1C\121 SSE5,AMD +FMNSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1C\112 SSE5,AMD +FMNSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x19\132 SSE5,AMD +FMNSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x19\123 SSE5,AMD +FMNSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1D\121 SSE5,AMD +FMNSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1D\112 SSE5,AMD +FMNSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x1A\132 SSE5,AMD +FMNSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x1A\123 SSE5,AMD +FMNSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1E\121 SSE5,AMD +FMNSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1E\112 SSE5,AMD +FMNSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x1B\132 SSE5,AMD +FMNSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x1B\123 SSE5,AMD +FMNSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1F\121 SSE5,AMD +FMNSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1F\112 SSE5,AMD +COMPS xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2C\121\27 SSE5,AMD +COMPD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2D\121\27 SSE5,AMD +COMSS xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2E\121\27 SSE5,AMD +COMSD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2F\121\27 SSE5,AMD +PCOMB xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4C\121\27 SSE5,AMD +PCOMW xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4D\121\27 SSE5,AMD +PCOMD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4E\121\27 SSE5,AMD +PCOMQ xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4F\121\27 SSE5,AMD +PCOMUB xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6C\121\27 SSE5,AMD +PCOMUW xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6D\121\27 SSE5,AMD +PCOMUD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6E\121\27 SSE5,AMD +PCOMUQ xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6F\121\27 SSE5,AMD +PERMPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x20\132 SSE5,AMD +PERMPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x20\123 SSE5,AMD +PERMPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x24\121 SSE5,AMD +PERMPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x24\112 SSE5,AMD +PERMPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x21\132 SSE5,AMD +PERMPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x21\123 SSE5,AMD +PERMPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x25\121 SSE5,AMD +PERMPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x25\112 SSE5,AMD +PCMOV xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x22\132 SSE5,AMD +PCMOV xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x22\123 SSE5,AMD +PCMOV xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x26\121 SSE5,AMD +PCMOV xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x26\112 SSE5,AMD +PPERM xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x23\132 SSE5,AMD +PPERM xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x23\123 SSE5,AMD +PPERM xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x27\121 SSE5,AMD +PPERM xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x27\112 SSE5,AMD +PMACSSWW xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x85\121 SSE5,AMD +PMACSWW xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x95\121 SSE5,AMD +PMACSSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x86\121 SSE5,AMD +PMACSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x96\121 SSE5,AMD +PMACSSDD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x8E\121 SSE5,AMD +PMACSDD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x9E\121 SSE5,AMD +PMACSSDQL xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x87\121 SSE5,AMD +PMACSDQL xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x97\121 SSE5,AMD +PMACSSDQH xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x8F\121 SSE5,AMD +PMACSDQH xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x9F\121 SSE5,AMD +PMADCSSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\xA6\121 SSE5,AMD +PMADCSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\xB6\121 SSE5,AMD + +; Three operands with DREX +PROTB xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x40\121 SSE5,AMD +PROTB xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x40\112 SSE5,AMD +PROTW xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x41\121 SSE5,AMD +PROTW xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x41\112 SSE5,AMD +PROTD xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x42\121 SSE5,AMD +PROTD xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x42\112 SSE5,AMD +PROTQ xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x43\121 SSE5,AMD +PROTQ xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x43\112 SSE5,AMD +PSHLB xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x44\121 SSE5,AMD +PSHLB xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x44\112 SSE5,AMD +PSHLW xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x45\121 SSE5,AMD +PSHLW xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x45\112 SSE5,AMD +PSHLD xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x46\121 SSE5,AMD +PSHLD xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x46\112 SSE5,AMD +PSHLQ xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x47\121 SSE5,AMD +PSHLQ xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x47\112 SSE5,AMD +PSHAB xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x48\121 SSE5,AMD +PSHAB xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x48\112 SSE5,AMD +PSHAW xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x49\121 SSE5,AMD +PSHAW xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x49\112 SSE5,AMD +PSHAD xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x4A\121 SSE5,AMD +PSHAD xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x4A\112 SSE5,AMD +PSHAQ xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x4B\121 SSE5,AMD +PSHAQ xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x4B\112 SSE5,AMD + +; Non-DREX +FRCZPS xmmreg,xmmrm \3\x0F\x7A\x10\110 SSE5,AMD +FRCZPD xmmreg,xmmrm \3\x0F\x7A\x11\110 SSE5,AMD +FRCZSS xmmreg,xmmrm \3\x0F\x7A\x12\110 SSE5,AMD +FRCZSD xmmreg,xmmrm \3\x0F\x7A\x13\110 SSE5,AMD +CVTPH2PS xmmreg,xmmrm \3\x0F\x7A\x30\110 SSE5,AMD,SQ +CVTPS2PH xmmrm,xmmreg \3\x0F\x7A\x31\101 SSE5,AMD,SQ +PHADDBW xmmreg,xmmrm \3\x0F\x7A\x41\110 SSE5,AMD +PHADDBD xmmreg,xmmrm \3\x0F\x7A\x42\110 SSE5,AMD +PHADDBQ xmmreg,xmmrm \3\x0F\x7A\x43\110 SSE5,AMD +PHADDWD xmmreg,xmmrm \3\x0F\x7A\x46\110 SSE5,AMD +PHADDWQ xmmreg,xmmrm \3\x0F\x7A\x47\110 SSE5,AMD +PHADDDQ xmmreg,xmmrm \3\x0F\x7A\x4B\110 SSE5,AMD +PHADDUBW xmmreg,xmmrm \3\x0F\x7A\x51\110 SSE5,AMD +PHADDUBD xmmreg,xmmrm \3\x0F\x7A\x52\110 SSE5,AMD +PHADDUBQ xmmreg,xmmrm \3\x0F\x7A\x53\110 SSE5,AMD +PHADDUWD xmmreg,xmmrm \3\x0F\x7A\x56\110 SSE5,AMD +PHADDUWQ xmmreg,xmmrm \3\x0F\x7A\x57\110 SSE5,AMD +PHADDUDQ xmmreg,xmmrm \3\x0F\x7A\x5B\110 SSE5,AMD +PHSUBBW xmmreg,xmmrm \3\x0F\x7A\x61\110 SSE5,AMD +PHSUBWD xmmreg,xmmrm \3\x0F\x7A\x62\110 SSE5,AMD +PHSUBDQ xmmreg,xmmrm \3\x0F\x7A\x63\110 SSE5,AMD +PROTB xmmreg,xmmrm,imm \3\x0F\x7B\x40\110\26 SSE5,AMD +PROTW xmmreg,xmmrm,imm \3\x0F\x7B\x41\110\26 SSE5,AMD +PROTD xmmreg,xmmrm,imm \3\x0F\x7B\x42\110\26 SSE5,AMD +PROTQ xmmreg,xmmrm,imm \3\x0F\x7B\x43\110\26 SSE5,AMD +PTEST xmmreg,xmmrm \366\3\x0F\x38\x17\110 SSE5,AMD +ROUNDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD +ROUNDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD +ROUNDSS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD +ROUNDSD xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD From 5255fd1f36eece1cbf4000ffc3120dbcb9bf5038 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 12:38:07 -0700 Subject: [PATCH 10/29] Change the token prehash function for better convergence Combining arithmetric (add) and bitwise (xor) mixing seems to give better result than either. With the new prehash function, we find a valid hash much quicker. --- perllib/phash.ph | 4 ++-- pptok.pl | 4 ++-- tokhash.pl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/perllib/phash.ph b/perllib/phash.ph index 60334272..3bb3a05b 100644 --- a/perllib/phash.ph +++ b/perllib/phash.ph @@ -42,8 +42,8 @@ sub prehash($$$) { foreach $c (unpack("C*", $key)) { $ko1 = $k1; $ko2 = $k2; - $k1 = int32(rot($ko1,$s0)-rot($ko2, $s1)+$c); - $k2 = int32(rot($ko2,$s2)-rot($ko1, $s3)+$c); + $k1 = int32(rot($ko1,$s0)^int32(rot($ko2, $s1)+$c)); + $k2 = int32(rot($ko2,$s2)^int32(rot($ko1, $s3)+$c)); } # Create a bipartite graph... diff --git a/pptok.pl b/pptok.pl index a0425b7c..a835bf3e 100755 --- a/pptok.pl +++ b/pptok.pl @@ -191,8 +191,8 @@ if ($what eq 'c') { print OUT " while ((c = *p++) != 0) {\n"; print OUT " uint32_t kn1, kn2;\n"; print OUT " c |= 0x20; /* convert to lower case */\n"; - printf OUT " kn1 = rot(k1,%2d) - rot(k2,%2d) + c;\n", ${$sv}[0], ${$sv}[1]; - printf OUT " kn2 = rot(k2,%2d) - rot(k1,%2d) + c;\n", ${$sv}[2], ${$sv}[3]; + printf OUT " kn1 = rot(k1,%2d)^(rot(k2,%2d) + c);\n", ${$sv}[0], ${$sv}[1]; + printf OUT " kn2 = rot(k2,%2d)^(rot(k1,%2d) + c);\n", ${$sv}[2], ${$sv}[3]; print OUT " k1 = kn1; k2 = kn2;\n"; print OUT " }\n"; print OUT "\n"; diff --git a/tokhash.pl b/tokhash.pl index 5f1a9f4c..9d5888be 100755 --- a/tokhash.pl +++ b/tokhash.pl @@ -187,8 +187,8 @@ print " const char *p = token;\n"; print "\n"; print " while ((c = *p++) != 0) {\n"; -printf " uint32_t kn1 = rot(k1,%2d) - rot(k2,%2d) + c;\n", ${$sv}[0], ${$sv}[1]; -printf " uint32_t kn2 = rot(k2,%2d) - rot(k1,%2d) + c;\n", ${$sv}[2], ${$sv}[3]; +printf " uint32_t kn1 = rot(k1,%2d)^(rot(k2,%2d) + c);\n", ${$sv}[0], ${$sv}[1]; +printf " uint32_t kn2 = rot(k2,%2d)^(rot(k1,%2d) + c);\n", ${$sv}[2], ${$sv}[3]; print " k1 = kn1; k2 = kn2;\n"; print " }\n"; print "\n"; From 41c9f6fde06091199f1a95e0c045230baaa25bf4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 13:01:32 -0700 Subject: [PATCH 11/29] Implement "oword" (128 bits) as a first-class size Implement oword, reso, do, as well as the SO flag to instructions. No instructions are actually flagged with SO yet, but this allows us to specify 128-bit sizes in instruction patterns. --- assemble.c | 7 +++++++ insns.dat | 26 ++++++++++++++++---------- insns.h | 1 + nasm.h | 10 ++++++---- parser.c | 35 +++++++++++++++++++++++++---------- tokens.dat | 1 + 6 files changed, 56 insertions(+), 24 deletions(-) diff --git a/assemble.c b/assemble.c index ec3b1124..e5384548 100644 --- a/assemble.c +++ b/assemble.c @@ -1720,6 +1720,9 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) case IF_SQ: size[i] = BITS64; break; + case IF_SO: + size[i] = BITS128; + break; default: break; } @@ -1742,6 +1745,10 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) asize = BITS64; oprs = itemp->operands; break; + case IF_SO: + asize = BITS128; + oprs = itemp->operands; + break; default: break; } diff --git a/insns.dat b/insns.dat index 5214ee52..f95b157e 100644 --- a/insns.dat +++ b/insns.dat @@ -14,6 +14,22 @@ ; see the comment at the top of assemble.c. For a detailed description ; of the flags (fourth field), please see insns.h. ; + +; Special instructions... +DB ignore ignore ignore +DW ignore ignore ignore +DD ignore ignore ignore +DQ ignore ignore ignore +DT ignore ignore ignore +DO ignore ignore ignore +RESB imm \340 8086 +RESW ignore ignore ignore +RESD ignore ignore ignore +RESQ ignore ignore ignore +REST ignore ignore ignore +RESO ignore ignore ignore + +; Conventional instructions AAA void \1\x37 8086,NOLONG AAD void \2\xD5\x0A 8086,NOLONG AAD imm \1\xD5\24 8086,SB,NOLONG @@ -270,8 +286,6 @@ CWD void \320\1\x99 8086 CWDE void \321\1\x98 386 DAA void \1\x27 8086,NOLONG DAS void \1\x2F 8086,NOLONG -DB ignore ignore ignore -DD ignore ignore ignore DEC reg16 \320\10\x48 8086,NOLONG DEC reg32 \321\10\x48 386,NOLONG DEC rm8 \300\1\xFE\201 8086 @@ -282,9 +296,6 @@ DIV rm8 \300\1\xF6\206 8086 DIV rm16 \320\300\1\xF7\206 8086 DIV rm32 \321\300\1\xF7\206 386 DIV rm64 \324\300\1\xF7\206 X64 -DQ ignore ignore ignore -DT ignore ignore ignore -DW ignore ignore ignore EMMS void \2\x0F\x77 PENT,MMX ENTER imm,imm \1\xC8\30\25 186 EQU imm \0 8086 @@ -1029,11 +1040,6 @@ RDMSR void \2\x0F\x32 PENT,PRIV RDPMC void \2\x0F\x33 P6 RDTSC void \2\x0F\x31 PENT RDTSCP void \3\x0F\x01\xF9 X64 -RESB imm \340 8086 -RESD ignore ignore ignore -RESQ ignore ignore ignore -REST ignore ignore ignore -RESW ignore ignore ignore RET void \1\xC3 8086 RET imm \1\xC2\30 8086,SW RETF void \1\xCB 8086 diff --git a/insns.h b/insns.h index c7fa75a0..b5d6caf7 100644 --- a/insns.h +++ b/insns.h @@ -68,6 +68,7 @@ extern const struct itemplate * const * const itable[]; #define IF_SW 0x00000008UL /* unsized operands can't be non-word */ #define IF_SD 0x0000000CUL /* unsized operands can't be non-dword */ #define IF_SQ 0x00000010UL /* unsized operands can't be non-qword */ +#define IF_SO 0x00000014UL /* unsized operands can't be non-oword */ #define IF_SMASK 0x0000001CUL /* mask for unsized argument size */ #define IF_AR0 0x00000020UL /* SB, SW, SD applies to argument 0 */ #define IF_AR1 0x00000040UL /* SB, SW, SD applies to argument 1 */ diff --git a/nasm.h b/nasm.h index f5d64946..f4afad36 100644 --- a/nasm.h +++ b/nasm.h @@ -375,7 +375,7 @@ enum { * * The bits are assigned as follows: * - * Bits 0-7: sizes + * Bits 0-7, 29: sizes * 0: 8 bits (BYTE) * 1: 16 bits (WORD) * 2: 32 bits (DWORD) @@ -384,6 +384,7 @@ enum { * 5: FAR * 6: NEAR * 7: SHORT + * 29: 128 bits (OWORD) * * Bits 8-11 modifiers * 8: TO @@ -454,12 +455,13 @@ typedef uint32_t opflags_t; #define BITS32 0x00000004L #define BITS64 0x00000008L /* x64 and FPU only */ #define BITS80 0x00000010L /* FPU only */ +#define BITS128 0x20000000L #define FAR 0x00000020L /* grotty: this means 16:16 or */ /* 16:32, like in CALL/JMP */ #define NEAR 0x00000040L #define SHORT 0x00000080L /* and this means what it says :) */ -#define SIZE_MASK 0x000000FFL /* all the size attributes */ +#define SIZE_MASK 0x200000FFL /* all the size attributes */ /* Modifiers */ #define MODIFIER_MASK 0x00000f00L @@ -959,8 +961,8 @@ struct dfmt { */ enum special_tokens { - S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, S_QWORD, S_REL, - S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD + S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, + S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD }; /* diff --git a/parser.c b/parser.c index 16164d77..ca12a097 100644 --- a/parser.c +++ b/parser.c @@ -175,23 +175,25 @@ insn *parse_line(int pass, char *buffer, insn * result, * For the moment, EQU has the same difficulty, so we'll * include that. */ - if (result->opcode == I_RESB || result->opcode == I_RESW || result->opcode == I_RESD || result->opcode == I_RESQ || result->opcode == I_REST || result->opcode == I_EQU || result->opcode == I_INCBIN) { /* fbk */ + if (result->opcode == I_RESB || result->opcode == I_RESW || + result->opcode == I_RESD || result->opcode == I_RESQ || + result->opcode == I_REST || result->opcode == I_RESO || + result->opcode == I_EQU || result->opcode == I_INCBIN) { critical = pass0; } else critical = (pass == 2 ? 2 : 0); - if (result->opcode == I_DB || - result->opcode == I_DW || - result->opcode == I_DD || - result->opcode == I_DQ || - result->opcode == I_DT || result->opcode == I_INCBIN) { + if (result->opcode == I_DB || result->opcode == I_DW || + result->opcode == I_DD || result->opcode == I_DQ || + result->opcode == I_DT || result->opcode == I_DO || + result->opcode == I_INCBIN) { extop *eop, **tail = &result->eops, **fixptr; int oper_num = 0; result->eops_float = FALSE; /* - * Begin to read the DB/DW/DD/DQ/DT/INCBIN operands. + * Begin to read the DB/DW/DD/DQ/DT/DO/INCBIN operands. */ while (1) { i = stdscan(NULL, &tokval); @@ -234,6 +236,8 @@ insn *parse_line(int pass, char *buffer, insn * result, eop->stringlen = 8; else if (result->opcode == I_DT) eop->stringlen = 10; + else if (result->opcode == I_DO) + eop->stringlen = 16; else { error(ERR_NONFATAL, "floating-point constant" " encountered in `D%c' instruction", @@ -245,8 +249,7 @@ insn *parse_line(int pass, char *buffer, insn * result, */ eop->stringlen = 0; } - eop = - nasm_realloc(eop, sizeof(extop) + eop->stringlen); + eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen); tail = &eop->next; *fixptr = eop; eop->stringval = (char *)eop + sizeof(extop); @@ -384,6 +387,11 @@ insn *parse_line(int pass, char *buffer, insn * result, result->oprs[operand].type |= BITS80; setsize = 1; break; + case S_OWORD: + if (!setsize) + result->oprs[operand].type |= BITS128; + setsize = 1; + break; case S_TO: result->oprs[operand].type |= TO; break; @@ -440,6 +448,9 @@ insn *parse_line(int pass, char *buffer, insn * result, case S_TWORD: result->oprs[operand].type |= BITS80; break; + case S_OWORD: + result->oprs[operand].type |= BITS128; + break; default: error(ERR_NONFATAL, "invalid operand size specification"); @@ -751,7 +762,7 @@ insn *parse_line(int pass, char *buffer, insn * result, result->oprs[operand++].type = 0; /* - * Transform RESW, RESD, RESQ, REST into RESB. + * Transform RESW, RESD, RESQ, REST, RESO into RESB. */ switch (result->opcode) { case I_RESW: @@ -770,6 +781,10 @@ insn *parse_line(int pass, char *buffer, insn * result, result->opcode = I_RESB; result->oprs[0].offset *= 10; break; + case I_RESO: + result->opcode = I_RESB; + result->oprs[0].offset *= 16; + break; default: break; } diff --git a/tokens.dat b/tokens.dat index 6acaba49..c84b8fb3 100644 --- a/tokens.dat +++ b/tokens.dat @@ -23,6 +23,7 @@ far long near nosplit +oword qword rel short From 0edc309505e659345cf353f81fb77793f8f5c291 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 13:45:12 -0700 Subject: [PATCH 12/29] Document oword, do and reso Document oword and the associated do and reso pseudoinstructions. --- doc/nasmdoc.src | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 13ae013d..2530b2b5 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -1115,19 +1115,19 @@ indicate what size of \i{memory operand} it refers to. \H{pseudop} \i{Pseudo-Instructions} Pseudo-instructions are things which, though not real x86 machine -instructions, are used in the instruction field anyway because -that's the most convenient place to put them. The current -pseudo-instructions are \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ} and -\i\c{DT}, their \i{uninitialized} counterparts \i\c{RESB}, -\i\c{RESW}, \i\c{RESD}, \i\c{RESQ} and \i\c{REST}, the \i\c{INCBIN} +instructions, are used in the instruction field anyway because that's +the most convenient place to put them. The current pseudo-instructions +are \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT} and \i\c{DO}; +their \i{uninitialized} counterparts \i\c{RESB}, \i\c{RESW}, +\i\c{RESD}, \i\c{RESQ}, \i\c{REST} and \i\c{RESO}; the \i\c{INCBIN} command, the \i\c{EQU} command, and the \i\c{TIMES} prefix. \S{db} \c{DB} and friends: Declaring initialized Data -\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ} and \i\c{DT} are used, much -as in MASM, to declare initialized data in the output file. They can -be invoked in a wide range of ways: +\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT} and \i\c{DO} are +used, much as in MASM, to declare initialized data in the output +file. They can be invoked in a wide range of ways: \I{floating-point}\I{character constant}\I{string constant} \c db 0x55 ; just the byte 0x55 @@ -1144,20 +1144,20 @@ be invoked in a wide range of ways: \c dq 1.234567e20 ; double-precision float \c dt 1.234567e20 ; extended-precision float -\c{DT} does not accept \i{numeric constants} as operands. +\c{DT} and \c{DO} do not accept \i{numeric constants} as operands. \S{resb} \c{RESB} and friends: Declaring \i{Uninitialized} Data -\i\c{RESB}, \i\c{RESW}, \i\c{RESD}, \i\c{RESQ} and \i\c{REST} are -designed to be used in the BSS section of a module: they declare -\e{uninitialized} storage space. Each takes a single operand, which -is the number of bytes, words, doublewords or whatever to reserve. -As stated in \k{qsother}, NASM does not support the MASM/TASM syntax -of reserving uninitialized space by writing \I\c{?}\c{DW ?} or -similar things: this is what it does instead. The operand to a -\c{RESB}-type pseudo-instruction is a \i\e{critical expression}: see -\k{crit}. +\i\c{RESB}, \i\c{RESW}, \i\c{RESD}, \i\c{RESQ}, \i\c{REST} and +\i\c{RESO} are designed to be used in the BSS section of a module: +they declare \e{uninitialized} storage space. Each takes a single +operand, which is the number of bytes, words, doublewords or whatever +to reserve. As stated in \k{qsother}, NASM does not support the +MASM/TASM syntax of reserving uninitialized space by writing +\I\c{?}\c{DW ?} or similar things: this is what it does instead. The +operand to a \c{RESB}-type pseudo-instruction is a \i\e{critical +expression}: see \k{crit}. For example: @@ -1560,11 +1560,11 @@ invent one using the macro processor. When assembling with the optimizer set to level 2 or higher (see \k{opt-On}), NASM will use size specifiers (\c{BYTE}, \c{WORD}, -\c{DWORD}, \c{QWORD}, or \c{TWORD}), but will give them the smallest -possible size. The keyword \c{STRICT} can be used to inhibit +\c{DWORD}, \c{QWORD}, \c{TWORD} or \c{OWORD}), but will give them the +smallest possible size. The keyword \c{STRICT} can be used to inhibit optimization and force a particular operand to be emitted in the -specified size. For example, with the optimizer on, and in -\c{BITS 16} mode, +specified size. For example, with the optimizer on, and in \c{BITS 16} +mode, \c push dword 33 From 19e201053689be68d0e45077fa86e9538d74daa1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 15:08:20 -0700 Subject: [PATCH 13/29] Speed up the disassembler by allowing prefixed instruction tables Modify the disassembler so that we can have separate instruction tables for prefixed instructions. As it was, all instructions which started with 0F were linearly searched, and that is by now more than half the instruction set. --- disasm.c | 13 ++++- insns.h | 11 +++- insns.pl | 151 +++++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 139 insertions(+), 36 deletions(-) diff --git a/disasm.c b/disasm.c index 3a8f710d..a6c1c729 100644 --- a/disasm.c +++ b/disasm.c @@ -671,9 +671,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, int32_t offset, int autosync, uint32_t prefer) { const struct itemplate * const *p, * const *best_p; + const struct disasm_index *ix; + uint8_t *dp; int length, best_length = 0; char *segover; - int i, slen, colon; + int i, slen, colon, n; uint8_t *origdata; int works; insn tmp_ins, ins; @@ -728,7 +730,14 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, best_p = NULL; best_pref = INT_MAX; - for (p = itable[*data]; *p; p++) { + dp = data; + ix = itable + *dp++; + while (ix->n == (size_t)-1) { + ix = (const struct disasm_index *)ix->p + *dp++; + } + + p = (const struct itemplate * const *)ix->p; + for (n = ix->n; n; n--, p++) { if ((length = matches(*p, data, &prefix, segsize, &tmp_ins))) { works = TRUE; /* diff --git a/insns.h b/insns.h index b5d6caf7..b025c7a5 100644 --- a/insns.h +++ b/insns.h @@ -26,9 +26,18 @@ struct itemplate { uint32_t flags; /* some flags */ }; +/* Disassembler table structure */ +/* If n == -1, then p points to another table of 256 + struct disasm_index, otherwise p points to a list of n + struct itemplates to consider. */ +struct disasm_index { + const void *p; + int n; +}; + /* Tables for the assembler and disassembler, respectively */ extern const struct itemplate * const nasm_instructions[]; -extern const struct itemplate * const * const itable[]; +extern const struct disasm_index itable[256]; /* * this define is used to signify the end of an itemplate diff --git a/insns.pl b/insns.pl index 6e961dec..c5f280c6 100644 --- a/insns.pl +++ b/insns.pl @@ -7,6 +7,10 @@ # redistributable under the licence given in the file "Licence" # distributed in the NASM archive. +# Opcode prefixes which need their own opcode tables +# LONGER PREFIXES FIRST! +@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0FC2 0F); + print STDERR "Reading insns.dat...\n"; @args = (); @@ -26,6 +30,8 @@ foreach $arg ( @ARGV ) { $fname = "insns.dat" unless $fname = $args[0]; open (F, $fname) || die "unable to open $fname"; +%dinstables = (); + $line = 0; $insns = 0; while () { @@ -50,9 +56,11 @@ while () { } if ($formatted && !$nd) { push @big, $formatted; - foreach $i (&startbyte($_[2])) { - $aname = sprintf "dd_%02X",$i; - push @$aname, $#big; + foreach $i (startseq($_[2])) { + if (!defined($dinstables{$i})) { + $dinstables{$i} = []; + } + push(@{$dinstables{$i}}, $#big); } } } @@ -106,23 +114,38 @@ if ( !defined($output) || $output eq 'd' ) { foreach $j (@big) { printf D " /* %4d */ %s\n", $n++, $j; } - print D " ITEMPLATE_END\n};\n\n"; - - for ($c=0; $c<256; $c++) { - $h = sprintf "%02X", $c; - print D "static const struct itemplate * const itable_${h}[] = {\n"; - $aname = "dd_$h"; - foreach $j (@$aname) { + print D "};\n"; + + foreach $h (sort(keys(%dinstables))) { + print D "\nstatic const struct itemplate * const itable_${h}[] = {\n"; + foreach $j (@{$dinstables{$h}}) { print D " instrux + $j,\n"; } - print D " NULL\n};\n\n"; - } - - print D "const struct itemplate * const * const itable[] = {\n"; - for ($c=0; $c<256; $c++) { - printf D " itable_%02X,\n", $c; + print D "};\n"; } + + foreach $h (@disasm_prefixes, '') { + $is_prefix{$h} = 1; + print D "\n"; + print D "static " unless ($h eq ''); + print D "const struct disasm_index "; + print D ($h eq '') ? 'itable' : "itable_$h"; + print D "[256] = {\n"; + for ($c = 0; $c < 256; $c++) { + $nn = sprintf("%s%02X", $h, $c); + if ($is_prefix{$nn}) { + die "$0: ambiguous decoding of $nn\n" + if (defined($dinstables{$nn})); + printf D " { itable_%s, -1 },\n", $nn; + } elsif (defined($dinstables{$nn})) { + printf D " { itable_%s, %u },\n", + $nn, scalar(@{$dinstables{$nn}}); + } else { + printf D " { NULL, 0 },\n"; + } + } print D "};\n"; + } close D; } @@ -240,6 +263,17 @@ sub format { ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd); } +sub hexlist($$$) { + my($prefix, $start, $n) = @_; + my $i; + my @l = (); + + for ($i = 0; $i < $n; $i++) { + push(@l, sprintf("%s%02X", $prefix, $start+$i)); + } + return @l; +} + # Here we determine the range of possible starting bytes for a given # instruction. We need only consider the codes: # \1 \2 \3 mean literal bytes, of course @@ -248,24 +282,75 @@ sub format { # \170 means byte zero # \330 means byte plus condition code # \0 or \340 mean give up and return empty set -sub startbyte { - my ($codes) = @_; +sub startseq($) { + my ($codestr) = @_; my $word, @range; + my @codes = (); + my $c = $codestr; + my $c0, $c1, $i; + my $prefix = ''; - while (1) { - die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/; - $word = $1, $codes = $2; - return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/; - return (0x07, 0x17, 0x1F) if $word eq "\\4"; - return (0xA1, 0xA9) if $word eq "\\5"; - return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6"; - return (0xA0, 0xA8) if $word eq "\\7"; - $start=hex $1, $r=8, last if $word =~ /^\\1[0123]$/ && $codes =~/^\\x(..)/; - return (0) if $word eq "\\170"; - $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/; - return () if $word eq "\\0" || $word eq "\\340"; + # Although these are C-syntax strings, by convention they should have + # only octal escapes (for directives) and hexadecimal escapes + # (for verbatim bytes) + while ($c ne '') { + if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) { + push(@codes, hex $1); + $c = $2; + next; + } elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) { + push(@codes, oct $1); + $c = $2; + next; + } else { + die "$0: unknown code format in \"$codestr\"\n"; + } } - @range = (); - push @range, $start++ while ($r-- > 0); - @range; + + while ($c0 = shift(@codes)) { + $c1 = $codes[0]; + if ($c0 == 01 || $c0 == 02 || $c0 == 03 || $c0 == 0170) { + # Fixed byte string + my $fbs = $prefix; + while (1) { + if ($c0 == 01 || $c0 == 02 || $c0 == 03) { + while ($c0--) { + $fbs .= sprintf("%02X", shift(@codes)); + } + } elsif ($c0 == 0170) { + $fbs .= '00'; + } else { + last; + } + $c0 = shift(@codes); + } + + foreach $pfx (@disasm_prefixes) { + if ($fbs =~ /^$pfx(.*)$/) { + $prefix = $pfx; + $fbs = $1; + last; + } + } + + if ($fbs ne '') { + return ($prefix.substr($fbs,0,2)); + } + } elsif ($c0 == 04) { + return ("07", "17", "1F"); + } elsif ($c0 == 05) { + return ("A1", "A9"); + } elsif ($c0 == 06) { + return ("06", "0E", "16", "1E"); + } elsif ($c0 == 07) { + return ("A0", "A8"); + } elsif ($c0 >= 010 && $c0 <= 013) { + return hexlist($prefix, $c1, 8); + } elsif ($c0 == 0330) { + return hexlist($prefix, $c1, 16); + } elsif ($c0 == 0 || $c0 == 0340) { + return (); + } + } + return (); } From 76815bf60b5db3bb0f9711920562ea4afc3f5c85 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 15:24:38 -0700 Subject: [PATCH 14/29] Remove 0FC2 from list of instruction prefixes 0FC2 is not really an instruction prefix; it's the opcode for CMPPS/CMPSS, which takes a control immediate which Intel chose to have opcode aliases for. However, we can't dispatch on a tail byte, so it's useless. --- insns.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/insns.pl b/insns.pl index c5f280c6..356c183d 100644 --- a/insns.pl +++ b/insns.pl @@ -9,7 +9,7 @@ # Opcode prefixes which need their own opcode tables # LONGER PREFIXES FIRST! -@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0FC2 0F); +@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0F); print STDERR "Reading insns.dat...\n"; From 141d7cf68d60f6c77c078fea7ff85526db668c6f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 16:39:03 -0700 Subject: [PATCH 15/29] Support 16-bit IEEE floating point; used in SSE5 SSE5 supports standard IEEE 16-bit floating point, so we should support that too. --- float.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- parser.c | 24 ++++++++++++------- 2 files changed, 82 insertions(+), 13 deletions(-) diff --git a/float.c b/float.c index 099e23f2..afa84d2e 100644 --- a/float.c +++ b/float.c @@ -213,6 +213,7 @@ static int ieee_round(uint16_t *mant, int i) #define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) ) +/* 64-bit format with 52-bit mantissa and 11-bit exponent */ static int to_double(char *str, int32_t sign, uint8_t *result, efunc error) { @@ -275,6 +276,7 @@ static int to_double(char *str, int32_t sign, uint8_t *result, return 1; /* success */ } +/* 32-bit format with 23-bit mantissa and 8-bit exponent */ static int to_float(char *str, int32_t sign, uint8_t *result, efunc error) { @@ -330,6 +332,64 @@ static int to_float(char *str, int32_t sign, uint8_t *result, return 1; } +/* 16-bit format with 10-bit mantissa and 5-bit exponent. + Defined in IEEE 754r. Used in SSE5. See the AMD SSE5 manual, AMD + document number 43479. */ +static int to_float16(char *str, int32_t sign, uint8_t *result, + efunc error) +{ + uint16_t mant[MANT_WORDS]; + int32_t exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + ieee_flconvert(str, mant, &exponent, error); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -14 && exponent <= 16) { + /* + * Normalised. + */ + exponent += 15; + ieee_shr(mant, 5); + ieee_round(mant, 1); + if (mant[0] & 0x800) /* did we scale up by one? */ + ieee_shr(mant, 1), exponent++; + mant[0] &= 0x3FF; /* remove leading one */ + put(result + 0, (exponent << 7) | mant[0] | sign); + } else if (exponent < -14 && exponent >= -24) { + /* + * Denormal. + */ + int shift = -(exponent + 8); + int sh = shift % 16, wds = shift / 16; + ieee_shr(mant, sh); + if (ieee_round(mant, 1 - wds) + || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) { + ieee_shr(mant, 1); + if (sh == 0) + mant[0] |= 0x8000; + exponent++; + } + put(result + 0, (wds == 0 ? mant[0] : 0) | sign); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset(result, 0, 2); + } + } else { + memset(result, 0, 2); + } + return 1; +} + +/* 80-bit format with 64-bit mantissa *including an explicit integer 1* + and 15-bit exponent. */ static int to_ldoub(char *str, int32_t sign, uint8_t *result, efunc error) { @@ -394,13 +454,16 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result, int float_const(char *number, int32_t sign, uint8_t *result, int bytes, efunc error) { - if (bytes == 4) + switch (bytes) { + case 2: + return to_float16(number, sign, result, error); + case 4: return to_float(number, sign, result, error); - else if (bytes == 8) + case 8: return to_double(number, sign, result, error); - else if (bytes == 10) + case 10: return to_ldoub(number, sign, result, error); - else { + default: error(ERR_PANIC, "strange value %d passed to float_const", bytes); return 0; } diff --git a/parser.c b/parser.c index ca12a097..69ae3790 100644 --- a/parser.c +++ b/parser.c @@ -230,30 +230,36 @@ insn *parse_line(int pass, char *buffer, insn * result, if (i == TOKEN_FLOAT) { eop->type = EOT_DB_STRING; result->eops_float = TRUE; - if (result->opcode == I_DD) + switch (result->opcode) { + case I_DW: + eop->stringlen = 2; + break; + case I_DD: eop->stringlen = 4; - else if (result->opcode == I_DQ) + break; + case I_DQ: eop->stringlen = 8; - else if (result->opcode == I_DT) + break; + case I_DT: eop->stringlen = 10; - else if (result->opcode == I_DO) - eop->stringlen = 16; - else { + break; + default: error(ERR_NONFATAL, "floating-point constant" - " encountered in `D%c' instruction", - result->opcode == I_DW ? 'W' : 'B'); + " encountered in `d%c' instruction" + ? (result->opcode == I_DO) ? 'o' : 'b'); /* * fix suggested by Pedro Gimeno... original line * was: * eop->type = EOT_NOTHING; */ eop->stringlen = 0; + break; } eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen); tail = &eop->next; *fixptr = eop; eop->stringval = (char *)eop + sizeof(extop); - if (eop->stringlen < 4 || + if (!eop->stringlen || !float_const(tokval.t_charptr, sign, (uint8_t *)eop->stringval, eop->stringlen, error)) From cfbe7c3cc2dbdfe1268e2d0a19fc59b52cbcfcc5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 17:49:09 -0700 Subject: [PATCH 16/29] Fix handling of DO; support unary + for floating-point numbers Floating-point users generally expect to be able to use a unary plus. Fix support for the DO instruction in several places. --- assemble.c | 13 +++++++++---- parser.c | 18 +++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/assemble.c b/assemble.c index e5384548..efb02207 100644 --- a/assemble.c +++ b/assemble.c @@ -250,6 +250,9 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp, case I_DT: wsize = 10; break; + case I_DO: + wsize = 16; + break; default: break; } @@ -564,10 +567,9 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp, if (instruction->opcode == -1) return 0; - if (instruction->opcode == I_DB || - instruction->opcode == I_DW || - instruction->opcode == I_DD || - instruction->opcode == I_DQ || instruction->opcode == I_DT) { + if (instruction->opcode == I_DB || instruction->opcode == I_DW || + instruction->opcode == I_DD || instruction->opcode == I_DQ || + instruction->opcode == I_DT || instruction->opcode == I_DO) { extop *e; int32_t isize, osize, wsize = 0; /* placate gcc */ @@ -588,6 +590,9 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp, case I_DT: wsize = 10; break; + case I_DO: + wsize = 16; + break; default: break; } diff --git a/parser.c b/parser.c index 69ae3790..31c3612a 100644 --- a/parser.c +++ b/parser.c @@ -214,16 +214,18 @@ insn *parse_line(int pass, char *buffer, insn * result, continue; } - if ((i == TOKEN_FLOAT && is_comma_next()) || i == '-') { - int32_t sign = +1L; + if ((i == TOKEN_FLOAT && is_comma_next()) + || i == '-' || i == '+') { + int32_t sign = +1; - if (i == '-') { + if (i == '+' || i == '-') { char *save = stdscan_bufptr; + int token = i; + sign = (i == '-') ? -1 : 1; i = stdscan(NULL, &tokval); - sign = -1L; if (i != TOKEN_FLOAT || !is_comma_next()) { stdscan_bufptr = save; - i = tokval.t_type = '-'; + i = tokval.t_type = token; } } @@ -243,10 +245,12 @@ insn *parse_line(int pass, char *buffer, insn * result, case I_DT: eop->stringlen = 10; break; + case I_DO: + eop->stringlen = 16; + break; default: error(ERR_NONFATAL, "floating-point constant" - " encountered in `d%c' instruction" - ? (result->opcode == I_DO) ? 'o' : 'b'); + " encountered in `db' instruction"); /* * fix suggested by Pedro Gimeno... original line * was: From e31747e95bba75c7e27d0a76f0e385c6d12351e2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 17:50:34 -0700 Subject: [PATCH 17/29] Unify all standard IEEE floating-point formats; add 128-bit Unify all the standard IEEE formats into one function, add support for IEEE standard 128-bit floating point numbers. The 80-bit format is still special since it explicitly represents the integer portion. --- float.c | 206 +++++++++++++++---------------------------------- test/float.asm | 103 +++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 142 deletions(-) create mode 100644 test/float.asm diff --git a/float.c b/float.c index afa84d2e..a6ad3936 100644 --- a/float.c +++ b/float.c @@ -18,8 +18,8 @@ #define TRUE 1 #define FALSE 0 -#define MANT_WORDS 6 /* 64 bits + 32 for accuracy == 96 */ -#define MANT_DIGITS 28 /* 29 digits don't fit in 96 bits */ +#define MANT_WORDS 10 /* 112 bits + 48 for accuracy == 160 */ +#define MANT_DIGITS 49 /* 50 digits don't fit in 160 bits */ /* * guaranteed top bit of from is set @@ -47,9 +47,8 @@ static int ieee_multiply(uint16_t *to, uint16_t *from) temp[i] &= 0xFFFF; } if (temp[0] & 0x8000) { - for (i = 0; i < MANT_WORDS; i++) - to[i] = temp[i] & 0xFFFF; - return 0; + memcpy(to, temp, 2*MANT_WORDS); + return 0; } else { for (i = 0; i < MANT_WORDS; i++) to[i] = (temp[i] << 1) + !!(temp[i + 1] & 0x8000); @@ -213,75 +212,33 @@ static int ieee_round(uint16_t *mant, int i) #define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) ) -/* 64-bit format with 52-bit mantissa and 11-bit exponent */ -static int to_double(char *str, int32_t sign, uint8_t *result, - efunc error) -{ - uint16_t mant[MANT_WORDS]; - int32_t exponent; +/* Produce standard IEEE formats, with implicit "1" bit; this makes + the following assumptions: - sign = (sign < 0 ? 0x8000L : 0L); + - the sign bit is the MSB, followed by the exponent. + - the sign bit plus exponent fit in 16 bits. + - the exponent bias is 2^(n-1)-1 for an n-bit exponent */ - ieee_flconvert(str, mant, &exponent, error); - if (mant[0] & 0x8000) { - /* - * Non-zero. - */ - exponent--; - if (exponent >= -1022 && exponent <= 1024) { - /* - * Normalised. - */ - exponent += 1023; - ieee_shr(mant, 11); - ieee_round(mant, 4); - if (mant[0] & 0x20) /* did we scale up by one? */ - ieee_shr(mant, 1), exponent++; - mant[0] &= 0xF; /* remove leading one */ - put(result + 6, (exponent << 4) | mant[0] | sign); - put(result + 4, mant[1]); - put(result + 2, mant[2]); - put(result + 0, mant[3]); - } else if (exponent < -1022 && exponent >= -1074) { - /* - * Denormal. - */ - int shift = -(exponent + 1011); - int sh = shift % 16, wds = shift / 16; - ieee_shr(mant, sh); - if (ieee_round(mant, 4 - wds) - || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) { - ieee_shr(mant, 1); - if (sh == 0) - mant[0] |= 0x8000; - exponent++; - } - put(result + 6, (wds == 0 ? mant[0] : 0) | sign); - put(result + 4, (wds <= 1 ? mant[1 - wds] : 0)); - put(result + 2, (wds <= 2 ? mant[2 - wds] : 0)); - put(result + 0, (wds <= 3 ? mant[3 - wds] : 0)); - } else { - if (exponent > 0) { - error(ERR_NONFATAL, "overflow in floating-point constant"); - return 0; - } else - memset(result, 0, 8); - } - } else { - /* - * Zero. - */ - memset(result, 0, 8); - } - return 1; /* success */ -} +struct ieee_format { + int words; + int mantissa; /* Bits in the mantissa */ + int exponent; /* Bits in the exponent */ +}; -/* 32-bit format with 23-bit mantissa and 8-bit exponent */ +static const struct ieee_format ieee_16 = { 1, 10, 5 }; +static const struct ieee_format ieee_32 = { 2, 23, 8 }; +static const struct ieee_format ieee_64 = { 4, 52, 11 }; +static const struct ieee_format ieee_128 = { 8, 112, 15 }; + +/* Produce all the standard IEEE formats: 16, 32, 64, and 128 bits */ static int to_float(char *str, int32_t sign, uint8_t *result, - efunc error) + const struct ieee_format *fmt, efunc error) { - uint16_t mant[MANT_WORDS]; + uint16_t mant[MANT_WORDS], *mp; int32_t exponent; + int32_t expmax = 1 << (fmt->exponent-1); + uint16_t implicit_one = 0x8000 >> fmt->exponent; + int i; sign = (sign < 0 ? 0x8000L : 0L); @@ -291,101 +248,64 @@ static int to_float(char *str, int32_t sign, uint8_t *result, * Non-zero. */ exponent--; - if (exponent >= -126 && exponent <= 128) { + if (exponent >= 2-expmax && exponent <= expmax) { /* * Normalised. */ - exponent += 127; - ieee_shr(mant, 8); - ieee_round(mant, 2); - if (mant[0] & 0x100) /* did we scale up by one? */ - ieee_shr(mant, 1), exponent++; - mant[0] &= 0x7F; /* remove leading one */ - put(result + 2, (exponent << 7) | mant[0] | sign); - put(result + 0, mant[1]); - } else if (exponent < -126 && exponent >= -149) { + exponent += expmax; + ieee_shr(mant, fmt->exponent); + ieee_round(mant, fmt->words); + /* did we scale up by one? */ + if (mant[0] & (implicit_one << 1)) { + ieee_shr(mant, 1); + exponent++; + } + + mant[0] &= (implicit_one-1); /* remove leading one */ + mant[0] |= exponent << (15 - fmt->exponent); + } else if (exponent < 2-expmax && exponent >= 2-expmax-fmt->mantissa) { /* * Denormal. */ - int shift = -(exponent + 118); + int shift = -(exponent + expmax-2-fmt->exponent); int sh = shift % 16, wds = shift / 16; ieee_shr(mant, sh); - if (ieee_round(mant, 2 - wds) + if (ieee_round(mant, fmt->words - wds) || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) { ieee_shr(mant, 1); if (sh == 0) mant[0] |= 0x8000; exponent++; } - put(result + 2, (wds == 0 ? mant[0] : 0) | sign); - put(result + 0, (wds <= 1 ? mant[1 - wds] : 0)); + + if (wds) { + for (i = fmt->words-1; i >= wds; i--) + mant[i] = mant[i-wds]; + for (; i >= 0; i--) + mant[i] = 0; + } } else { if (exponent > 0) { error(ERR_NONFATAL, "overflow in floating-point constant"); return 0; - } else - memset(result, 0, 4); + } else { + memset(mant, 0, 2*fmt->words); + } } } else { - memset(result, 0, 4); + /* Zero */ + memset(mant, 0, 2*fmt->words); } - return 1; -} -/* 16-bit format with 10-bit mantissa and 5-bit exponent. - Defined in IEEE 754r. Used in SSE5. See the AMD SSE5 manual, AMD - document number 43479. */ -static int to_float16(char *str, int32_t sign, uint8_t *result, - efunc error) -{ - uint16_t mant[MANT_WORDS]; - int32_t exponent; + mant[0] |= sign; - sign = (sign < 0 ? 0x8000L : 0L); - - ieee_flconvert(str, mant, &exponent, error); - if (mant[0] & 0x8000) { - /* - * Non-zero. - */ - exponent--; - if (exponent >= -14 && exponent <= 16) { - /* - * Normalised. - */ - exponent += 15; - ieee_shr(mant, 5); - ieee_round(mant, 1); - if (mant[0] & 0x800) /* did we scale up by one? */ - ieee_shr(mant, 1), exponent++; - mant[0] &= 0x3FF; /* remove leading one */ - put(result + 0, (exponent << 7) | mant[0] | sign); - } else if (exponent < -14 && exponent >= -24) { - /* - * Denormal. - */ - int shift = -(exponent + 8); - int sh = shift % 16, wds = shift / 16; - ieee_shr(mant, sh); - if (ieee_round(mant, 1 - wds) - || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) { - ieee_shr(mant, 1); - if (sh == 0) - mant[0] |= 0x8000; - exponent++; - } - put(result + 0, (wds == 0 ? mant[0] : 0) | sign); - } else { - if (exponent > 0) { - error(ERR_NONFATAL, "overflow in floating-point constant"); - return 0; - } else - memset(result, 0, 2); - } - } else { - memset(result, 0, 2); + for (mp = &mant[fmt->words], i = 0; i < fmt->words; i++) { + uint16_t m = *--mp; + put(result, m); + result += 2; } - return 1; + + return 1; /* success */ } /* 80-bit format with 64-bit mantissa *including an explicit integer 1* @@ -456,13 +376,15 @@ int float_const(char *number, int32_t sign, uint8_t *result, int bytes, { switch (bytes) { case 2: - return to_float16(number, sign, result, error); + return to_float(number, sign, result, &ieee_16, error); case 4: - return to_float(number, sign, result, error); + return to_float(number, sign, result, &ieee_32, error); case 8: - return to_double(number, sign, result, error); + return to_float(number, sign, result, &ieee_64, error); case 10: return to_ldoub(number, sign, result, error); + case 16: + return to_float(number, sign, result, &ieee_128, error); default: error(ERR_PANIC, "strange value %d passed to float_const", bytes); return 0; diff --git a/test/float.asm b/test/float.asm new file mode 100644 index 00000000..30d1f062 --- /dev/null +++ b/test/float.asm @@ -0,0 +1,103 @@ +; +; Test of floating-point formats +; + +; 16-bit + dw 1.0 + dw +1.0 + dw -1.0 + dw 0.0 + dw +0.0 + dw -0.0 + dw 1.83203125 + dw +1.83203125 + dw -1.83203125 + dw 1.83203125e3 + dw +1.83203125e3 + dw -1.83203125e3 + dw 1.83203125e-3 + dw +1.83203125e-3 + dw -1.83203125e-3 + dw 1.83203125e-6 ; Denormal! + dw +1.83203125e-6 ; Denormal! + dw -1.83203125e-6 ; Denormal! + +; 32-bit + dd 1.0 + dd +1.0 + dd -1.0 + dd 0.0 + dd +0.0 + dd -0.0 + dd 1.83203125 + dd +1.83203125 + dd -1.83203125 + dd 1.83203125e15 + dd +1.83203125e15 + dd -1.83203125e15 + dd 1.83203125e-15 + dd +1.83203125e-15 + dd -1.83203125e-15 + dd 1.83203125e-40 ; Denormal! + dd +1.83203125e-40 ; Denormal! + dd -1.83203125e-40 ; Denormal! + +; 64-bit + dq 1.0 + dq +1.0 + dq -1.0 + dq 0.0 + dq +0.0 + dq -0.0 + dq 1.83203125 + dq +1.83203125 + dq -1.83203125 + dq 1.83203125e300 + dq +1.83203125e300 + dq -1.83203125e300 + dq 1.83203125e-300 + dq +1.83203125e-300 + dq -1.83203125e-300 + dq 1.83203125e-320 ; Denormal! + dq +1.83203125e-320 ; Denormal! + dq -1.83203125e-320 ; Denormal! + +; 80-bit + dt 1.0 + dt +1.0 + dt -1.0 + dt 0.0 + dt +0.0 + dt -0.0 + dt 1.83203125 + dt +1.83203125 + dt -1.83203125 + dt 1.83203125e+4000 + dt +1.83203125e+4000 + dt -1.83203125e+4000 + dt 1.83203125e-4000 + dt +1.83203125e-4000 + dt -1.83203125e-4000 + dt 1.83203125e-4940 ; Denormal! + dt +1.83203125e-4940 ; Denormal! + dt -1.83203125e-4940 ; Denormal! + +; 128-bit + do 1.0 + do +1.0 + do -1.0 + do 0.0 + do +0.0 + do -0.0 + do 1.83203125 + do +1.83203125 + do -1.83203125 + do 1.83203125e+4000 + do +1.83203125e+4000 + do -1.83203125e+4000 + do 1.83203125e-4000 + do +1.83203125e-4000 + do -1.83203125e-4000 + do 1.83203125e-4940 ; Denormal! + do +1.83203125e-4940 ; Denormal! + do -1.83203125e-4940 ; Denormal! From fe2177fe4287bbfa9205bcd362694f47870a3c30 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 18:31:26 -0700 Subject: [PATCH 18/29] Support C99-style hexadecimal floating point. Add support for C99-style hexadecimal floating point. The format is 0x p . 0x1.0e+1 thus is the same as 2.0. --- float.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ stdscan.c | 4 ++- 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/float.c b/float.c index a6ad3936..08dfdb47 100644 --- a/float.c +++ b/float.c @@ -8,6 +8,7 @@ * initial version 13/ix/96 by Simon Tatham */ +#include #include #include #include @@ -56,6 +57,91 @@ static int ieee_multiply(uint16_t *to, uint16_t *from) } } +static int hexval(char c) +{ + if (c >= '0' && c <= '9') + return c-'0'; + else if (c >= 'a' && c <= 'f') + return c-'a'+10; + else + return c-'A'+10; +} + +static void ieee_flconvert_hex(char *string, uint16_t *mant, + int32_t *exponent, efunc error) +{ + static const int log2tbl[16] = + { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + uint16_t mult[MANT_WORDS+1], *mp; + int ms; + int32_t twopwr; + int seendot, seendigit; + unsigned char c; + + twopwr = 0; + seendot = seendigit = 0; + + memset(mult, 0, sizeof mult); + + while ((c = *string++) != '\0') { + if (c == '.') { + if (!seendot) + seendot = TRUE; + else { + error(ERR_NONFATAL, + "too many periods in floating-point constant"); + return; + } + } else if (isxdigit(c)) { + int v = hexval(c); + + if (!seendigit && v) { + int l = log2tbl[v]; + + seendigit = 1; + mp = mult; + ms = 15-l; + + twopwr = seendot ? twopwr-4+l : l-3; + } + + if (seendigit) { + if (ms <= 0) { + *mp |= v >> -ms; + mp++; + if (mp > &mult[MANT_WORDS]) + mp = &mult[MANT_WORDS]; /* Guard slot */ + ms += 16; + } + *mp |= v << ms; + ms -= 4; + + if (!seendot) + twopwr += 4; + } else { + if (seendot) + twopwr -= 4; + } + } else if (c == 'p' || c == 'P') { + twopwr += atoi(string); + break; + } else { + error(ERR_NONFATAL, + "floating-point constant: `%c' is invalid character", + *string); + return; + } + } + + if (!seendigit) { + memset(mant, 0, 2*MANT_WORDS); /* Zero */ + *exponent = 0; + } else { + memcpy(mant, mult, 2*MANT_WORDS); + *exponent = twopwr; + } +} + static void ieee_flconvert(char *string, uint16_t *mant, int32_t *exponent, efunc error) { @@ -66,6 +152,11 @@ static void ieee_flconvert(char *string, uint16_t *mant, int32_t tenpwr, twopwr; int extratwos, started, seendot; + if (string[0] == '0' && (string[1] == 'x' || string[1] == 'X')) { + ieee_flconvert_hex(string+2, mant, exponent, error); + return; + } + p = digits; tenpwr = 0; started = seendot = FALSE; diff --git a/stdscan.c b/stdscan.c index d4ad696d..b6a4ee8f 100644 --- a/stdscan.c +++ b/stdscan.c @@ -130,7 +130,9 @@ int stdscan(void *private_data, struct tokenval *tv) stdscan_bufptr++; while (isnumchar(*stdscan_bufptr) || ((stdscan_bufptr[-1] == 'e' - || stdscan_bufptr[-1] == 'E') + || stdscan_bufptr[-1] == 'E' + || stdscan_bufptr[-1] == 'p' + || stdscan_bufptr[-1] == 'P') && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) { stdscan_bufptr++; } From 26976a187fa0b3e393118b6bf1a72707b0767ddb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 18:33:17 -0700 Subject: [PATCH 19/29] Fix error-reporting in hexadecimal floating-point numbers --- float.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/float.c b/float.c index 08dfdb47..b87db848 100644 --- a/float.c +++ b/float.c @@ -128,7 +128,7 @@ static void ieee_flconvert_hex(char *string, uint16_t *mant, } else { error(ERR_NONFATAL, "floating-point constant: `%c' is invalid character", - *string); + c); return; } } From 72ac77bb0b37990aa7cae7a058ae646135280301 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 18:37:36 -0700 Subject: [PATCH 20/29] Simple test for hexadecimal floating-point numbers Very trivial test for hexadecimal floating-point numbers --- test/floatx.asm | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 test/floatx.asm diff --git a/test/floatx.asm b/test/floatx.asm new file mode 100644 index 00000000..f513ec83 --- /dev/null +++ b/test/floatx.asm @@ -0,0 +1,125 @@ +; +; floatx.asm +; +; Test hexadecimal floating-point numbers + +; 16-bit + dw 1.0 + dw 0x1.0 + dw 2.0 + dw 0x2.0 + dw 0x1.0p+1 + dw 0x1.0p-1 + dw 0x0.0 + dw 0x1.23456789 + dw 0x0.123456789 + dw 0x0.0000123456789 + dw 0x1.23456789p10 + dw 0x1.23456789p+10 + dw 0x1.23456789p-10 + dw 0x0.123456789p10 + dw 0x0.123456789p+10 + dw 0x0.123456789abcdef0123456789abcdef012345p-10 + dw 0x0.0000123456789 + dw 0x0.0000123456789p+10 + dw 0x0.0000123456789p-10 + +; 32-bit + dd 1.0 + dd 0x1.0 + dd 2.0 + dd 0x2.0 + dd 0x1.0p+1 + dd 0x1.0p-1 + dd 0x0.0 + dd 0x1.23456789 + dd 0x0.123456789 + dd 0x0.0000123456789 + dd 0x1.23456789p10 + dd 0x1.23456789p+10 + dd 0x1.23456789p-10 + dd 0x0.123456789p10 + dd 0x0.123456789p+10 + dd 0x0.123456789abcdef0123456789abcdef012345p-10 + dd 0x0.0000123456789 + dd 0x0.0000123456789p+10 + dd 0x0.0000123456789p-10 + dd 0x123456789.0 + dd 0x0000123456789.0 + dd 0x123456789.0p+0 + dd 0x123456789.0p+64 + +; 64-bit + dq 1.0 + dq 0x1.0 + dq 2.0 + dq 0x2.0 + dq 0x1.0p+1 + dq 0x1.0p-1 + dq 0x0.0 + dq 0x1.23456789 + dq 0x0.123456789 + dq 0x0.0000123456789 + dq 0x1.23456789p10 + dq 0x1.23456789p+10 + dq 0x1.23456789p-10 + dq 0x0.123456789p10 + dq 0x0.123456789p+10 + dq 0x0.123456789abcdef0123456789abcdef012345p-10 + dq 0x0.0000123456789 + dq 0x0.0000123456789p+10 + dq 0x0.0000123456789p-10 + dq 0x123456789.0 + dq 0x0000123456789.0 + dq 0x123456789.0p+0 + dq 0x123456789.0p+300 + +; 80-bit + dt 1.0 + dt 0x1.0 + dt 2.0 + dt 0x2.0 + dt 0x1.0p+1 + dt 0x1.0p-1 + dt 0x0.0 + dt 0x1.23456789 + dt 0x0.123456789 + dt 0x0.0000123456789 + dt 0x1.23456789p10 + dt 0x1.23456789p+10 + dt 0x1.23456789p-10 + dt 0x0.123456789p10 + dt 0x0.123456789p+10 + dt 0x0.123456789abcdef0123456789abcdef012345p-10 + dt 0x0.0000123456789 + dt 0x0.0000123456789p+10 + dt 0x0.0000123456789p-10 + dt 0x123456789.0 + dt 0x0000123456789.0 + dt 0x123456789.0p+0 + dt 0x123456789.0p+1024 + +; 128-bit + do 1.0 + do 0x1.0 + do 2.0 + do 0x2.0 + do 0x1.0p+1 + do 0x1.0p-1 + do 0x0.0 + do 0x1.23456789 + do 0x0.123456789 + do 0x0.0000123456789 + do 0x1.23456789p10 + do 0x1.23456789p+10 + do 0x1.23456789p-10 + do 0x0.123456789p10 + do 0x0.123456789p+10 + do 0x0.123456789abcdef0123456789abcdef012345p-10 + do 0x0.0000123456789 + do 0x0.0000123456789p+10 + do 0x0.0000123456789p-10 + do 0x123456789.0 + do 0x0000123456789.0 + do 0x123456789.0p+0 + do 0x123456789.0p+1024 From 5107d672a006750a8f20b227d1fe33ead77ddd41 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 19:12:26 -0700 Subject: [PATCH 21/29] Update documentation Document new floating-point capabilities, and clean up the discussion about BITS 64 and REX prefixes. --- doc/nasmdoc.src | 61 ++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 2530b2b5..96faefbe 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -1093,7 +1093,7 @@ syntax in which register names must be prefixed by a \c{%} sign), or they can be \i{effective addresses} (see \k{effaddr}), constants (\k{const}) or expressions (\k{expr}). -For \i{floating-point} instructions, NASM accepts a wide range of +For x87 \i{floating-point} instructions, NASM accepts a wide range of syntaxes: you can use two-operand forms like MASM supports, or you can use NASM's native single-operand forms in most cases. \# Details of @@ -1107,7 +1107,7 @@ For example, you can code: \c fadd st1,st0 ; this sets st1 := st1 + st0 \c fadd to st1 ; so does this -Almost any floating-point instruction that references memory must +Almost any x87 floating-point instruction that references memory must use one of the prefixes \i\c{DWORD}, \i\c{QWORD} or \i\c{TWORD} to indicate what size of \i{memory operand} it refers to. @@ -1145,6 +1145,7 @@ file. They can be invoked in a wide range of ways: \c dt 1.234567e20 ; extended-precision float \c{DT} and \c{DO} do not accept \i{numeric constants} as operands. +\c{DB} does not accept \i{floating-point} numbers as operands. \S{resb} \c{RESB} and friends: Declaring \i{Uninitialized} Data @@ -1390,20 +1391,28 @@ when they are operands to \c{dw}. \S{fltconst} \I{floating-point, constants}Floating-Point Constants \i{Floating-point} constants are acceptable only as arguments to -\i\c{DD}, \i\c{DQ} and \i\c{DT}. They are expressed in the -traditional form: digits, then a period, then optionally more -digits, then optionally an \c{E} followed by an exponent. The period -is mandatory, so that NASM can distinguish between \c{dd 1}, which -declares an integer constant, and \c{dd 1.0} which declares a -floating-point constant. +\i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}. They are +expressed in the traditional form: digits, then a period, then +optionally more digits, then optionally an \c{E} followed by an +exponent. The period is mandatory, so that NASM can distinguish +between \c{dd 1}, which declares an integer constant, and \c{dd 1.0} +which declares a floating-point constant. + +NASM also support C99-style hexadecimal floating-point: \c{0x}, +hexadecimal digits, period, optionally more hexadeximal digits, then +optionally a \c{P} followed by a \e{binary} (not hexadecimal) exponent +in decimal notation. Some examples: +\c dw -0.5 ; IEEE half precision \c dd 1.2 ; an easy one +\c dd 0x1p+2 ; 1.0x2^2 = 4.0 \c dq 1.e10 ; 10,000,000,000 \c dq 1.e+10 ; synonymous with 1.e10 \c dq 1.e-10 ; 0.000 000 000 1 \c dt 3.141592653589793238462 ; pi +\c do 1.e+4000 ; IEEE quad precision NASM cannot do compile-time arithmetic on floating-point constants. This is because NASM is designed to be portable - although it always @@ -1418,15 +1427,9 @@ size of the assembler for very little benefit. \H{expr} \i{Expressions} -Expressions in NASM are similar in syntax to those in C. - -NASM does not guarantee the size of the integers used to evaluate -expressions at compile time: since NASM can compile and run on -64-bit systems quite happily, don't assume that expressions are -evaluated in 32-bit registers and so try to make deliberate use of -\i{integer overflow}. It might not always work. The only thing NASM -will guarantee is what's guaranteed by ANSI C: you always have \e{at -least} 32 bits to work in. +Expressions in NASM are similar in syntax to those in C. Expressions +are evaluated as 64-bit integers which are then adjusted to the +appropriate size. NASM supports two special tokens in expressions, allowing calculations to involve the current assembly position: the @@ -3425,15 +3428,21 @@ using 16-bit data need an 0x66 and those working on 16-bit addresses need an 0x67. When NASM is in \c{BITS 64} mode, most instructions operate the same -as they do for \c{BITS 32} mode. However, 16-bit addresses are depreciated -in the x86-64 architecture extension and the 0x67 prefix is used for 32-bit -addressing. This is due to the default of 64-bit addressing. When the \c{REX} -prefix is used, the processor does not know how to address the AH, BH, CH or -DH (high 8-bit legacy) registers. This because the x86-64 has added a new -set of registers and the capability to address the low 8-bits of the SP, BP -SI and DI registers as SPL, BPL, SIL and DIL, respectively; but only when -the REX prefix is used. In summary, the \c{REX} prefix causes the addressing -of AH, BH, CH and DH to be replaced by SPL, BPL, SIL and DIL. +as they do for \c{BITS 32} mode. However, there are 8 more general and +SSE registers, and 16-bit addressing is no longer supported. + +The default address size is 64 bits; 32-bit addressing can be selected +with the 0x67 prefix. The default operand size is still 32 bits, +however, and the 0x66 prefix selects 16-bit operand size. The \c{REX} +prefix is used both to select 64-bit operand size, and to access the +new registers. NASM automatically inserts REX prefixes when +necessary. + +When the \c{REX} prefix is used, the processor does not know how to +address the AH, BH, CH or DH (high 8-bit legacy) registers. Instead, +it is possible to access the the low 8-bits of the SP, BP SI and DI +registers as SPL, BPL, SIL and DIL, respectively; but only when the +REX prefix is used. The \c{BITS} directive has an exactly equivalent primitive form, \c{[BITS 16]}, \c{[BITS 32]} and \c{[BITS 64]}. The user-level form is From f48bc6fb485de852c128c5756c77acd0611c2b87 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 21:55:56 -0700 Subject: [PATCH 22/29] Support generating NaNs and infinities Support generating NaNs and infinities as part of floating-point constants. --- float.c | 184 +++++++++++++++++++++++++++++++++---------------- insns.h | 4 +- test/float.asm | 30 ++++++++ tokens.dat | 6 ++ 4 files changed, 161 insertions(+), 63 deletions(-) diff --git a/float.c b/float.c index b87db848..d22aa19c 100644 --- a/float.c +++ b/float.c @@ -303,6 +303,12 @@ static int ieee_round(uint16_t *mant, int i) #define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) ) +/* Set a bit, using *bigendian* bit numbering (0 = MSB) */ +static void set_bit(uint16_t *mant, int bit) +{ + mant[bit >> 4] |= 1 << (~bit & 15); +} + /* Produce standard IEEE formats, with implicit "1" bit; this makes the following assumptions: @@ -333,59 +339,84 @@ static int to_float(char *str, int32_t sign, uint8_t *result, sign = (sign < 0 ? 0x8000L : 0L); - ieee_flconvert(str, mant, &exponent, error); - if (mant[0] & 0x8000) { - /* - * Non-zero. - */ - exponent--; - if (exponent >= 2-expmax && exponent <= expmax) { - /* - * Normalised. - */ - exponent += expmax; - ieee_shr(mant, fmt->exponent); - ieee_round(mant, fmt->words); - /* did we scale up by one? */ - if (mant[0] & (implicit_one << 1)) { - ieee_shr(mant, 1); - exponent++; - } + if (str[0] == '_') { + /* NaN or Infinity */ + int32_t expmask = (1 << fmt->exponent)-1; - mant[0] &= (implicit_one-1); /* remove leading one */ - mant[0] |= exponent << (15 - fmt->exponent); - } else if (exponent < 2-expmax && exponent >= 2-expmax-fmt->mantissa) { - /* - * Denormal. - */ - int shift = -(exponent + expmax-2-fmt->exponent); - int sh = shift % 16, wds = shift / 16; - ieee_shr(mant, sh); - if (ieee_round(mant, fmt->words - wds) - || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) { - ieee_shr(mant, 1); - if (sh == 0) - mant[0] |= 0x8000; - exponent++; - } + memset(mant, 0, sizeof mant); + mant[0] = expmask << (15-fmt->exponent); /* Exponent: all bits one */ - if (wds) { - for (i = fmt->words-1; i >= wds; i--) - mant[i] = mant[i-wds]; - for (; i >= 0; i--) - mant[i] = 0; - } - } else { - if (exponent > 0) { - error(ERR_NONFATAL, "overflow in floating-point constant"); - return 0; - } else { - memset(mant, 0, 2*fmt->words); - } - } + switch (str[2]) { + case 'n': /* __nan__ */ + case 'N': + case 'q': /* __qnan__ */ + case 'Q': + set_bit(mant, fmt->exponent+1); /* Highest bit in mantissa */ + break; + case 's': /* __snan__ */ + case 'S': + set_bit(mant, fmt->exponent+fmt->mantissa); /* Last bit */ + break; + case 'i': /* __infinity__ */ + case 'I': + break; + } } else { - /* Zero */ - memset(mant, 0, 2*fmt->words); + ieee_flconvert(str, mant, &exponent, error); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= 2-expmax && exponent <= expmax) { + /* + * Normalised. + */ + exponent += expmax; + ieee_shr(mant, fmt->exponent); + ieee_round(mant, fmt->words); + /* did we scale up by one? */ + if (mant[0] & (implicit_one << 1)) { + ieee_shr(mant, 1); + exponent++; + } + + mant[0] &= (implicit_one-1); /* remove leading one */ + mant[0] |= exponent << (15 - fmt->exponent); + } else if (exponent < 2-expmax && + exponent >= 2-expmax-fmt->mantissa) { + /* + * Denormal. + */ + int shift = -(exponent + expmax-2-fmt->exponent); + int sh = shift % 16, wds = shift / 16; + ieee_shr(mant, sh); + if (ieee_round(mant, fmt->words - wds) + || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) { + ieee_shr(mant, 1); + if (sh == 0) + mant[0] |= 0x8000; + exponent++; + } + + if (wds) { + for (i = fmt->words-1; i >= wds; i--) + mant[i] = mant[i-wds]; + for (; i >= 0; i--) + mant[i] = 0; + } + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else { + memset(mant, 0, 2*fmt->words); + } + } + } else { + /* Zero */ + memset(mant, 0, 2*fmt->words); + } } mant[0] |= sign; @@ -409,6 +440,31 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result, sign = (sign < 0 ? 0x8000L : 0L); + if (str[0] == '_') { + uint16_t is_snan = 0, is_qnan = 0x8000; + switch (str[2]) { + case 'n': + case 'N': + case 'q': + case 'Q': + is_qnan = 0xc000; + break; + case 's': + case 'S': + is_snan = 1; + break; + case 'i': + case 'I': + break; + } + put(result + 0, is_snan); + put(result + 2, 0); + put(result + 4, 0); + put(result + 6, is_qnan); + put(result + 8, 0x7fff|sign); + return 1; + } + ieee_flconvert(str, mant, &exponent, error); if (mant[0] & 0x8000) { /* @@ -422,11 +478,11 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result, exponent += 16383; if (ieee_round(mant, 4)) /* did we scale up by one? */ ieee_shr(mant, 1), mant[0] |= 0x8000, exponent++; - put(result + 8, exponent | sign); - put(result + 6, mant[0]); - put(result + 4, mant[1]); - put(result + 2, mant[2]); put(result + 0, mant[3]); + put(result + 2, mant[2]); + put(result + 4, mant[1]); + put(result + 6, mant[0]); + put(result + 8, exponent | sign); } else if (exponent < -16383 && exponent >= -16446) { /* * Denormal. @@ -441,23 +497,29 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result, mant[0] |= 0x8000; exponent++; } - put(result + 8, sign); - put(result + 6, (wds == 0 ? mant[0] : 0)); - put(result + 4, (wds <= 1 ? mant[1 - wds] : 0)); - put(result + 2, (wds <= 2 ? mant[2 - wds] : 0)); put(result + 0, (wds <= 3 ? mant[3 - wds] : 0)); + put(result + 2, (wds <= 2 ? mant[2 - wds] : 0)); + put(result + 4, (wds <= 1 ? mant[1 - wds] : 0)); + put(result + 6, (wds == 0 ? mant[0] : 0)); + put(result + 8, sign); } else { if (exponent > 0) { error(ERR_NONFATAL, "overflow in floating-point constant"); return 0; - } else - memset(result, 0, 10); + } else { + goto zero; + } } } else { /* * Zero. */ - memset(result, 0, 10); + zero: + put(result + 0, 0); + put(result + 2, 0); + put(result + 4, 0); + put(result + 6, 0); + put(result + 8, sign); } return 1; } diff --git a/insns.h b/insns.h index b025c7a5..314737af 100644 --- a/insns.h +++ b/insns.h @@ -12,10 +12,10 @@ #include "nasm.h" /* max length of any instruction, register name etc. */ -#if MAX_INSLEN > 9 /* MAX_INSLEN defined in insnsi.h */ +#if MAX_INSLEN > 12 /* MAX_INSLEN defined in insnsi.h */ #define MAX_KEYWORD MAX_INSLEN #else -#define MAX_KEYWORD 9 +#define MAX_KEYWORD 12 #endif struct itemplate { diff --git a/test/float.asm b/test/float.asm index 30d1f062..bcb2ec28 100644 --- a/test/float.asm +++ b/test/float.asm @@ -21,6 +21,12 @@ dw 1.83203125e-6 ; Denormal! dw +1.83203125e-6 ; Denormal! dw -1.83203125e-6 ; Denormal! + dw __Infinity__ + dw +__Infinity__ + dw -__Infinity__ + dw __NaN__ + dw __QNaN__ + dw __SNaN__ ; 32-bit dd 1.0 @@ -41,6 +47,12 @@ dd 1.83203125e-40 ; Denormal! dd +1.83203125e-40 ; Denormal! dd -1.83203125e-40 ; Denormal! + dd __Infinity__ + dd +__Infinity__ + dd -__Infinity__ + dd __NaN__ + dd __QNaN__ + dd __SNaN__ ; 64-bit dq 1.0 @@ -61,6 +73,12 @@ dq 1.83203125e-320 ; Denormal! dq +1.83203125e-320 ; Denormal! dq -1.83203125e-320 ; Denormal! + dq __Infinity__ + dq +__Infinity__ + dq -__Infinity__ + dq __NaN__ + dq __QNaN__ + dq __SNaN__ ; 80-bit dt 1.0 @@ -81,6 +99,12 @@ dt 1.83203125e-4940 ; Denormal! dt +1.83203125e-4940 ; Denormal! dt -1.83203125e-4940 ; Denormal! + dt __Infinity__ + dt +__Infinity__ + dt -__Infinity__ + dt __NaN__ + dt __QNaN__ + dt __SNaN__ ; 128-bit do 1.0 @@ -101,3 +125,9 @@ do 1.83203125e-4940 ; Denormal! do +1.83203125e-4940 ; Denormal! do -1.83203125e-4940 ; Denormal! + do __Infinity__ + do +__Infinity__ + do -__Infinity__ + do __NaN__ + do __QNaN__ + do __SNaN__ diff --git a/tokens.dat b/tokens.dat index c84b8fb3..e7c1cb29 100644 --- a/tokens.dat +++ b/tokens.dat @@ -32,6 +32,12 @@ to tword word +% TOKEN_FLOAT, 0, 0 +__infinity__ +__nan__ +__qnan__ +__snan__ + % TOKEN_*, 0, 0 seg wrt From 8084f105a251b025176b293a5232295fcd39f1d3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 22:08:04 -0700 Subject: [PATCH 23/29] Document Infinity and NaN Add __Infinity__, __QNaN__, and __SNaN__ to the documentation. --- doc/nasmdoc.src | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 96faefbe..c79cd39b 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -151,6 +151,7 @@ convention \IR{ms-dos} MS-DOS \IR{ms-dos device drivers} MS-DOS device drivers \IR{multipush} \c{multipush} macro +\IR{nan} NaN \IR{nasm version} NASM version \IR{netbsd} NetBSD \IR{omf} OMF @@ -1424,6 +1425,15 @@ do floating arithmetic it would have to include its own complete set of floating-point routines, which would significantly increase the size of the assembler for very little benefit. +The special tokens \i\c{__Infinity__}, \i\c{__QNaN__} (or +\i\c{__NaN__}) and \i\c{__SNaN__} can be used to generate +\I{infinity}infinities, quiet \i{NaN}s, and signalling NaNs, +respectively. These are normally used as macros: + +\c %define Inf __Infinity__ +\c %define NaN __QNaN__ +\c +\c dq +1.5, -Inf, NaN ; Double-precision constants \H{expr} \i{Expressions} From bf9a24f46471abad75fa3efba059646a6c4f5026 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 18 Sep 2007 22:54:40 -0700 Subject: [PATCH 24/29] Slightly optimize the interface to nasm_token_hash() Instead of returning -1 from nasm_token_hash, set tv->t_type to TOKEN_ID and return TOKEN_ID, since that's what stdscan.c wants to do with it anyway. This allows us to simply tailcall nasm_token_hash(). --- stdscan.c | 6 +----- tokhash.pl | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/stdscan.c b/stdscan.c index b6a4ee8f..aecbd4a7 100644 --- a/stdscan.c +++ b/stdscan.c @@ -75,7 +75,6 @@ int stdscan(void *private_data, struct tokenval *tv) (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) { /* now we've got an identifier */ int is_sym = FALSE; - int t; if (*stdscan_bufptr == '$') { is_sym = TRUE; @@ -99,10 +98,7 @@ int stdscan(void *private_data, struct tokenval *tv) *r = '\0'; /* right, so we have an identifier sitting in temp storage. now, * is it actually a register or instruction name, or what? */ - if ((t = nasm_token_hash(ourcopy, tv)) != -1) - return t; - else - return tv->t_type = TOKEN_ID; + return nasm_token_hash(ourcopy, tv); } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) { /* * It's a $ sign with no following hex number; this must diff --git a/tokhash.pl b/tokhash.pl index 9d5888be..a63e55f3 100755 --- a/tokhash.pl +++ b/tokhash.pl @@ -194,14 +194,14 @@ print " }\n"; print "\n"; printf " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1; printf " if (ix >= %d)\n", scalar(@tokendata); -print " return -1;\n"; +print " return tv->t_type = TOKEN_ID;\n"; print "\n"; print " data = &tokendata[ix];\n"; # print " fprintf(stderr, \"Looked for: %s found: %s\\n\", token, data->string);\n\n"; print " if (strcmp(data->string, token))\n"; -print " return -1;\n"; +print " return tv->t_type = TOKEN_ID;\n"; print "\n"; print " tv->t_integer = data->num;\n"; print " tv->t_inttwo = data->aux;\n"; From b4b43178783e963e95fb290e82f1a0c6d6725520 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 19 Sep 2007 16:15:22 -0700 Subject: [PATCH 25/29] test/Makefile: make a bit more useful --- test/Makefile | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index bdb55a62..f48e3d9f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,2 +1,23 @@ +.SUFFIXES: .bin .o .o64 .obj .obj64 .exe .asm .lst + +NASM = ../nasm + +.asm.bin: + $(NASM) -f bin -o $@ -l $*.lst $< + +.asm.o: + $(NASM) -f elf32 -o $@ -l $*.lst $< + +.asm.o64: + $(NASM) -f elf64 -o $@ -l $*.lst $< + +.asm.obj: + $(NASM) -f win32 -o $@ -l $*.lst $< + +.asm.obj64: + $(NASM) -f win64 -o $@ -l $*.lst $< + +all: + clean: - rm -f *test *.com *.o *.obj *so *.exe + rm -f *.com *.o *.o64 *.obj *.obj64 *.exe *.lst From 4ff711889f0c29165c0b1d523b0ac53cf5e10763 Mon Sep 17 00:00:00 2001 From: Frank Kotler Date: Wed, 19 Sep 2007 21:07:32 -0400 Subject: [PATCH 26/29] Version 0.99.03 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 10afd2eb..a3ebc112 100644 --- a/version +++ b/version @@ -1 +1 @@ -0.99.02 +0.99.03 From 87f252aaa53f8ce5305d84c95a8751592f647dc2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 19 Sep 2007 21:40:37 -0700 Subject: [PATCH 27/29] Make nasm_malloc() et al available from inside ndisasm Clean up nasmlib to remove functions irrelevant for ndisasm; make nasm_malloc() etc usable inside ndisasm. --- Makefile.in | 27 +++++---- Mkfiles/msvc.mak | 27 +++++---- exprlib.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++ nasm.h | 55 ++++------------- nasmlib.c | 146 -------------------------------------------- nasmlib.h | 58 +++++++++++++----- ndisasm.c | 14 +++++ 7 files changed, 253 insertions(+), 228 deletions(-) create mode 100644 exprlib.c diff --git a/Makefile.in b/Makefile.in index 0dc6aed7..1a43f551 100644 --- a/Makefile.in +++ b/Makefile.in @@ -58,7 +58,7 @@ NASM = nasm.$(O) nasmlib.$(O) float.$(O) insnsa.$(O) assemble.$(O) \ output/outobj.$(O) output/outas86.$(O) output/outrdf2.$(O) \ output/outdbg.$(O) output/outieee.$(O) output/outmacho.$(O) \ preproc.$(O) pptok.$(O) \ - listing.$(O) eval.$(O) stdscan.$(O) tokhash.$(O) + listing.$(O) eval.$(O) exprlib.$(O) stdscan.$(O) tokhash.$(O) NDISASM = ndisasm.$(O) disasm.$(O) sync.$(O) nasmlib.$(O) insnsd.$(O) @@ -211,16 +211,19 @@ assemble.$(O): assemble.c assemble.h compiler.h config.h insns.h insnsi.h \ nasm.h nasmlib.h pptok.h preproc.h regflags.c regs.h regvals.c version.h crc64.$(O): crc64.c disasm.$(O): disasm.c compiler.h config.h disasm.h insns.h insnsi.h insnsn.c \ - names.c nasm.h regdis.c regs.c regs.h sync.h version.h + names.c nasm.h nasmlib.h regdis.c regs.c regs.h sync.h version.h eval.$(O): eval.c compiler.h config.h eval.h insnsi.h labels.h nasm.h \ nasmlib.h regs.h version.h -float.$(O): float.c compiler.h config.h insnsi.h nasm.h regs.h version.h +exprlib.$(O): exprlib.c compiler.h config.h insnsi.h nasm.h nasmlib.h regs.h \ + version.h +float.$(O): float.c compiler.h config.h insnsi.h nasm.h nasmlib.h regs.h \ + version.h hashtbl.$(O): hashtbl.c compiler.h config.h hashtbl.h insnsi.h nasm.h \ nasmlib.h regs.h version.h -insnsa.$(O): insnsa.c compiler.h config.h insns.h insnsi.h nasm.h regs.h \ - version.h -insnsd.$(O): insnsd.c compiler.h config.h insns.h insnsi.h nasm.h regs.h \ - version.h +insnsa.$(O): insnsa.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ + regs.h version.h +insnsd.$(O): insnsd.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ + regs.h version.h insnsn.$(O): insnsn.c labels.$(O): labels.c compiler.h config.h hashtbl.h insnsi.h nasm.h \ nasmlib.h regs.h version.h @@ -235,8 +238,8 @@ nasmlib.$(O): nasmlib.c compiler.h config.h insns.h insnsi.h nasm.h \ nasmlib.h regs.h version.h ndisasm.$(O): ndisasm.c compiler.h config.h disasm.h insns.h insnsi.h nasm.h \ nasmlib.h regs.h sync.h version.h -outform.$(O): outform.c compiler.h config.h insnsi.h nasm.h outform.h regs.h \ - version.h +outform.$(O): outform.c compiler.h config.h insnsi.h nasm.h nasmlib.h \ + outform.h regs.h version.h output/outaout.$(O): output/outaout.c compiler.h config.h insnsi.h nasm.h \ nasmlib.h outform.h regs.h stdscan.h version.h output/outas86.$(O): output/outas86.c compiler.h config.h insnsi.h nasm.h \ @@ -272,6 +275,6 @@ regs.$(O): regs.c regvals.$(O): regvals.c stdscan.$(O): stdscan.c compiler.h config.h insns.h insnsi.h nasm.h \ nasmlib.h regs.h stdscan.h version.h -sync.$(O): sync.c sync.h -tokhash.$(O): tokhash.c compiler.h config.h insns.h insnsi.h nasm.h regs.h \ - version.h +sync.$(O): sync.c compiler.h config.h nasmlib.h sync.h +tokhash.$(O): tokhash.c compiler.h config.h insns.h insnsi.h nasm.h \ + nasmlib.h regs.h version.h diff --git a/Mkfiles/msvc.mak b/Mkfiles/msvc.mak index d70973d9..96b5449c 100644 --- a/Mkfiles/msvc.mak +++ b/Mkfiles/msvc.mak @@ -38,7 +38,7 @@ NASM = nasm.$(O) nasmlib.$(O) float.$(O) insnsa.$(O) assemble.$(O) \ output/outobj.$(O) output/outas86.$(O) output/outrdf2.$(O) \ output/outdbg.$(O) output/outieee.$(O) output/outmacho.$(O) \ preproc.$(O) pptok.$(O) \ - listing.$(O) eval.$(O) stdscan.$(O) tokhash.$(O) + listing.$(O) eval.$(O) exprlib.$(O) stdscan.$(O) tokhash.$(O) NDISASM = ndisasm.$(O) disasm.$(O) sync.$(O) nasmlib.$(O) insnsd.$(O) @@ -169,16 +169,19 @@ assemble.$(O): assemble.c assemble.h compiler.h config.h insns.h insnsi.h \ nasm.h nasmlib.h pptok.h preproc.h regflags.c regs.h regvals.c version.h crc64.$(O): crc64.c disasm.$(O): disasm.c compiler.h config.h disasm.h insns.h insnsi.h insnsn.c \ - names.c nasm.h regdis.c regs.c regs.h sync.h version.h + names.c nasm.h nasmlib.h regdis.c regs.c regs.h sync.h version.h eval.$(O): eval.c compiler.h config.h eval.h insnsi.h labels.h nasm.h \ nasmlib.h regs.h version.h -float.$(O): float.c compiler.h config.h insnsi.h nasm.h regs.h version.h +exprlib.$(O): exprlib.c compiler.h config.h insnsi.h nasm.h nasmlib.h regs.h \ + version.h +float.$(O): float.c compiler.h config.h insnsi.h nasm.h nasmlib.h regs.h \ + version.h hashtbl.$(O): hashtbl.c compiler.h config.h hashtbl.h insnsi.h nasm.h \ nasmlib.h regs.h version.h -insnsa.$(O): insnsa.c compiler.h config.h insns.h insnsi.h nasm.h regs.h \ - version.h -insnsd.$(O): insnsd.c compiler.h config.h insns.h insnsi.h nasm.h regs.h \ - version.h +insnsa.$(O): insnsa.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ + regs.h version.h +insnsd.$(O): insnsd.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ + regs.h version.h insnsn.$(O): insnsn.c labels.$(O): labels.c compiler.h config.h hashtbl.h insnsi.h nasm.h \ nasmlib.h regs.h version.h @@ -193,8 +196,8 @@ nasmlib.$(O): nasmlib.c compiler.h config.h insns.h insnsi.h nasm.h \ nasmlib.h regs.h version.h ndisasm.$(O): ndisasm.c compiler.h config.h disasm.h insns.h insnsi.h nasm.h \ nasmlib.h regs.h sync.h version.h -outform.$(O): outform.c compiler.h config.h insnsi.h nasm.h outform.h regs.h \ - version.h +outform.$(O): outform.c compiler.h config.h insnsi.h nasm.h nasmlib.h \ + outform.h regs.h version.h output/outaout.$(O): output/outaout.c compiler.h config.h insnsi.h nasm.h \ nasmlib.h outform.h regs.h stdscan.h version.h output/outas86.$(O): output/outas86.c compiler.h config.h insnsi.h nasm.h \ @@ -230,6 +233,6 @@ regs.$(O): regs.c regvals.$(O): regvals.c stdscan.$(O): stdscan.c compiler.h config.h insns.h insnsi.h nasm.h \ nasmlib.h regs.h stdscan.h version.h -sync.$(O): sync.c sync.h -tokhash.$(O): tokhash.c compiler.h config.h insns.h insnsi.h nasm.h regs.h \ - version.h +sync.$(O): sync.c compiler.h config.h nasmlib.h sync.h +tokhash.$(O): tokhash.c compiler.h config.h insns.h insnsi.h nasm.h \ + nasmlib.h regs.h version.h diff --git a/exprlib.c b/exprlib.c new file mode 100644 index 00000000..2f03ff0a --- /dev/null +++ b/exprlib.c @@ -0,0 +1,154 @@ +/* + * exprlib.c + * + * Library routines to manipulate expression data types. + */ + +#include "nasm.h" + +/* + * Return TRUE if the argument is a simple scalar. (Or a far- + * absolute, which counts.) + */ +int is_simple(expr * vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type && vect->type < EXPR_SEGBASE + SEG_ABS) + return 0; + return 1; +} + +/* + * Return TRUE if the argument is a simple scalar, _NOT_ a far- + * absolute. + */ +int is_really_simple(expr * vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type) + return 0; + return 1; +} + +/* + * Return TRUE if the argument is relocatable (i.e. a simple + * scalar, plus at most one segment-base, plus possibly a WRT). + */ +int is_reloc(expr * vect) +{ + while (vect->type && !vect->value) /* skip initial value-0 terms */ + vect++; + if (!vect->type) /* trivially return TRUE if nothing */ + return 1; /* is present apart from value-0s */ + if (vect->type < EXPR_SIMPLE) /* FALSE if a register is present */ + return 0; + if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */ + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + } + if (vect->type == EXPR_WRT) { /* skip over a WRT term... */ + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + } + if (vect->value != 0 && vect->value != 1) + return 0; /* segment base multiplier non-unity */ + do { /* skip over _one_ seg-base term... */ + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + return 0; /* And return FALSE if there's more */ +} + +/* + * Return TRUE if the argument contains an `unknown' part. + */ +int is_unknown(expr * vect) +{ + while (vect->type && vect->type < EXPR_UNKNOWN) + vect++; + return (vect->type == EXPR_UNKNOWN); +} + +/* + * Return TRUE if the argument contains nothing but an `unknown' + * part. + */ +int is_just_unknown(expr * vect) +{ + while (vect->type && !vect->value) + vect++; + return (vect->type == EXPR_UNKNOWN); +} + +/* + * Return the scalar part of a relocatable vector. (Including + * simple scalar vectors - those qualify as relocatable.) + */ +int64_t reloc_value(expr * vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 0; + if (vect->type == EXPR_SIMPLE) + return vect->value; + else + return 0; +} + +/* + * Return the segment number of a relocatable vector, or NO_SEG for + * simple scalars. + */ +int32_t reloc_seg(expr * vect) +{ + while (vect->type && (vect->type == EXPR_WRT || !vect->value)) + vect++; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + } + if (!vect->type) + return NO_SEG; + else + return vect->type - EXPR_SEGBASE; +} + +/* + * Return the WRT segment number of a relocatable vector, or NO_SEG + * if no WRT part is present. + */ +int32_t reloc_wrt(expr * vect) +{ + while (vect->type && vect->type < EXPR_WRT) + vect++; + if (vect->type == EXPR_WRT) { + return vect->value; + } else + return NO_SEG; +} + diff --git a/nasm.h b/nasm.h index f4afad36..a7c26c88 100644 --- a/nasm.h +++ b/nasm.h @@ -15,6 +15,7 @@ #include #include "version.h" /* generated NASM version macros */ #include "compiler.h" +#include "nasmlib.h" #include "insnsi.h" /* For enum opcode */ #ifndef NULL @@ -60,48 +61,6 @@ */ struct ofmt; -/* - * ------------------------- - * Error reporting functions - * ------------------------- - */ - -/* - * An error reporting function should look like this. - */ -typedef void (*efunc) (int severity, const char *fmt, ...); - -/* - * These are the error severity codes which get passed as the first - * argument to an efunc. - */ - -#define ERR_DEBUG 0x00000008 /* put out debugging message */ -#define ERR_WARNING 0x00000000 /* warn only: no further action */ -#define ERR_NONFATAL 0x00000001 /* terminate assembly after phase */ -#define ERR_FATAL 0x00000002 /* instantly fatal: exit with error */ -#define ERR_PANIC 0x00000003 /* internal error: panic instantly - * and dump core for reference */ -#define ERR_MASK 0x0000000F /* mask off the above codes */ -#define ERR_NOFILE 0x00000010 /* don't give source file name/line */ -#define ERR_USAGE 0x00000020 /* print a usage message */ -#define ERR_PASS1 0x00000040 /* only print this error on pass one */ - -/* - * These codes define specific types of suppressible warning. - */ - -#define ERR_WARN_MASK 0x0000FF00 /* the mask for this feature */ -#define ERR_WARN_SHR 8 /* how far to shift right */ - -#define ERR_WARN_MNP 0x00000100 /* macro-num-parameters warning */ -#define ERR_WARN_MSR 0x00000200 /* macro self-reference */ -#define ERR_WARN_OL 0x00000300 /* orphan label (no colon, and - * alone on line) */ -#define ERR_WARN_NOV 0x00000400 /* numeric overflow */ -#define ERR_WARN_GNUELF 0x00000500 /* using GNU ELF extensions */ -#define ERR_WARN_MAX 5 /* the highest numbered one */ - /* * ----------------------- * Other function typedefs @@ -242,6 +201,18 @@ typedef struct { int64_t value; /* must be >= 32 bits */ } expr; +/* + * Library routines to manipulate expression data types. + */ +int is_reloc(expr *); +int is_simple(expr *); +int is_really_simple(expr *); +int is_unknown(expr *); +int is_just_unknown(expr *); +int64_t reloc_value(expr *); +int32_t reloc_seg(expr *); +int32_t reloc_wrt(expr *); + /* * The evaluator can also return hints about which of two registers * used in an expression should be the base register. See also the diff --git a/nasmlib.c b/nasmlib.c index b96fe126..59971c9d 100644 --- a/nasmlib.c +++ b/nasmlib.c @@ -702,152 +702,6 @@ const char *prefix_name(int token) return prefix_names[prefix]; } -/* - * Return TRUE if the argument is a simple scalar. (Or a far- - * absolute, which counts.) - */ -int is_simple(expr * vect) -{ - while (vect->type && !vect->value) - vect++; - if (!vect->type) - return 1; - if (vect->type != EXPR_SIMPLE) - return 0; - do { - vect++; - } while (vect->type && !vect->value); - if (vect->type && vect->type < EXPR_SEGBASE + SEG_ABS) - return 0; - return 1; -} - -/* - * Return TRUE if the argument is a simple scalar, _NOT_ a far- - * absolute. - */ -int is_really_simple(expr * vect) -{ - while (vect->type && !vect->value) - vect++; - if (!vect->type) - return 1; - if (vect->type != EXPR_SIMPLE) - return 0; - do { - vect++; - } while (vect->type && !vect->value); - if (vect->type) - return 0; - return 1; -} - -/* - * Return TRUE if the argument is relocatable (i.e. a simple - * scalar, plus at most one segment-base, plus possibly a WRT). - */ -int is_reloc(expr * vect) -{ - while (vect->type && !vect->value) /* skip initial value-0 terms */ - vect++; - if (!vect->type) /* trivially return TRUE if nothing */ - return 1; /* is present apart from value-0s */ - if (vect->type < EXPR_SIMPLE) /* FALSE if a register is present */ - return 0; - if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */ - do { - vect++; - } while (vect->type && !vect->value); - if (!vect->type) /* ...returning TRUE if that's all */ - return 1; - } - if (vect->type == EXPR_WRT) { /* skip over a WRT term... */ - do { - vect++; - } while (vect->type && !vect->value); - if (!vect->type) /* ...returning TRUE if that's all */ - return 1; - } - if (vect->value != 0 && vect->value != 1) - return 0; /* segment base multiplier non-unity */ - do { /* skip over _one_ seg-base term... */ - vect++; - } while (vect->type && !vect->value); - if (!vect->type) /* ...returning TRUE if that's all */ - return 1; - return 0; /* And return FALSE if there's more */ -} - -/* - * Return TRUE if the argument contains an `unknown' part. - */ -int is_unknown(expr * vect) -{ - while (vect->type && vect->type < EXPR_UNKNOWN) - vect++; - return (vect->type == EXPR_UNKNOWN); -} - -/* - * Return TRUE if the argument contains nothing but an `unknown' - * part. - */ -int is_just_unknown(expr * vect) -{ - while (vect->type && !vect->value) - vect++; - return (vect->type == EXPR_UNKNOWN); -} - -/* - * Return the scalar part of a relocatable vector. (Including - * simple scalar vectors - those qualify as relocatable.) - */ -int64_t reloc_value(expr * vect) -{ - while (vect->type && !vect->value) - vect++; - if (!vect->type) - return 0; - if (vect->type == EXPR_SIMPLE) - return vect->value; - else - return 0; -} - -/* - * Return the segment number of a relocatable vector, or NO_SEG for - * simple scalars. - */ -int32_t reloc_seg(expr * vect) -{ - while (vect->type && (vect->type == EXPR_WRT || !vect->value)) - vect++; - if (vect->type == EXPR_SIMPLE) { - do { - vect++; - } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); - } - if (!vect->type) - return NO_SEG; - else - return vect->type - EXPR_SEGBASE; -} - -/* - * Return the WRT segment number of a relocatable vector, or NO_SEG - * if no WRT part is present. - */ -int32_t reloc_wrt(expr * vect) -{ - while (vect->type && vect->type < EXPR_WRT) - vect++; - if (vect->type == EXPR_WRT) { - return vect->value; - } else - return NO_SEG; -} - /* * Binary search. */ diff --git a/nasmlib.h b/nasmlib.h index 82a35e30..43342096 100644 --- a/nasmlib.h +++ b/nasmlib.h @@ -22,6 +22,48 @@ */ /* #define LOGALLOC */ +/* + * ------------------------- + * Error reporting functions + * ------------------------- + */ + +/* + * An error reporting function should look like this. + */ +typedef void (*efunc) (int severity, const char *fmt, ...); + +/* + * These are the error severity codes which get passed as the first + * argument to an efunc. + */ + +#define ERR_DEBUG 0x00000008 /* put out debugging message */ +#define ERR_WARNING 0x00000000 /* warn only: no further action */ +#define ERR_NONFATAL 0x00000001 /* terminate assembly after phase */ +#define ERR_FATAL 0x00000002 /* instantly fatal: exit with error */ +#define ERR_PANIC 0x00000003 /* internal error: panic instantly + * and dump core for reference */ +#define ERR_MASK 0x0000000F /* mask off the above codes */ +#define ERR_NOFILE 0x00000010 /* don't give source file name/line */ +#define ERR_USAGE 0x00000020 /* print a usage message */ +#define ERR_PASS1 0x00000040 /* only print this error on pass one */ + +/* + * These codes define specific types of suppressible warning. + */ + +#define ERR_WARN_MASK 0x0000FF00 /* the mask for this feature */ +#define ERR_WARN_SHR 8 /* how far to shift right */ + +#define ERR_WARN_MNP 0x00000100 /* macro-num-parameters warning */ +#define ERR_WARN_MSR 0x00000200 /* macro self-reference */ +#define ERR_WARN_OL 0x00000300 /* orphan label (no colon, and + * alone on line) */ +#define ERR_WARN_NOV 0x00000400 /* numeric overflow */ +#define ERR_WARN_GNUELF 0x00000500 /* using GNU ELF extensions */ +#define ERR_WARN_MAX 5 /* the highest numbered one */ + /* * Wrappers around malloc, realloc and free. nasm_malloc will * fatal-error and die rather than return NULL; nasm_realloc will @@ -29,7 +71,6 @@ * passed a NULL pointer; nasm_free will do nothing if it is passed * a NULL pointer. */ -#ifdef NASM_NASM_H /* need efunc defined for this */ void nasm_set_malloc_error(efunc); #ifndef LOGALLOC void *nasm_malloc(size_t); @@ -49,7 +90,6 @@ char *nasm_strndup_log(char *, int, char *, size_t); #define nasm_strdup(x) nasm_strdup_log(__FILE__,__LINE__,x) #define nasm_strndup(x,y) nasm_strndup_log(__FILE__,__LINE__,x,y) #endif -#endif /* * ANSI doesn't guarantee the presence of `stricmp' or @@ -234,20 +274,6 @@ void saa_fread(struct SAA *s, int32_t posn, void *p, int32_t len); /* fixup * void saa_fwrite(struct SAA *s, int32_t posn, void *p, int32_t len); /* fixup */ void saa_fpwrite(struct SAA *, FILE *); -#ifdef NASM_NASM_H -/* - * Library routines to manipulate expression data types. - */ -int is_reloc(expr *); -int is_simple(expr *); -int is_really_simple(expr *); -int is_unknown(expr *); -int is_just_unknown(expr *); -int64_t reloc_value(expr *); -int32_t reloc_seg(expr *); -int32_t reloc_wrt(expr *); -#endif - /* * Binary search routine. Returns index into `array' of an entry * matching `string', or <0 if no match. `array' is taken to diff --git a/ndisasm.c b/ndisasm.c index f9793838..ea4dc537 100644 --- a/ndisasm.c +++ b/ndisasm.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -36,6 +37,17 @@ static const char *help = static void output_ins(uint32_t, uint8_t *, int, char *); static void skip(uint32_t dist, FILE * fp); +static void ndisasm_error(int severity, const char *fmt, ...) +{ + va_list va; + + va_start(va, fmt); + vfprintf(stderr, fmt, va); + + if (severity & ERR_FATAL) + exit(1); +} + int main(int argc, char **argv) { char buffer[INSN_MAX * 2], *p, *ep, *q; @@ -53,6 +65,8 @@ int main(int argc, char **argv) int32_t offset; FILE *fp; + nasm_set_malloc_error(ndisasm_error); + offset = 0; init_sync(); From 8d024e7965efb208b0831ee7289329f85cf4433f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 19 Sep 2007 21:41:02 -0700 Subject: [PATCH 28/29] Remove limit on number of sync points Make it possible for ndisasm to allocate more memory for sync points as needed. --- sync.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/sync.c b/sync.c index 88d882a2..562c59d6 100644 --- a/sync.c +++ b/sync.c @@ -11,9 +11,10 @@ #include #include +#include "nasmlib.h" #include "sync.h" -#define SYNC_MAX 4096 /* max # of sync points */ +#define SYNC_MAX 4096 /* max # of sync points (initial) */ /* * This lot manages the current set of sync points by means of a @@ -24,29 +25,12 @@ static struct Sync { uint32_t pos; uint32_t length; } *synx; -static int nsynx; +static int max_synx, nsynx; void init_sync(void) { - /* - * I'd like to allocate an array of size SYNC_MAX, then write - * `synx--' which would allow numbering the array from one - * instead of zero without wasting memory. Sadly I don't trust - * this to work in 16-bit Large model, so it's staying the way - * it is. Btw, we don't care about freeing this array, since it - * has to last for the duration of the program and will then be - * auto-freed on exit. And I'm lazy ;-) - * - * Speaking of 16-bit Large model, that's also the reason I'm - * not declaring this array statically - by doing it - * dynamically I avoid problems with the total size of DGROUP - * in Borland C. - */ - synx = malloc((SYNC_MAX + 1) * sizeof(*synx)); - if (!synx) { - fprintf(stderr, "ndisasm: not enough memory for sync array\n"); - exit(1); - } + max_synx = SYNC_MAX-1; + synx = nasm_malloc(SYNC_MAX * sizeof(*synx)); nsynx = 0; } @@ -54,8 +38,10 @@ void add_sync(uint32_t pos, uint32_t length) { int i; - if (nsynx == SYNC_MAX) - return; /* can't do anything - overflow */ + if (nsynx >= max_synx) { + max_synx = (max_synx << 1)+1; + synx = nasm_realloc(synx, (max_synx+1) * sizeof(*synx)); + } nsynx++; synx[nsynx].pos = pos; From d9a979559e76028f671891483134251656793d0c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 19 Sep 2007 21:41:27 -0700 Subject: [PATCH 29/29] Update manual pages Update manual pages to include 64-bit support, and remove section about sync point limits in ndisasm. --- nasm.1 | 10 ++++++---- ndisasm.1 | 7 +------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/nasm.1 b/nasm.1 index e3284406..7b5d2929 100644 --- a/nasm.1 +++ b/nasm.1 @@ -192,9 +192,10 @@ is reserved using the .IR RESB , .IR RESW , .IR RESD , -.I RESQ -and +.IR RESQ , .I REST +and +.I RESO pseudo-opcodes, each taking one parameter which gives the number of bytes, words, doublewords, quadwords or ten-byte words to reserve. .PP @@ -297,9 +298,10 @@ finished doing absolute assembly, you must issue another .I SECTION directive to return to normal assembly. .PP -.I BITS 16 -or +.I BITS 16, .I BITS 32 +or +.I BITS 64 switches the default processor mode for which .B nasm is generating code: it is equivalent to diff --git a/ndisasm.1 b/ndisasm.1 index d48a1827..622500f9 100644 --- a/ndisasm.1 +++ b/ndisasm.1 @@ -88,7 +88,7 @@ means of examining the target addresses of the relative jumps and calls it disassembles. .TP .BI \-b " bits" -Specifies either 16-bit or 32-bit mode. The default is 16-bit mode. +Specifies 16-, 32- or 64-bit mode. The default is 16-bit mode. .TP .B \-u Specifies 32-bit mode, more compactly than using `-b 32'. @@ -125,10 +125,5 @@ or calls result from disassembling non-machine-code data, sync markers may get placed in strange places. Feel free to turn auto-sync off and go back to doing it manually if necessary. .PP -.B ndisasm -can only keep track of 8192 sync markers internally at once: this is -to do with portability, since DOS machines don't take kindly to more -than 64K being allocated at a time. -.PP .SH SEE ALSO .BR objdump "(" 1 ")."