diff --git a/assemble.c b/assemble.c index 9288aab9..54522712 100644 --- a/assemble.c +++ b/assemble.c @@ -64,10 +64,10 @@ * to the condition code value of the instruction. * \331 - instruction not valid with REP prefix. Hint for * disassembler only; for SSE instructions. - * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep. - * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded - * as a literal byte in order to aid the disassembler. + * \332 - REP prefix (0xF2 byte) used as opcode extension. + * \333 - REP prefix (0xF3 byte) used as opcode extension. * \334 - LOCK prefix used instead of REX.R + * \335 - disassemble a rep (0xF3 byte) prefix as repe not rep. * \340 - reserve bytes of uninitialized storage. * Operand 0 had better be a segmentless constant. * \364 - operand-size prefix (0x66) not permitted @@ -862,8 +862,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, codes++, length++; break; case 0331: - case 0332: break; + case 0332: case 0333: length++; break; @@ -871,6 +871,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, assert_no_prefix(ins, P_LOCK); ins->rex |= REX_L; break; + case 0335: + break; case 0340: case 0341: case 0342: @@ -1360,11 +1362,11 @@ static void gencode(int32_t segment, int32_t offset, int bits, break; case 0331: - case 0332: break; + case 0332: case 0333: - *bytes = 0xF3; + *bytes = c - 0332 + 0xF2; out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); offset += 1; break; @@ -1378,6 +1380,9 @@ static void gencode(int32_t segment, int32_t offset, int bits, ins->rex &= ~(REX_L|REX_R); break; + case 0335: + break; + case 0340: case 0341: case 0342: diff --git a/disasm.c b/disasm.c index cf7cd710..0452c295 100644 --- a/disasm.c +++ b/disasm.c @@ -573,9 +573,9 @@ static int matches(const struct itemplate *t, uint8_t *data, } else if (c == 0331) { if (prefix->rep) return FALSE; - } else if (c == 0332) { - if (drep == P_REP) - drep = P_REPE; + } else if (c == 0332) { + if (prefix->rep != 0xF2) + return FALSE; } else if (c == 0333) { if (prefix->rep != 0xF3) return FALSE; @@ -585,6 +585,9 @@ static int matches(const struct itemplate *t, uint8_t *data, ins->rex |= REX_R; lock = 0; } + } else if (c == 0335) { + if (drep == P_REP) + drep = P_REPE; } else if (c == 0364) { if (prefix->osp) return FALSE; diff --git a/insns.dat b/insns.dat index 9b1fcade..a1ad815f 100644 --- a/insns.dat +++ b/insns.dat @@ -244,10 +244,10 @@ CMP rm64,imm \324\300\144\1\x81\207\141 X64,SM CMP mem,imm8 \300\1\x80\207\21 8086,SM CMP mem,imm16 \320\300\134\1\x81\207\131 8086,SM CMP mem,imm32 \321\300\144\1\x81\207\141 386,SM -CMPSB void \332\1\xA6 8086 -CMPSD void \332\321\1\xA7 386 -CMPSQ void \332\324\1\xA7 X64 -CMPSW void \332\320\1\xA7 8086 +CMPSB void \335\1\xA6 8086 +CMPSD void \335\321\1\xA7 386 +CMPSQ void \335\324\1\xA7 X64 +CMPSW void \335\320\1\xA7 8086 CMPXCHG mem,reg8 \300\2\x0F\xB0\101 PENT,SM CMPXCHG reg8,reg8 \2\x0F\xB0\101 PENT CMPXCHG mem,reg16 \320\300\2\x0F\xB1\101 PENT,SM @@ -1127,10 +1127,10 @@ SBB rm64,imm \324\300\144\1\x81\203\141 X64,SM SBB mem,imm8 \300\1\x80\203\21 8086,SM SBB mem,imm16 \320\300\134\1\x81\203\131 8086,SM SBB mem,imm32 \321\300\144\1\x81\203\141 386,SM -SCASB void \332\1\xAE 8086 -SCASD void \332\321\1\xAF 386 -SCASQ void \332\324\1\xAF X64 -SCASW void \332\320\1\xAF 8086 +SCASB void \335\1\xAE 8086 +SCASD void \335\321\1\xAF 386 +SCASQ void \335\324\1\xAF X64 +SCASW void \335\320\1\xAF 8086 SFENCE void \3\x0F\xAE\xF8 X64,AMD SGDT mem \300\2\x0F\x01\200 286 SHL rm8,unity \300\1\xD0\204 8086 @@ -1948,3 +1948,75 @@ PSIGNW mmxreg,mmxrm \364\3\x0F\x38\x09\110 SSSE3,MMX,SQ PSIGNW xmmreg,xmmrm \366\3\x0F\x38\x09\110 SSSE3 PSIGND mmxreg,mmxrm \364\3\x0F\x38\x0A\110 SSSE3,MMX,SQ PSIGND xmmreg,xmmrm \366\3\x0F\x38\x0A\110 SSSE3 + +; Penryn New Instructions (SSE4.1) +BLENDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x0D\110\26 SSE41 +BLENDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x0C\110\26 SSE41 +BLENDVPD xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x15\110 SSE41 +BLENDVPS xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x14\110 SSE41 +DPPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x41\110\26 SSE41 +DPPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x40\110\26 SSE41 +EXTRACTPS rm32,xmmreg,imm \366\3\x0F\x3A\x17\101\26 SSE41 +EXTRACTPS reg64,xmmreg,imm \324\366\3\x0F\x3A\x17\101\26 SSE41,X64 +INSERTPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x21\110\26 SSE41,SD +MOVNTDQA xmmreg,mem \366\3\x0F\x38\x2A\110 SSE41 +MPSADBW xmmreg,xmmrm,imm \366\3\x0F\x3A\x42\110\26 SSE41 +PACKUSDW xmmreg,xmmrm \366\3\x0F\x38\x2B\110 SSE41 +PBLENDVB xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x10\110 SSE41 +PBLENDW xmmreg,xmmrm,imm \366\3\x0F\x3A\x0E\110\26 SSE41 +PCMPEQQ xmmreg,xmmrm \366\3\x0F\x38\x29\110 SSE41 +PEXTRB reg32,xmmreg,imm \366\3\x0F\x3A\x14\101\26 SSE41 +PEXTRB mem8,xmmreg,imm \366\3\x0F\x3A\x14\101\26 SSE41 +PEXTRB reg64,xmmreg,imm \324\366\3\x0F\x3A\x14\101\26 SSE41,X64 +PEXTRD rm32,xmmreg,imm \366\3\x0F\x3A\x16\101\26 SSE41 +PEXTRQ rm64,xmmreg,imm \366\3\x0F\x3A\x16\101\26 SSE41,X64 +PEXTRW reg32,xmmreg,imm \366\3\x0F\x3A\x15\101\26 SSE41 +PEXTRW mem16,xmmreg,imm \366\3\x0F\x3A\x15\101\26 SSE41 +PEXTRW reg64,xmmreg,imm \324\366\3\x0F\x3A\x15\101\26 SSE41,X64 +PHMINPOSUW xmmreg,xmmrm \366\3\x0F\x38\x41\110 SSE41 +PINSRB xmmreg,reg32,imm \366\3\x0F\x3A\x20\110\26 SSE41 +PINSRB xmmreg,mem8,imm \366\3\x0F\x3A\x20\110\26 SSE41 +PINSRD xmmreg,rm32,imm \366\3\x0F\x3A\x22\110\26 SSE41 +PINSRQ xmmreg,rm64,imm \324\366\3\x0F\x3A\x22\110\26 SSE41,X64 +PMAXSB xmmreg,xmmrm \366\3\x0F\x38\x3C\110 SSE41 +PMAXSD xmmreg,xmmrm \366\3\x0F\x38\x3D\110 SSE41 +PMAXUD xmmreg,xmmrm \366\3\x0F\x38\x3F\110 SSE41 +PMAXUW xmmreg,xmmrm \366\3\x0F\x38\x3E\110 SSE41 +PMINSB xmmreg,xmmrm \366\3\x0F\x38\x38\110 SSE41 +PMINSD xmmreg,xmmrm \366\3\x0F\x38\x39\110 SSE41 +PMINUD xmmreg,xmmrm \366\3\x0F\x38\x3B\110 SSE41 +PMINUW xmmreg,xmmrm \366\3\x0F\x38\x3A\110 SSE41 +PMOVSXBW xmmreg,xmmrm \366\3\x0F\x38\x20\110 SSE41,SQ +PMOVSXBD xmmreg,xmmrm \366\3\x0F\x38\x21\110 SSE41,SD +PMOVSXBQ xmmreg,xmmrm \366\3\x0F\x38\x22\110 SSE41,SW +PMOVSXWD xmmreg,xmmrm \366\3\x0F\x38\x23\110 SSE41,SQ +PMOVSXWQ xmmreg,xmmrm \366\3\x0F\x38\x24\110 SSE41,SD +PMOVSXDQ xmmreg,xmmrm \366\3\x0F\x38\x25\110 SSE41,SQ +PMOVZXBW xmmreg,xmmrm \366\3\x0F\x38\x30\110 SSE41,SQ +PMOVZXBD xmmreg,xmmrm \366\3\x0F\x38\x31\110 SSE41,SD +PMOVZXBQ xmmreg,xmmrm \366\3\x0F\x38\x32\110 SSE41,SW +PMOVZXWD xmmreg,xmmrm \366\3\x0F\x38\x33\110 SSE41,SQ +PMOVZXWQ xmmreg,xmmrm \366\3\x0F\x38\x34\110 SSE41,SD +PMOVZXDQ xmmreg,xmmrm \366\3\x0F\x38\x35\110 SSE41,SQ +PMULDQ xmmreg,xmmrm \366\3\x0F\x38\x28\110 SSE41 +PMULLD xmmreg,xmmrm \366\3\x0F\x38\x40\110 SSE41 +PTEST xmmreg,xmmrm \366\3\x0F\x38\x17\110 SSE41 +ROUNDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x09\110\26 SSE41 +ROUNDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE41 +ROUNDSD xmmreg,xmmrm,imm \366\3\x0F\x3A\x0B\110\26 SSE41 +ROUNDSS xmmreg,xmmrm,imm \366\3\x0F\x3A\x0A\110\26 SSE41 + +; Nehalem New Instructions (SSE4.2) +CRC32 reg32,rm8 \332\3\x0F\x38\1\xF0\110 SSE42 +CRC32 reg32,rm16 \332\3\x0F\x38\1\xF1\110 SSE42 +CRC32 reg32,rm32 \332\3\x0F\x38\1\xF1\110 SSE42 +CRC32 reg64,rm8 \324\332\3\x0F\x38\1\xF0\110 SSE42,X64 +CRC32 reg64,rm64 \324\332\3\x0F\x38\1\xF1\110 SSE42,X64 +PCMPESTRI xmmreg,xmmrm,imm \366\3\x0F\x3A\x61\110\26 SSE42 +PCMPESTRM xmmreg,xmmrm,imm \366\3\x0F\x3A\x60\110\26 SSE42 +PCMPISTRI xmmreg,xmmrm,imm \366\3\x0F\x3A\x63\110\26 SSE42 +PCMPISTRM xmmreg,xmmrm,imm \366\3\x0F\x3A\x62\110\26 SSE42 +PCMPGTQ xmmreg,xmmrm \366\3\x0F\x38\x37\110 SSE42 +POPCNT reg16,rm16 \320\333\2\x0F\xB8\110 NEHALEM +POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM +POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64 diff --git a/insns.h b/insns.h index 706ee7cd..4deccf94 100644 --- a/insns.h +++ b/insns.h @@ -103,6 +103,7 @@ extern const struct itemplate * const * const itable[]; #define IF_WILLAMETTE 0x08000000UL /* Willamette instructions */ #define IF_PRESCOTT 0x09000000UL /* Prescott instructions */ #define IF_X86_64 0x0A000000UL /* x86-64 instruction (long or legacy mode) */ +#define IF_NEHALEM 0x0B000000UL /* Nehalem instruction */ #define IF_X64 (IF_LONG|IF_X86_64) #define IF_IA64 0x0F000000UL /* IA64 instructions (in x86 mode) */ #define IF_CYRIX 0x10000000UL /* Cyrix-specific instruction */ diff --git a/nasm.h b/nasm.h index 34d6d05e..6e231b61 100644 --- a/nasm.h +++ b/nasm.h @@ -422,6 +422,9 @@ enum { * With FPUREG: * 16: FPU0 * + * With XMMREG: + * 16: XMM0 + * * With MEMORY: * 16: MEM_OFFS (this is a simple offset) * 17: IP_REL (IP-relative offset) @@ -485,6 +488,7 @@ enum { #define MMXREG 0x02009000L /* MMX register */ #define RM_XMM 0x04008000L /* XMM (SSE) operand */ #define XMMREG 0x04009000L /* XMM (SSE) register */ +#define XMM0 0x04019000L /* XMM register zero */ #define REG_CDT 0x00101004L /* CRn, DRn and TRn */ #define REG_CREG 0x00111004L /* CRn */ #define REG_DREG 0x00121004L /* DRn */ diff --git a/regs.dat b/regs.dat index f70e3298..64fa2a0b 100644 --- a/regs.dat +++ b/regs.dat @@ -78,4 +78,5 @@ st1-7 FPUREG fpureg 1 mm0-7 MMXREG mmxreg 0 # SSE registers -xmm0-15 XMMREG xmmreg 0 +xmm0 XMM0 xmmreg 0 +xmm1-15 XMMREG xmmreg 1