Add (untested!) SSSE3, SSE4.1, SSE4.2 instructions

Add the SSSE3, SSE4.1 and SSE4.2 instruction sets.  Change \332 to be
a literal 0xF2 prefix, by analog with \333 for 0xF3 prefix (the
previous \332 flag changed to \335).  This is necessary to get the REX
prefix in the right place for instructions that use it.

We are going to have to go in and change existing instruction patterns
which use these, as well.
This commit is contained in:
H. Peter Anvin 2007-09-12 21:58:51 -07:00
parent daffd79372
commit cb9b690ae6
6 changed files with 104 additions and 18 deletions

View File

@ -64,10 +64,10 @@
* to the condition code value of the instruction.
* \331 - instruction not valid with REP prefix. Hint for
* disassembler only; for SSE instructions.
* \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
* \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
* as a literal byte in order to aid the disassembler.
* \332 - REP prefix (0xF2 byte) used as opcode extension.
* \333 - REP prefix (0xF3 byte) used as opcode extension.
* \334 - LOCK prefix used instead of REX.R
* \335 - disassemble a rep (0xF3 byte) prefix as repe not rep.
* \340 - reserve <operand 0> bytes of uninitialized storage.
* Operand 0 had better be a segmentless constant.
* \364 - operand-size prefix (0x66) not permitted
@ -862,8 +862,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
codes++, length++;
break;
case 0331:
case 0332:
break;
case 0332:
case 0333:
length++;
break;
@ -871,6 +871,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
assert_no_prefix(ins, P_LOCK);
ins->rex |= REX_L;
break;
case 0335:
break;
case 0340:
case 0341:
case 0342:
@ -1360,11 +1362,11 @@ static void gencode(int32_t segment, int32_t offset, int bits,
break;
case 0331:
case 0332:
break;
case 0332:
case 0333:
*bytes = 0xF3;
*bytes = c - 0332 + 0xF2;
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
offset += 1;
break;
@ -1378,6 +1380,9 @@ static void gencode(int32_t segment, int32_t offset, int bits,
ins->rex &= ~(REX_L|REX_R);
break;
case 0335:
break;
case 0340:
case 0341:
case 0342:

View File

@ -573,9 +573,9 @@ static int matches(const struct itemplate *t, uint8_t *data,
} else if (c == 0331) {
if (prefix->rep)
return FALSE;
} else if (c == 0332) {
if (drep == P_REP)
drep = P_REPE;
} else if (c == 0332) {
if (prefix->rep != 0xF2)
return FALSE;
} else if (c == 0333) {
if (prefix->rep != 0xF3)
return FALSE;
@ -585,6 +585,9 @@ static int matches(const struct itemplate *t, uint8_t *data,
ins->rex |= REX_R;
lock = 0;
}
} else if (c == 0335) {
if (drep == P_REP)
drep = P_REPE;
} else if (c == 0364) {
if (prefix->osp)
return FALSE;

View File

@ -244,10 +244,10 @@ CMP rm64,imm \324\300\144\1\x81\207\141 X64,SM
CMP mem,imm8 \300\1\x80\207\21 8086,SM
CMP mem,imm16 \320\300\134\1\x81\207\131 8086,SM
CMP mem,imm32 \321\300\144\1\x81\207\141 386,SM
CMPSB void \332\1\xA6 8086
CMPSD void \332\321\1\xA7 386
CMPSQ void \332\324\1\xA7 X64
CMPSW void \332\320\1\xA7 8086
CMPSB void \335\1\xA6 8086
CMPSD void \335\321\1\xA7 386
CMPSQ void \335\324\1\xA7 X64
CMPSW void \335\320\1\xA7 8086
CMPXCHG mem,reg8 \300\2\x0F\xB0\101 PENT,SM
CMPXCHG reg8,reg8 \2\x0F\xB0\101 PENT
CMPXCHG mem,reg16 \320\300\2\x0F\xB1\101 PENT,SM
@ -1127,10 +1127,10 @@ SBB rm64,imm \324\300\144\1\x81\203\141 X64,SM
SBB mem,imm8 \300\1\x80\203\21 8086,SM
SBB mem,imm16 \320\300\134\1\x81\203\131 8086,SM
SBB mem,imm32 \321\300\144\1\x81\203\141 386,SM
SCASB void \332\1\xAE 8086
SCASD void \332\321\1\xAF 386
SCASQ void \332\324\1\xAF X64
SCASW void \332\320\1\xAF 8086
SCASB void \335\1\xAE 8086
SCASD void \335\321\1\xAF 386
SCASQ void \335\324\1\xAF X64
SCASW void \335\320\1\xAF 8086
SFENCE void \3\x0F\xAE\xF8 X64,AMD
SGDT mem \300\2\x0F\x01\200 286
SHL rm8,unity \300\1\xD0\204 8086
@ -1948,3 +1948,75 @@ PSIGNW mmxreg,mmxrm \364\3\x0F\x38\x09\110 SSSE3,MMX,SQ
PSIGNW xmmreg,xmmrm \366\3\x0F\x38\x09\110 SSSE3
PSIGND mmxreg,mmxrm \364\3\x0F\x38\x0A\110 SSSE3,MMX,SQ
PSIGND xmmreg,xmmrm \366\3\x0F\x38\x0A\110 SSSE3
; Penryn New Instructions (SSE4.1)
BLENDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x0D\110\26 SSE41
BLENDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x0C\110\26 SSE41
BLENDVPD xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x15\110 SSE41
BLENDVPS xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x14\110 SSE41
DPPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x41\110\26 SSE41
DPPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x40\110\26 SSE41
EXTRACTPS rm32,xmmreg,imm \366\3\x0F\x3A\x17\101\26 SSE41
EXTRACTPS reg64,xmmreg,imm \324\366\3\x0F\x3A\x17\101\26 SSE41,X64
INSERTPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x21\110\26 SSE41,SD
MOVNTDQA xmmreg,mem \366\3\x0F\x38\x2A\110 SSE41
MPSADBW xmmreg,xmmrm,imm \366\3\x0F\x3A\x42\110\26 SSE41
PACKUSDW xmmreg,xmmrm \366\3\x0F\x38\x2B\110 SSE41
PBLENDVB xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x10\110 SSE41
PBLENDW xmmreg,xmmrm,imm \366\3\x0F\x3A\x0E\110\26 SSE41
PCMPEQQ xmmreg,xmmrm \366\3\x0F\x38\x29\110 SSE41
PEXTRB reg32,xmmreg,imm \366\3\x0F\x3A\x14\101\26 SSE41
PEXTRB mem8,xmmreg,imm \366\3\x0F\x3A\x14\101\26 SSE41
PEXTRB reg64,xmmreg,imm \324\366\3\x0F\x3A\x14\101\26 SSE41,X64
PEXTRD rm32,xmmreg,imm \366\3\x0F\x3A\x16\101\26 SSE41
PEXTRQ rm64,xmmreg,imm \366\3\x0F\x3A\x16\101\26 SSE41,X64
PEXTRW reg32,xmmreg,imm \366\3\x0F\x3A\x15\101\26 SSE41
PEXTRW mem16,xmmreg,imm \366\3\x0F\x3A\x15\101\26 SSE41
PEXTRW reg64,xmmreg,imm \324\366\3\x0F\x3A\x15\101\26 SSE41,X64
PHMINPOSUW xmmreg,xmmrm \366\3\x0F\x38\x41\110 SSE41
PINSRB xmmreg,reg32,imm \366\3\x0F\x3A\x20\110\26 SSE41
PINSRB xmmreg,mem8,imm \366\3\x0F\x3A\x20\110\26 SSE41
PINSRD xmmreg,rm32,imm \366\3\x0F\x3A\x22\110\26 SSE41
PINSRQ xmmreg,rm64,imm \324\366\3\x0F\x3A\x22\110\26 SSE41,X64
PMAXSB xmmreg,xmmrm \366\3\x0F\x38\x3C\110 SSE41
PMAXSD xmmreg,xmmrm \366\3\x0F\x38\x3D\110 SSE41
PMAXUD xmmreg,xmmrm \366\3\x0F\x38\x3F\110 SSE41
PMAXUW xmmreg,xmmrm \366\3\x0F\x38\x3E\110 SSE41
PMINSB xmmreg,xmmrm \366\3\x0F\x38\x38\110 SSE41
PMINSD xmmreg,xmmrm \366\3\x0F\x38\x39\110 SSE41
PMINUD xmmreg,xmmrm \366\3\x0F\x38\x3B\110 SSE41
PMINUW xmmreg,xmmrm \366\3\x0F\x38\x3A\110 SSE41
PMOVSXBW xmmreg,xmmrm \366\3\x0F\x38\x20\110 SSE41,SQ
PMOVSXBD xmmreg,xmmrm \366\3\x0F\x38\x21\110 SSE41,SD
PMOVSXBQ xmmreg,xmmrm \366\3\x0F\x38\x22\110 SSE41,SW
PMOVSXWD xmmreg,xmmrm \366\3\x0F\x38\x23\110 SSE41,SQ
PMOVSXWQ xmmreg,xmmrm \366\3\x0F\x38\x24\110 SSE41,SD
PMOVSXDQ xmmreg,xmmrm \366\3\x0F\x38\x25\110 SSE41,SQ
PMOVZXBW xmmreg,xmmrm \366\3\x0F\x38\x30\110 SSE41,SQ
PMOVZXBD xmmreg,xmmrm \366\3\x0F\x38\x31\110 SSE41,SD
PMOVZXBQ xmmreg,xmmrm \366\3\x0F\x38\x32\110 SSE41,SW
PMOVZXWD xmmreg,xmmrm \366\3\x0F\x38\x33\110 SSE41,SQ
PMOVZXWQ xmmreg,xmmrm \366\3\x0F\x38\x34\110 SSE41,SD
PMOVZXDQ xmmreg,xmmrm \366\3\x0F\x38\x35\110 SSE41,SQ
PMULDQ xmmreg,xmmrm \366\3\x0F\x38\x28\110 SSE41
PMULLD xmmreg,xmmrm \366\3\x0F\x38\x40\110 SSE41
PTEST xmmreg,xmmrm \366\3\x0F\x38\x17\110 SSE41
ROUNDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x09\110\26 SSE41
ROUNDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE41
ROUNDSD xmmreg,xmmrm,imm \366\3\x0F\x3A\x0B\110\26 SSE41
ROUNDSS xmmreg,xmmrm,imm \366\3\x0F\x3A\x0A\110\26 SSE41
; Nehalem New Instructions (SSE4.2)
CRC32 reg32,rm8 \332\3\x0F\x38\1\xF0\110 SSE42
CRC32 reg32,rm16 \332\3\x0F\x38\1\xF1\110 SSE42
CRC32 reg32,rm32 \332\3\x0F\x38\1\xF1\110 SSE42
CRC32 reg64,rm8 \324\332\3\x0F\x38\1\xF0\110 SSE42,X64
CRC32 reg64,rm64 \324\332\3\x0F\x38\1\xF1\110 SSE42,X64
PCMPESTRI xmmreg,xmmrm,imm \366\3\x0F\x3A\x61\110\26 SSE42
PCMPESTRM xmmreg,xmmrm,imm \366\3\x0F\x3A\x60\110\26 SSE42
PCMPISTRI xmmreg,xmmrm,imm \366\3\x0F\x3A\x63\110\26 SSE42
PCMPISTRM xmmreg,xmmrm,imm \366\3\x0F\x3A\x62\110\26 SSE42
PCMPGTQ xmmreg,xmmrm \366\3\x0F\x38\x37\110 SSE42
POPCNT reg16,rm16 \320\333\2\x0F\xB8\110 NEHALEM
POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM
POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64

View File

@ -103,6 +103,7 @@ extern const struct itemplate * const * const itable[];
#define IF_WILLAMETTE 0x08000000UL /* Willamette instructions */
#define IF_PRESCOTT 0x09000000UL /* Prescott instructions */
#define IF_X86_64 0x0A000000UL /* x86-64 instruction (long or legacy mode) */
#define IF_NEHALEM 0x0B000000UL /* Nehalem instruction */
#define IF_X64 (IF_LONG|IF_X86_64)
#define IF_IA64 0x0F000000UL /* IA64 instructions (in x86 mode) */
#define IF_CYRIX 0x10000000UL /* Cyrix-specific instruction */

4
nasm.h
View File

@ -422,6 +422,9 @@ enum {
* With FPUREG:
* 16: FPU0
*
* With XMMREG:
* 16: XMM0
*
* With MEMORY:
* 16: MEM_OFFS (this is a simple offset)
* 17: IP_REL (IP-relative offset)
@ -485,6 +488,7 @@ enum {
#define MMXREG 0x02009000L /* MMX register */
#define RM_XMM 0x04008000L /* XMM (SSE) operand */
#define XMMREG 0x04009000L /* XMM (SSE) register */
#define XMM0 0x04019000L /* XMM register zero */
#define REG_CDT 0x00101004L /* CRn, DRn and TRn */
#define REG_CREG 0x00111004L /* CRn */
#define REG_DREG 0x00121004L /* DRn */

View File

@ -78,4 +78,5 @@ st1-7 FPUREG fpureg 1
mm0-7 MMXREG mmxreg 0
# SSE registers
xmm0-15 XMMREG xmmreg 0
xmm0 XMM0 xmmreg 0
xmm1-15 XMMREG xmmreg 1