Add support for new instructions from ISE June 2020

Add support for new instructions as defined in the Instruction Set
Extensions manual as of June 2020.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin 2020-07-16 21:48:28 -07:00
parent 36814f1fc8
commit b31a4c9906
9 changed files with 172 additions and 36 deletions

View File

@ -63,17 +63,18 @@
* assembly mode or the operand-size override on the operand
* \70..\73 rel32 a long relative operand, from operand 0..3
* \74..\77 seg a word constant, from the _segment_ part of operand 0..3
* \1ab a ModRM, calculated on EA in operand a, with the spare
* \1ab /r a ModRM, calculated on EA in operand a, with the reg
* field the register value of operand b.
* \172\ab the register number from operand a in bits 7..4, with
* \171\mab /mrb (e.g /3r0) a ModRM, with the reg field taken from operand a, and the m
* and b fields set to the specified values.
* \172\ab /is4 the register number from operand a in bits 7..4, with
* the 4-bit immediate from operand b in bits 3..0.
* \173\xab the register number from operand a in bits 7..4, with
* the value b in bits 3..0.
* \174..\177 the register number from operand 0..3 in bits 7..4, and
* an arbitrary value in bits 3..0 (assembled as zero.)
* \2ab a ModRM, calculated on EA in operand a, with the spare
* \2ab /b a ModRM, calculated on EA in operand a, with the reg
* field equal to digit b.
*
* \240..\243 this instruction uses EVEX rather than REX or VEX/XOP, with the
* V field taken from operand 0..3.
* \250 this instruction uses EVEX rather than REX or VEX/XOP, with the
@ -103,12 +104,11 @@
* tup is tuple type for Disp8*N from %tuple_codes in insns.pl
* (compressed displacement encoding)
*
* \254..\257 id,s a signed 32-bit operand to be extended to 64 bits.
* \260..\263 this instruction uses VEX/XOP rather than REX, with the
* V field taken from operand 0..3.
* \270 this instruction uses VEX/XOP rather than REX, with the
* V field set to 1111b.
*
* \254..\257 id,s a signed 32-bit operand to be extended to 64 bits.
* \260..\263 this instruction uses VEX/XOP rather than REX, with the
* V field taken from operand 0..3.
* \270 this instruction uses VEX/XOP rather than REX, with the
* V field set to 1111b.
* VEX/XOP prefixes are followed by the sequence:
* \tmm\wlp where mm is the M field; and wlp is:
* 00 wwl lpp
@ -1317,6 +1317,14 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
length += 2;
break;
case 0171:
c = *codes++;
op2 = (op2 & ~3) | ((c >> 3) & 3);
opx = &ins->oprs[op2];
ins->rex |= op_rexflags(opx, REX_R|REX_H|REX_P|REX_W);
length++;
break;
case 0172:
case 0173:
codes++;
@ -1951,6 +1959,15 @@ static void gencode(struct out_data *data, insn *ins)
out_segment(data, opx);
break;
case 0171:
c = *codes++;
op2 = (op2 & ~3) | ((c >> 3) & 3);
opx = &ins->oprs[op2];
r = nasm_regvals[opx->basereg];
c = (c & ~070) | ((r & 7) << 3);
out_rawbyte(data, c);
break;
case 0172:
{
int mask = ins->prefixes[PPS_VEX] == P_EVEX ? 7 : 15;
@ -2807,7 +2824,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
input->disp_size != (addrbits != 16 ? 32 : 16)))
nasm_warn(WARN_OTHER, "displacement size ignored on absolute address");
if (bits == 64 && (~input->type & IP_REL)) {
if ((eaflags & EAF_MIB) || (bits == 64 && (~input->type & IP_REL))) {
output->sib_present = true;
output->sib = GEN_SIB(0, 4, 5);
output->bytes = 4;
@ -3026,7 +3043,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
output->rex |= rexflags(it, ix, REX_X);
output->rex |= rexflags(bt, bx, REX_B);
if (it == -1 && (bt & 7) != REG_NUM_ESP) {
if (it == -1 && (bt & 7) != REG_NUM_ESP && !(eaflags & EAF_MIB)) {
/* no SIB needed */
int mod, rm;

View File

@ -203,6 +203,8 @@ static enum reg_enum whichreg(opflags_t regflags, int regval, int rex)
return GET_REGISTER(nasm_rd_opmaskreg, regval);
if (!(BNDREG & ~regflags))
return GET_REGISTER(nasm_rd_bndreg, regval);
if (!(TMMREG & ~regflags))
return GET_REGISTER(nasm_rd_tmmreg, regval);
#undef GET_REGISTER
return 0;
@ -679,6 +681,22 @@ static int matches(const struct itemplate *t, uint8_t *data,
break;
}
case 0171:
{
uint8_t t = *r++;
uint8_t d = *data++;
if ((d ^ t) & ~070) {
return 0;
} else {
op2 = (op2 & ~3) | ((t >> 3) & 3);
opy = &ins->oprs[op2];
opy->basereg = ((d >> 3) & 7) +
(ins->rex & REX_R ? 8 : 0);
opy->segment |= SEG_RMREG;
}
break;
}
case 0172:
{
uint8_t ximm = *data++;

View File

@ -9,6 +9,9 @@ since 2007.
\S{cl-2.15.03} Version 2.15.03
\b Add instructions from the Intel Instruction Set Extensions and
Future Features Programming Reference, June 2020.
\b Properly display warnings in preprocess-only mode.
\b Fix copy-and-paste of examples from the PDF documentation.

View File

@ -81,19 +81,19 @@
/*
* Register classes.
*
* Bits: 7 - 16
* Bits: 7 - 17
*/
#define REG_CLASS_SHIFT (7)
#define REG_CLASS_BITS (10)
#define REG_CLASS_BITS (11)
#define REG_CLASS_MASK OP_GENMASK(REG_CLASS_BITS, REG_CLASS_SHIFT)
#define GEN_REG_CLASS(bit) OP_GENBIT(bit, REG_CLASS_SHIFT)
/*
* Subclasses. Depends on type of operand.
*
* Bits: 17 - 24
* Bits: 18 - 25
*/
#define SUBCLASS_SHIFT (17)
#define SUBCLASS_SHIFT (18)
#define SUBCLASS_BITS (8)
#define SUBCLASS_MASK OP_GENMASK(SUBCLASS_BITS, SUBCLASS_SHIFT)
#define GEN_SUBCLASS(bit) OP_GENBIT(bit, SUBCLASS_SHIFT)
@ -101,9 +101,9 @@
/*
* Special flags. Context dependant.
*
* Bits: 25 - 31
* Bits: 26 - 32
*/
#define SPECIAL_SHIFT (25)
#define SPECIAL_SHIFT (26)
#define SPECIAL_BITS (7)
#define SPECIAL_MASK OP_GENMASK(SPECIAL_BITS, SPECIAL_SHIFT)
#define GEN_SPECIAL(bit) OP_GENBIT(bit, SPECIAL_SHIFT)
@ -111,9 +111,9 @@
/*
* Sizes of the operands and attributes.
*
* Bits: 32 - 42
* Bits: 33 - 43
*/
#define SIZE_SHIFT (32)
#define SIZE_SHIFT (33)
#define SIZE_BITS (11)
#define SIZE_MASK OP_GENMASK(SIZE_BITS, SIZE_SHIFT)
#define GEN_SIZE(bit) OP_GENBIT(bit, SIZE_SHIFT)
@ -121,9 +121,9 @@
/*
* Register set count
*
* Bits: 47 - 43
* Bits: 44 - 48
*/
#define REGSET_SHIFT (43)
#define REGSET_SHIFT (44)
#define REGSET_BITS (5)
#define REGSET_MASK OP_GENMASK(REGSET_BITS, REGSET_SHIFT)
#define GEN_REGSET(bit) OP_GENBIT(bit, REGSET_SHIFT)
@ -138,11 +138,11 @@
*
* ............................................................1111 optypes
* .........................................................111.... modifiers
* ...............................................1111111111....... register classes
* .......................................11111111................. subclasses
* ................................1111111......................... specials
* .....................11111111111................................ sizes
* ................11111........................................... regset count
* ..............................................11111111111....... register classes
* ......................................11111111.................. subclasses
* ...............................1111111.......................... specials
* ....................11111111111................................. sizes
* ...............11111............................................ regset count
*/
#define REGISTER GEN_OPTYPE(0) /* register number in 'basereg' */
@ -176,6 +176,7 @@
#define REG_CLASS_RM_ZMM GEN_REG_CLASS(7)
#define REG_CLASS_OPMASK GEN_REG_CLASS(8)
#define REG_CLASS_BND GEN_REG_CLASS(9)
#define REG_CLASS_RM_TMM GEN_REG_CLASS(10)
static inline bool is_class(opflags_t class, opflags_t op)
{
@ -217,6 +218,7 @@ static inline bool is_reg_class(opflags_t class, opflags_t reg)
#define KREG OPMASKREG
#define RM_BND ( REG_CLASS_BND | REGMEM) /* Bounds operand */
#define BNDREG ( REG_CLASS_BND | REGMEM | REGISTER) /* Bounds register */
#define TMMREG ( REG_CLASS_RM_TMM | REGMEM | REGISTER) /* TMM (AMX) register */
#define REG_CDT ( REG_CLASS_CDT | BITS32 | REGISTER) /* CRn, DRn and TRn */
#define REG_CREG (GEN_SUBCLASS(1) | REG_CLASS_CDT | BITS32 | REGISTER) /* CRn */
#define REG_DREG (GEN_SUBCLASS(2) | REG_CLASS_CDT | BITS32 | REGISTER) /* DRn */

36
test/amx.asm Normal file
View File

@ -0,0 +1,36 @@
bits 64
%macro amx 1
%define treg tmm %+ %1
ldtilecfg [rsi]
sttilecfg [rdi]
tilezero treg
tileloadd treg, [rax]
tileloadd treg, [rax,rdx]
tileloadd treg, [rax,rdx*2]
tileloaddt1 treg, [rax]
tileloaddt1 treg, [rax,rdx]
tileloaddt1 treg, [rax,rdx*2]
tdpbf16ps treg, treg, treg
tdpbssd treg, treg, treg
tdpbusd treg, treg, treg
tdpbsud treg, treg, treg
tdpbuud treg, treg, treg
tilestored [rax], treg
tilestored [rax,rdx], treg
tilestored [rax,rdx*2], treg
tilerelease
%endmacro
%assign n 0
%rep 8
amx n
%assign n n+1
%endrep

View File

@ -84,6 +84,16 @@ if_("AVX5124FMAPS", "AVX-512 4-iteration multiply-add");
if_("AVX5124VNNIW", "AVX-512 4-iteration dot product");
if_("SGX", "Intel Software Guard Extensions (SGX)");
if_("CET", "Intel Control-Flow Enforcement Technology (CET)");
if_("ENQCMD", "Enqueue command instructions");
if_("PCONFIG", "Platform configuration instruction");
if_("WBNOINVD", "Writeback and do not invalidate instruction");
if_("TSXLDTRK", "TSX suspend load address tracking");
if_("SERIALIZE", "SERIALIZE instruction");
if_("AVX512BF16", "AVX-512 bfloat16");
if_("AVX512VP2INTERSECT", "AVX-512 VP2INTERSECT instructions");
if_("AMXTILE", "AMX tile configuration instructions");
if_("AMXBF16", "AMX bfloat16 multiplication");
if_("AMXINT8", "AMX 8-bit integer multiplication");
# Put these last [hpa: why?]
if_("OBSOLETE", "Instruction removed from architecture");

View File

@ -5999,6 +5999,51 @@ WRUSSQ mem,reg64 [mr: o64 66 0f 38 f5 /r] CET,FUTURE,X64
WRSSD mem,reg32 [mr: o32 0f 38 f6 /r] CET,FUTURE
WRSSQ mem,reg64 [mr: o64 0f 38 f6 /r] CET,FUTURE,X64
;# Instructions from ISE doc 319433-040, June 2020
ENQCMD reg16,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE
ENQCMD reg32,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,ND
ENQCMD reg32,mem512 [rm: a32 f2 0f 38 f8 /r] ENQCMD,FUTURE
ENQCMD reg64,mem512 [rm: a64 f2 0f 38 f8 /r] ENQCMD,FUTURE,X64
ENQCMDS reg16,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV
ENQCMDS reg32,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV,ND
ENQCMDS reg32,mem512 [rm: a32 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV
ENQCMDS reg64,mem512 [rm: a64 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV,X64
PCONFIG void [ np 0f 01 c5] PCONFIG,FUTURE,PRIV
SERIALIZE void [ np 0f 01 e8] SERIALIZE,FUTURE
WBNOINVD void [ f3 0f 09] WBNOINVD,FUTURE,PRIV
XRESLDTRK void [ f2 0f 01 e9] TSXLDTRK,FUTURE
XSUSLDTRK void [ f2 0f 01 e8] TSXLDTRK,FUTURE
;# AVX512 Bfloat16 instructions
VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f2.0f38.w0 72 /r] AVX512BF16,FUTURE
VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm: evex.256.f2.0f38.w0 72 /r] AVX512BF16,FUTURE
VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm: evex.512.f2.0f38.w0 72 /r] AVX512BF16,FUTURE
VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f3.0f38.w0 72 /r] AVX512BF16,FUTURE
VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm: evex.256.f3.0f38.w0 72 /r] AVX512BF16,FUTURE
VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm: evex.512.f3.0f38.w0 72 /r] AVX512BF16,FUTURE
VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f3.0f38.w0 52 /r] AVX512BF16,FUTURE
VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm128|b32 [rvm: evex.256.f3.0f38.w0 52 /r] AVX512BF16,FUTURE
VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm128|b32 [rvm: evex.512.f3.0f38.w0 52 /r] AVX512BF16,FUTURE
;# AVX512 mask intersect instructions
VP2INTERSECTD kreg|rs2,xmmreg,xmmrm128|b32 [rvm: evex.nds.128.f2.0f38.w0 68 /r] AVX512BF16,FUTURE
VP2INTERSECTD kreg|rs2,ymmreg,ymmrm128|b32 [rvm: evex.nds.256.f2.0f38.w0 68 /r] AVX512BF16,FUTURE
VP2INTERSECTD kreg|rs2,zmmreg,zmmrm128|b32 [rvm: evex.nds.512.f2.0f38.w0 68 /r] AVX512BF16,FUTURE
;# Intel Advanced Matrix Extensions (AMX)
LDTILECFG mem512 [m: vex.128.np.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64
STTILECFG mem512 [m: vex.128.66.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64
TDPBF16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5c /r] AMXBF16,FUTURE,X64
TDPBSSD tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.0f38.w0 5e /r] AMXINT8,FUTURE,X64
TDPBSUD tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5e /r] AMXINT8,FUTURE,X64
TDPBUSD tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 5e /r] AMXINT8,FUTURE,X64
TDPBUUD tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 5e /r] AMXINT8,FUTURE,X64
TILELOADD tmmreg,mem [rm: vex.128.f2.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64
TILELOADDT1 tmmreg,mem [rm: vex.128.f2.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64
TILERELEASE void [ vex.128.np.0f38.w0 49 c0] AMXTILE,FUTURE,X64
TILESTORED mem,tmmreg [mr: vex.128.f3.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64
TILEZERO tmmreg [r: vex.128.f2.0f38.w0 49 /3r0] AMXTILE,FUTURE,X64
;# Systematic names for the hinting nop instructions
; These should be last in the file
HINT_NOP0 rm16 [m: o16 0f 18 /0] P6,UNDOC

View File

@ -880,11 +880,19 @@ sub byte_code_compile($$) {
$prefix_ok = 0;
} elsif ($op =~ m:^/([0-7])$:) {
if (!defined($oppos{'m'})) {
die "$fname:$line: $op requires m operand\n";
die "$fname:$line: $op requires an m operand\n";
}
push(@codes, 06) if ($oppos{'m'} & 4);
push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1);
$prefix_ok = 0;
} elsif ($op =~ m:^/([0-3]?)r([0-7])$:) {
if (!defined($oppos{'r'})) {
die "$fname:$line: $op requires an r operand\n";
}
push(@codes, 05) if ($oppos{'r'} & 4);
push(@codes, 0171);
push(@codes, (($1+0) << 6) + (($oppos{'r'} & 3) << 3) + $2);
$prefix_ok = 0;
} elsif ($op =~ /^(vex|xop)(|\..*)$/) {
my $vexname = $1;
my $c = $vexmap{$vexname};
@ -907,7 +915,7 @@ sub byte_code_compile($$) {
$w = 2;
} elsif ($oq eq 'ww') {
$w = 3;
} elsif ($oq eq 'p0') {
} elsif ($oq eq 'np' || $oq eq 'p0') {
$p = 0;
} elsif ($oq eq '66' || $oq eq 'p1') {
$p = 1;
@ -935,9 +943,6 @@ sub byte_code_compile($$) {
if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) {
die "$fname:$line: missing fields in \U$vexname\E specification\n";
}
if (defined($oppos{'v'}) && !$has_nds) {
die "$fname:$line: 'v' operand without ${vexname}.nds or ${vexname}.ndd\n";
}
my $minmap = ($c == 1) ? 8 : 0; # 0-31 for VEX, 8-31 for XOP
if ($m < $minmap || $m > 31) {
die "$fname:$line: Only maps ${minmap}-31 are valid for \U${vexname}\n";
@ -966,7 +971,7 @@ sub byte_code_compile($$) {
$w = 2;
} elsif ($oq eq 'ww') {
$w = 3;
} elsif ($oq eq 'p0') {
} elsif ($oq eq 'np' || $oq eq 'p0') {
$p = 0;
} elsif ($oq eq '66' || $oq eq 'p1') {
$p = 1;
@ -994,9 +999,6 @@ sub byte_code_compile($$) {
if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) {
die "$fname:$line: missing fields in EVEX specification\n";
}
if (defined($oppos{'v'}) && !$has_nds) {
die "$fname:$line: 'v' operand without evex.nds or evex.ndd\n";
}
if ($m > 15) {
die "$fname:$line: Only maps 0-15 are valid for EVEX\n";
}

View File

@ -130,6 +130,9 @@ zmm0 ZMM0 zmmreg 0
zmm1-15 ZMM_L16 zmmreg 1
zmm16-31 ZMMREG zmmreg 16
# AMX tile registers
tmm0-7 TMMREG tmmreg 0
# Opmask registers
k0 OPMASK0 opmaskreg 0
k1-7 OPMASKREG opmaskreg 1 TFLAG_BRC_OPT