AVX-512: Change the data type for instruction flags

Increased the size of data type for instruction flags from 32bits to 64bits.
And a new type (iflags_t) is defined for better maintainability.

Bigger data type is needed because more instruction set types are coming
but there were not enough space for them. Since they are not bit masks,
only one instruction set is allowed for each instruction.

Signed-off-by: Jin Kyu Song <jin.kyu.song@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
This commit is contained in:
Jin Kyu Song 2013-08-26 20:28:42 -07:00 committed by Cyrill Gorcunov
parent d2d9c3ee38
commit 9bb987d8e0
10 changed files with 81 additions and 61 deletions

View File

@ -213,7 +213,7 @@ typedef struct {
#define GEN_MODRM(mod, reg, rm) \
(((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
static uint32_t cpu; /* cpu level received from nasm.c */
static iflags_t cpu; /* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;
@ -377,7 +377,7 @@ static bool jmp_match(int32_t segment, int64_t offset, int bits,
return (isize >= -128 && isize <= 127); /* is it byte size? */
}
int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
int64_t assemble(int32_t segment, int64_t offset, int bits, iflags_t cp,
insn * instruction, struct ofmt *output, efunc error,
ListGen * listgen)
{
@ -680,7 +680,7 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
return 0;
}
int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
int64_t insn_size(int32_t segment, int64_t offset, int bits, iflags_t cp,
insn * instruction, efunc error)
{
const struct itemplate *temp;

View File

@ -38,9 +38,9 @@
#ifndef NASM_ASSEMBLE_H
#define NASM_ASSEMBLE_H
int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
int64_t insn_size(int32_t segment, int64_t offset, int bits, iflags_t cp,
insn * instruction, efunc error);
int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
int64_t assemble(int32_t segment, int64_t offset, int bits, iflags_t cp,
insn * instruction, struct ofmt *output, efunc error,
ListGen * listgen);

View File

@ -944,7 +944,7 @@ static const char * const condition_name[16] = {
};
int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
int32_t offset, int autosync, uint32_t prefer)
int32_t offset, int autosync, iflags_t prefer)
{
const struct itemplate * const *p, * const *best_p;
const struct disasm_index *ix;
@ -955,7 +955,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
uint8_t *origdata;
int works;
insn tmp_ins, ins;
uint32_t goodness, best;
iflags_t goodness, best;
int best_pref;
struct prefix_info prefix;
bool end_prefix;

View File

@ -41,7 +41,7 @@
#define INSN_MAX 32 /* one instruction can't be longer than this */
int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
int32_t offset, int autosync, uint32_t prefer);
int32_t offset, int autosync, iflags_t prefer);
int32_t eatbyte(uint8_t *data, char *output, int outbufsize, int segsize);
#endif

View File

@ -1514,8 +1514,8 @@ CMPPS xmmreg,xmmreg,imm [rmi: np 0f c2 /r ib,u] KATMAI,SSE,SB,AR2
CMPSS xmmreg,mem,imm [rmi: f3 0f c2 /r ib,u] KATMAI,SSE,SB,AR2
CMPSS xmmreg,xmmreg,imm [rmi: f3 0f c2 /r ib,u] KATMAI,SSE,SB,AR2
COMISS xmmreg,xmmrm32 [rm: np 0f 2f /r] KATMAI,SSE
CVTPI2PS xmmreg,mmxrm64 [rm: np 0f 2a /r] KATMAI,SSE,MMX
CVTPS2PI mmxreg,xmmrm64 [rm: np 0f 2d /r] KATMAI,SSE,MMX
CVTPI2PS xmmreg,mmxrm64 [rm: np 0f 2a /r] KATMAI,SSE
CVTPS2PI mmxreg,xmmrm64 [rm: np 0f 2d /r] KATMAI,SSE
CVTSI2SS xmmreg,mem [rm: f3 0f 2a /r] KATMAI,SSE,SD,AR1,ND
CVTSI2SS xmmreg,rm32 [rm: f3 0f 2a /r] KATMAI,SSE,SD,AR1
CVTSI2SS xmmreg,rm64 [rm: o64 f3 0f 2a /r] X64,SSE,SQ,AR1
@ -1523,7 +1523,7 @@ CVTSS2SI reg32,xmmreg [rm: f3 0f 2d /r] KATMAI,SSE,SD,AR1
CVTSS2SI reg32,mem [rm: f3 0f 2d /r] KATMAI,SSE,SD,AR1
CVTSS2SI reg64,xmmreg [rm: o64 f3 0f 2d /r] X64,SSE,SD,AR1
CVTSS2SI reg64,mem [rm: o64 f3 0f 2d /r] X64,SSE,SD,AR1
CVTTPS2PI mmxreg,xmmrm [rm: np 0f 2c /r] KATMAI,SSE,MMX,SQ
CVTTPS2PI mmxreg,xmmrm [rm: np 0f 2c /r] KATMAI,SSE,SQ
CVTTSS2SI reg32,xmmrm [rm: f3 0f 2c /r] KATMAI,SSE,SD,AR1
CVTTSS2SI reg64,xmmrm [rm: o64 f3 0f 2c /r] X64,SSE,SD,AR1
DIVPS xmmreg,xmmrm128 [rm: np 0f 5e /r] KATMAI,SSE
@ -1568,10 +1568,10 @@ UNPCKLPS xmmreg,xmmrm128 [rm: np 0f 14 /r] KATMAI,SSE
XORPS xmmreg,xmmrm128 [rm: np 0f 57 /r] KATMAI,SSE
;# Introduced in Deschutes but necessary for SSE support
FXRSTOR mem [m: np 0f ae /1] P6,SSE,FPU
FXRSTOR64 mem [m: o64 np 0f ae /1] X64,SSE,FPU
FXSAVE mem [m: np 0f ae /0] P6,SSE,FPU
FXSAVE64 mem [m: o64 np 0f ae /0] X64,SSE,FPU
FXRSTOR mem [m: np 0f ae /1] P6,SSE
FXRSTOR64 mem [m: o64 np 0f ae /1] X64,SSE
FXSAVE mem [m: np 0f ae /0] P6,SSE
FXSAVE64 mem [m: o64 np 0f ae /0] X64,SSE
;# XSAVE group (AVX and extended state)
; Introduced in late Penryn ... we really need to clean up the handling
@ -1863,37 +1863,37 @@ INVVPID reg32,mem [rm: 66 0f 38 81 /r] VMX,SO,NOLONG
INVVPID reg64,mem [rm: o64nw 66 0f 38 81 /r] VMX,SO,LONG
;# Tejas New Instructions (SSSE3)
PABSB mmxreg,mmxrm [rm: np 0f 38 1c /r] SSSE3,MMX,SQ
PABSB mmxreg,mmxrm [rm: np 0f 38 1c /r] SSSE3,SQ
PABSB xmmreg,xmmrm [rm: 66 0f 38 1c /r] SSSE3
PABSW mmxreg,mmxrm [rm: np 0f 38 1d /r] SSSE3,MMX,SQ
PABSW mmxreg,mmxrm [rm: np 0f 38 1d /r] SSSE3,SQ
PABSW xmmreg,xmmrm [rm: 66 0f 38 1d /r] SSSE3
PABSD mmxreg,mmxrm [rm: np 0f 38 1e /r] SSSE3,MMX,SQ
PABSD mmxreg,mmxrm [rm: np 0f 38 1e /r] SSSE3,SQ
PABSD xmmreg,xmmrm [rm: 66 0f 38 1e /r] SSSE3
PALIGNR mmxreg,mmxrm,imm [rmi: np 0f 3a 0f /r ib,u] SSSE3,MMX,SQ
PALIGNR mmxreg,mmxrm,imm [rmi: np 0f 3a 0f /r ib,u] SSSE3,SQ
PALIGNR xmmreg,xmmrm,imm [rmi: 66 0f 3a 0f /r ib,u] SSSE3
PHADDW mmxreg,mmxrm [rm: np 0f 38 01 /r] SSSE3,MMX,SQ
PHADDW mmxreg,mmxrm [rm: np 0f 38 01 /r] SSSE3,SQ
PHADDW xmmreg,xmmrm [rm: 66 0f 38 01 /r] SSSE3
PHADDD mmxreg,mmxrm [rm: np 0f 38 02 /r] SSSE3,MMX,SQ
PHADDD mmxreg,mmxrm [rm: np 0f 38 02 /r] SSSE3,SQ
PHADDD xmmreg,xmmrm [rm: 66 0f 38 02 /r] SSSE3
PHADDSW mmxreg,mmxrm [rm: np 0f 38 03 /r] SSSE3,MMX,SQ
PHADDSW mmxreg,mmxrm [rm: np 0f 38 03 /r] SSSE3,SQ
PHADDSW xmmreg,xmmrm [rm: 66 0f 38 03 /r] SSSE3
PHSUBW mmxreg,mmxrm [rm: np 0f 38 05 /r] SSSE3,MMX,SQ
PHSUBW mmxreg,mmxrm [rm: np 0f 38 05 /r] SSSE3,SQ
PHSUBW xmmreg,xmmrm [rm: 66 0f 38 05 /r] SSSE3
PHSUBD mmxreg,mmxrm [rm: np 0f 38 06 /r] SSSE3,MMX,SQ
PHSUBD mmxreg,mmxrm [rm: np 0f 38 06 /r] SSSE3,SQ
PHSUBD xmmreg,xmmrm [rm: 66 0f 38 06 /r] SSSE3
PHSUBSW mmxreg,mmxrm [rm: np 0f 38 07 /r] SSSE3,MMX,SQ
PHSUBSW mmxreg,mmxrm [rm: np 0f 38 07 /r] SSSE3,SQ
PHSUBSW xmmreg,xmmrm [rm: 66 0f 38 07 /r] SSSE3
PMADDUBSW mmxreg,mmxrm [rm: np 0f 38 04 /r] SSSE3,MMX,SQ
PMADDUBSW mmxreg,mmxrm [rm: np 0f 38 04 /r] SSSE3,SQ
PMADDUBSW xmmreg,xmmrm [rm: 66 0f 38 04 /r] SSSE3
PMULHRSW mmxreg,mmxrm [rm: np 0f 38 0b /r] SSSE3,MMX,SQ
PMULHRSW mmxreg,mmxrm [rm: np 0f 38 0b /r] SSSE3,SQ
PMULHRSW xmmreg,xmmrm [rm: 66 0f 38 0b /r] SSSE3
PSHUFB mmxreg,mmxrm [rm: np 0f 38 00 /r] SSSE3,MMX,SQ
PSHUFB mmxreg,mmxrm [rm: np 0f 38 00 /r] SSSE3,SQ
PSHUFB xmmreg,xmmrm [rm: 66 0f 38 00 /r] SSSE3
PSIGNB mmxreg,mmxrm [rm: np 0f 38 08 /r] SSSE3,MMX,SQ
PSIGNB mmxreg,mmxrm [rm: np 0f 38 08 /r] SSSE3,SQ
PSIGNB xmmreg,xmmrm [rm: 66 0f 38 08 /r] SSSE3
PSIGNW mmxreg,mmxrm [rm: np 0f 38 09 /r] SSSE3,MMX,SQ
PSIGNW mmxreg,mmxrm [rm: np 0f 38 09 /r] SSSE3,SQ
PSIGNW xmmreg,xmmrm [rm: 66 0f 38 09 /r] SSSE3
PSIGND mmxreg,mmxrm [rm: np 0f 38 0a /r] SSSE3,MMX,SQ
PSIGND mmxreg,mmxrm [rm: np 0f 38 0a /r] SSSE3,SQ
PSIGND xmmreg,xmmrm [rm: 66 0f 38 0a /r] SSSE3
;# AMD SSE4A

53
insns.h
View File

@ -19,7 +19,7 @@ struct itemplate {
opflags_t opd[MAX_OPERANDS]; /* bit flags for operand types */
decoflags_t deco[MAX_OPERANDS]; /* bit flags for operand decorators */
const uint8_t *code; /* the code it assembles to */
uint32_t flags; /* some flags */
iflags_t flags; /* some flags */
};
/* Disassembler table structure */
@ -72,6 +72,8 @@ extern const uint8_t nasm_bytecodes[];
* (The default state if neither IF_SM nor IF_SM2 is specified is
* that any operand with unspecified size in the template is
* required to have unspecified size in the instruction too...)
*
* iflags_t is defined to store these flags.
*/
#define IF_SM 0x00000001UL /* size match */
@ -103,33 +105,34 @@ extern const uint8_t nasm_bytecodes[];
#define IF_LONG 0x00001000UL /* long mode instruction */
#define IF_NOHLE 0x00002000UL /* HLE prefixes forbidden */
/* These flags are currently not used for anything - intended for insn set */
#define IF_UNDOC 0x00000000UL /* it's an undocumented instruction */
#define IF_FPU 0x00000000UL /* it's an FPU instruction */
#define IF_MMX 0x00000000UL /* it's an MMX instruction */
#define IF_3DNOW 0x00000000UL /* it's a 3DNow! instruction */
#define IF_SSE 0x00000000UL /* it's a SSE (KNI, MMX2) instruction */
#define IF_SSE2 0x00000000UL /* it's a SSE2 instruction */
#define IF_SSE3 0x00000000UL /* it's a SSE3 (PNI) instruction */
#define IF_VMX 0x00000000UL /* it's a VMX instruction */
#define IF_SSSE3 0x00000000UL /* it's an SSSE3 instruction */
#define IF_SSE4A 0x00000000UL /* AMD SSE4a */
#define IF_SSE41 0x00000000UL /* it's an SSE4.1 instruction */
#define IF_SSE42 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_SSE5 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_AVX 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_AVX2 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_AVX512 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_FMA 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_BMI1 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_BMI2 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_TBM 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_HLE 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_RTM 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_INVPCID 0x00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_UNDOC 0x8000000000UL /* it's an undocumented instruction */
#define IF_HLE 0x4000000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_FPU 0x0100000000UL /* it's an FPU instruction */
#define IF_MMX 0x0200000000UL /* it's an MMX instruction */
#define IF_3DNOW 0x0300000000UL /* it's a 3DNow! instruction */
#define IF_SSE 0x0400000000UL /* it's a SSE (KNI, MMX2) instruction */
#define IF_SSE2 0x0500000000UL /* it's a SSE2 instruction */
#define IF_SSE3 0x0600000000UL /* it's a SSE3 (PNI) instruction */
#define IF_VMX 0x0700000000UL /* it's a VMX instruction */
#define IF_SSSE3 0x0800000000UL /* it's an SSSE3 instruction */
#define IF_SSE4A 0x0900000000UL /* AMD SSE4a */
#define IF_SSE41 0x0A00000000UL /* it's an SSE4.1 instruction */
#define IF_SSE42 0x0B00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_SSE5 0x0C00000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_AVX 0x0D00000000UL /* it's an AVX (128b) instruction */
#define IF_AVX2 0x0E00000000UL /* it's an AVX2 (256b) instruction */
#define IF_AVX512 0x0F00000000UL /* it's an AVX-512 (512b) instruction */
#define IF_FMA 0x1000000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_BMI1 0x1100000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_BMI2 0x1200000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_TBM 0x1300000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_RTM 0x1400000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_INVPCID 0x1500000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_INSMASK 0xFF00000000UL /* the mask for instruction set types */
#define IF_PMASK 0xFF000000UL /* the mask for processor types */
#define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */
/* also the highest possible processor */
#define IF_PFMASK 0xF01FF800UL /* the mask for disassembly "prefer" */
#define IF_PFMASK 0xFFF0000000UL /* the mask for disassembly "prefer" */
#define IF_8086 0x00000000UL /* 8086 instruction */
#define IF_186 0x01000000UL /* 186+ instruction */
#define IF_286 0x02000000UL /* 286+ instruction */

View File

@ -427,6 +427,10 @@ sub format_insn($$$$$) {
my $num, $nd = 0;
my @bytecode;
my $op, @ops, $opp, @opx, @oppx, @decos, @opevex;
my @iflags = ( "FPU", "MMX", "3DNOW", "SSE", "SSE2",
"SSE3", "VMX", "SSSE3", "SSE4A", "SSE41",
"SSE42", "SSE5", "AVX", "AVX2", "AVX512",
"FMA", "BMI1", "BMI2", "TBM", "RTM", "INVPCID");
return (undef, undef) if $operands eq "ignore";
@ -476,6 +480,17 @@ sub format_insn($$$$$) {
}
$decorators =~ tr/a-z/A-Z/;
# check if two different insn set types are set
$cnt = 0;
foreach $fla (split(/,/, $flags)) {
if ($fla ~~ @iflags) {
$cnt++;
if ($cnt >= 2) {
die "Too many insn set flags in $flags\n";
}
}
}
# format the flags
$flags =~ s/,/|IF_/g;
$flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/;

8
nasm.c
View File

@ -74,7 +74,7 @@ struct forwrefinfo { /* info held on forward refs. */
};
static int get_bits(char *value);
static uint32_t get_cpu(char *cpu_str);
static iflags_t get_cpu(char *cpu_str);
static void parse_cmdline(int, char **);
static void assemble_file(char *, StrList **);
static void nasm_verror_gnu(int severity, const char *fmt, va_list args);
@ -106,8 +106,8 @@ static FILE *error_file; /* Where to write error messages */
FILE *ofile = NULL;
int optimizing = MAX_OPTIMIZE; /* number of optimization passes to take */
static int sb, cmd_sb = 16; /* by default */
static uint32_t cmd_cpu = IF_PLEVEL; /* highest level by default */
static uint32_t cpu = IF_PLEVEL; /* passed to insn_size & assemble.c */
static iflags_t cmd_cpu = IF_PLEVEL; /* highest level by default */
static iflags_t cpu = IF_PLEVEL; /* passed to insn_size & assemble.c */
int64_t global_offset_changed; /* referenced in labels.c */
int64_t prev_offset_changed;
int32_t stall_count;
@ -2006,7 +2006,7 @@ static void usage(void)
fputs("type `nasm -h' for help\n", error_file);
}
static uint32_t get_cpu(char *value)
static iflags_t get_cpu(char *value)
{
if (!strcmp(value, "8086"))
return IF_8086;

2
nasm.h
View File

@ -694,6 +694,8 @@ typedef struct insn { /* an instruction itself */
enum geninfo { GI_SWITCH };
typedef uint64_t iflags_t;
/*
* The data structure defining an output format driver, and the
* interfaces to the functions therein.

View File

@ -88,7 +88,7 @@ int main(int argc, char **argv)
bool autosync = false;
int bits = 16, b;
bool eof = false;
uint32_t prefer = 0;
iflags_t prefer = 0;
bool rn_error;
int32_t offset;
FILE *fp;