First cut at AVX machinery.

First cut at AVX machinery support.  The only instruction implemented
is VPERMIL2PS, and it's probably buggy.  I'm checking this in with the
hope that other people can start helping out with (a) testing this,
and (b) adding instructions.

NDISASM support is not there yet.
This commit is contained in:
H. Peter Anvin 2008-05-04 17:53:31 -07:00
parent 7117e80702
commit d85d250fa2
11 changed files with 169 additions and 35 deletions

View File

@ -48,11 +48,25 @@
* kindly to a zero byte in the _middle_ of a compile time
* string constant, so I had to put this hack in.)
* \171 - placement of DREX suffix in the absence of an EA
* \172\ab - the register number from operand a in bits 7..4, with
* the 4-bit immediate from operand b in bits 0..3.
* \2ab - a ModRM, calculated on EA in operand a, with the spare
* field equal to digit b.
* \250..\253 - same as \150..\153, except warn if the 64-bit operand
* is not equal to the truncated and sign-extended 32-bit
* operand; used for 32-bit immediates in 64-bit mode.
* \260..\263 - this instruction uses VEX rather than REX, with the
* V field taken from operand 0..3.
* \270 - this instruction uses VEX rather than REX, with the
* V field set to 1111b.
*
* VEX prefixes are followed by the sequence:
* \1mm\1wp where mm is the M field; and wp is:
* 01 0ww lpp
* ww = 0 for W = 0
* ww = 1 for W = 1
* ww = 2 for W used as REX.W
*
* \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
* \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
* \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
@ -190,7 +204,7 @@ static void out(int64_t offset, int32_t segto, const void *data,
errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
return;
}
WRITEADDR(q, *(int64_t *)data, size);
data = p;
type = OUT_RAWDATA;
@ -964,7 +978,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
case 0163:
length++;
ins->rex |= REX_D;
ins->drexdst = regval(&ins->oprs[c & 3]);
ins->drexdst = regval(opx);
break;
case 0164:
case 0165:
@ -972,19 +986,40 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
case 0167:
length++;
ins->rex |= REX_D|REX_OC;
ins->drexdst = regval(&ins->oprs[c & 3]);
ins->drexdst = regval(opx);
break;
case 0170:
length++;
break;
case 0171:
break;
case 0172:
codes++;
length++;
break;
case 0250:
case 0251:
case 0252:
case 0253:
length += is_sbyte64(ins, c & 3) ? 1 : 4;
break;
case 0260:
case 0261:
case 0262:
case 0263:
length += 2;
ins->rex |= REX_V;
ins->drexdst = regval(opx);
ins->vex_m = *codes++;
ins->vex_wlp = *codes++;
break;
case 0270:
length += 2;
ins->rex |= REX_V;
ins->drexdst = 0;
ins->vex_m = *codes++;
ins->vex_wlp = *codes++;
break;
case 0300:
case 0301:
case 0302:
@ -1093,12 +1128,40 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
ins->rex &= rex_mask;
if (ins->rex & REX_D) {
if (ins->rex & REX_V) {
int bad32 = REX_R|REX_W|REX_X|REX_B;
if (ins->rex & REX_H) {
errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
return -1;
}
switch (ins->vex_wlp & 030) {
case 000:
ins->rex &= ~REX_W;
break;
case 010:
ins->rex |= REX_W;
bad32 &= ~REX_W;
break;
default:
/* Follow REX_W */
break;
}
if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
return -1;
}
if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
length += 3;
else
length += 2;
} else if (ins->rex & REX_D) {
if (ins->rex & REX_H) {
errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
return -1;
}
if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) ||
if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
ins->drexdst > 7)) {
errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
return -1;
@ -1126,7 +1189,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
}
#define EMIT_REX() \
if (!(ins->rex & REX_D) && (ins->rex & REX_REAL) && (bits == 64)) { \
if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
ins->rex = (ins->rex & REX_REAL)|REX_P; \
out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
ins->rex = 0; \
@ -1507,6 +1570,26 @@ static void gencode(int32_t segment, int64_t offset, int bits,
offset++;
break;
case 0172:
c = *codes++;
opx = &ins->oprs[c >> 3];
bytes[0] = regvals[opx->basereg] << 4;
opx = &ins->oprs[c & 7];
if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
errfunc(ERR_NONFATAL,
"non-absolute expression not permitted as argument %d",
c & 7);
} else {
if (opx->offset & ~15) {
errfunc(ERR_WARNING | ERR_WARN_NOV,
"four-bit argument exceeds bounds");
}
bytes[0] |= opx->offset & 15;
}
out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
offset++;
break;
case 0250:
case 0251:
case 0252:
@ -1525,6 +1608,28 @@ static void gencode(int32_t segment, int64_t offset, int bits,
}
break;
case 0260:
case 0261:
case 0262:
case 0263:
case 0270:
codes += 2;
if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
bytes[0] = 0xc4;
bytes[1] = ins->vex_m | ((ins->rex & 7) << 5);
bytes[2] = ((ins->rex & REX_W) << (7-3)) |
(ins->drexdst << 3) | (ins->vex_wlp & 07);
out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
offset += 3;
} else {
bytes[0] = 0xc5;
bytes[1] = ((ins->rex & REX_R) << (7-2)) |
(ins->drexdst << 3) | (ins->vex_wlp & 07);
out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
offset += 2;
}
break;
case 0300:
case 0301:
case 0302:
@ -1887,7 +1992,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
int32_t type = instruction->oprs[i].type;
if (!(type & SIZE_MASK))
type |= size[i];
if (itemp->opd[i] & SAME_AS) {
int j = itemp->opd[i] & ~SAME_AS;
if (type != instruction->oprs[j].type ||

View File

@ -643,6 +643,16 @@ static int matches(const struct itemplate *t, uint8_t *data,
return false;
break;
case 0172:
{
uint8_t ximm = *data++;
c = *r++;
ins->oprs[c >> 3].basereg = ximm >> 4;
ins->oprs[c >> 3].segment |= SEG_RMREG;
ins->oprs[c & 7].offset = ximm & 15;
}
break;
case4(0200):
case4(0204):
case4(0210):

View File

@ -2023,6 +2023,12 @@ GETSEC void \2\x0F\x37 KATMAI
PFRCP mmxreg,mmxrm \323\2\x0F\x0F\110\1\x86 PENT,3DNOW,SQ,CYRIX
PFRSQRT mmxreg,mmxrm \323\2\x0F\x0F\110\1\x87 PENT,3DNOW,SQ,CYRIX
;# Intel AVX instructions
VPERMIL2PS xmmreg,xmmreg,xmmrm,xmmreg,imm \260\103\101\1\x48\123\172\34 AVX,SANDYBANKS
VPERMIL2PS xmmreg,xmmreg,xmmreg,xmmrm,imm \260\103\111\1\x48\132\172\14 AVX,SANDYBANKS
VPERMIL2PS ymmreg,ymmreg,ymmrm,ymmreg,imm \260\103\105\1\x48\123\172\34 AVX,SANDYBANKS
VPERMIL2PS ymmreg,ymmreg,ymmreg,ymmrm,imm \260\103\115\1\x48\132\172\14 AVX,SANDYBANKS
;# VIA (Centaur) security instructions
XSTORE void \3\x0F\xA7\xC0 PENT,CYRIX
XCRYPTECB void \333\3\x0F\xA7\xC8 PENT,CYRIX

View File

@ -98,6 +98,7 @@ extern const struct disasm_index itable[256];
#define IF_SSE41 0x00800000UL /* it's an SSE4.1 instruction */
#define IF_SSE42 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_SSE5 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_AVX 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_PMASK 0xFF000000UL /* the mask for processor types */
#define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */
/* also the highest possible processor */
@ -114,6 +115,7 @@ extern const struct disasm_index itable[256];
#define IF_PRESCOTT 0x09000000UL /* Prescott instructions */
#define IF_X86_64 0x0A000000UL /* x86-64 instruction (long or legacy mode) */
#define IF_NEHALEM 0x0B000000UL /* Nehalem instruction */
#define IF_SANDYBANKS 0x0C000000UL /* Sandy Banks instruction */
#define IF_X64 (IF_LONG|IF_X86_64)
#define IF_IA64 0x0F000000UL /* IA64 instructions (in x86 mode) */
#define IF_CYRIX 0x10000000UL /* Cyrix-specific instruction */

View File

@ -241,8 +241,7 @@ sub format {
$operands =~ s/imm(\d+)/imm|bits$1/g;
$operands =~ s/imm/immediate/g;
$operands =~ s/rm(\d+)/rm_gpr|bits$1/g;
$operands =~ s/mmxrm/rm_mmx/g;
$operands =~ s/xmmrm/rm_xmm/g;
$operands =~ s/(mmx|xmm|ymm)rm/rm_$1/g;
$operands =~ s/\=([0-9]+)/same_as|$1/g;
if ($operands eq 'void') {
@ops = ();

12
nasm.c
View File

@ -216,7 +216,7 @@ static void define_macros_early(void)
strftime(temp, sizeof temp, "__UTC_TIME_NUM__=%H%M%S", &gm);
pp_pre_define(temp);
}
if (gm_p)
posix_time = posix_mktime(&gm);
else if (lt_p)
@ -502,7 +502,7 @@ static bool process_arg(char *p, char *q)
case 'O': /* Optimization level */
{
int opt;
if (!*param) {
/* Naked -O == -Ox */
optimizing = INT_MAX >> 1; /* Almost unlimited */
@ -512,7 +512,7 @@ static bool process_arg(char *p, char *q)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
opt = strtoul(param, &param, 10);
/* -O0 -> optimizing == -1, 0.98 behaviour */
/* -O1 -> optimizing == 0, 0.98.09 behaviour */
if (opt < 2)
@ -520,18 +520,18 @@ static bool process_arg(char *p, char *q)
else
optimizing = opt;
break;
case 'v':
case '+':
param++;
opt_verbose_info = true;
break;
case 'x':
param++;
optimizing = INT_MAX >> 1; /* Almost unlimited */
break;
default:
report_error(ERR_FATAL,
"unknown optimization option -O%c\n",

27
nasm.h
View File

@ -540,7 +540,7 @@ typedef uint32_t opflags_t;
#define IP_REL 0x0002c000U /* IP-relative offset */
/* memory which matches any type of r/m operand */
#define MEMORY_ANY (MEMORY|RM_GPR|RM_MMX|RM_XMM)
#define MEMORY_ANY (MEMORY|RM_GPR|RM_MMX|RM_XMM|RM_YMM)
/* special type of immediate operand */
#define UNITY 0x00012000U /* for shift/rotate instructions */
@ -564,16 +564,17 @@ enum ccode { /* condition code names */
/*
* REX flags
*/
#define REX_OC 0x0200 /* DREX suffix has the OC0 bit set */
#define REX_D 0x0100 /* Instruction uses DREX instead of REX */
#define REX_H 0x80 /* High register present, REX forbidden */
#define REX_P 0x40 /* REX prefix present/required */
#define REX_L 0x20 /* Use LOCK prefix instead of REX.R */
#define REX_W 0x08 /* 64-bit operand size */
#define REX_R 0x04 /* ModRM reg extension */
#define REX_X 0x02 /* SIB index extension */
#define REX_B 0x01 /* ModRM r/m extension */
#define REX_REAL 0x4f /* Actual REX prefix bits */
#define REX_B 0x01 /* ModRM r/m extension */
#define REX_X 0x02 /* SIB index extension */
#define REX_R 0x04 /* ModRM reg extension */
#define REX_W 0x08 /* 64-bit operand size */
#define REX_L 0x20 /* Use LOCK prefix instead of REX.R */
#define REX_P 0x40 /* REX prefix present/required */
#define REX_H 0x80 /* High register present, REX forbidden */
#define REX_D 0x0100 /* Instruction uses DREX instead of REX */
#define REX_OC 0x0200 /* DREX suffix has the OC0 bit set */
#define REX_V 0x0400 /* Instruction uses VEX instead of REX */
/*
* Note that because segment registers may be used as instruction
@ -651,7 +652,7 @@ enum prefix_pos {
MAXPREFIX /* Total number of prefix slots */
};
#define MAX_OPERANDS 4
#define MAX_OPERANDS 5
typedef struct insn { /* an instruction itself */
char *label; /* the label defined, or NULL */
@ -667,7 +668,9 @@ typedef struct insn { /* an instruction itself */
int32_t times; /* repeat count (TIMES prefix) */
int forw_ref; /* is there a forward reference? */
int rex; /* Special REX Prefix */
int drexdst; /* Destination register for DREX suffix */
int drexdst; /* Destination register for DREX/VEX suffix */
int vex_m; /* M register for VEX prefix */
int vex_wlp; /* W, P and L information for VEX prefix */
} insn;
enum geninfo { GI_SWITCH };

View File

@ -671,7 +671,7 @@ void saa_wleb128u(struct SAA *psaa, int value)
ptemp++;
len++;
} while (value != 0);
saa_wbytes(psaa, temp, len);
saa_wbytes(psaa, temp, len);
}
/* write signed LEB128 value to SAA */
@ -703,8 +703,8 @@ void saa_wleb128s(struct SAA *psaa, int value)
*ptemp = byte;
ptemp++;
len++;
}
saa_wbytes(psaa, temp, len);
}
saa_wbytes(psaa, temp, len);
}
void saa_rewind(struct SAA *s)

View File

@ -1585,14 +1585,14 @@ static bool if_condition(Token * tline, enum preproc_token ct)
iftype:
t = tline = expand_smacro(tline);
while (tok_type_(t, TOK_WHITESPACE) ||
(needtype == TOK_NUMBER &&
tok_type_(t, TOK_OTHER) &&
(t->text[0] == '-' || t->text[0] == '+') &&
!t->text[1]))
t = t->next;
j = tok_type_(t, needtype);
break;

9
test/avx.asm Normal file
View File

@ -0,0 +1,9 @@
bits 64
vpermil2ps xmm0,xmm1,[rdi],xmm3,0
vpermil2ps xmm0,xmm1,xmm2,[rdi],1
vpermil2ps ymm0,ymm1,ymm2,ymm3,2
vpermil2ps ymm0,ymm1,ymm2,[rdi],3
vpermil2ps ymm0,ymm1,[rdi],ymm3,2
vpermil2ps ymm0,ymm1,ymm2,[rdi],3

6
wsaa.h
View File

@ -32,13 +32,13 @@
#else /* !X86_MEMORY */
#define WSAACHAR(s,p,v) \
#define WSAACHAR(s,p,v) \
do { \
*(uint8_t *)(p) = (v); \
saa_wbytes(s, p, 1); \
} while (0)
#define WSAASHORT(s,p,v) \
#define WSAASHORT(s,p,v) \
do { \
uint16_t _wss_v = (v); \
uint8_t *_wss_p = (uint8_t *)(p); \
@ -58,7 +58,7 @@
saa_wbytes(s, _wsl_p, 4); \
} while (0)
#define WSAADLONG(s,p,v) \
#define WSAADLONG(s,p,v) \
do { \
uint64_t _wsq_v = (v); \
uint8_t *_wsq_p = (uint8_t *)(p); \