mirror of
https://github.com/netwide-assembler/nasm.git
synced 2024-11-21 03:14:19 +08:00
Document CPU LATEVEX, add CPU EVEX and CPU VEX flags
Document CPU LATEVEX and the associated prefixes; add CPU EVEX and CPU VEX flags to further control encodings. Fix the error message for invalid encodings due to flags. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
parent
494d9531dd
commit
55dc058356
@ -934,8 +934,12 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
|
||||
nasm_nonfatal("instruction not supported in %d-bit mode", bits);
|
||||
break;
|
||||
case MERR_ENCMISMATCH:
|
||||
nasm_nonfatal("instruction not encodable with %s prefix",
|
||||
prefix_name(instruction->prefixes[PPS_REX]));
|
||||
if (!instruction->prefixes[PPS_REX]) {
|
||||
nasm_nonfatal("instruction not encodable without explicit prefix");
|
||||
} else {
|
||||
nasm_nonfatal("instruction not encodable with %s prefix",
|
||||
prefix_name(instruction->prefixes[PPS_REX]));
|
||||
}
|
||||
break;
|
||||
case MERR_BADBND:
|
||||
case MERR_BADREPNE:
|
||||
@ -2552,9 +2556,16 @@ static enum match_result matches(const struct itemplate *itemp,
|
||||
return MERR_ENCMISMATCH;
|
||||
break;
|
||||
default:
|
||||
if (itemp_has(itemp, IF_LATEVEX)) {
|
||||
if (!iflag_test(&cpu, IF_LATEVEX))
|
||||
if (itemp_has(itemp, IF_EVEX)) {
|
||||
if (!iflag_test(&cpu, IF_EVEX))
|
||||
return MERR_ENCMISMATCH;
|
||||
} else if (itemp_has(itemp, IF_VEX)) {
|
||||
if (!iflag_test(&cpu, IF_VEX)) {
|
||||
return MERR_ENCMISMATCH;
|
||||
} else if (itemp_has(itemp, IF_LATEVEX)) {
|
||||
if (!iflag_test(&cpu, IF_LATEVEX) && iflag_test(&cpu, IF_EVEX))
|
||||
return MERR_ENCMISMATCH;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -111,7 +111,9 @@ void set_cpu(const char *value)
|
||||
{ "any", IF_ANY },
|
||||
{ "all", IF_ANY },
|
||||
{ "latevex", IF_LATEVEX },
|
||||
{ NULL, IF_DEFAULT } /* End of list */
|
||||
{ "evex", IF_EVEX },
|
||||
{ "vex", IF_VEX },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
if (!value) {
|
||||
|
@ -68,6 +68,20 @@ reservations (e.g. \c{dw ?}.)
|
||||
\b Allow forcing an instruction in 64-bit mode to have a (possibly
|
||||
redundant) REX prefix, using the syntax \i\c{\{rex\}} as a prefix.
|
||||
|
||||
\b Add a \c{\{vex\}} prefix to enforce VEX (AVX) encoding of an
|
||||
instruction, either using the 2- or 3-byte VEX prefixes.
|
||||
|
||||
\b The \c{CPU} directive has been augmented to allow control of
|
||||
generation of VEX (AVX) versus EVEX (AVX-512) instruction formats, see
|
||||
\k{CPU}.
|
||||
|
||||
\b Some recent instructions that previously have been only available
|
||||
using EVEX encodings are now also encodable using VEX (AVX)
|
||||
encodings. For backwards compatibility these encodings are not enabled
|
||||
by default, but can be generated either via an explicit \c{\{vex\}}
|
||||
prefix or by specifying either \c{CPU LATEVEX} or \c{CPU NOEVEX}; see
|
||||
\k{CPU}.
|
||||
|
||||
\b Document the already existing \c{%unimacro} directive. See \k{unmacro}.
|
||||
|
||||
\b Fix a code range generation bug in the DWARF debug format
|
||||
@ -767,9 +781,10 @@ options to indicate whether all relevant branches should be getting
|
||||
\c{BND} prefixes. This is expected to be the normal for use in MPX
|
||||
code.
|
||||
|
||||
\b Add \c{{evex}}, \c{{vex3}} and \c{{vex2}} instruction prefixes to
|
||||
have NASM encode the corresponding instruction, if possible, with an EVEX,
|
||||
3-byte VEX, or 2-byte VEX prefix, respectively.
|
||||
\b Add \c{\{evex\}}, \c{\{vex3\}} and \c{\{vex2\}} instruction
|
||||
prefixes to have NASM encode the corresponding instruction, if
|
||||
possible, with an EVEX, 3-byte VEX, or 2-byte VEX prefix,
|
||||
respectively.
|
||||
|
||||
\b Support for section names longer than 8 bytes in Win32/Win64 COFF.
|
||||
|
||||
|
@ -5594,47 +5594,87 @@ are excluded from the symbol mangling and also not marked as global.
|
||||
\H{CPU} \i\c{CPU}: Defining CPU Dependencies
|
||||
|
||||
The \i\c{CPU} directive restricts assembly to those instructions which
|
||||
are available on the specified CPU.
|
||||
are available on the specified CPU. At the moment, it is primarily
|
||||
used to enforce unavailable \e{encodings} of instructions, such as
|
||||
5-byte jumps on the 8080.
|
||||
|
||||
Options are:
|
||||
(If someone would volunteer to work through the database and add
|
||||
proper annotations to each instruction, this could be greatly
|
||||
improved. Please contact the developers to volunteer, see \{contact}.)
|
||||
|
||||
\b\c{CPU 8086} Assemble only 8086 instruction set
|
||||
Current CPU keywords are:
|
||||
|
||||
\b\c{CPU 186} Assemble instructions up to the 80186 instruction set
|
||||
\b\c{CPU 8086} - Assemble only 8086 instruction set
|
||||
|
||||
\b\c{CPU 286} Assemble instructions up to the 286 instruction set
|
||||
\b\c{CPU 186} - Assemble instructions up to the 80186 instruction set
|
||||
|
||||
\b\c{CPU 386} Assemble instructions up to the 386 instruction set
|
||||
\b\c{CPU 286} - Assemble instructions up to the 286 instruction set
|
||||
|
||||
\b\c{CPU 486} 486 instruction set
|
||||
\b\c{CPU 386} - Assemble instructions up to the 386 instruction set
|
||||
|
||||
\b\c{CPU 586} Pentium instruction set
|
||||
\b\c{CPU 486} - 486 instruction set
|
||||
|
||||
\b\c{CPU PENTIUM} Same as 586
|
||||
\b\c{CPU 586} - Pentium instruction set
|
||||
|
||||
\b\c{CPU 686} P6 instruction set
|
||||
\b\c{CPU PENTIUM} - Same as 586
|
||||
|
||||
\b\c{CPU PPRO} Same as 686
|
||||
\b\c{CPU 686} - P6 instruction set
|
||||
|
||||
\b\c{CPU P2} Same as 686
|
||||
\b\c{CPU PPRO} - Same as 686
|
||||
|
||||
\b\c{CPU P3} Pentium III (Katmai) instruction sets
|
||||
\b\c{CPU P2} - Same as 686
|
||||
|
||||
\b\c{CPU KATMAI} Same as P3
|
||||
\b\c{CPU P3} - Pentium III (Katmai) instruction sets
|
||||
|
||||
\b\c{CPU P4} Pentium 4 (Willamette) instruction set
|
||||
\b\c{CPU KATMAI} - Same as P3
|
||||
|
||||
\b\c{CPU WILLAMETTE} Same as P4
|
||||
\b\c{CPU P4} - Pentium 4 (Willamette) instruction set
|
||||
|
||||
\b\c{CPU PRESCOTT} Prescott instruction set
|
||||
\b\c{CPU WILLAMETTE} - Same as P4
|
||||
|
||||
\b\c{CPU X64} x86-64 (x64/AMD64/Intel 64) instruction set
|
||||
\b\c{CPU PRESCOTT} - Prescott instruction set
|
||||
|
||||
\b\c{CPU IA64} IA64 CPU (in x86 mode) instruction set
|
||||
\b\c{CPU X64} - x86-64 (x64/AMD64/Intel 64) instruction set
|
||||
|
||||
All options are case insensitive. All instructions will be selected
|
||||
only if they apply to the selected CPU or lower. By default, all
|
||||
instructions are available.
|
||||
\b\c{CPU IA64} - IA64 CPU (in x86 mode) instruction set
|
||||
|
||||
\b\c{CPU DEFAULT} - All available instructions
|
||||
|
||||
\b\c{CPU ALL} - All available instructions \e{and flags}
|
||||
|
||||
All options are case insensitive.
|
||||
|
||||
In addition, optional flags can be specified to modify the instruction
|
||||
selections. These can be combined with a CPU declaration or specified
|
||||
alone. They can be prefixed by \c{+} (add flag, default), \c{-}
|
||||
(remove flag) or \c{*} (set flag to default); these prefixes are
|
||||
"sticky", so:
|
||||
|
||||
\c cpu -foo,bar
|
||||
|
||||
means remove both the \c{foo} and \c{bar} options.
|
||||
|
||||
If prefixed with \c{no}, it inverts the meaning of the flag, but this
|
||||
is not sticky, so:
|
||||
|
||||
\c cpu nofoo,bar
|
||||
|
||||
means remove the \c{foo} flag but add the \c{bar} flag.
|
||||
|
||||
Currently available flags are:
|
||||
|
||||
\b\c{EVEX} - Enable generation of EVEX (AVX-512) encoded instructions
|
||||
without an explicit \c{\{evex\}} prefix. Default on.
|
||||
|
||||
\b\c\{VEX} - Enable generation of VEX (AVX) or XOP encoded
|
||||
instructions without an explict \c{\{vex\}} prefix. Default on.
|
||||
|
||||
\b\c{LATEVEX} - Enable generation of VEX (AVX) encoding of
|
||||
instructions where the VEX instructions forms were introduced
|
||||
\e{after} the corresponding EVEX (AVX-512) instruction forms without
|
||||
requiring an explicit \c{\{vex\}} prefix. This is implicit if the
|
||||
\c{EVEX} flag is disabled and the \c{VEX} flag is enabled. Default
|
||||
off.
|
||||
|
||||
|
||||
\H{FLOAT} \i\c{FLOAT}: Handling of \I{floating-point, constants}floating-point constants
|
||||
@ -5643,19 +5683,19 @@ By default, floating-point constants are rounded to nearest, and IEEE
|
||||
denormals are supported. The following options can be set to alter
|
||||
this behaviour:
|
||||
|
||||
\b\c{FLOAT DAZ} Flush denormals to zero
|
||||
\b\c{FLOAT DAZ} - Flush denormals to zero
|
||||
|
||||
\b\c{FLOAT NODAZ} Do not flush denormals to zero (default)
|
||||
\b\c{FLOAT NODAZ} - Do not flush denormals to zero (default)
|
||||
|
||||
\b\c{FLOAT NEAR} Round to nearest (default)
|
||||
\b\c{FLOAT NEAR} - Round to nearest (default)
|
||||
|
||||
\b\c{FLOAT UP} Round up (toward +Infinity)
|
||||
\b\c{FLOAT UP} - Round up (toward +Infinity)
|
||||
|
||||
\b\c{FLOAT DOWN} Round down (toward -Infinity)
|
||||
\b\c{FLOAT DOWN} - Round down (toward -Infinity)
|
||||
|
||||
\b\c{FLOAT ZERO} Round toward zero
|
||||
\b\c{FLOAT ZERO} - Round toward zero
|
||||
|
||||
\b\c{FLOAT DEFAULT} Restore default settings
|
||||
\b\c{FLOAT DEFAULT} - Restore default settings
|
||||
|
||||
The standard macros \i\c{__?FLOAT_DAZ?__}, \i\c{__?FLOAT_ROUND?__}, and
|
||||
\i\c{__?FLOAT?__} contain the current state, as long as the programmer
|
||||
|
@ -1,7 +1,7 @@
|
||||
bits 64
|
||||
|
||||
%define YMMWORD yword
|
||||
|
||||
|
||||
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
|
||||
vpmadd52luq ymm16,ymm1,YMMWORD[32+rsi]
|
||||
vpmadd52luq ymm17,ymm1,YMMWORD[64+rsi]
|
||||
@ -30,4 +30,42 @@
|
||||
vpmadd52luq ymm17,ymm2,YMMWORD[64+rcx]
|
||||
vpmadd52luq ymm18,ymm2,YMMWORD[96+rcx]
|
||||
vpmadd52luq ymm19,ymm2,YMMWORD[128+rcx]
|
||||
|
||||
|
||||
cpu default
|
||||
|
||||
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
|
||||
vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
|
||||
|
||||
cpu noevex
|
||||
|
||||
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
|
||||
vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
|
||||
|
||||
%ifdef ERROR
|
||||
vpmadd52luq ymm19,ymm2,YMMWORD[128+rcx]
|
||||
%endif
|
||||
|
||||
cpu evex,novex,latevex
|
||||
|
||||
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
|
||||
vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
|
||||
|
||||
cpu default
|
||||
|
||||
vaddps ymm3,ymm1,YMMWORD[rsi]
|
||||
vaddps ymm3,ymm2,YMMWORD[rcx]
|
||||
|
||||
cpu novex
|
||||
|
||||
vaddps ymm3,ymm1,YMMWORD[rsi]
|
||||
vaddps ymm3,ymm2,YMMWORD[rcx]
|
||||
|
||||
%ifdef ERROR
|
||||
cpu noevex
|
||||
|
||||
vaddps ymm3,ymm1,YMMWORD[rsi]
|
||||
vaddps ymm3,ymm2,YMMWORD[rcx]
|
||||
%endif
|
||||
|
||||
{vex} vaddps ymm3,ymm1,YMMWORD[rsi]
|
||||
{vex} vaddps ymm3,ymm2,YMMWORD[rcx]
|
||||
|
Loading…
Reference in New Issue
Block a user