From 49640ed315f08f7a709d8a07813409a5d5de36b5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 23 Jul 2024 12:47:25 -0700 Subject: [PATCH] x86: move the bytecode defintion into a separate file in x86/ At least three files (asm/assemble.c, disasm/disasm.c, and x86/insns.pl) depend on the bytecode defintions. It makes a lot more sense for them to live in an explicit documentation file in the x86/ directory. Signed-off-by: H. Peter Anvin --- asm/assemble.c | 137 -------------------------------------------- disasm/disasm.c | 3 + x86/bytecode.txt | 145 +++++++++++++++++++++++++++++++++++++++++++++++ x86/insns.pl | 4 ++ 4 files changed, 152 insertions(+), 137 deletions(-) create mode 100644 x86/bytecode.txt diff --git a/asm/assemble.c b/asm/assemble.c index fd768089..72aaf1d4 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -34,143 +34,6 @@ /* * assemble.c code generation for the Netwide Assembler * - * Bytecode specification - * ---------------------- - * - * - * Codes Mnemonic Explanation - * - * \0 terminates the code. (Unless it's a literal of course.) - * \1..\4 that many literal bytes follow in the code stream - * \5 add 4 to the primary operand number (b, low octdigit) - * \6 add 4 to the secondary operand number (a, middle octdigit) - * \7 add 4 to both the primary and the secondary operand number - * \10..\13 a literal byte follows in the code stream, to be added - * to the register value of operand 0..3 - * \14..\17 the position of index register operand in MIB (BND insns) - * \20..\23 ib a byte immediate operand, from operand 0..3 - * \24..\27 ib,u a zero-extended byte immediate operand, from operand 0..3 - * \30..\33 iw a word immediate operand, from operand 0..3 - * \34..\37 iwd select between \3[0-3] and \4[0-3] depending on 16/32 bit - * assembly mode or the operand-size override on the operand - * \40..\43 id a long immediate operand, from operand 0..3 - * \44..\47 iwdq select between \3[0-3], \4[0-3] and \5[4-7] - * depending on the address size of the instruction. - * \50..\53 rel8 a byte relative operand, from operand 0..3 - * \54..\57 iq a qword immediate operand, from operand 0..3 - * \60..\63 rel16 a word relative operand, from operand 0..3 - * \64..\67 rel select between \6[0-3] and \7[0-3] depending on 16/32 bit - * assembly mode or the operand-size override on the operand - * \70..\73 rel32 a long relative operand, from operand 0..3 - * \74..\77 seg a word constant, from the _segment_ part of operand 0..3 - * \1ab /r a ModRM, calculated on EA in operand a, with the reg - * field the register value of operand b. - * \171\mab /mrb (e.g /3r0) a ModRM, with the reg field taken from operand a, and the m - * and b fields set to the specified values. - * \172\ab /is4 the register number from operand a in bits 7..4, with - * the 4-bit immediate from operand b in bits 3..0. - * \173\xab the register number from operand a in bits 7..4, with - * the value b in bits 3..0. - * \174..\177 the register number from operand 0..3 in bits 7..4, and - * an arbitrary value in bits 3..0 (assembled as zero.) - * \2ab /b a ModRM, calculated on EA in operand a, with the reg - * field equal to digit b. - * \240..\243 this instruction uses EVEX rather than REX or VEX/XOP, with the - * V field taken from operand 0..3. - * \250 this instruction uses EVEX rather than REX or VEX/XOP, with the - * V field set to 1111b. - * - * EVEX prefixes are followed by the sequence: - * \cm\wlp\tup where cm is: - * cc 00m mmm - * c = 2 for EVEX and mmmm is the M field (EVEX.P0[3:0]) - * and wlp is: - * 00 wwl lpp - * [l0] ll = 0 (.128, .lz) - * [l1] ll = 1 (.256) - * [l2] ll = 2 (.512) - * [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0) - * - * [w0] ww = 0 for W = 0 - * [w1] ww = 1 for W = 1 - * [wig] ww = 2 for W don't care (always assembled as 0) - * [ww] ww = 3 for W used as REX.W - * - * [p0] pp = 0 for no prefix - * [60] pp = 1 for legacy prefix 60 - * [f3] pp = 2 - * [f2] pp = 3 - * - * tup is tuple type for Disp8*N from %tuple_codes in insns.pl - * (compressed displacement encoding) - * - * \254..\257 id,s a signed 32-bit operand to be extended to 64 bits. - * \260..\263 this instruction uses VEX/XOP rather than REX, with the - * V field taken from operand 0..3. - * \270 this instruction uses VEX/XOP rather than REX, with the - * V field set to 1111b. - * VEX/XOP prefixes are followed by the sequence: - * \tmm\wlp where mm is the M field; and wlp is: - * 00 wwl lpp - * [l0] ll = 0 for L = 0 (.128, .lz) - * [l1] ll = 1 for L = 1 (.256) - * [lig] ll = 2 for L don't care (always assembled as 0) - * - * [w0] ww = 0 for W = 0 - * [w1 ] ww = 1 for W = 1 - * [wig] ww = 2 for W don't care (always assembled as 0) - * [ww] ww = 3 for W used as REX.W - * - * t = 0 for VEX (C4/C5), t = 1 for XOP (8F). - * - * \271 hlexr instruction takes XRELEASE (F3) with or without lock - * \272 hlenl instruction takes XACQUIRE/XRELEASE with or without lock - * \273 hle instruction takes XACQUIRE/XRELEASE with lock only - * \274..\277 ib,s a byte immediate operand, from operand 0..3, sign-extended - * to the operand size (if o16/o32/o64 present) or the bit size - * \310 a16 indicates fixed 16-bit address size, i.e. optional 0x67. - * \311 a32 indicates fixed 32-bit address size, i.e. optional 0x67. - * \312 adf (disassembler only) invalid with non-default address size. - * \313 a64 indicates fixed 64-bit address size, 0x67 invalid. - * \314 norexb (disassembler only) invalid with REX.B - * \315 norexx (disassembler only) invalid with REX.X - * \316 norexr (disassembler only) invalid with REX.R - * \317 norexw (disassembler only) invalid with REX.W - * \320 o16 indicates fixed 16-bit operand size, i.e. optional 0x66. - * \321 o32 indicates fixed 32-bit operand size, i.e. optional 0x66. - * \322 odf indicates that this instruction is only valid when the - * operand size is the default (instruction to disassembler, - * generates no code in the assembler) - * \323 o64nw indicates fixed 64-bit operand size, REX on extensions only. - * \324 o64 indicates 64-bit operand size requiring REX prefix. - * \325 nohi instruction which always uses spl/bpl/sil/dil - * \326 nof3 instruction not valid with 0xF3 REP prefix. Hint for - disassembler only; for SSE instructions. - * \331 norep instruction not valid with REP prefix. Hint for - * disassembler only; for SSE instructions. - * \332 f2i REP prefix (0xF2 byte) used as opcode extension. - * \333 f3i REP prefix (0xF3 byte) used as opcode extension. - * \334 rex.l LOCK prefix used as REX.R (used in non-64-bit mode) - * \335 repe disassemble a rep (0xF3 byte) prefix as repe not rep. - * \336 mustrep force a REP(E) prefix (0xF3) even if not specified. - * \337 mustrepne force a REPNE prefix (0xF2) even if not specified. - * \336-\337 are still listed as prefixes in the disassembler. - * \340 resb reserve bytes of uninitialized storage. - * Operand 0 had better be a segmentless constant. - * \341 wait this instruction needs a WAIT "prefix" - * \360 np no SSE prefix (== \364\331) - * \361 66 SSE prefix (== \366\331) - * \364 !osp operand-size prefix (0x66) not permitted - * \365 !asp address-size prefix (0x67) not permitted - * \366 operand-size prefix (0x66) used as opcode extension - * \367 address-size prefix (0x67) used as opcode extension - * \370,\371 jcc8 match only if operand 0 meets byte jump criteria. - * jmp8 370 is used for Jcc, 371 is used for JMP. - * \373 jlen assemble 0x03 if bits==16, 0x05 if bits==32; - * used for conditional jump over longer jump - * \374 vsibx|vm32x|vm64x this instruction takes an XMM VSIB memory EA - * \375 vsiby|vm32y|vm64y this instruction takes an YMM VSIB memory EA - * \376 vsibz|vm32z|vm64z this instruction takes an ZMM VSIB memory EA */ #include "compiler.h" diff --git a/disasm/disasm.c b/disasm/disasm.c index ac2dc97f..6273f87a 100644 --- a/disasm/disasm.c +++ b/disasm/disasm.c @@ -33,6 +33,9 @@ /* * disasm.c where all the _work_ gets done in the Netwide Disassembler + * + * See x86/bytecode.txt for the definition of the instruction encoding + * byte codes. */ #include "compiler.h" diff --git a/x86/bytecode.txt b/x86/bytecode.txt new file mode 100644 index 00000000..b3b905a1 --- /dev/null +++ b/x86/bytecode.txt @@ -0,0 +1,145 @@ +Bytecode specification +---------------------- + +These are the bytecodes generated by x86/insn.pl into x86/insnsb.c +and consumed by asm/assemble.c and disasm/disasm.c. + +Values prefixed with \ are in octal, values prefixed with \x are in +hexadecimal. + +The mnemonics are the ones used in x86/insns.txt, where applicable. + + +Codes Mnemonic Explanation + +\0 terminates the code. (Unless it's a literal of course.) +\1..\4 that many literal bytes follow in the code stream +\5 add 4 to the primary operand number (b, low octdigit) +\6 add 4 to the secondary operand number (a, middle octdigit) +\7 add 4 to both the primary and the secondary operand number +\10..\13 a literal byte follows in the code stream, to be added + to the register value of operand 0..3 +\14..\17 the position of index register operand in MIB (BND insns) +\20..\23 ib a byte immediate operand, from operand 0..3 +\24..\27 ib,u a zero-extended byte immediate operand, from operand 0..3 +\30..\33 iw a word immediate operand, from operand 0..3 +\34..\37 iwd select between \3[0-3] and \4[0-3] depending on 16/32 bit + assembly mode or the operand-size override on the operand +\40..\43 id a long immediate operand, from operand 0..3 +\44..\47 iwdq select between \3[0-3], \4[0-3] and \5[4-7] + depending on the address size of the instruction. +\50..\53 rel8 a byte relative operand, from operand 0..3 +\54..\57 iq a qword immediate operand, from operand 0..3 +\60..\63 rel16 a word relative operand, from operand 0..3 +\64..\67 rel select between \6[0-3] and \7[0-3] depending on 16/32 bit + assembly mode or the operand-size override on the operand +\70..\73 rel32 a long relative operand, from operand 0..3 +\74..\77 seg a word constant, from the _segment_ part of operand 0..3 +\1ab /r a ModRM, calculated on EA in operand a, with the reg + field the register value of operand b. +\171\mab /mrb (e.g /3r0) a ModRM, with the reg field taken from operand a, and the m + and b fields set to the specified values. +\172\ab /is4 the register number from operand a in bits 7..4, with + the 4-bit immediate from operand b in bits 3..0. +\173\xab the register number from operand a in bits 7..4, with + the value b in bits 3..0. +\174..\177 the register number from operand 0..3 in bits 7..4, and + an arbitrary value in bits 3..0 (assembled as zero.) +\2ab /b a ModRM, calculated on EA in operand a, with the reg + field equal to digit b. +\240..\243 this instruction uses EVEX rather than REX or VEX/XOP, with the + V field taken from operand 0..3. +\250 this instruction uses EVEX rather than REX or VEX/XOP, with the + V field set to 1111b. + +EVEX prefixes are followed by the sequence: +\cm\wlp\tup where cm is: + cc 00m mmm + c = 2 for EVEX and mmmm is the M field (EVEX.P0[3:0]) + and wlp is: + 00 wwl lpp + [l0] ll = 0 (.128, .lz) + [l1] ll = 1 (.256) + [l2] ll = 2 (.512) + [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0) + + [w0] ww = 0 for W = 0 + [w1] ww = 1 for W = 1 + [wig] ww = 2 for W don't care (always assembled as 0) + [ww] ww = 3 for W used as REX.W + + [p0] pp = 0 for no prefix + [60] pp = 1 for legacy prefix 60 + [f3] pp = 2 + [f2] pp = 3 + + tup is tuple type for Disp8*N from %tuple_codes in insns.pl + (compressed displacement encoding) + +\254..\257 id,s a signed 32-bit operand to be extended to 64 bits. +\260..\263 this instruction uses VEX/XOP rather than REX, with the + V field taken from operand 0..3. +\270 this instruction uses VEX/XOP rather than REX, with the + V field set to 1111b. +VEX/XOP prefixes are followed by the sequence: +\tmm\wlp where mm is the M field; and wlp is: + 00 wwl lpp + [l0] ll = 0 for L = 0 (.128, .lz) + [l1] ll = 1 for L = 1 (.256) + [lig] ll = 2 for L don't care (always assembled as 0) + + [w0] ww = 0 for W = 0 + [w1 ] ww = 1 for W = 1 + [wig] ww = 2 for W don't care (always assembled as 0) + [ww] ww = 3 for W used as REX.W + +t = 0 for VEX (C4/C5), t = 1 for XOP (8F). + +\271 hlexr instruction takes XRELEASE (F3) with or without lock +\272 hlenl instruction takes XACQUIRE/XRELEASE with or without lock +\273 hle instruction takes XACQUIRE/XRELEASE with lock only +\274..\277 ib,s a byte immediate operand, from operand 0..3, sign-extended + to the operand size (if o16/o32/o64 present) or the bit size +\310 a16 indicates fixed 16-bit address size, i.e. optional 0x67. +\311 a32 indicates fixed 32-bit address size, i.e. optional 0x67. +\312 adf (disassembler only) invalid with non-default address size. +\313 a64 indicates fixed 64-bit address size, 0x67 invalid. +\314 norexb (disassembler only) invalid with REX.B +\315 norexx (disassembler only) invalid with REX.X +\316 norexr (disassembler only) invalid with REX.R +\317 norexw (disassembler only) invalid with REX.W +\320 o16 indicates fixed 16-bit operand size, i.e. optional 0x66. +\321 o32 indicates fixed 32-bit operand size, i.e. optional 0x66. +\322 odf indicates that this instruction is only valid when the + operand size is the default (instruction to disassembler, + generates no code in the assembler) +\323 o64nw indicates fixed 64-bit operand size, REX on extensions only. +\324 o64 indicates 64-bit operand size requiring REX prefix. +\325 nohi instruction which always uses spl/bpl/sil/dil +\326 nof3 instruction not valid with 0xF3 REP prefix. Hint for + disassembler only; for SSE instructions. +\331 norep instruction not valid with REP prefix. Hint for + disassembler only; for SSE instructions. +\332 f2i REP prefix (0xF2 byte) used as opcode extension. +\333 f3i REP prefix (0xF3 byte) used as opcode extension. +\334 rex.l LOCK prefix used as REX.R (used in non-64-bit mode) +\335 repe disassemble a rep (0xF3 byte) prefix as repe not rep. +\336 mustrep force a REP(E) prefix (0xF3) even if not specified. +\337 mustrepne force a REPNE prefix (0xF2) even if not specified. + \336-\337 are still listed as prefixes in the disassembler. +\340 resb reserve bytes of uninitialized storage. + Operand 0 had better be a segmentless constant. +\341 wait this instruction needs a WAIT "prefix" +\360 np no SSE prefix (== \364\331) +\361 66 SSE prefix (== \366\331) +\364 !osp operand-size prefix (0x66) not permitted +\365 !asp address-size prefix (0x67) not permitted +\366 operand-size prefix (0x66) used as opcode extension +\367 address-size prefix (0x67) used as opcode extension +\370,\371 jcc8 match only if operand 0 meets byte jump criteria. + jmp8 370 is used for Jcc, 371 is used for JMP. +\373 jlen assemble 0x03 if bits==16, 0x05 if bits==32; + used for conditional jump over longer jump +\374 vsibx|vm32x|vm64x this instruction takes an XMM VSIB memory EA +\375 vsiby|vm32y|vm64y this instruction takes an YMM VSIB memory EA +\376 vsibz|vm32z|vm64z this instruction takes an ZMM VSIB memory EA diff --git a/x86/insns.pl b/x86/insns.pl index 16634285..5c334c66 100755 --- a/x86/insns.pl +++ b/x86/insns.pl @@ -36,6 +36,10 @@ # insns.pl # # Parse insns.dat and produce generated source code files +# +# See x86/bytecode.txt for the defintion of the byte code +# output to x86/insnsb.c. +# require 'x86/insns-iflags.ph';