Speed up the disassembler by allowing prefixed instruction tables

Modify the disassembler so that we can have separate instruction
tables for prefixed instructions.  As it was, all instructions which
started with 0F were linearly searched, and that is by now more than
half the instruction set.
This commit is contained in:
H. Peter Anvin 2007-09-18 15:08:20 -07:00
parent 0edc309505
commit 19e2010536
3 changed files with 139 additions and 36 deletions

View File

@ -671,9 +671,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
int32_t offset, int autosync, uint32_t prefer)
{
const struct itemplate * const *p, * const *best_p;
const struct disasm_index *ix;
uint8_t *dp;
int length, best_length = 0;
char *segover;
int i, slen, colon;
int i, slen, colon, n;
uint8_t *origdata;
int works;
insn tmp_ins, ins;
@ -728,7 +730,14 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
best_p = NULL;
best_pref = INT_MAX;
for (p = itable[*data]; *p; p++) {
dp = data;
ix = itable + *dp++;
while (ix->n == (size_t)-1) {
ix = (const struct disasm_index *)ix->p + *dp++;
}
p = (const struct itemplate * const *)ix->p;
for (n = ix->n; n; n--, p++) {
if ((length = matches(*p, data, &prefix, segsize, &tmp_ins))) {
works = TRUE;
/*

11
insns.h
View File

@ -26,9 +26,18 @@ struct itemplate {
uint32_t flags; /* some flags */
};
/* Disassembler table structure */
/* If n == -1, then p points to another table of 256
struct disasm_index, otherwise p points to a list of n
struct itemplates to consider. */
struct disasm_index {
const void *p;
int n;
};
/* Tables for the assembler and disassembler, respectively */
extern const struct itemplate * const nasm_instructions[];
extern const struct itemplate * const * const itable[];
extern const struct disasm_index itable[256];
/*
* this define is used to signify the end of an itemplate

151
insns.pl
View File

@ -7,6 +7,10 @@
# redistributable under the licence given in the file "Licence"
# distributed in the NASM archive.
# Opcode prefixes which need their own opcode tables
# LONGER PREFIXES FIRST!
@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0FC2 0F);
print STDERR "Reading insns.dat...\n";
@args = ();
@ -26,6 +30,8 @@ foreach $arg ( @ARGV ) {
$fname = "insns.dat" unless $fname = $args[0];
open (F, $fname) || die "unable to open $fname";
%dinstables = ();
$line = 0;
$insns = 0;
while (<F>) {
@ -50,9 +56,11 @@ while (<F>) {
}
if ($formatted && !$nd) {
push @big, $formatted;
foreach $i (&startbyte($_[2])) {
$aname = sprintf "dd_%02X",$i;
push @$aname, $#big;
foreach $i (startseq($_[2])) {
if (!defined($dinstables{$i})) {
$dinstables{$i} = [];
}
push(@{$dinstables{$i}}, $#big);
}
}
}
@ -106,23 +114,38 @@ if ( !defined($output) || $output eq 'd' ) {
foreach $j (@big) {
printf D " /* %4d */ %s\n", $n++, $j;
}
print D " ITEMPLATE_END\n};\n\n";
for ($c=0; $c<256; $c++) {
$h = sprintf "%02X", $c;
print D "static const struct itemplate * const itable_${h}[] = {\n";
$aname = "dd_$h";
foreach $j (@$aname) {
print D "};\n";
foreach $h (sort(keys(%dinstables))) {
print D "\nstatic const struct itemplate * const itable_${h}[] = {\n";
foreach $j (@{$dinstables{$h}}) {
print D " instrux + $j,\n";
}
print D " NULL\n};\n\n";
}
print D "const struct itemplate * const * const itable[] = {\n";
for ($c=0; $c<256; $c++) {
printf D " itable_%02X,\n", $c;
print D "};\n";
}
foreach $h (@disasm_prefixes, '') {
$is_prefix{$h} = 1;
print D "\n";
print D "static " unless ($h eq '');
print D "const struct disasm_index ";
print D ($h eq '') ? 'itable' : "itable_$h";
print D "[256] = {\n";
for ($c = 0; $c < 256; $c++) {
$nn = sprintf("%s%02X", $h, $c);
if ($is_prefix{$nn}) {
die "$0: ambiguous decoding of $nn\n"
if (defined($dinstables{$nn}));
printf D " { itable_%s, -1 },\n", $nn;
} elsif (defined($dinstables{$nn})) {
printf D " { itable_%s, %u },\n",
$nn, scalar(@{$dinstables{$nn}});
} else {
printf D " { NULL, 0 },\n";
}
}
print D "};\n";
}
close D;
}
@ -240,6 +263,17 @@ sub format {
("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
}
sub hexlist($$$) {
my($prefix, $start, $n) = @_;
my $i;
my @l = ();
for ($i = 0; $i < $n; $i++) {
push(@l, sprintf("%s%02X", $prefix, $start+$i));
}
return @l;
}
# Here we determine the range of possible starting bytes for a given
# instruction. We need only consider the codes:
# \1 \2 \3 mean literal bytes, of course
@ -248,24 +282,75 @@ sub format {
# \170 means byte zero
# \330 means byte plus condition code
# \0 or \340 mean give up and return empty set
sub startbyte {
my ($codes) = @_;
sub startseq($) {
my ($codestr) = @_;
my $word, @range;
my @codes = ();
my $c = $codestr;
my $c0, $c1, $i;
my $prefix = '';
while (1) {
die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/;
$word = $1, $codes = $2;
return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/;
return (0x07, 0x17, 0x1F) if $word eq "\\4";
return (0xA1, 0xA9) if $word eq "\\5";
return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6";
return (0xA0, 0xA8) if $word eq "\\7";
$start=hex $1, $r=8, last if $word =~ /^\\1[0123]$/ && $codes =~/^\\x(..)/;
return (0) if $word eq "\\170";
$start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/;
return () if $word eq "\\0" || $word eq "\\340";
# Although these are C-syntax strings, by convention they should have
# only octal escapes (for directives) and hexadecimal escapes
# (for verbatim bytes)
while ($c ne '') {
if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) {
push(@codes, hex $1);
$c = $2;
next;
} elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) {
push(@codes, oct $1);
$c = $2;
next;
} else {
die "$0: unknown code format in \"$codestr\"\n";
}
}
@range = ();
push @range, $start++ while ($r-- > 0);
@range;
while ($c0 = shift(@codes)) {
$c1 = $codes[0];
if ($c0 == 01 || $c0 == 02 || $c0 == 03 || $c0 == 0170) {
# Fixed byte string
my $fbs = $prefix;
while (1) {
if ($c0 == 01 || $c0 == 02 || $c0 == 03) {
while ($c0--) {
$fbs .= sprintf("%02X", shift(@codes));
}
} elsif ($c0 == 0170) {
$fbs .= '00';
} else {
last;
}
$c0 = shift(@codes);
}
foreach $pfx (@disasm_prefixes) {
if ($fbs =~ /^$pfx(.*)$/) {
$prefix = $pfx;
$fbs = $1;
last;
}
}
if ($fbs ne '') {
return ($prefix.substr($fbs,0,2));
}
} elsif ($c0 == 04) {
return ("07", "17", "1F");
} elsif ($c0 == 05) {
return ("A1", "A9");
} elsif ($c0 == 06) {
return ("06", "0E", "16", "1E");
} elsif ($c0 == 07) {
return ("A0", "A8");
} elsif ($c0 >= 010 && $c0 <= 013) {
return hexlist($prefix, $c1, 8);
} elsif ($c0 == 0330) {
return hexlist($prefix, $c1, 16);
} elsif ($c0 == 0 || $c0 == 0340) {
return ();
}
}
return ();
}