mirror of
https://github.com/netwide-assembler/nasm.git
synced 2025-01-24 16:35:40 +08:00
5253f58c36
Add a generic facility for generating perfect string hashes, where all that is needed is an enum and a string table. The existing mechanism using a custom Perl script wrapped around a module continues to be available for any use case where this particular approach isn't sophisticated enough. Much of this patch comes from renaming "enum directives" to "enum directive" as a result of the string hash generator expecting a set of uniform naming conventions. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
363 lines
10 KiB
Perl
Executable File
363 lines
10 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
## --------------------------------------------------------------------------
|
|
##
|
|
## Copyright 1996-2017 The NASM Authors - All Rights Reserved
|
|
## See the file AUTHORS included with the NASM distribution for
|
|
## the specific copyright holders.
|
|
##
|
|
## Redistribution and use in source and binary forms, with or without
|
|
## modification, are permitted provided that the following
|
|
## conditions are met:
|
|
##
|
|
## * Redistributions of source code must retain the above copyright
|
|
## notice, this list of conditions and the following disclaimer.
|
|
## * Redistributions in binary form must reproduce the above
|
|
## copyright notice, this list of conditions and the following
|
|
## disclaimer in the documentation and/or other materials provided
|
|
## with the distribution.
|
|
##
|
|
## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
|
## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
|
## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
##
|
|
## --------------------------------------------------------------------------
|
|
|
|
#
|
|
# Generate a perfect hash for general case-insensitive string-to-enum
|
|
# lookup. This generates an enum and the corresponding hash, but
|
|
# relies on a common function to parse the hash.
|
|
#
|
|
# Usage:
|
|
# perfhash.pl h foohash.dat foohash.h (to generate C header)
|
|
# perfhash.pl c foohash.dat foohash.c (to generate C source)
|
|
#
|
|
|
|
use strict;
|
|
|
|
require 'phash.ph';
|
|
|
|
sub basename($) {
|
|
my($s) = @_;
|
|
$s =~ s/^.*[^-[:alnum:]_\.]//; # Remove path component as best we can
|
|
return $s;
|
|
}
|
|
|
|
sub intval($) {
|
|
my($s) = @_;
|
|
|
|
if ($s =~ /^0/) {
|
|
return oct($s); # Handles octal or hexadecimal
|
|
} elsif ($s =~ /^\-(0.*)$/) {
|
|
return -oct($1);
|
|
} else {
|
|
return $s + 0; # Forcibly convert to number
|
|
}
|
|
}
|
|
|
|
my($output, $infile, $outfile) = @ARGV;
|
|
my $me = basename($0);
|
|
|
|
# The following special things are allowed in the input file:
|
|
# #<space> or ; begins a comment
|
|
# #include filename
|
|
# #name str
|
|
# The name of the hash
|
|
# #prefix str
|
|
# Defines the prefix before enum
|
|
# #guard str
|
|
# Defines the header guard string
|
|
# #special str [= value]
|
|
# Generate an enum value without a corresponding string; not capitalized.
|
|
# #header str
|
|
# Indicates the name of the .h file to include from the .c file
|
|
# #errval str
|
|
# Define the value to be returned if a string is not found
|
|
# (defaults to -1). This can be any constant C expression,
|
|
# including one of the enum values.
|
|
#
|
|
# Regular lines are just str [= value]
|
|
#
|
|
# Enumeration is generated in the order listed in the file, just as in C
|
|
# specifying a value causes the values to increase by 1 from that point on
|
|
# unless specified.
|
|
|
|
my $name;
|
|
my $prefix;
|
|
my $guard;
|
|
my $hfile;
|
|
|
|
my %strings = ();
|
|
my %specials = ();
|
|
my $next_value = 0;
|
|
my $errval = '-1';
|
|
|
|
my @incstack = ();
|
|
my @filenames = ($infile);
|
|
my @linenums = (0);
|
|
my $dd = undef;
|
|
my $err = 0;
|
|
|
|
while (scalar(@filenames)) {
|
|
if (!defined($dd)) {
|
|
open($dd, '<', $filenames[-1])
|
|
or die "$0: cannot open: $filenames[-1]: $!\n";
|
|
}
|
|
|
|
my $line = <$dd>;
|
|
if (!defined($line)) {
|
|
close($dd);
|
|
$dd = pop @incstack;
|
|
pop @filenames;
|
|
pop @linenums;
|
|
next;
|
|
}
|
|
|
|
$linenums[-1]++;
|
|
|
|
chomp $line;
|
|
$line =~ s/\s*(|\;.*|\#\s.*|\#)$//; # Remove comments and trailing space
|
|
$line =~ s/^\s+//; # Remove leading space
|
|
if ($line eq '') {
|
|
# Do nothing
|
|
} elsif ($line =~ /^\#name\s+([[:alnum:]_]+)$/) {
|
|
$name = $1;
|
|
} elsif ($line =~ /^\#prefix\s+([[:alnum:]_]+)$/) {
|
|
$prefix = $1;
|
|
} elsif ($line =~ /^\#guard\s+([[:alnum:]_]+)$/) {
|
|
$guard = $1;
|
|
} elsif ($line =~ /^\#errval\s+(\S.*)$/) {
|
|
$errval = $1;
|
|
} elsif ($line =~ /^\#header\s+(\"(.+)\"|\S+)$/) {
|
|
$hfile = ($2 ne '') ? $2 : $1;
|
|
} elsif ($line =~ /^\#include\s+(\"(.+)\"|\S+)$/) {
|
|
push @incstack, $dd;
|
|
push @filenames, (($2 ne '') ? $2 : $1);
|
|
push @linenums, 0;
|
|
undef $dd; # Open a new file
|
|
} elsif ($line =~ /^(|\#special\s+)(\S+)\s*(|=\s*(\-?(0[Xx][[:xdigit:]]+|0[0-7]*|[0-9]+)))$/) {
|
|
$next_value = intval($4) if ($4 ne '');
|
|
if ($1 eq '') {
|
|
$strings{$2} = $next_value++;
|
|
} else {
|
|
$specials{$2} = $next_value++;
|
|
}
|
|
} else {
|
|
printf STDERR "%s:%d:%s syntax error: \"%s\"\n",
|
|
$filenames[-1], $linenums[-1],
|
|
(scalar(@incstack) == 1) ? '' : "(from $infile)", $line;
|
|
$err++;
|
|
}
|
|
}
|
|
|
|
exit 1 if ($err);
|
|
|
|
# Default name, prefix, and header guard name
|
|
if (!defined($name)) {
|
|
$name = basename($infile);
|
|
$name =~ s/(\..*)$//; # Strip extension, if any
|
|
}
|
|
if (!defined($prefix)) {
|
|
$prefix = "\U${name}\E_";
|
|
}
|
|
if (!defined($hfile)) {
|
|
$hfile = $outfile;
|
|
$hfile =~ s/\.c$/\.h/;
|
|
}
|
|
if (!defined($guard)) {
|
|
$guard = basename($hfile);
|
|
$guard =~ s/[^[:alnum:]_]/_/g;
|
|
$guard =~ s/__+/_/g;
|
|
$guard = "\U$guard";
|
|
}
|
|
|
|
# Verify input. We can't have more than one constant with the same
|
|
# enumeration value, nor the same enumeration string.
|
|
if (scalar(keys(%strings)) == 0) {
|
|
die "$0: $infile: no strings to hash!\n";
|
|
}
|
|
|
|
my %enums;
|
|
my %enumvals;
|
|
my %stringbyval;
|
|
my $max_enum;
|
|
my $tbllen = 0;
|
|
my $tbloffs;
|
|
foreach my $s (keys(%strings)) {
|
|
my $es = "${prefix}\U${s}";
|
|
$es =~ s/[^[:alnum:]_]/_/g;
|
|
$es =~ s/__+/_/g;
|
|
my $v = $strings{$s};
|
|
$stringbyval{$v} = $s;
|
|
if (defined($enums{$es})) {
|
|
printf STDERR "%s: string \"%s\" duplicates existing enum %s\n",
|
|
$infile, $s, $es;
|
|
$err++;
|
|
} else {
|
|
$enums{$es} = $v;
|
|
}
|
|
if (defined($enumvals{$v})) {
|
|
printf STDERR "%s: string \"%s\" duplicates existing enum constant %d\n", $v;
|
|
$err++;
|
|
} else {
|
|
$enumvals{$v} = $es;
|
|
}
|
|
$max_enum = $v if ($v > $max_enum || !defined($max_enum));
|
|
$tbloffs = $v if ($v < $tbloffs || !defined($tbloffs));
|
|
$tbllen = $v+1 if ($v >= $tbllen || !defined($tbllen));
|
|
}
|
|
foreach my $s (keys(%specials)) {
|
|
my $es = $prefix . $s; # No string mangling here
|
|
my $v = $specials{$s};
|
|
if (defined($enums{$es})) {
|
|
printf STDERR "%s: special \"%s\" duplicates existing enum %s\n",
|
|
$infile, $s, $es;
|
|
$err++;
|
|
} else {
|
|
$enums{$es} = $v;
|
|
}
|
|
if (defined ($enumvals{$v})) {
|
|
printf STDERR "%s: special \"%s\" duplicates existing enum constant %d\n", $v;
|
|
$err++;
|
|
} else {
|
|
$enumvals{$v} = $es;
|
|
}
|
|
$max_enum = $v if ($v > $max_enum || !defined($max_enum));
|
|
}
|
|
|
|
$tbllen -= $tbloffs;
|
|
if ($tbllen > 65536) {
|
|
printf STDERR "%s: span of enumeration values too large\n";
|
|
$err++;
|
|
}
|
|
|
|
exit 1 if ($err);
|
|
|
|
open(F, '>', $outfile)
|
|
or die "$0: cannot create: ${outfile}: $!\n";
|
|
|
|
if ($output eq 'h') {
|
|
print F "/*\n";
|
|
print F " * This file is generated from $infile\n";
|
|
print F " * by $me; do not edit.\n";
|
|
print F " */\n";
|
|
print F "\n";
|
|
|
|
print F "#ifndef $guard\n";
|
|
print F "#define $guard 1\n\n";
|
|
print F "#include \"perfhash.h\"\n\n";
|
|
|
|
my $c = '{';
|
|
$next_value = 0;
|
|
print F "enum ${name} ";
|
|
foreach my $v (sort { $a <=> $b } keys(%enumvals)) {
|
|
my $s = $enumvals{$v};
|
|
print F "$c\n $s";
|
|
print F " = $v" if ($v != $next_value);
|
|
$next_value = $v + 1;
|
|
$c = ',';
|
|
}
|
|
print F "\n};\n\n";
|
|
print F "extern const struct perfect_hash ${name}_hash;\n";
|
|
printf F "extern const char * const %s_tbl[%d];\n", $name, $tbllen;
|
|
|
|
print F "\nstatic inline enum ${name} ${name}_find(const char *str)\n";
|
|
print F "{\n";
|
|
print F " return perfhash_find(&${name}_hash, str);\n";
|
|
print F "}\n";
|
|
|
|
print F "\nstatic inline const char * ${name}_name(enum ${name} x)\n";
|
|
print F "{\n";
|
|
printf F " size_t ix = (size_t)x - (%d);\n", $tbloffs;
|
|
printf F " if (ix >= %d)\n", $tbllen;
|
|
print F " return NULL;\n";
|
|
print F " return ${name}_tbl[ix];\n";
|
|
print F "}\n";
|
|
|
|
print F "\nstatic inline const char * ${name}_dname(enum ${name} x)\n";
|
|
print F "{\n";
|
|
print F " const char *y = ${name}_name(x);\n";
|
|
print F " return y ? y : invalid_enum_str(x);\n";
|
|
print F "}\n";
|
|
|
|
print F "\n#endif /* $guard */\n";
|
|
} elsif ($output eq 'c') {
|
|
# The strings we hash must all be lower case, even if the string
|
|
# table doesn't contain them that way.
|
|
|
|
my %lcstrings;
|
|
foreach my $s (keys(%strings)) {
|
|
my $ls = "\L$s";
|
|
if (defined($lcstrings{$ls})) {
|
|
printf STDERR "%s: strings \"%s\" and \"%s\" differ only in case\n",
|
|
$infile, $s, $strings{$lcstrings{$s}};
|
|
} else {
|
|
$lcstrings{$ls} = $strings{$s} - $tbloffs;
|
|
}
|
|
}
|
|
|
|
my @hashinfo = gen_perfect_hash(\%lcstrings);
|
|
if (!@hashinfo) {
|
|
die "$0: no hash found\n";
|
|
}
|
|
|
|
# Paranoia...
|
|
verify_hash_table(\%lcstrings, \@hashinfo);
|
|
|
|
my ($n, $sv, $g) = @hashinfo;
|
|
|
|
die if ($n & ($n-1));
|
|
|
|
print F "/*\n";
|
|
print F " * This file is generated from $infile\n";
|
|
print F " * by $me; do not edit.\n";
|
|
print F " */\n";
|
|
print F "\n";
|
|
|
|
print F "#include \"$hfile\"\n\n";
|
|
|
|
printf F "const char * const %s_tbl[%d] = ", $name, $tbllen;
|
|
my $c = '{';
|
|
for (my $i = $tbloffs; $i < $tbloffs+$tbllen; $i++) {
|
|
printf F "%s\n %s", $c,
|
|
defined($stringbyval{$i}) ? '"'.$stringbyval{$i}.'"' : 'NULL';
|
|
$c = ',';
|
|
}
|
|
print F "\n};\n\n";
|
|
|
|
print F "#define UNUSED (65536/3)\n\n";
|
|
|
|
printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2;
|
|
$c = '{';
|
|
for (my $i = 0; $i < $n; $i++) {
|
|
my $h = ${$g}[$i*2+0];
|
|
print F "$c\n ", defined($h) ? $h : 'UNUSED';
|
|
$c = ',';
|
|
}
|
|
for (my $i = 0; $i < $n; $i++) {
|
|
my $h = ${$g}[$i*2+1];
|
|
print F "$c\n ", defined($h) ? $h : 'UNUSED';
|
|
$c = ',';
|
|
}
|
|
print F "\n};\n\n";
|
|
|
|
print F "const struct perfect_hash ${name}_hash = {\n";
|
|
printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit
|
|
printf F " UINT32_C(0x%x),\n", $n-1; # hashmask
|
|
printf F " UINT32_C(%u),\n", $tbllen; # tbllen
|
|
printf F " %d,\n", $tbloffs; # tbloffs
|
|
printf F " (%s),\n", $errval; # errval
|
|
printf F " ${name}_hashvals,\n"; # hashvals
|
|
printf F " ${name}_tbl\n"; # strings
|
|
print F "};\n";
|
|
}
|