mirror of
https://github.com/netwide-assembler/nasm.git
synced 2025-03-31 18:20:22 +08:00
Perfect hash generator, as a perl script
Requires the CPAN Graph module.
This commit is contained in:
parent
3894519675
commit
8781c6a5f3
303
phash.pl
Executable file
303
phash.pl
Executable file
@ -0,0 +1,303 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
# Perfect Minimal Hash Generator written in Perl, which produces
|
||||
# C output.
|
||||
#
|
||||
# Requires the CPAN Graph module (tested against 0.83)
|
||||
|
||||
use Graph::Undirected;
|
||||
|
||||
|
||||
# Produce the same values every time, please...
|
||||
srand(0);
|
||||
|
||||
#
|
||||
# 32-bit rotate
|
||||
#
|
||||
sub rot($$) {
|
||||
my($v,$s) = @_;
|
||||
|
||||
return (($v << $s)+($v >> (32-$s))) & 0xffffffff;
|
||||
}
|
||||
|
||||
#
|
||||
# Compute the prehash for a key
|
||||
#
|
||||
# prehash(key, sv, N)
|
||||
#
|
||||
sub prehash($$$) {
|
||||
my($key, $n, $sv) = @_;
|
||||
my $c;
|
||||
my $k1 = 0, $k2 = 0;
|
||||
|
||||
foreach $c (unpack("C*", $key)) {
|
||||
$k1 = (rot($k1,$sv+2) + rot($k1, 5) + $c) & 0xffffffff;
|
||||
$k2 = (rot($k2,$sv) + rot($k2, 7) + $c) & 0xffffffff;
|
||||
}
|
||||
|
||||
return ($k1 % $n, $k2 % $n);
|
||||
}
|
||||
|
||||
#
|
||||
# Shuffle a list.
|
||||
#
|
||||
sub shuffle(@) {
|
||||
my(@l) = @_;
|
||||
my($i, $j);
|
||||
my $tmp;
|
||||
|
||||
for ($i = scalar(@l)-1; $i > 0; $i--) {
|
||||
$j = int(rand($i));
|
||||
|
||||
$tmp = $l[$j];
|
||||
$l[$j] = $l[$i];
|
||||
$l[$i] = $tmp;
|
||||
}
|
||||
|
||||
return @l;
|
||||
}
|
||||
|
||||
#
|
||||
# Pick a set of F-functions of length N.
|
||||
#
|
||||
# ffunc(N)
|
||||
#
|
||||
sub ffunc($) {
|
||||
my($n) = @_;
|
||||
|
||||
return shuffle(0..$n-1);
|
||||
}
|
||||
|
||||
#
|
||||
# Walk the assignment graph
|
||||
#
|
||||
sub walk_graph($$$) {
|
||||
my($gr,$n,$v) = @_;
|
||||
my $nx;
|
||||
|
||||
# print STDERR "Vertex $n value $v\n";
|
||||
$gr->set_vertex_attribute($n,"val",$v);
|
||||
|
||||
foreach $nx ($gr->neighbors($n)) {
|
||||
die unless ($gr->has_edge_attribute($n, $nx, "hash"));
|
||||
my $e = $gr->get_edge_attribute($n, $nx, "hash");
|
||||
|
||||
# print STDERR "Edge $n=$nx value $e: ";
|
||||
|
||||
if ($gr->has_vertex_attribute($nx, "val")) {
|
||||
die if ($v+$gr->get_vertex_attribute($nx, "val") != $e);
|
||||
# print STDERR "ok\n";
|
||||
} else {
|
||||
walk_graph($gr, $nx, $e-$v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
# Generate the function assuming a given N.
|
||||
#
|
||||
# gen_hash_n(N, sv, \%data)
|
||||
#
|
||||
sub gen_hash_n($$$) {
|
||||
my($n, $sv, $href) = @_;
|
||||
my @keys = keys(%{$href});
|
||||
my $i, $sv, @f1, @f2, @g;
|
||||
my $gr;
|
||||
my $k, $v;
|
||||
|
||||
@f1 = ffunc($n);
|
||||
@f2 = ffunc($n);
|
||||
|
||||
print STDERR "*** New graph ***\n";
|
||||
|
||||
$gr = Graph::Undirected->new;
|
||||
for ($i = 0; $i < $n; $i++) {
|
||||
$gr->add_vertex($i);
|
||||
}
|
||||
|
||||
foreach $k (@keys) {
|
||||
my ($p1, $p2) = prehash($k, $n, $sv);
|
||||
my $pf1 = $f1[$p1];
|
||||
my $pf2 = $f2[$p2];
|
||||
my $e = ${$href}{$k};
|
||||
|
||||
if ($gr->has_edge($pf1, $pf2)) {
|
||||
my $xkey = $gr->get_edge_attribute($pf1, $pf2, "key");
|
||||
my ($xp1, $xp2) = prehash($xkey, $n, $sv);
|
||||
print STDERR "Collision: $pf1=$pf2 $k ($p1,$p2) with ";
|
||||
print STDERR "$xkey ($xp1,$xp2)\n";
|
||||
return;
|
||||
}
|
||||
|
||||
# print STDERR "Edge $pf1=$pf2 value $e from $k ($p1,$p2)\n";
|
||||
|
||||
$gr->add_edge($pf1, $pf2);
|
||||
$gr->set_edge_attribute($pf1, $pf2, "hash", $e);
|
||||
$gr->set_edge_attribute($pf1, $pf2, "key", $k);
|
||||
}
|
||||
|
||||
# At this point, we're good if the graph is acyclic.
|
||||
if ($gr->is_cyclic) {
|
||||
print STDERR "Graph is cyclic\n";
|
||||
return;
|
||||
}
|
||||
|
||||
# print STDERR "Graph:\n$gr\n";
|
||||
|
||||
# Now we need to assign values to each vertex, so that for each
|
||||
# edge, the sum of the values for the two vertices give the value
|
||||
# for the edge (which is our hash index.) Since the graph is
|
||||
# acyclic, this is always doable.
|
||||
for ($i = 0; $i < $n; $i++) {
|
||||
if (!$gr->has_vertex_attribute($i, "val")) {
|
||||
walk_graph($gr,$i,0); # First vertex in a cluster
|
||||
}
|
||||
push(@g, $gr->get_vertex_attribute($i, "val"));
|
||||
}
|
||||
|
||||
# for ($i = 0; $i < $n; $i++) {
|
||||
# print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
|
||||
# }
|
||||
|
||||
print STDERR "Done: n = $n, sv = $sv\n";
|
||||
|
||||
return ($n, $sv, \@f1, \@f2, \@g);
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# Driver for generating the function
|
||||
#
|
||||
# gen_perfect_hash(\%data)
|
||||
#
|
||||
sub gen_perfect_hash($) {
|
||||
my($href) = @_;
|
||||
my @keys = keys(%{$href});
|
||||
my @hashinfo;
|
||||
my $n, $i, $j, $sv, $maxj;
|
||||
|
||||
# Minimal power of 2 value for N
|
||||
$n = 1;
|
||||
while ($n < scalar(@keys)) {
|
||||
$n <<= 1;
|
||||
}
|
||||
|
||||
$maxj = 256; # Number of times to try
|
||||
|
||||
for ($i = 0; $i < 4; $i++) {
|
||||
print STDERR "Trying n = $n...\n";
|
||||
for ($j = 0; $j < $maxj; $j++) {
|
||||
for ($sv = 0; $sv < 32; $sv++) {
|
||||
@hashinfo = gen_hash_n($n, $sv, $href);
|
||||
return @hashinfo if (defined(@hashinfo));
|
||||
}
|
||||
}
|
||||
$n <<= 1;
|
||||
$maxj >>= 1;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#
|
||||
# Read input file
|
||||
#
|
||||
sub read_input() {
|
||||
my $key,$val;
|
||||
my %out;
|
||||
my $x = 0;
|
||||
|
||||
while (defined($l = <STDIN>)) {
|
||||
chomp $l;
|
||||
$l =~ s/\s*(\#.*|)$//;
|
||||
|
||||
next if ($l eq '');
|
||||
|
||||
if ($l =~ /^([^=]+)\=([^=]+)$/) {
|
||||
$out{$1} = $2;
|
||||
$x = $2;
|
||||
} else {
|
||||
$out{$l} = $x;
|
||||
}
|
||||
$x++;
|
||||
}
|
||||
|
||||
return %out;
|
||||
}
|
||||
|
||||
#
|
||||
# Verify that the hash table is actually correct...
|
||||
#
|
||||
sub verify_hash_table($$)
|
||||
{
|
||||
my ($href, $hashinfo) = @_;
|
||||
my ($n, $sv, $f1, $f2, $g) = @{$hashinfo};
|
||||
my $k;
|
||||
my $err = 0;
|
||||
|
||||
print STDERR "Verify: n = $n, sv = $sv\n";
|
||||
|
||||
foreach $k (keys(%$href)) {
|
||||
my ($p1, $p2) = prehash($k, $n, $sv);
|
||||
my $pf1 = ${$f1}[$p1];
|
||||
my $pf2 = ${$f2}[$p2];
|
||||
my $g1 = ${$g}[$pf1];
|
||||
my $g2 = ${$g}[$pf2];
|
||||
|
||||
if ($g1+$g2 != ${$href}{$k}) {
|
||||
printf STDERR "%s(%d,%d): %d=%d, %d+%d = %d != %d\n",
|
||||
$k, $p1, $p2, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
|
||||
$err = 1;
|
||||
} else {
|
||||
printf STDERR "%s: %d+%d = %d ok\n",
|
||||
$k, $g1, $g2, $g1+$g2;
|
||||
}
|
||||
}
|
||||
|
||||
die "$0: hash validation error\n" if ($err);
|
||||
}
|
||||
|
||||
#
|
||||
# Main program
|
||||
#
|
||||
sub main() {
|
||||
my $n;
|
||||
my %data;
|
||||
my @hashinfo;
|
||||
my $x, $i;
|
||||
|
||||
%data = read_input();
|
||||
@hashinfo = gen_perfect_hash(\%data);
|
||||
|
||||
if (!defined(@hashinfo)) {
|
||||
die "$0: no hash found\n";
|
||||
}
|
||||
|
||||
verify_hash_table(\%data, \@hashinfo);
|
||||
|
||||
($n, $sv, $f1, $f2, $g) = @hashinfo;
|
||||
|
||||
print "static int HASHNAME_fg1[$n] =\n";
|
||||
print "{\n";
|
||||
for ($i = 0; $i < $n; $i++) {
|
||||
print "\t", ${$g}[${$f1}[$i]], "\n";
|
||||
}
|
||||
print "};\n\n";
|
||||
|
||||
print "static int HASHNAME_fg2[$n] =\n";
|
||||
print "{\n";
|
||||
for ($i = 0; $i < $n; $i++) {
|
||||
print "\t", ${$g}[${$f2}[$i]], "\n";
|
||||
}
|
||||
print "};\n\n";
|
||||
|
||||
print "struct p_hash HASHNAME =\n";
|
||||
print "{\n";
|
||||
print "\t$n\n";
|
||||
print "\t$sv\n";
|
||||
print "\tHASHNAME_fg1,\n";
|
||||
print "\tHASHNAME_fg2,\n";
|
||||
print "};\n";
|
||||
}
|
||||
|
||||
main();
|
Loading…
x
Reference in New Issue
Block a user