mirror of
https://github.com/netwide-assembler/nasm.git
synced 2024-11-21 03:14:19 +08:00
5255fd1f36
Combining arithmetric (add) and bitwise (xor) mixing seems to give better result than either. With the new prehash function, we find a valid hash much quicker.
244 lines
5.0 KiB
Perl
244 lines
5.0 KiB
Perl
# -*- perl -*-
|
|
#
|
|
# Perfect Minimal Hash Generator written in Perl, which produces
|
|
# C output.
|
|
#
|
|
# Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
|
|
#
|
|
|
|
use Graph::Undirected;
|
|
require 'random_sv_vectors.ph';
|
|
|
|
#
|
|
# Truncate to 32-bit integer
|
|
#
|
|
sub int32($) {
|
|
my($x) = @_;
|
|
|
|
return int($x) % 4294967296;
|
|
}
|
|
|
|
#
|
|
# 32-bit rotate
|
|
#
|
|
sub rot($$) {
|
|
my($v,$s) = @_;
|
|
|
|
$v = int32($v);
|
|
return int32(($v << $s)|($v >> (32-$s)));
|
|
}
|
|
|
|
#
|
|
# Compute the prehash for a key
|
|
#
|
|
# prehash(key, sv, N)
|
|
#
|
|
sub prehash($$$) {
|
|
my($key, $n, $sv) = @_;
|
|
my $c;
|
|
my $k1 = 0, $k2 = 0;
|
|
my $ko1, $ko2;
|
|
my($s0, $s1, $s2, $s3) = @{$sv};
|
|
|
|
foreach $c (unpack("C*", $key)) {
|
|
$ko1 = $k1; $ko2 = $k2;
|
|
$k1 = int32(rot($ko1,$s0)^int32(rot($ko2, $s1)+$c));
|
|
$k2 = int32(rot($ko2,$s2)^int32(rot($ko1, $s3)+$c));
|
|
}
|
|
|
|
# Create a bipartite graph...
|
|
$k1 = (($k1 & ($n-1)) << 1) + 0;
|
|
$k2 = (($k2 & ($n-1)) << 1) + 1;
|
|
|
|
return ($k1, $k2);
|
|
}
|
|
|
|
#
|
|
# Walk the assignment graph
|
|
#
|
|
sub walk_graph($$$) {
|
|
my($gr,$n,$v) = @_;
|
|
my $nx;
|
|
|
|
# print STDERR "Vertex $n value $v\n";
|
|
$gr->set_vertex_attribute($n,"val",$v);
|
|
|
|
foreach $nx ($gr->neighbors($n)) {
|
|
die unless ($gr->has_edge_attribute($n, $nx, "hash"));
|
|
my $e = $gr->get_edge_attribute($n, $nx, "hash");
|
|
|
|
# print STDERR "Edge $n=$nx value $e: ";
|
|
|
|
if ($gr->has_vertex_attribute($nx, "val")) {
|
|
die if ($v+$gr->get_vertex_attribute($nx, "val") != $e);
|
|
# print STDERR "ok\n";
|
|
} else {
|
|
walk_graph($gr, $nx, $e-$v);
|
|
}
|
|
}
|
|
}
|
|
|
|
#
|
|
# Generate the function assuming a given N.
|
|
#
|
|
# gen_hash_n(N, sv, \%data)
|
|
#
|
|
sub gen_hash_n($$$) {
|
|
my($n, $sv, $href) = @_;
|
|
my @keys = keys(%{$href});
|
|
my $i, $sv, @g;
|
|
my $gr;
|
|
my $k, $v;
|
|
my $gsize = 2*$n;
|
|
|
|
$gr = Graph::Undirected->new;
|
|
for ($i = 0; $i < $gsize; $i++) {
|
|
$gr->add_vertex($i);
|
|
}
|
|
|
|
foreach $k (@keys) {
|
|
my ($pf1, $pf2) = prehash($k, $n, $sv);
|
|
my $e = ${$href}{$k};
|
|
|
|
if ($gr->has_edge($pf1, $pf2)) {
|
|
my $xkey = $gr->get_edge_attribute($pf1, $pf2, "key");
|
|
my ($xp1, $xp2) = prehash($xkey, $n, $sv);
|
|
print STDERR "Collision: $pf1=$pf2 $k with ";
|
|
print STDERR "$xkey ($xp1,$xp2)\n";
|
|
return;
|
|
}
|
|
|
|
# print STDERR "Edge $pf1=$pf2 value $e from $k\n";
|
|
|
|
$gr->add_edge($pf1, $pf2);
|
|
$gr->set_edge_attribute($pf1, $pf2, "hash", $e);
|
|
$gr->set_edge_attribute($pf1, $pf2, "key", $k);
|
|
}
|
|
|
|
# At this point, we're good if the graph is acyclic.
|
|
if ($gr->is_cyclic) {
|
|
print STDERR "Graph is cyclic\n";
|
|
return;
|
|
}
|
|
|
|
print STDERR "Graph OK, computing vertices...\n";
|
|
|
|
# Now we need to assign values to each vertex, so that for each
|
|
# edge, the sum of the values for the two vertices give the value
|
|
# for the edge (which is our hash index.) Since the graph is
|
|
# acyclic, this is always doable.
|
|
for ($i = 0; $i < $gsize; $i++) {
|
|
if ($gr->degree($i)) {
|
|
# This vertex has neighbors (is used)
|
|
if (!$gr->has_vertex_attribute($i, "val")) {
|
|
walk_graph($gr,$i,0); # First vertex in a cluster
|
|
}
|
|
push(@g, $gr->get_vertex_attribute($i, "val"));
|
|
} else {
|
|
# Unused vertex
|
|
push(@g, undef);
|
|
}
|
|
}
|
|
|
|
# for ($i = 0; $i < $n; $i++) {
|
|
# print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
|
|
# }
|
|
|
|
print STDERR "Done: n = $n, sv = [", join(',', @$sv), "]\n";
|
|
|
|
return ($n, $sv, \@g);
|
|
}
|
|
|
|
#
|
|
# Driver for generating the function
|
|
#
|
|
# gen_perfect_hash(\%data)
|
|
#
|
|
sub gen_perfect_hash($) {
|
|
my($href) = @_;
|
|
my @keys = keys(%{$href});
|
|
my @hashinfo;
|
|
my $n, $i, $j, $sv, $maxj;
|
|
|
|
# Minimal power of 2 value for N with enough wiggle room.
|
|
# The scaling constant must be larger than 0.5 in order for the
|
|
# algorithm to ever terminate.
|
|
my $room = scalar(@keys)*0.7;
|
|
$n = 1;
|
|
while ($n < $room) {
|
|
$n <<= 1;
|
|
}
|
|
|
|
# Number of times to try...
|
|
$maxj = scalar @random_sv_vectors;
|
|
|
|
for ($i = 0; $i < 4; $i++) {
|
|
print STDERR "Trying n = $n...\n";
|
|
for ($j = 0; $j < $maxj; $j++) {
|
|
$sv = $random_sv_vectors[$j];
|
|
@hashinfo = gen_hash_n($n, $sv, $href);
|
|
return @hashinfo if (defined(@hashinfo));
|
|
}
|
|
$n <<= 1;
|
|
$maxj >>= 1;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
#
|
|
# Read input file
|
|
#
|
|
sub read_input() {
|
|
my $key,$val;
|
|
my %out;
|
|
my $x = 0;
|
|
|
|
while (defined($l = <STDIN>)) {
|
|
chomp $l;
|
|
$l =~ s/\s*(\#.*|)$//;
|
|
|
|
next if ($l eq '');
|
|
|
|
if ($l =~ /^([^=]+)\=([^=]+)$/) {
|
|
$out{$1} = $2;
|
|
$x = $2;
|
|
} else {
|
|
$out{$l} = $x;
|
|
}
|
|
$x++;
|
|
}
|
|
|
|
return %out;
|
|
}
|
|
|
|
#
|
|
# Verify that the hash table is actually correct...
|
|
#
|
|
sub verify_hash_table($$)
|
|
{
|
|
my ($href, $hashinfo) = @_;
|
|
my ($n, $sv, $g) = @{$hashinfo};
|
|
my $k;
|
|
my $err = 0;
|
|
|
|
foreach $k (keys(%$href)) {
|
|
my ($pf1, $pf2) = prehash($k, $n, $sv);
|
|
my $g1 = ${$g}[$pf1];
|
|
my $g2 = ${$g}[$pf2];
|
|
|
|
if ($g1+$g2 != ${$href}{$k}) {
|
|
printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
|
|
$k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
|
|
$err = 1;
|
|
} else {
|
|
# printf STDERR "%s: %d+%d = %d ok\n",
|
|
# $k, $g1, $g2, $g1+$g2;
|
|
}
|
|
}
|
|
|
|
die "$0: hash validation error\n" if ($err);
|
|
}
|
|
|
|
1;
|