diff --git a/asm/pptok.pl b/asm/pptok.pl index 2e763b31..47924897 100755 --- a/asm/pptok.pl +++ b/asm/pptok.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl ## -------------------------------------------------------------------------- ## -## Copyright 1996-2019 The NASM Authors - All Rights Reserved +## Copyright 1996-2020 The NASM Authors - All Rights Reserved ## See the file AUTHORS included with the NASM distribution for ## the specific copyright holders. ## @@ -168,6 +168,36 @@ if ($what eq 'c') { print OUT "/* Do not edit */\n"; print OUT "\n"; + print OUT "#include \"compiler.h\"\n"; + print OUT "#include \"nctype.h\"\n"; + print OUT "#include \"nasmlib.h\"\n"; + print OUT "#include \"hashtbl.h\"\n"; + print OUT "#include \"preproc.h\"\n"; + print OUT "\n"; + + # Note that this is global. + printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok); + foreach $d (@pptok) { + if (defined($d) && $d !~ /[A-Z]/) { + print OUT " \"%$d\",\n"; + } else { + print OUT " NULL,\n"; + } + } + print OUT "};\n"; + + printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok); + foreach $d (@pptok) { + printf OUT " %d,\n", defined($d) ? length($d)+1 : 0; + } + print OUT "};\n"; + + # Put a large value in unused hash slots. This makes it extremely + # unlikely that any combination that involves unused slot will + # pass the range test. This speeds up rejection of unrecognized + # tokens, i.e. identifiers. + print OUT "\n#define INVALID_HASH_ENTRY (65535/3)\n"; + my %tokens = (); my @tokendata = (); @@ -196,54 +226,19 @@ if ($what eq 'c') { ($n, $sv, $g) = @hashinfo; die if ($n & ($n-1)); + $n <<= 1; - print OUT "#include \"compiler.h\"\n"; - print OUT "#include \"nctype.h\"\n"; - print OUT "#include \"nasmlib.h\"\n"; - print OUT "#include \"hashtbl.h\"\n"; - print OUT "#include \"preproc.h\"\n"; - print OUT "\n"; - - # Note that this is global. - printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok); - foreach $d (@pptok) { - if (defined($d) && $d !~ /[A-Z]/) { - print OUT " \"%$d\",\n"; - } else { - print OUT " NULL,\n"; - } - } - print OUT "};\n"; - - printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok); - foreach $d (@pptok) { - printf OUT " %d,\n", defined($d) ? length($d)+1 : 0; - } - print OUT "};\n"; - - # Put a large value in unused slots. This makes it extremely unlikely - # that any combination that involves unused slot will pass the range test. - # This speeds up rejection of unrecognized tokens, i.e. identifiers. - print OUT "\n#define INVALID_HASH_ENTRY (65535/3)\n"; print OUT "\n\n/* Primary preprocessor token hash */\n\n"; print OUT "enum preproc_token pp_token_hash(const char *token)\n"; print OUT "{\n"; - print OUT " static const int16_t hash1[$n] = {\n"; + print OUT " static const int16_t hashdata[$n] = {\n"; for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+0]; + my $h = ${$g}[$i]; print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; } print OUT " };\n"; - - print OUT " static const int16_t hash2[$n] = {\n"; - for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+1]; - print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; - } - print OUT " };\n"; - print OUT " uint32_t k1, k2;\n"; print OUT " uint64_t crc;\n"; # For correct overflow behavior, "ix" should be unsigned of the same @@ -253,10 +248,10 @@ if ($what eq 'c') { printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), token);\n", $$sv[0], $$sv[1]; - print OUT " k1 = (uint32_t)crc;\n"; - print OUT " k2 = (uint32_t)(crc >> 32);\n"; + printf OUT " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2; + printf OUT " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2; print OUT "\n"; - printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1; + print OUT " ix = hashdata[k1] + hashdata[k2];\n"; printf OUT " if (ix >= %d)\n", scalar(@pptok); print OUT " return PP_INVALID;\n"; print OUT "\n"; @@ -288,25 +283,18 @@ if ($what eq 'c') { ($n, $sv, $g) = @hashinfo; die if ($n & ($n-1)); + $n <<= 1; print OUT "\n\n/* TASM compatibility preprocessor token hash */\n"; print OUT "enum preproc_token pp_tasm_token_hash(const char *token)\n"; print OUT "{\n"; - print OUT " static const int16_t hash1[$n] = {\n"; + print OUT " static const int16_t hashdata[$n] = {\n"; for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+0]; + my $h = ${$g}[$i]; print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; } print OUT " };\n"; - - print OUT " static const int16_t hash2[$n] = {\n"; - for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+1]; - print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; - } - print OUT " };\n"; - print OUT " uint32_t k1, k2;\n"; print OUT " uint64_t crc;\n"; # For correct overflow behavior, "ix" should be unsigned of the same @@ -316,10 +304,12 @@ if ($what eq 'c') { printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), token);\n", $$sv[0], $$sv[1]; - print OUT " k1 = (uint32_t)crc;\n"; - print OUT " k2 = (uint32_t)(crc >> 32);\n"; + printf OUT " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2; + printf OUT " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2; print OUT "\n"; - printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1; + printf OUT " ix = hashdata[k1] + hashdata[k2];\n", $n-1, $n-1; + # Comparing to pptok here is correct, because this hash produces + # an enum preproc_token value directly. printf OUT " if (ix >= %d)\n", scalar(@pptok); print OUT " return PP_INVALID;\n"; print OUT "\n"; @@ -337,9 +327,8 @@ if ($what eq 'c') { if ($what eq 'ph') { print OUT "# Automatically generated from $in by $0\n"; print OUT "# Do not edit\n"; - print OUT "\n"; - print OUT "%pptok_hash = (\n"; + print OUT "\n\%pptok_hash = (\n"; $n = 0; foreach $tok (@pptok) { if (defined($tok)) { @@ -348,5 +337,12 @@ if ($what eq 'ph') { $n++; } print OUT ");\n"; - print OUT "1;\n"; + + print OUT "\n\@pptok_list = (\n"; + foreach $tok (@pptok) { + print OUT " ", (defined($tok) ? "'\%$tok'" : 'undef'), ",\n"; + } + print OUT ");\n"; + + print OUT "\n1;\n"; } diff --git a/asm/tokhash.pl b/asm/tokhash.pl index 2d6538fe..566a3aa3 100755 --- a/asm/tokhash.pl +++ b/asm/tokhash.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl ## -------------------------------------------------------------------------- ## -## Copyright 1996-2018 The NASM Authors - All Rights Reserved +## Copyright 1996-2020 The NASM Authors - All Rights Reserved ## See the file AUTHORS included with the NASM distribution for ## the specific copyright holders. ## @@ -201,6 +201,7 @@ if ($output eq 'h') { ($n, $sv, $g) = @hashinfo; die if ($n & ($n-1)); + $n <<= 1; print "/*\n"; print " * This file is generated from insns.dat, regs.dat and token.dat\n"; @@ -236,16 +237,9 @@ if ($output eq 'h') { # This speeds up rejection of unrecognized tokens, i.e. identifiers. print "#define INVALID_HASH_ENTRY (65535/3)\n"; - print " static const int16_t hash1[$n] = {\n"; + printf " static const int16_t hashdata[%d] = {\n", $n; for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+0]; - print " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; - } - print " };\n"; - - print " static const int16_t hash2[$n] = {\n"; - for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+1]; + my $h = ${$g}[$i]; print " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; } print " };\n"; @@ -275,10 +269,11 @@ if ($output eq 'h') { print " crc = crc64_byte(crc, c);\n"; print " };\n"; print "\n"; - print " k1 = (uint32_t)crc;\n"; - print " k2 = (uint32_t)(crc >> 32);\n"; + printf " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2; + printf " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2; print "\n"; - printf " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1; + printf " ix = hashdata[k1] + hashdata[k2];\n", + $n-2, $n-2; printf " if (ix >= %d)\n", scalar(@tokendata); print " goto notfound;\n"; print "\n"; diff --git a/macros/macros.pl b/macros/macros.pl index 1e9cc110..03a6486c 100755 --- a/macros/macros.pl +++ b/macros/macros.pl @@ -260,6 +260,7 @@ if (!@hashinfo) { verify_hash_table(\%pkg_number, \@hashinfo); my ($n, $sv, $g) = @hashinfo; die if ($n & ($n-1)); +$n <<= 1; printf OUT "const int use_package_count = %d;\n\n", $npkg; @@ -278,16 +279,9 @@ print OUT " };\n"; # This speeds up rejection of unrecognized tokens, i.e. identifiers. print OUT "#define INVALID_HASH_ENTRY (65535/3)\n"; -print OUT " static const int16_t hash1[$n] = {\n"; +print OUT " static const int16_t hashdata[$n] = {\n"; for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+0]; - print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; -} -print OUT " };\n"; - -print OUT " static const int16_t hash2[$n] = {\n"; -for ($i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+1]; + my $h = ${$g}[$i]; print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n"; } print OUT " };\n"; @@ -301,10 +295,10 @@ print OUT "\n"; printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), name);\n", $$sv[0], $$sv[1]; -print OUT " k1 = (uint32_t)crc;\n"; -print OUT " k2 = (uint32_t)(crc >> 32);\n"; +printf OUT " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2; +printf OUT " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2; print OUT "\n"; -printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1; +printf OUT " ix = hashdata[k1] + hashdata[k2];\n"; printf OUT " if (ix >= %d)\n", scalar(@pkg_list); print OUT " return NULL;\n"; print OUT "\n"; diff --git a/nasmlib/perfhash.c b/nasmlib/perfhash.c index 5cd6714e..acd3d289 100644 --- a/nasmlib/perfhash.c +++ b/nasmlib/perfhash.c @@ -42,9 +42,9 @@ int perfhash_find(const struct perfect_hash *hash, const char *str) crc = crc64i(hash->crcinit, str); k1 = (uint32_t)crc & hash->hashmask; - k2 = (uint32_t)(crc >> 32) & hash->hashmask; + k2 = ((uint32_t)(crc >> 32) & hash->hashmask) + 1; - ix = hash->hashvals[k1] + hash->hashvals[k2 + hash->hashmask + 1]; + ix = hash->hashvals[k1] + hash->hashvals[k2]; if (ix >= hash->tbllen || !hash->strings[ix] || diff --git a/nasmlib/perfhash.pl b/nasmlib/perfhash.pl index 036fdf1a..0c885e8f 100755 --- a/nasmlib/perfhash.pl +++ b/nasmlib/perfhash.pl @@ -338,13 +338,8 @@ if ($output eq 'h') { printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2; $c = '{'; - for (my $i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+0]; - print F "$c\n ", defined($h) ? $h : 'INVALID_HASH_ENTRY'; - $c = ','; - } - for (my $i = 0; $i < $n; $i++) { - my $h = ${$g}[$i*2+1]; + for (my $i = 0; $i < $n*2; $i++) { + my $h = ${$g}[$i]; print F "$c\n ", defined($h) ? $h : 'INVALID_HASH_ENTRY'; $c = ','; } @@ -352,7 +347,7 @@ if ($output eq 'h') { print F "const struct perfect_hash ${name}_hash = {\n"; printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit - printf F " UINT32_C(0x%x),\n", $n-1; # hashmask + printf F " UINT32_C(0x%x),\n", ($n-1) << 1; # hashmask printf F " UINT32_C(%u),\n", $tbllen; # tbllen printf F " %d,\n", $tbloffs; # tbloffs printf F " (%s),\n", $errval; # errval diff --git a/perllib/phash.ph b/perllib/phash.ph index 133a28b0..693909b8 100644 --- a/perllib/phash.ph +++ b/perllib/phash.ph @@ -15,10 +15,11 @@ require 'crc64.ph'; sub prehash($$$) { my($key, $n, $sv) = @_; my @c = crc64($sv, $key); + my $nmask = ($n << 1) - 2; # Create a bipartite graph... - $k1 = (($c[1] & ($n-1)) << 1) + 0; # low word - $k2 = (($c[0] & ($n-1)) << 1) + 1; # high word + $k1 = ($c[1] & $nmask) + 0; # low word + $k2 = ($c[0] & $nmask) + 1; # high word return ($k1, $k2); }