nasm/directives.pl

#!/usr/bin/perl
## --------------------------------------------------------------------------
##
##   Copyright 1996-2009 The NASM Authors - All Rights Reserved
##   See the file AUTHORS included with the NASM distribution for
##   the specific copyright holders.
##
##   Redistribution and use in source and binary forms, with or without
##   modification, are permitted provided that the following
##   conditions are met:
##
##   * Redistributions of source code must retain the above copyright
##     notice, this list of conditions and the following disclaimer.
##   * Redistributions in binary form must reproduce the above
##     copyright notice, this list of conditions and the following
##     disclaimer in the documentation and/or other materials provided
##     with the distribution.
##
##     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
##     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
##     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
##     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
##     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
##     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
##     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
##     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
##     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
##     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
##     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
##     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
##     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
##
## --------------------------------------------------------------------------

#
# Generate a perfect hash for directive parsing
#
# Usage: directives.pl directives.dat directives.c directives.h
#

require 'phash.ph';

my($output, $directives_dat, $outfile) = @ARGV;

@directives = ();

open(DD, "< ${directives_dat}\0")
    or die "$0: cannot open: ${directives_dat}: $!\n";
while (defined($line = <DD>)) {
    chomp $line;
    if ($line =~ /^\s*([[:alnum:]]+)\s*(|[\;\#].*)$/) {
	push(@directives, $1);
    }
}
close(DD);

if ($output eq 'h') {
    open(H, "> ${outfile}\0")
	or die "$0: cannot create: ${outfile}: $!\n";

    print H "/*\n";
    print H " * This	 file is generated from directives.dat\n";
    print H " * by directives.pl; do not edit.\n";
    print H " */\n";
    print H "\n";

    print H "#ifndef NASM_DIRECTIVES_H\n";
    print H "#define NASM_DIRECTIVES_H\n";
    print H "\n";

    print H "enum directives {\n";
    print H "    D_NONE";
    foreach $d (@directives) {
	print H ",\n    D_\U$d";
    }
    print H "\n};\n\n";
    printf H "extern const char * const directives[%d];\n",
        scalar(@directives)+1;
    print H "enum directives find_directive(const char *token);\n\n";
    print H "#endif /* NASM_DIRECTIVES_H */\n";
} elsif ($output eq 'c') {
    %directive = ();
    $n = 0;
    foreach $d (@directives) {
	if (exists($directive{$d})) {
	    die "$0: $directives_dat: duplicate directive: $d\n";
	}
	$directive{$d} = $n++;	# This is zero-based, unlike the enum!
    }

    @hashinfo = gen_perfect_hash(\%directive);
    if (!defined(@hashinfo)) {
	die "$0: no hash found\n";
    }

    # Paranoia...
    verify_hash_table(\%directive, \@hashinfo);

    ($n, $sv, $g) = @hashinfo;
    $sv2 = $sv+2;

    die if ($n & ($n-1));

    open(C, "> ${outfile}\0")
	or die "$0: cannot create: ${directives_c}: $!\n";

    print C "/*\n";
    print C " * This file is generated from directives.dat\n";
    print C " * by directives.pl; do not edit.\n";
    print C " */\n";
    print C "\n";

    print C "#include \"compiler.h\"\n";
    print C "#include <string.h>\n";
    print C "#include \"nasm.h\"\n";
    print C "#include \"hashtbl.h\"\n";
    print C "#include \"directives.h\"\n";
    print C "\n";

    printf C "const char * const directives[%d] = {\n",
        scalar(@directives)+1;
    print C "    NULL";
    foreach $d (@directives) {
	print C ",\n    \"$d\"";
    }
    print C "\n};\n\n";

    print C "enum directives find_directive(const char *token)\n";
    print C "{\n";

    # Put a large value in unused slots.  This makes it extremely unlikely
    # that any combination that involves unused slot will pass the range test.
    # This speeds up rejection of unrecognized tokens, i.e. identifiers.
    print C "#define UNUSED 16383\n";

    print C "    static const int16_t hash1[$n] = {\n";
    for ($i = 0; $i < $n; $i++) {
	my $h = ${$g}[$i*2+0];
	print C "        ", defined($h) ? $h : 'UNUSED', ",\n";
    }
    print C "    };\n";

    print C "    static const int16_t hash2[$n] = {\n";
    for ($i = 0; $i < $n; $i++) {
	my $h = ${$g}[$i*2+1];
	print C "        ", defined($h) ? $h : 'UNUSED', ",\n";
    }
    print C "    };\n";

    print C  "    uint32_t k1, k2;\n";
    print C  "    uint64_t crc;\n";
    # For correct overflow behavior, "ix" should be unsigned of the same
    # width as the hash arrays.
    print C  "    uint16_t ix;\n";
    print C  "\n";
    printf C "    crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
	$$sv[0], $$sv[1];
    print C  "    k1 = (uint32_t)crc;\n";
    print C  "    k2 = (uint32_t)(crc >> 32);\n";
    print C  "\n";
    printf C "    ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
    printf C "    if (ix >= %d)\n", scalar(@directives);
    print C  "        return D_NONE;\n";
    print C  "\n";
    print C  "    ix++;\n";	# Account for D_NONE
    print C  "    if (nasm_stricmp(token, directives[ix]))\n";
    print C  "        return D_NONE;\n";
    print C  "\n";
    print C  "    return ix;\n";
    print C  "}\n";
}
directives: split global directives from backend-specific ones Split out the global directives into a separate section, that allows the switch() in the main code to be slightly faster. Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2009-07-14 02:54:31 +08:00			`#!/usr/bin/perl`
			`## --------------------------------------------------------------------------`
			`##`
			`## Copyright 1996-2009 The NASM Authors - All Rights Reserved`
			`## See the file AUTHORS included with the NASM distribution for`
			`## the specific copyright holders.`
			`##`
			`## Redistribution and use in source and binary forms, with or without`
			`## modification, are permitted provided that the following`
			`## conditions are met:`
			`##`
			`## * Redistributions of source code must retain the above copyright`
			`## notice, this list of conditions and the following disclaimer.`
			`## * Redistributions in binary form must reproduce the above`
			`## copyright notice, this list of conditions and the following`
			`## disclaimer in the documentation and/or other materials provided`
			`## with the distribution.`
			`##`
			`## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND`
			`## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,`
			`## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF`
			`## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE`
			`## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR`
			`## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT`
			`## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;`
			`## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)`
			`## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR`
			`## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,`
			`## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`##`
			`## --------------------------------------------------------------------------`

			`#`
			`# Generate a perfect hash for directive parsing`
			`#`
			`# Usage: directives.pl directives.dat directives.c directives.h`
			`#`

			`require 'phash.ph';`

			`my($output, $directives_dat, $outfile) = @ARGV;`

			`@directives = ();`

			`open(DD, "< ${directives_dat}\0")`
			`or die "$0: cannot open: ${directives_dat}: $!\n";`
			`while (defined($line = <DD>)) {`
			`chomp $line;`
			`if ($line =~ /^\s([[:alnum:]]+)\s(\|[\;\#].*)$/) {`
			`push(@directives, $1);`
			`}`
			`}`
			`close(DD);`

			`if ($output eq 'h') {`
			`open(H, "> ${outfile}\0")`
			`or die "$0: cannot create: ${outfile}: $!\n";`

			`print H "/*\n";`
			`print H " * This file is generated from directives.dat\n";`
			`print H " * by directives.pl; do not edit.\n";`
			`print H " */\n";`
			`print H "\n";`

			`print H "#ifndef NASM_DIRECTIVES_H\n";`
			`print H "#define NASM_DIRECTIVES_H\n";`
			`print H "\n";`

			`print H "enum directives {\n";`
			`print H " D_NONE";`
			`foreach $d (@directives) {`
			`print H ",\n D_\U$d";`
			`}`
			`print H "\n};\n\n";`
			`printf H "extern const char * const directives[%d];\n",`
			`scalar(@directives)+1;`
			`print H "enum directives find_directive(const char *token);\n\n";`
			`print H "#endif /* NASM_DIRECTIVES_H */\n";`
			`} elsif ($output eq 'c') {`
			`%directive = ();`
			`$n = 0;`
			`foreach $d (@directives) {`
			`if (exists($directive{$d})) {`
			`die "$0: $directives_dat: duplicate directive: $d\n";`
			`}`
			`$directive{$d} = $n++; # This is zero-based, unlike the enum!`
			`}`

			`@hashinfo = gen_perfect_hash(\%directive);`
			`if (!defined(@hashinfo)) {`
			`die "$0: no hash found\n";`
			`}`

			`# Paranoia...`
			`verify_hash_table(\%directive, \@hashinfo);`

			`($n, $sv, $g) = @hashinfo;`
			`$sv2 = $sv+2;`

			`die if ($n & ($n-1));`

			`open(C, "> ${outfile}\0")`
			`or die "$0: cannot create: ${directives_c}: $!\n";`

			`print C "/*\n";`
			`print C " * This file is generated from directives.dat\n";`
			`print C " * by directives.pl; do not edit.\n";`
			`print C " */\n";`
			`print C "\n";`

			`print C "#include \"compiler.h\"\n";`
			`print C "#include <string.h>\n";`
			`print C "#include \"nasm.h\"\n";`
			`print C "#include \"hashtbl.h\"\n";`
			`print C "#include \"directives.h\"\n";`
			`print C "\n";`

			`printf C "const char * const directives[%d] = {\n",`
			`scalar(@directives)+1;`
			`print C " NULL";`
			`foreach $d (@directives) {`
			`print C ",\n \"$d\"";`
			`}`
			`print C "\n};\n\n";`

			`print C "enum directives find_directive(const char *token)\n";`
			`print C "{\n";`

			`# Put a large value in unused slots. This makes it extremely unlikely`
			`# that any combination that involves unused slot will pass the range test.`
			`# This speeds up rejection of unrecognized tokens, i.e. identifiers.`
			`print C "#define UNUSED 16383\n";`

			`print C " static const int16_t hash1[$n] = {\n";`
			`for ($i = 0; $i < $n; $i++) {`
			`my $h = ${$g}[$i*2+0];`
			`print C " ", defined($h) ? $h : 'UNUSED', ",\n";`
			`}`
			`print C " };\n";`

			`print C " static const int16_t hash2[$n] = {\n";`
			`for ($i = 0; $i < $n; $i++) {`
			`my $h = ${$g}[$i*2+1];`
			`print C " ", defined($h) ? $h : 'UNUSED', ",\n";`
			`}`
			`print C " };\n";`

			`print C " uint32_t k1, k2;\n";`
			`print C " uint64_t crc;\n";`
			`# For correct overflow behavior, "ix" should be unsigned of the same`
			`# width as the hash arrays.`
			`print C " uint16_t ix;\n";`
			`print C "\n";`
			`printf C " crc = crc64i(UINT64_C(0x%08x%08x), token);\n",`
			`$$sv[0], $$sv[1];`
			`print C " k1 = (uint32_t)crc;\n";`
			`print C " k2 = (uint32_t)(crc >> 32);\n";`
			`print C "\n";`
			`printf C " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;`
			`printf C " if (ix >= %d)\n", scalar(@directives);`
			`print C " return D_NONE;\n";`
			`print C "\n";`
			`print C " ix++;\n"; # Account for D_NONE`
			`print C " if (nasm_stricmp(token, directives[ix]))\n";`
			`print C " return D_NONE;\n";`
			`print C "\n";`
			`print C " return ix;\n";`
			`print C "}\n";`
			`}`