From cda816306d437d4b7c170afcd983263b5fb83c17 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sat, 21 Jun 2008 15:15:40 -0700
Subject: [PATCH] Drop the index tables from the canned macros

Instead of an array of strings, just have a character array; that
reduces the size of canned macros by up to 30%, and we only did
sequential access anyway.
---
 macros.pl | 55 +++++++++++++++++++++++++++++++++++++------------------
 preproc.c | 16 ++++++++--------
 preproc.h |  2 +-
 tables.h  |  6 +++---
 4 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/macros.pl b/macros.pl
index 18c77596..4fe775ea 100755
--- a/macros.pl
+++ b/macros.pl
@@ -10,11 +10,30 @@
 require 'phash.ph';
 require 'pptok.ph';
 
+use bytes;
+
 my $fname;
 my $line = 0;
 my $index      = 0;
 my $tasm_count = 0;
 
+#
+# Print out a string as a character array
+#
+sub charcify(@) {
+    my $l = '';
+    my $c, $o;
+    foreach $o (unpack("C*", join('',@_))) {
+	$c = pack("C", $o);
+	if ($o < 32 || $o > 126 || $c eq '"' || $c eq "\\") {
+	    $l .= sprintf("(char)%3d,", $o);
+	} else {
+	    $l .= "\'".$c."\',";
+	}
+    }
+    return $l;
+}
+
 #
 # Generate macros.c
 #
@@ -22,14 +41,14 @@ open(OUT,">macros.c") or die "unable to open macros.c\n";
 
 print OUT "/*\n";
 print OUT " * Do not edit - this file auto-generated by macros.pl from:\n";
-print OUT " * ", join(' ', @ARGV), "\n";
+print OUT " *   ", join("\n *   ", @ARGV), "\n";
 print OUT " */\n";
 print OUT "\n";
 print OUT "#include \"tables.h\"\n";
 print OUT "#include \"nasmlib.h\"\n";
 print OUT "#include \"hashtbl.h\"\n";
 print OUT "\n";
-print OUT "const char * const nasm_stdmac[] = {";
+print OUT "const char nasm_stdmac[] = {";
 
 my $npkg = 0;
 my @pkg_list   = ();
@@ -38,6 +57,7 @@ my $pkg;
 my @out_list   = ();
 my $outfmt;
 my $lastname;
+my $z;
 
 foreach $fname ( @ARGV ) {
     open(INPUT,$fname) or die "unable to open $fname\n";
@@ -50,10 +70,10 @@ foreach $fname ( @ARGV ) {
 	} elsif (m/^OUT:\s*(.*\S)\s*$/) {
 	    undef $pkg;
 	    my @out_alias = split(/\s+/, $1);
-	    printf OUT "        /* %4d */ NULL\n", $index++;
+	    printf OUT "        /* %4d */ 0\n", $index++;
 	    print OUT "};\n";
 	    $index = 0;
-	    printf OUT "const char * const %s_stdmac[] = {\n", $out_alias[0];
+	    printf OUT "const char %s_stdmac[] = {\n", $out_alias[0];
 	    print  OUT "    /* From $fname */\n";
 	    $lastname = $fname;
 	    push(@out_list, $out_alias[0]);
@@ -63,23 +83,20 @@ foreach $fname ( @ARGV ) {
 	    if (defined($pkg_number{$pkg})) {
 		die "$0: $fname: duplicate package: $pkg\n";
 	    }
-	    printf OUT "        /* %4d */ NULL,\n", $index++;
+	    printf OUT "        /* %4d */ 0\n", $index++;
 	    print OUT "};\n";
 	    $index = 0;
-	    printf OUT "static const char * const nasm_stdmac_%s[] = {\n", $pkg;
+	    printf OUT "static const char nasm_stdmac_%s[] = {\n", $pkg;
 	    print  OUT "    /* From $fname */\n";
 	    $lastname = $fname;
 	    push(@pkg_list, $pkg);
 	    $pkg_number{$pkg} = $npkg++;
-	    $pkg_index{$pkg}  = $index;
-	    printf OUT "        /* %4d */ \"\\x%02x\"\"%s\",\n",
-	    	$index++, $pptok_hash{'%define'}+128, "__USE_\U$pkg\E__";
+	    $z = pack("C", $pptok_hash{'%define'}+128)."__USE_\U$pkg\E__";
+	    printf OUT "        /* %4d */ %s0,\n", $index, charcify($z);
+	    $index += length($z)+1;
 	} elsif (m/^\s*((\s*([^\"\';\s]+|\"[^\"]*\"|\'[^\']*\'))*)\s*(;.*)?$/) {
 	    my $s1, $s2, $pd, $ws;
 	    $s1 = $1;
-	    $s1 =~ s/(\s)\s+/$1/g;
-	    $s1 =~ s/\\/\\\\/g;
-	    $s1 =~ s/"/\\"/g;
 	    $s2 = '';
 	    while ($s1 =~ /(\%[a-zA-Z_][a-zA-Z0-9_]*)((\s+)(.*)|)$/) {
 		$s2 .= "$'";
@@ -88,7 +105,7 @@ foreach $fname ( @ARGV ) {
 		$s1 = $4;
 		if (defined($pptok_hash{$pd}) &&
 		    $pptok_hash{$pd} <= 127) {
-		    $s2 .= sprintf("\\x%02x\"\"", $pptok_hash{$pd}+128);
+		    $s2 .= pack("C", $pptok_hash{$pd}+128);
 		} else {
 		    $s2 .= $pd.$ws;
 		}
@@ -99,7 +116,9 @@ foreach $fname ( @ARGV ) {
 		    print OUT "\n    /* From $fname */\n";
 		    $lastname = $fname;
 		}	
-		printf OUT "        /* %4d */ \"%s\",\n", $index++, $s2;
+		printf OUT "        /* %4d */ %s0,\n",
+		    $index, charcify($s2);
+		$index += length($s2)+1;
 	    }
 	} else {
 	    die "$fname:$line:  error unterminated quote";
@@ -107,8 +126,8 @@ foreach $fname ( @ARGV ) {
     }
     close(INPUT);
 }
-printf OUT "        /* %4d */ NULL\n};\n\n", $index++;
-print OUT "const char * const * const nasm_stdmac_after_tasm = ",
+printf OUT "        /* %4d */ 0\n};\n\n", $index++;
+print OUT "const char * const nasm_stdmac_after_tasm = ",
     "&nasm_stdmac[$tasm_count];\n\n";
 
 my @hashinfo = gen_perfect_hash(\%pkg_number);
@@ -120,11 +139,11 @@ verify_hash_table(\%pkg_number, \@hashinfo);
 my ($n, $sv, $g) = @hashinfo;
 die if ($n & ($n-1));
 
-print OUT "const char * const *nasm_stdmac_find_package(const char *package)\n";
+print OUT "const char *nasm_stdmac_find_package(const char *package)\n";
 print OUT "{\n";
 print OUT "    static const struct {\n";
 print OUT "         const char *package;\n";
-print OUT "         const char * const *macros;\n";
+print OUT "         const char *macros;\n";
 print OUT "    } packages[$npkg] = {\n";
 foreach $pkg (@pkg_list) {
     printf OUT "        { \"%s\", nasm_stdmac_%s },\n",
diff --git a/preproc.c b/preproc.c
index 4fa0667c..b4eb54c6 100644
--- a/preproc.c
+++ b/preproc.c
@@ -362,13 +362,13 @@ static MMacro *defining;
  * The standard macro set: defined in macros.c in the array nasm_stdmac.
  * This gives our position in the macro set, when we're processing it.
  */
-static const char * const *stdmacpos;
+static const macros_t *stdmacpos;
 
 /*
  * The extra standard macros that come from the object format, if
  * any.
  */
-static const char * const *extrastdmac = NULL;
+static const macros_t *extrastdmac = NULL;
 static bool any_extrastdmac;
 
 /*
@@ -642,10 +642,9 @@ static char *read_line(void)
 
     if (stdmacpos) {
 	unsigned char c;
+	const char *p = stdmacpos;
 	char *ret, *q;
-	const char *smac = *stdmacpos++, *p;
 	size_t len = 0;
-	p = smac;
 	while ((c = *p++)) {
 	    if (c >= 0x80)
 		len += pp_directives_len[c-0x80]+1;
@@ -653,8 +652,8 @@ static char *read_line(void)
 		len++;
 	}
 	ret = nasm_malloc(len+1);
-	p = smac; q = ret;
-	while ((c = *p++)) {
+	q = ret;
+	while ((c = *stdmacpos++)) {
 	    if (c >= 0x80) {
 		memcpy(q, pp_directives[c-0x80], pp_directives_len[c-0x80]);
 		q += pp_directives_len[c-0x80];
@@ -663,6 +662,7 @@ static char *read_line(void)
 		*q++ = c;
 	    }
 	}
+	stdmacpos = p;
 	*q = '\0';
 
 	if (!*stdmacpos) {
@@ -2153,7 +2153,7 @@ static int do_directive(Token * tline)
 
     case PP_USE:
     {
-	static const char * const *use_pkg;
+	static const char *use_pkg;
 	const char *pkg_macro;
 
 	t = tline->next = expand_smacro(tline->next);
@@ -2175,7 +2175,7 @@ static int do_directive(Token * tline)
 	if (!use_pkg)
 	    error(ERR_NONFATAL, "unknown `%%use' package: %s", t->text);
 	/* The first string will be <%define>__USE_*__ */
-	pkg_macro = *use_pkg + 1;
+	pkg_macro = use_pkg + 1;
 	if (!smacro_defined(NULL, pkg_macro, 0, NULL, true)) {
 	    /* Not already included, go ahead and include it */
 	    stdmacpos = use_pkg;
diff --git a/preproc.h b/preproc.h
index a79ae277..1c526829 100644
--- a/preproc.h
+++ b/preproc.h
@@ -15,7 +15,7 @@ extern const char * const pp_directives[];
 extern const int pp_directives_len[];
 
 /* Pointer to a macro chain */
-typedef const char * const macros_t;
+typedef const char macros_t;
 
 enum preproc_token pp_token_hash(const char *token);
 void pp_include_path(char *);
diff --git a/tables.h b/tables.h
index c9a4c720..f0044a50 100644
--- a/tables.h
+++ b/tables.h
@@ -14,9 +14,9 @@
 /* --- From standard.mac via macros.pl: --- */
 
 /* macros.c */
-extern const char * const nasm_stdmac[];
-extern const char * const * const nasm_stdmac_after_tasm;
-const char * const *nasm_stdmac_find_package(const char *);
+extern const char nasm_stdmac[];
+extern const char * const nasm_stdmac_after_tasm;
+const char *nasm_stdmac_find_package(const char *);
 
 /* --- From insns.dat via insns.pl: --- */