From 19499bf99a962552e2e4acc2b69e25e9ba621415 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 29 Apr 2009 05:05:57 +0000
Subject: [PATCH] Add check_keyword.pl script to perform some basic sanity
 checks to the keyword lists in gram.y and kwlist.h. It checks that all lists
 are in alphabetical order, and that all keywords present in gram.y are listed
 in kwlist.h in the right category, and that all keywords in kwlist.h are also
 in gram.y. What's still missing is to check that all keywords defined  with
 "%token <keyword>" in gram.y are present in one of the keyword lists in
 gram.y.

---
 src/tools/check_keywords.pl | 178 ++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)
 create mode 100755 src/tools/check_keywords.pl
diff --git a/src/tools/check_keywords.pl b/src/tools/check_keywords.pl
new file mode 100755
index 0000000000..378aabef99
--- /dev/null
+++ b/src/tools/check_keywords.pl
@@ -0,0 +1,178 @@
+#!/usr/bin/perl
+
+# Check that the keyword lists in gram.y and kwlist.h are sane. Run from
+# the top directory, or pass a path to a top directory as argument.
+#
+# $PostgreSQL: pgsql/src/tools/check_keywords.pl,v 1.1 2009/04/29 05:05:57 heikki Exp $
+
+if (@ARGV) {
+	$path = $ARGV[0];
+	shift @ARGV;
+}
+
+if ($path eq '') { $path = "."; }
+
+$[ = 1;			# set array base to 1
+$, = ' ';		# set output field separator
+$\ = "\n";		# set output record separator
+
+$keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD';
+$keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD';
+$keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD';
+$keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD';
+
+$gram_filename = "$path/src/backend/parser/gram.y";
+open(GRAM, $gram_filename) || die("Could not open $gram_filename!");
+line: while (<GRAM>) {
+    chomp;	# strip record separator
+    @Fld = split(' ', $_, -1);
+
+    $S = $_;
+    # Make sure any braces are split
+    $s = '{', $S =~ s/$s/ { /g;
+    $s = '}', $S =~ s/$s/ } /g;
+    # Any comments are split
+    $s = '[/][*]', $S =~ s#$s# /* #g;
+    $s = '[*][/]', $S =~ s#$s# */ #g;
+
+    if (!($kcat)) {
+	# Is this the beginning of a keyword list?
+	foreach $k (keys %keyword_categories) {
+	    if ($S =~ m/^($k):/) {
+		$kcat = $k;
+		next line;
+	    }
+	}
+	next line;
+    }
+
+    # Now split the line into individual fields
+    $n = (@arr = split(' ', $S));
+
+    # Ok, we're in a keyword list. Go through each field in turn
+    for ($fieldIndexer = 1; $fieldIndexer <= $n; $fieldIndexer++) {
+	if ($arr[$fieldIndexer] eq '*/' && $comment) {
+	    $comment = 0;
+	    next;
+	}
+	elsif ($comment) {
+	    next;
+	}
+	elsif ($arr[$fieldIndexer] eq '/*') {
+	    # start of a multiline comment
+	    $comment = 1;
+	    next;
+	}
+	elsif ($arr[$fieldIndexer] eq '//') {
+	    next line;
+	}
+
+	if ($arr[$fieldIndexer] eq ';') {
+	    # end of keyword list
+	    $line = '';
+	    $kcat = '';
+	    next;
+	}
+
+	if ($arr[$fieldIndexer] eq '|') {
+	    next;
+	}
+	
+	# Put this keyword into the right list
+	push @{$keywords{$kcat}}, $arr[$fieldIndexer];
+    }
+}
+close GRAM;
+
+# Check that all keywords are in alphabetical order
+foreach $kcat (keys %keyword_categories) {
+    $prevkword = '';
+
+    foreach $kword (@{$keywords{$kcat}}) {
+	# Some keyword have a _P suffix. Remove it for the comparison.
+	$bare_kword = $kword;
+	$bare_kword =~ s/_P$//;
+	if ($bare_kword le $prevkword) {
+	    print "'$bare_kword' after '$prevkword' in $kcat list is misplaced";
+	}
+	$prevkword = $bare_kword;
+    }
+}
+
+# Transform the keyword lists into hashes.
+# kwhashes is a hash of hashes, keyed by keyword category id, e.g.
+# UNRESERVED_KEYWORD. Each inner hash is a keyed by keyword id, e.g. ABORT_P
+# with a dummy value.
+while ( my ($kcat, $kcat_id) = each(%keyword_categories) ) {
+    @arr = @{$keywords{$kcat}};
+
+    my $hash;
+    foreach my $item (@arr) { $hash->{$item} = 1 }
+
+    $kwhashes{$kcat_id} = $hash;
+}
+
+# Now read in kwlist.h
+
+$kwlist_filename = "$path/src/include/parser/kwlist.h";
+open(KWLIST, $kwlist_filename) || die("Could not open $kwlist_filename!");
+
+$prevkwstring = '';
+kwlist_line: while (<KWLIST>) {
+    my($line) = $_;
+
+    if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*)\)/)
+    {
+	my($kwstring) = $1;
+	my($kwname) = $2;
+	my($kwcat_id) = $3;
+
+	# Check that the list is in alphabetical order
+	if ($kwstring le $prevkwstring) {
+	    print "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced";
+	}
+	$prevkwstring = $kwstring;
+
+	# Check that the keyword string is valid: all lower-case ASCII chars
+	if ($kwstring !~ /^[a-z_]*$/) {
+	    print "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars";
+	}
+
+	# Check that the keyword name is valid: all upper-case ASCII chars
+	if ($kwname !~ /^[A-Z_]*$/) {
+	    print "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars";
+	}
+
+	# Check that the keyword string matches keyword name
+	$bare_kwname = $kwname;
+	$bare_kwname =~ s/_P$//;
+	if ($bare_kwname ne uc($kwstring)) {
+	    print "keyword name '$kwname' doesn't match keyword string '$kwstring'";
+	}
+
+	# Check that the keyword is present in the grammar
+	%kwhash = %{$kwhashes{$kwcat_id}};
+
+	if (!(%kwhash))	{
+	    #print "Unknown kwcat_id: $kwcat_id";
+	} else {
+	    if (!($kwhash{$kwname})) {
+		print "'$kwname' not present in $kwcat_id section of gram.y";
+	    } else {
+		# Remove it from the hash, so that we can complain at the end
+		# if there's keywords left that were not found in kwlist.h
+		delete $kwhashes{$kwcat_id}->{$kwname};
+	    }
+	}
+    }
+}
+close KWLIST;
+
+# Check that we've paired up all keywords from gram.y with lines in kwlist.h
+while ( my ($kwcat, $kwcat_id) = each(%keyword_categories) ) {
+    %kwhash = %{$kwhashes{$kwcat_id}};
+
+    for my $kw ( keys %kwhash ) {
+	print "'$kw' found in gram.y $kwcat category, but not in kwlist.h"
+    }
+}