Convert tab-complete's long else-if chain to a switch statement.

Rename tab-complete.c to tab-complete.in.c, create the preprocessor
script gen_tabcomplete.pl, and install Makefile/meson.build rules
to create tab-complete.c from tab-complete.in.c.  The preprocessor
converts match_previous_words' else-if chain into a switch and
populates tcpatterns[] with the data needed by the driver loop.

The initial HeadMatches/TailMatches/Matches test in each else-if arm
is now performed in a table-driven loop.  Where we get a match, the
corresponding switch case is invoked to see if the match succeeds.
(It might not, if there were additional conditions in the original
else-if test.)

The total number of string comparisons done is just about the
same as it was in the previous coding; however, now that we
have table-driven logic underlying the handmade rules, there
is room to improve that.  For now I haven't bothered because
tab completion is still plenty fast enough for human use.
If the number of rules keeps increasing, we might someday
need to do more in that area.

The immediate benefit of all this thrashing is that C compilers
frequently don't deal well with long else-if chains.  On gcc 8.5.0,
this reduces the compile time of tab-complete.c by about a factor of
four, while MSVC is reported to crash outright with the previous
coding.

Discussion: https://postgr.es/m/2208466.1720729502@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2024-10-07 12:22:10 -04:00
parent bd1276a3c9
commit f391d9dc93
5 changed files with 323 additions and 3 deletions

View File

@ -1,4 +1,5 @@
/psqlscanslash.c
/tab-complete.c
/sql_help.h
/sql_help.c
/psql

View File

@ -62,6 +62,9 @@ psqlscanslash.c: FLEXFLAGS = -Cfe -p -p
psqlscanslash.c: FLEX_NO_BACKUP=yes
psqlscanslash.c: FLEX_FIX_WARNING=yes
tab-complete.c: gen_tabcomplete.pl tab-complete.in.c
$(PERL) $^ --outfile $@
install: all installdirs
$(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)'
$(INSTALL_DATA) $(srcdir)/psqlrc.sample '$(DESTDIR)$(datadir)/psqlrc.sample'
@ -75,7 +78,7 @@ uninstall:
clean distclean:
rm -f psql$(X) $(OBJS) lex.backup
rm -rf tmp_check
rm -f sql_help.h sql_help.c psqlscanslash.c
rm -f sql_help.h sql_help.c psqlscanslash.c tab-complete.c
check:
$(prove_check)

View File

@ -0,0 +1,306 @@
#----------------------------------------------------------------------
#
# gen_tabcomplete.pl
# Perl script that transforms tab-complete.in.c to tab-complete.c.
#
# This script converts a C else-if chain into a switch statement.
# The else-if statements to be processed must appear at single-tab-stop
# indentation between lines reading
# /* BEGIN GEN_TABCOMPLETE */
# /* END GEN_TABCOMPLETE */
# The first clause in each if-condition must be a call of one of the
# functions Matches, HeadMatches, TailMatches, MatchesCS, HeadMatchesCS,
# or TailMatchesCS. Its argument(s) must be string literals or macros
# that expand to string literals or NULL. These clauses are removed from
# the code and replaced by "break; case N:", where N is a unique number
# for each such case label.
# The BEGIN GEN_TABCOMPLETE and END GEN_TABCOMPLETE lines are replaced
# by "switch (pattern_id) {" and "}" wrapping to make a valid switch.
# The remainder of the code is copied verbatim.
#
# An if-condition can also be an OR ("||") of several *Matches function
# calls, or it can be an AND ("&&") of a *Matches call with some other
# condition. For example,
#
# else if (HeadMatches("DROP", "DATABASE") && ends_with(prev_wd, '('))
#
# will be transformed to
#
# break;
# case N:
# if (ends_with(prev_wd, '('))
#
# In addition, there must be one input line that reads
# /* Insert tab-completion pattern data here. */
# This line is replaced in the output file by macro calls, one for each
# replaced match condition. The output for the above example would be
# TCPAT(N, HeadMatch, "DROP", "DATABASE"),
# where N is the replacement case label, "HeadMatch" is the original
# function name minus "es", and the rest are the function arguments.
# The tab-completion data line must appear before BEGIN GEN_TABCOMPLETE.
#
#
# Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/bin/psql/gen_tabcomplete.pl
#
#----------------------------------------------------------------------
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my $outfile = '';
GetOptions('outfile=s' => \$outfile) or die "$0: wrong arguments";
open my $infh, '<', $ARGV[0]
or die "$0: could not open input file '$ARGV[0]': $!\n";
my $outfh;
if ($outfile)
{
open $outfh, '>', $outfile
or die "$0: could not open output file '$outfile': $!\n";
}
else
{
$outfh = *STDOUT;
}
# Opening boilerplate for output file.
printf $outfh <<EOM;
/*-------------------------------------------------------------------------
*
* tab-complete.c
* Preprocessed tab-completion code.
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* NOTES
* ******************************
* *** DO NOT EDIT THIS FILE! ***
* ******************************
*
* It has been GENERATED by src/bin/psql/gen_tabcomplete.pl
*
*-------------------------------------------------------------------------
*/
#define SWITCH_CONVERSION_APPLIED
#line 1 "tab-complete.in.c"
EOM
# Scan input file until we find the data-replacement label line.
# Dump what we scan directly into the output file.
while (<$infh>)
{
chomp;
last if m|^\s*/\* Insert tab-completion pattern data here\. \*/\s*$|;
print $outfh "$_\n";
}
# $table_data collects what we will substitute for the "pattern data" line.
my $table_data = '';
# $output_code collects code that we can't emit till after $table_data.
my $output_code = '';
# last case label assigned
my $last_case_label = 0;
# We emit #line directives to keep the output file's line numbering in sync
# with the line numbering of the original, to simplify compiler error message
# reading and debugging.
my $next_line_no = $. + 1;
$output_code .= "#line ${next_line_no} \"tab-complete.in.c\"\n";
# Scan until we find the BEGIN GEN_TABCOMPLETE line.
# Add the scanned code to $output_code verbatim.
while (<$infh>)
{
chomp;
last if m|^\s*/\* BEGIN GEN_TABCOMPLETE \*/\s*$|;
$output_code .= $_ . "\n";
}
# Emit the switch-starting lines.
$output_code .= "\tswitch (pattern_id)\n";
$output_code .= "\t{\n";
# Keep line numbering in sync.
$next_line_no = $. + 1;
$output_code .= "#line ${next_line_no} \"tab-complete.in.c\"\n";
# Scan input file, collecting outer-level else-if conditions
# to pass to process_else_if.
# Lines that aren't else-if conditions go to $output_code verbatim.
# True if we're handling a multiline else-if condition
my $in_else_if = 0;
# The accumulated line
my $else_if_line;
my $else_if_lineno;
while (<$infh>)
{
chomp;
last if m|^\s*/\* END GEN_TABCOMPLETE \*/\s*$|;
if ($in_else_if)
{
my $rest = $_;
# collapse leading whitespace
$rest =~ s/^\s+//;
$else_if_line .= ' ' . $rest;
# Double right paren is currently sufficient to detect completion
if ($else_if_line =~ m/\)\)$/)
{
process_else_if($else_if_line, $else_if_lineno, $.);
$in_else_if = 0;
}
}
elsif (m/^\telse if \(/)
{
$else_if_line = $_;
$else_if_lineno = $.;
# Double right paren is currently sufficient to detect completion
if ($else_if_line =~ m/\)\)$/)
{
process_else_if($else_if_line, $else_if_lineno, $.);
}
else
{
$in_else_if = 1;
}
}
else
{
$output_code .= $_ . "\n";
}
}
die "unfinished else-if" if $in_else_if;
# Emit the switch-ending lines.
$output_code .= "\tbreak;\n";
$output_code .= "\tdefault:\n";
$output_code .= "\t\tAssert(false);\n";
$output_code .= "\t\tbreak;\n";
$output_code .= "\t}\n";
# Keep line numbering in sync.
$next_line_no = $. + 1;
$output_code .= "#line ${next_line_no} \"tab-complete.in.c\"\n";
# Scan the rest, adding it to $output_code verbatim.
while (<$infh>)
{
chomp;
$output_code .= $_ . "\n";
}
# Dump out the table data.
print $outfh $table_data;
# Dump out the modified code, and we're done!
print $outfh $output_code;
close($infh);
close($outfh);
# Disassemble an else-if condition.
# Add the generated table-contents macro(s) to $table_data,
# and add the replacement case label(s) to $output_code.
sub process_else_if
{
my ($else_if_line, $else_if_lineno, $end_lineno) = @_;
# Strip the initial "else if (", which we know is there
$else_if_line =~ s/^\telse if \(//;
# Handle OR'd conditions
my $isfirst = 1;
while ($else_if_line =~
s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\s*\|\|\s*//
)
{
my $typ = $1;
my $cs = $2;
my $args = $3;
process_match($typ, $cs, $args, $else_if_lineno, $isfirst);
$isfirst = 0;
}
# Check for AND'd condition
if ($else_if_line =~
s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\s*&&\s*//
)
{
my $typ = $1;
my $cs = $2;
my $args = $3;
warn
"could not process OR/ANDed if condition at line $else_if_lineno\n"
if !$isfirst;
process_match($typ, $cs, $args, $else_if_lineno, $isfirst);
$isfirst = 0;
# approximate line positioning of AND'd condition
$output_code .= "#line ${end_lineno} \"tab-complete.in.c\"\n";
$output_code .= "\tif ($else_if_line\n";
}
elsif ($else_if_line =~
s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\)$//
)
{
my $typ = $1;
my $cs = $2;
my $args = $3;
process_match($typ, $cs, $args, $else_if_lineno, $isfirst);
$isfirst = 0;
}
else
{
warn
"could not process if condition at line $else_if_lineno: the rest looks like $else_if_line\n";
$output_code .= "\telse if ($else_if_line\n";
}
# Keep line numbering in sync.
if ($end_lineno != $else_if_lineno)
{
my $next_lineno = $end_lineno + 1;
$output_code .= "#line ${next_lineno} \"tab-complete.in.c\"\n";
}
}
sub process_match
{
my ($typ, $cs, $args, $lineno, $isfirst) = @_;
# Assign a new case label only for the first pattern in an OR group.
if ($isfirst)
{
$last_case_label++;
# We intentionally keep the "break;" and the "case" on one line, so
# that they have the same line number as the original "else if"'s
# first line. This avoids misleading displays in, e.g., lcov.
$output_code .= "\t";
$output_code .= "break; " if $last_case_label > 1;
$output_code .= "case $last_case_label:\n";
}
$table_data .=
"\tTCPAT(${last_case_label}, ${typ}Match${cs}, ${args}),\n";
}
sub usage
{
die <<EOM;
Usage: gen_tabcomplete.pl [--outfile/-o <path>] input_file
--outfile Output file (default is stdout)
gen_tabcomplete.pl transforms tab-complete.in.c to tab-complete.c.
EOM
}

View File

@ -13,7 +13,6 @@ psql_sources = files(
'prompt.c',
'startup.c',
'stringutils.c',
'tab-complete.c',
'variables.c',
)
@ -24,6 +23,17 @@ psqlscanslash = custom_target('psqlscanslash',
generated_sources += psqlscanslash
psql_sources += psqlscanslash
tabcomplete = custom_target('tabcomplete',
input: 'tab-complete.in.c',
output: 'tab-complete.c',
command: [
perl, files('gen_tabcomplete.pl'), files('tab-complete.in.c'),
'--outfile', '@OUTPUT@', '@INPUT@',
],
)
generated_sources += tabcomplete
psql_sources += tabcomplete
sql_help = custom_target('psql_help',
output: ['sql_help.c', 'sql_help.h'],
depfile: 'sql_help.dep',

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2024, PostgreSQL Global Development Group
*
* src/bin/psql/tab-complete.c
* src/bin/psql/tab-complete.in.c
*
* Note: this will compile and work as-is if SWITCH_CONVERSION_APPLIED
* is not defined. However, the expected usage is that it's first run