From d0cfc018233b4cdcab28d460ee0e14dbf87ac4ce Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 29 Jan 2010 17:44:12 +0000 Subject: [PATCH] Allow psql variables to be interpolated with literal or identifier escaping. Loosely based on a patch by Pavel Stehule. --- doc/src/sgml/ref/psql-ref.sgml | 80 +++++++++++++++++++--------------- src/bin/psql/psqlscan.l | 80 +++++++++++++++++++++++++++++++++- 2 files changed, 123 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 5b8bb836b1..a15b6a8df0 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1,5 +1,5 @@ @@ -658,7 +658,12 @@ testdb=> If an unquoted argument begins with a colon (:), it is taken as a psql variable and the value of the - variable is used as the argument instead. + variable is used as the argument instead. If the variable name is + surrounded by single quotes (e.g. :'var'), it + will be escaped as an SQL literal and the result will be used as + the argument. If the variable name is surrounded by double quotes, + it will be escaped as an SQL identifier and the result will be used + as the argument. @@ -2711,18 +2716,35 @@ bar An additional useful feature of psql variables is that you can substitute (interpolate) - them into regular SQL statements. The syntax for - this is again to prepend the variable name with a colon + them into regular SQL statements. + psql provides special facilities for + ensuring that values used as SQL literals and identifiers are + properly escaped. The syntax for interpolating a value without + any special escaping is again to prepend the variable name with a colon (:): testdb=> \set foo 'my_table' testdb=> SELECT * FROM :foo; - would then query the table my_table. The value of - the variable is copied literally, so it can even contain unbalanced - quotes or backslash commands. You must make sure that it makes sense - where you put it. Variable interpolation will not be performed into - quoted SQL entities. + would then query the table my_table. Note that this + may be unsafe: the value of the variable is copied literally, so it can + even contain unbalanced quotes or backslash commands. You must make sure + that it makes sense where you put it. + + + + When a value is to be used as an SQL literal or identifier, it is + safest to arrange for it to be escaped. To escape the value of + a variable as an SQL literal, write a colon followed by the variable + name in single quotes. To escape the value an SQL identifier, write + a colon followed by the variable name in double quotes. The previous + example would be more safely written this way: + +testdb=> \set foo 'my_table' +testdb=> SELECT * FROM :"foo"; + + Variable interpolation will not be performed into quoted + SQL entities. @@ -2730,40 +2752,26 @@ testdb=> SELECT * FROM :foo; copy the contents of a file into a table column. First load the file into a variable and then proceed as above: -testdb=> \set content '''' `cat my_file.txt` '''' -testdb=> INSERT INTO my_table VALUES (:content); +testdb=> \set content `cat my_file.txt` +testdb=> INSERT INTO my_table VALUES (:'content'); - One problem with this approach is that my_file.txt - might contain single quotes. These need to be escaped so that - they don't cause a syntax error when the second line is processed. This - could be done with the program sed: - -testdb=> \set content '''' `sed -e "s/'/''/g" < my_file.txt` '''' - - If you are using non-standard-conforming strings then you'll also need - to double backslashes. This is a bit tricky: - -testdb=> \set content '''' `sed -e "s/'/''/g" -e 's/\\/\\\\/g' < my_file.txt` '''' - - Note the use of different shell quoting conventions so that neither - the single quote marks nor the backslashes are special to the shell. - Backslashes are still special to sed, however, so - we need to double them. (Perhaps - at one point you thought it was great that all Unix commands use the - same escape character.) + (Note that this still won't work if my_file.txt contains NUL bytes. + psql does not support embedded NUL bytes in variable values.) - Since colons can legally appear in SQL commands, the following rule - applies: the character sequence - :name is not changed unless name is the name - of a variable that is currently set. In any case you can escape - a colon with a backslash to protect it from substitution. (The - colon syntax for variables is standard SQL for + Since colons can legally appear in SQL commands, an apparent attempt + at interpolation (such as :name, + :'name', or :"name") is not + changed unless the named variable is currently set. In any case you + can escape a colon with a backslash to protect it from substitution. + (The colon syntax for variables is standard SQL for embedded query languages, such as ECPG. The colon syntax for array slices and type casts are PostgreSQL extensions, hence the - conflict.) + conflict. The colon syntax for escaping a variable's value as an + SQL literal or identifier is a psql + extension.) diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index 619f30ec3b..b416c67177 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -33,7 +33,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.31 2010/01/02 16:57:59 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.32 2010/01/29 17:44:12 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -118,6 +118,7 @@ static YY_BUFFER_STATE prepare_buffer(const char *txt, int len, char **txtcopy); static void emit(const char *txt, int len); static bool is_utf16_surrogate_first(uint32 c); +static void escape_variable(bool as_ident); #define ECHO emit(yytext, yyleng) @@ -707,6 +708,14 @@ other . } } +:'[A-Za-z0-9_]+' { + escape_variable(false); + } + +:\"[A-Za-z0-9_]+\" { + escape_variable(true); + } + /* * Back to backend-compatible rules. */ @@ -927,6 +936,27 @@ other . return LEXRES_OK; } +:'[A-Za-z0-9_]+' { + if (option_type == OT_VERBATIM) + ECHO; + else + { + escape_variable(false); + return LEXRES_OK; + } + } + + +:\"[A-Za-z0-9_]+\" { + if (option_type == OT_VERBATIM) + ECHO; + else + { + escape_variable(true); + return LEXRES_OK; + } + } + "|" { ECHO; if (option_type == OT_FILEPIPE) @@ -1740,3 +1770,51 @@ is_utf16_surrogate_first(uint32 c) { return (c >= 0xD800 && c <= 0xDBFF); } + +static void +escape_variable(bool as_ident) +{ + char saved_char; + const char *value; + + /* Variable lookup. */ + saved_char = yytext[yyleng - 1]; + yytext[yyleng - 1] = '\0'; + value = GetVariable(pset.vars, yytext + 2); + + /* Escaping. */ + if (value) + { + if (!pset.db) + psql_error("can't escape without active connection\n"); + else + { + char *escaped_value; + + if (as_ident) + escaped_value = + PQescapeIdentifier(pset.db, value, strlen(value)); + else + escaped_value = + PQescapeLiteral(pset.db, value, strlen(value)); + if (escaped_value == NULL) + { + const char *error = PQerrorMessage(pset.db); + psql_error("%s", error); + } + else + { + appendPQExpBufferStr(output_buf, escaped_value); + PQfreemem(escaped_value); + return; + } + } + } + + /* + * If we reach this point, some kind of error has occurred. Emit the + * original text into the output buffer. + */ + yytext[yyleng - 1] = saved_char; + emit(yytext, yyleng); +}