From d395aecffad7cc6bd043e2d81a1bed5b3fe2f5fa Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 26 Jun 2005 19:16:07 +0000 Subject: [PATCH] Code review for escape-strings patch. Sync psql and plpgsql lexers with main, avoid using a SQL-defined SQLSTATE for what is most definitely not a SQL-compatible error condition, fix documentation omissions, adhere to message style guidelines, don't use two GUC_REPORT variables when one is sufficient. Nothing done about pg_dump issues. --- doc/src/sgml/errcodes.sgml | 7 ++++- doc/src/sgml/libpq.sgml | 21 ++++++++++---- doc/src/sgml/protocol.sgml | 11 ++++--- doc/src/sgml/runtime.sgml | 41 ++++++++++---------------- doc/src/sgml/syntax.sgml | 8 +++--- src/backend/parser/scan.l | 56 ++++++++++++++++++++++++------------ src/backend/utils/misc/guc.c | 17 ++--------- src/bin/psql/psqlscan.l | 17 +++++++---- src/include/utils/errcodes.h | 3 +- src/pl/plpgsql/src/scan.l | 39 +++++++++++++++++++++---- 10 files changed, 136 insertions(+), 84 deletions(-) diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml index ad556d0151..71b4a8cd40 100644 --- a/doc/src/sgml/errcodes.sgml +++ b/doc/src/sgml/errcodes.sgml @@ -1,4 +1,4 @@ - + <productname>PostgreSQL</productname> Error Codes @@ -370,6 +370,11 @@ INVALID ESCAPE SEQUENCE + +22P06 +NONSTANDARD USE OF ESCAPE CHARACTER + + 22010 INVALID INDICATOR PARAMETER VALUE diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 9c1b94e2be..a5bde7fc76 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1,5 +1,5 @@ @@ -286,7 +286,7 @@ PGconn *PQconnectdb(const char *conninfo); Kerberos service name to use when authenticating with Kerberos 4 or 5. This must match the service name specified in the server configuration for Kerberos authentication to succeed. (See also - .) + .) @@ -888,10 +888,13 @@ Parameters reported as of the current release include is_superuser, session_authorization, DateStyle, -TimeZone, and -integer_datetimes. +TimeZone, +integer_datetimes, and +standard_compliant_strings. (server_encoding, TimeZone, and -integer_datetimes were not reported by releases before 8.0.) +integer_datetimes were not reported by releases before 8.0; +standard_compliant_strings was not reported by releases +before 8.1.) Note that server_version, server_encoding and @@ -913,6 +916,14 @@ see also PQserverVersion, which returns the information in a numeric form that is much easier to compare against. + +If no value for standard_compliant_strings is reported, +applications may assume it is false, that is, backslashes +are treated as escapes in string literals. Also, the presence of this +parameter may be taken as an indication that the escape string syntax +(E'...') is accepted. + + Although the returned pointer is declared const, it in fact points to mutable storage associated with the PGconn structure. diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index cd6fa0e94f..7ebcfc63a3 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1,4 +1,4 @@ - + Frontend/Backend Protocol @@ -1072,10 +1072,13 @@ is_superuser, session_authorization, DateStyle, - TimeZone, and - integer_datetimes. + TimeZone, + integer_datetimes, and + standard_compliant_strings. (server_encoding, TimeZone, and - integer_datetimes were not reported by releases before 8.0.) + integer_datetimes were not reported by releases before 8.0; + standard_compliant_strings was not reported by releases + before 8.1.) Note that server_version, server_encoding and diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 3841b0dee2..c0b3e65ba7 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,5 +1,5 @@ @@ -3766,13 +3766,11 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' When on, a warning is issued if a backslash - (\) appears in a ordinary, non-escape syntax - ('') string. To log the statement that generated the - warning, set log_min_error_statement to - error. The default is off. + (\) appears in an ordinary string literal + ('...' syntax). The default is off. - Escape string syntax (E'') should be used for + Escape string syntax (E'...') should be used for escapes, because in future versions of PostgreSQL ordinary strings will have the standard-compliant behavior of treating backslashes @@ -3988,22 +3986,7 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' - - escape_string_syntax (boolean) - stringsescape - - escape_string_syntax configuration parameter - - - - Reports whether escape string syntax (E'') is - supported. This variable is used by applications that need to - determine if escape string syntax can be used in their code. - - - - - + standard_compliant_strings (boolean) stringsescape @@ -4011,10 +3994,16 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' - Reports whether ordinary, non-escape syntax strings - ('') treat backslashes literally, as specified in - the SQL standard. This variable is used by applications that - need to know how ordinary strings are processed`. + Reports whether ordinary string literals + ('...') treat backslashes literally, as specified in + the SQL standard. The value is currently always false, + indicating that backslashes are treated as escapes. It is planned + that this will change to true in a future + PostgreSQL release when string literal + syntax changes to meet the standard. Applications may check this + parameter to determine how string literals will be processed. + The presence of this parameter can also be taken as an indication + that the escape string syntax (E'...') is supported. diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index 3d8d457c56..0d3d7f19f1 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,5 +1,5 @@ @@ -249,7 +249,7 @@ UPDATE "my_table" SET "a" = 5; PostgreSQL also allows single quotes to be escaped with a backslash (\'). However, future versions of PostgreSQL will not - support this so applications using this should convert to the + allow this, so applications using backslashes should convert to the standard-compliant method outlined above. @@ -276,8 +276,8 @@ UPDATE "my_table" SET "a" = 5; eventually treat backslashes as literal characters to be standard-compliant. The proper way to specify escape processing is to use the escape string syntax to indicate that escape - processing is desired. Escape string syntax is specified by placing - the the letter E (upper or lower case) before + processing is desired. Escape string syntax is specified by writing + the letter E (upper or lower case) just before the string, e.g. E'\041'. This method will work in all future versions of PostgreSQL. diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 4c556a5fba..f9b0dbdb75 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.126 2005/06/26 03:03:38 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.127 2005/06/26 19:16:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,10 +48,19 @@ extern YYSTYPE yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ -static char *dolqstart; /* current $foo$ quote start string */ -static bool warn_on_first_escape; +static char *dolqstart; /* current $foo$ quote start string */ + +/* + * GUC variable. This is a DIRECT violation of the warning given at the + * head of gram.y, ie flex/bison code must not depend on any GUC variables; + * as such, changing its value can induce very unintuitive behavior. + * But we shall have to live with it as a short-term thing until the switch + * to SQL-standard string syntax is complete. + */ bool escape_string_warning; +static bool warn_on_first_escape; + /* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal. Call startlit to reset buffer @@ -66,6 +75,7 @@ static int literalalloc; /* current allocated buffer size */ static void addlit(char *ytext, int yleng); static void addlitchar(unsigned char ychar); static char *litbufdup(void); +static int pg_err_position(void); static void check_escape_warning(void); /* @@ -188,9 +198,8 @@ xhinside [^']* /* National character */ xnstart [nN]{quote} -/* Quote string does not warn about escapes */ +/* Quoted string that allows backslash escapes */ xestart [eE]{quote} -xeinside [^']* /* Extended quote * xqdouble implements embedded quote, '''' @@ -446,17 +455,21 @@ other . { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of \\' in a normal string"), - errhint("Use '' to place quotes in strings, or use the escape string syntax (E'')."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of \\' in a string literal"), + errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."), + errposition(pg_err_position()))); + warn_on_first_escape = false; /* warn only once per string */ } else if (yytext[1] == '\\') { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of \\\\ in a normal string"), - errhint("Use the escape string syntax for backslashes, e.g. E'\\\\'."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of \\\\ in a string literal"), + errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."), + errposition(pg_err_position()))); + warn_on_first_escape = false; /* warn only once per string */ } else check_escape_warning(); @@ -707,14 +720,20 @@ other . %% +static int +pg_err_position(void) +{ + const char *loc = token_start ? token_start : yytext; + + /* in multibyte encodings, return index in characters not bytes */ + return pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1; +} + void yyerror(const char *message) { const char *loc = token_start ? token_start : yytext; - int cursorpos; - - /* in multibyte encodings, return index in characters not bytes */ - cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1; + int cursorpos = pg_err_position(); if (*loc == YY_END_OF_BUFFER_CHAR) { @@ -852,8 +871,9 @@ check_escape_warning(void) { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of escapes in an ordinary string"), - errhint("Use the escape string syntax for escapes, e.g. E'\\r\\n'."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of escape in a string literal"), + errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), + errposition(pg_err_position()))); warn_on_first_escape = false; /* warn only once per string */ } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 1738604942..0ab8e74233 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.269 2005/06/26 03:03:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.270 2005/06/26 19:16:06 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -190,7 +190,6 @@ static int max_index_keys; static int max_identifier_length; static int block_size; static bool integer_datetimes; -static bool escape_string_syntax; static bool standard_compliant_strings; /* should be static, but commands/variable.c needs to get at it */ @@ -877,26 +876,16 @@ static struct config_bool ConfigureNamesBool[] = { {"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, - gettext_noop("Warn about backslash escapes in ordinary, non-escape-syntax strings."), + gettext_noop("Warn about backslash escapes in ordinary string literals."), NULL }, &escape_string_warning, false, NULL, NULL }, - { - {"escape_string_syntax", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("Escape string syntax (E'') is supported."), - NULL, - GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE - }, - &escape_string_syntax, - true, NULL, NULL - }, - { {"standard_compliant_strings", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("'' strings treat backslashes literally."), + gettext_noop("'...' strings treat backslashes literally."), NULL, GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE }, diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index a093b3703f..f62fe6224f 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -33,7 +33,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.14 2005/06/02 17:45:19 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.15 2005/06/26 19:16:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -235,17 +235,18 @@ quotefail {quote}{whitespace}*"-" xbstart [bB]{quote} xbinside [^']* -/* Hexadecimal number - */ +/* Hexadecimal number */ xhstart [xX]{quote} xhinside [^']* -/* National character - */ +/* National character */ xnstart [nN]{quote} +/* Quoted string that allows backslash escapes */ +xestart [eE]{quote} + /* Extended quote - * xqdouble implements embedded quote + * xqdouble implements embedded quote, '''' */ xqstart {quote} xqdouble {quote}{quote} @@ -450,6 +451,10 @@ other . BEGIN(xq); ECHO; } +{xestart} { + BEGIN(xq); + ECHO; + } {quotestop} | {quotefail} { yyless(1); diff --git a/src/include/utils/errcodes.h b/src/include/utils/errcodes.h index 646193def4..636c3f7a37 100644 --- a/src/include/utils/errcodes.h +++ b/src/include/utils/errcodes.h @@ -11,7 +11,7 @@ * * Copyright (c) 2003-2005, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.17 2005/01/01 20:44:30 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.18 2005/06/26 19:16:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,6 +126,7 @@ #define ERRCODE_INVALID_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','1','9') #define ERRCODE_INVALID_ESCAPE_OCTET MAKE_SQLSTATE('2','2', '0','0','D') #define ERRCODE_INVALID_ESCAPE_SEQUENCE MAKE_SQLSTATE('2','2', '0','2','5') +#define ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', 'P','0','6') #define ERRCODE_INVALID_INDICATOR_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','1','0') #define ERRCODE_INVALID_LIMIT_VALUE MAKE_SQLSTATE('2','2', '0','2','0') #define ERRCODE_INVALID_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','2','3') diff --git a/src/pl/plpgsql/src/scan.l b/src/pl/plpgsql/src/scan.l index 680a58fc01..e69c8f17b0 100644 --- a/src/pl/plpgsql/src/scan.l +++ b/src/pl/plpgsql/src/scan.l @@ -4,7 +4,7 @@ * procedural language * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.41 2005/06/22 01:35:02 neilc Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.42 2005/06/26 19:16:07 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -291,6 +291,12 @@ dump { return O_DUMP; } start_charpos = yytext; BEGIN(IN_STRING); } +[eE]' { + /* for now, treat the same as a regular literal */ + start_lineno = plpgsql_scanner_lineno(); + start_charpos = yytext; + BEGIN(IN_STRING); + } \\. { } \\ { /* can only happen with \ at EOF */ } '' { } @@ -563,18 +569,41 @@ plpgsql_get_string_value(void) memcpy(result, yytext + dolqlen, len); result[len] = '\0'; } - else + else if (*yytext == 'E' || *yytext == 'e') { - /* Token is a '...' string */ + /* Token is an E'...' string */ result = (char *) palloc(yyleng + 1); /* more than enough room */ len = 0; - for (cp = yytext; *cp; cp++) + for (cp = yytext + 2; *cp; cp++) { if (*cp == '\'') { if (cp[1] == '\'') result[len++] = *cp++; - /* else it must be string start or end quote */ + /* else it must be string end quote */ + } + else if (*cp == '\\') + { + if (cp[1] != '\0') /* just a paranoid check */ + result[len++] = *(++cp); + } + else + result[len++] = *cp; + } + result[len] = '\0'; + } + else + { + /* Token is a '...' string */ + result = (char *) palloc(yyleng + 1); /* more than enough room */ + len = 0; + for (cp = yytext + 1; *cp; cp++) + { + if (*cp == '\'') + { + if (cp[1] == '\'') + result[len++] = *cp++; + /* else it must be string end quote */ } else if (*cp == '\\') {