Revise plpgsql's scanner to process comments and string literals in a way

more nearly matching the core SQL scanner.  The user-visible effects are:

* Block comments (slash-star comments) now nest, as per SQL spec.

* In standard_conforming_strings mode, backslash as the last character of a
  non-E string literal is now correctly taken as an ordinary character;
  formerly it was misinterpreted as escaping the ending quote.  (Since the
  string also had to pass through the core scanner, this invariably led
  to syntax errors.)

* Formerly, backslashes in the format string of RAISE were always treated as
  quoting the next character, regardless of mode.  Now, they are ordinary
  characters with standard_conforming_strings on, while with it off, they
  introduce the same set of escapes as in the core SQL scanner.  Also,
  escape_string_warning is now effective for RAISE format strings.  These
  changes make RAISE format strings work just like any other string literal.

This is implemented by copying and pasting a lot of logic from the core
scanner.  It would be a good idea to look into getting rid of plpgsql's
scanner entirely in favor of using the core scanner.  However, that involves
more change than I can justify making during beta --- in particular, the core
scanner would have to become re-entrant.

In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or
T_TRIGGER as a made-up first token.  That presumably had some value once upon
a time, but now it's just useless complication for both the scanner and the
grammar.
This commit is contained in:
Tom Lane 2009-04-19 18:52:58 +00:00
parent 7f2f798b30
commit 3a624e9200
8 changed files with 395 additions and 233 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.139 2009/04/02 19:20:45 momjian Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.140 2009/04/19 18:52:56 tgl Exp $ -->
<chapter id="plpgsql"> <chapter id="plpgsql">
<title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title> <title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title>
@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>;
There are two types of comments in <application>PL/pgSQL</>. A double There are two types of comments in <application>PL/pgSQL</>. A double
dash (<literal>--</literal>) starts a comment that extends to the end of dash (<literal>--</literal>) starts a comment that extends to the end of
the line. A <literal>/*</literal> starts a block comment that extends to the line. A <literal>/*</literal> starts a block comment that extends to
the next occurrence of <literal>*/</literal>. Block comments cannot be the next occurrence of <literal>*/</literal>. Block comments nest,
nested, but double dash comments can be enclosed into a block comment and just as in ordinary SQL.
a double dash can hide the block comment delimiters <literal>/*</literal>
and <literal>*/</literal>.
</para> </para>
<para> <para>

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -62,6 +62,8 @@ static PLpgSQL_row *make_scalar_list1(const char *initial_name,
int lineno); int lineno);
static void check_sql_expr(const char *stmt); static void check_sql_expr(const char *stmt);
static void plpgsql_sql_error_callback(void *arg); static void plpgsql_sql_error_callback(void *arg);
static char *parse_string_token(const char *token);
static void plpgsql_string_error_callback(void *arg);
static char *check_label(const char *yytxt); static char *check_label(const char *yytxt);
static void check_labels(const char *start_label, static void check_labels(const char *start_label,
const char *end_label); const char *end_label);
@ -228,8 +230,6 @@ static List *read_raise_options(void);
/* /*
* Other tokens * Other tokens
*/ */
%token T_FUNCTION
%token T_TRIGGER
%token T_STRING %token T_STRING
%token T_NUMBER %token T_NUMBER
%token T_SCALAR /* a VAR, RECFIELD, or TRIGARG */ %token T_SCALAR /* a VAR, RECFIELD, or TRIGARG */
@ -244,13 +244,9 @@ static List *read_raise_options(void);
%% %%
pl_function : T_FUNCTION comp_optsect pl_block opt_semi pl_function : comp_optsect pl_block opt_semi
{ {
yylval.program = (PLpgSQL_stmt_block *)$3; yylval.program = (PLpgSQL_stmt_block *) $2;
}
| T_TRIGGER comp_optsect pl_block opt_semi
{
yylval.program = (PLpgSQL_stmt_block *)$3;
} }
; ;
@ -1403,7 +1399,7 @@ stmt_raise : K_RAISE lno
if (tok == T_STRING) if (tok == T_STRING)
{ {
/* old style message and parameters */ /* old style message and parameters */
new->message = plpgsql_get_string_value(); new->message = parse_string_token(yytext);
/* /*
* We expect either a semi-colon, which * We expect either a semi-colon, which
* indicates no parameters, or a comma that * indicates no parameters, or a comma that
@ -1435,7 +1431,7 @@ stmt_raise : K_RAISE lno
if (yylex() != T_STRING) if (yylex() != T_STRING)
yyerror("syntax error"); yyerror("syntax error");
sqlstatestr = plpgsql_get_string_value(); sqlstatestr = parse_string_token(yytext);
if (strlen(sqlstatestr) != 5) if (strlen(sqlstatestr) != 5)
yyerror("invalid SQLSTATE code"); yyerror("invalid SQLSTATE code");
@ -1778,7 +1774,7 @@ proc_condition : opt_lblname
/* next token should be a string literal */ /* next token should be a string literal */
if (yylex() != T_STRING) if (yylex() != T_STRING)
yyerror("syntax error"); yyerror("syntax error");
sqlstatestr = plpgsql_get_string_value(); sqlstatestr = parse_string_token(yytext);
if (strlen(sqlstatestr) != 5) if (strlen(sqlstatestr) != 5)
yyerror("invalid SQLSTATE code"); yyerror("invalid SQLSTATE code");
@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg)
errposition(0); errposition(0);
} }
/*
* Convert a string-literal token to the represented string value.
*
* To do this, we need to invoke the core lexer. To avoid confusion between
* the core bison/flex definitions and our own, the actual invocation is in
* pl_funcs.c. Here we are only concerned with setting up the right errcontext
* state, which is handled the same as in check_sql_expr().
*/
static char *
parse_string_token(const char *token)
{
char *result;
ErrorContextCallback syntax_errcontext;
ErrorContextCallback *previous_errcontext;
/* See comments in check_sql_expr() */
Assert(error_context_stack->callback == plpgsql_compile_error_callback);
previous_errcontext = error_context_stack;
syntax_errcontext.callback = plpgsql_string_error_callback;
syntax_errcontext.arg = (char *) token;
syntax_errcontext.previous = error_context_stack->previous;
error_context_stack = &syntax_errcontext;
result = plpgsql_parse_string_token(token);
/* Restore former ereport callback */
error_context_stack = previous_errcontext;
return result;
}
static void
plpgsql_string_error_callback(void *arg)
{
Assert(plpgsql_error_funcname);
errcontext("string literal in PL/PgSQL function \"%s\" near line %d",
plpgsql_error_funcname, plpgsql_error_lineno);
/* representing the string literal as internalquery seems overkill */
errposition(0);
}
static char * static char *
check_label(const char *yytxt) check_label(const char *yytxt)
{ {

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo,
bool forValidator) bool forValidator)
{ {
Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup); Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup);
int functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION; bool is_trigger = CALLED_AS_TRIGGER(fcinfo);
Datum prosrcdatum; Datum prosrcdatum;
bool isnull; bool isnull;
char *proc_source; char *proc_source;
@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo,
if (isnull) if (isnull)
elog(ERROR, "null prosrc"); elog(ERROR, "null prosrc");
proc_source = TextDatumGetCString(prosrcdatum); proc_source = TextDatumGetCString(prosrcdatum);
plpgsql_scanner_init(proc_source, functype); plpgsql_scanner_init(proc_source);
plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname)); plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
plpgsql_error_lineno = 0; plpgsql_error_lineno = 0;
@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo,
function->fn_oid = fcinfo->flinfo->fn_oid; function->fn_oid = fcinfo->flinfo->fn_oid;
function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data); function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data);
function->fn_tid = procTup->t_self; function->fn_tid = procTup->t_self;
function->fn_functype = functype; function->fn_is_trigger = is_trigger;
function->fn_cxt = func_cxt; function->fn_cxt = func_cxt;
function->out_param_varno = -1; /* set up for no OUT param */ function->out_param_varno = -1; /* set up for no OUT param */
switch (functype) switch (is_trigger)
{ {
case T_FUNCTION: case false:
/* /*
* Fetch info about the procedure's parameters. Allocations aren't * Fetch info about the procedure's parameters. Allocations aren't
@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo,
ReleaseSysCache(typeTup); ReleaseSysCache(typeTup);
break; break;
case T_TRIGGER: case true:
/* Trigger procedure's return type is unknown yet */ /* Trigger procedure's return type is unknown yet */
function->fn_rettype = InvalidOid; function->fn_rettype = InvalidOid;
function->fn_retbyval = false; function->fn_retbyval = false;
@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo,
break; break;
default: default:
elog(ERROR, "unrecognized function typecode: %u", functype); elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger);
break; break;
} }
@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word)
* Recognize tg_argv when compiling triggers * Recognize tg_argv when compiling triggers
* (XXX this sucks, it should be a regular variable in the namestack) * (XXX this sucks, it should be a regular variable in the namestack)
*/ */
if (plpgsql_curr_compile->fn_functype == T_TRIGGER) if (plpgsql_curr_compile->fn_is_trigger)
{ {
if (strcmp(cp[0], "tg_argv") == 0) if (strcmp(cp[0], "tg_argv") == 0)
{ {

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -17,6 +17,8 @@
#include <ctype.h> #include <ctype.h>
#include "parser/gramparse.h"
#include "parser/gram.h"
#include "parser/scansup.h" #include "parser/scansup.h"
@ -459,6 +461,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
} }
/*
* plpgsql_parse_string_token - get the value represented by a string literal
*
* We do not make plpgsql's lexer produce the represented value, because
* in many cases we don't need it. Instead this function is invoked when
* we do need it. The input is the T_STRING token as identified by the lexer.
*
* The result is a palloc'd string.
*
* Note: this is called only from plpgsql's gram.y, but we can't just put it
* there because including parser/gram.h there would cause confusion.
*/
char *
plpgsql_parse_string_token(const char *token)
{
int ctoken;
/*
* We use the core lexer to do the dirty work. Aside from getting the
* right results for escape sequences and so on, this helps us produce
* appropriate warnings for escape_string_warning etc.
*/
scanner_init(token);
ctoken = base_yylex();
if (ctoken != SCONST)
elog(ERROR, "unexpected result from base lexer: %d", ctoken);
scanner_finish();
return base_yylval.str;
}
/* /*
* Statement type as a string, for use in error messages etc. * Statement type as a string, for use in error messages etc.
*/ */

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -650,7 +650,7 @@ typedef struct PLpgSQL_function
Oid fn_oid; Oid fn_oid;
TransactionId fn_xmin; TransactionId fn_xmin;
ItemPointerData fn_tid; ItemPointerData fn_tid;
int fn_functype; bool fn_is_trigger;
PLpgSQL_func_hashkey *fn_hashkey; /* back-link to hashtable key */ PLpgSQL_func_hashkey *fn_hashkey; /* back-link to hashtable key */
MemoryContext fn_cxt; MemoryContext fn_cxt;
@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname);
* ---------- * ----------
*/ */
extern void plpgsql_convert_ident(const char *s, char **output, int numidents); extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
extern char *plpgsql_parse_string_token(const char *token);
extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt); extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
extern void plpgsql_dumptree(PLpgSQL_function *func); extern void plpgsql_dumptree(PLpgSQL_function *func);
@ -894,8 +895,7 @@ extern int plpgsql_yylex(void);
extern void plpgsql_push_back_token(int token); extern void plpgsql_push_back_token(int token);
extern void plpgsql_yyerror(const char *message); extern void plpgsql_yyerror(const char *message);
extern int plpgsql_scanner_lineno(void); extern int plpgsql_scanner_lineno(void);
extern void plpgsql_scanner_init(const char *str, int functype); extern void plpgsql_scanner_init(const char *str);
extern void plpgsql_scanner_finish(void); extern void plpgsql_scanner_finish(void);
extern char *plpgsql_get_string_value(void);
#endif /* PLPGSQL_H */ #endif /* PLPGSQL_H */

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.67 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.68 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -19,27 +19,31 @@
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
/* No reason to constrain amount of data slurped */
#define YY_READ_BUF_SIZE 16777216
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf #undef fprintf
#define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg))) #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg)))
/*
* When we parse a token that requires multiple lexer rules to process,
* remember the token's starting position this way.
*/
#define SAVE_TOKEN_START() \
( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
/* Handles to the buffer that the lexer uses internally */ /* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle; static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf; static char *scanbuf;
static const char *scanstr; /* original input string */ static const char *scanstr; /* original input string */
static int scanner_functype;
static bool scanner_typereported;
static int pushback_token; static int pushback_token;
static bool have_pushback_token; static bool have_pushback_token;
static const char *cur_line_start; static const char *cur_line_start;
static int cur_line_num; static int cur_line_num;
static int xcdepth = 0; /* depth of nesting in slash-star comments */
static char *dolqstart; /* current $foo$ quote start string */ static char *dolqstart; /* current $foo$ quote start string */
static int dolqlen; /* signal to plpgsql_get_string_value */
extern bool standard_conforming_strings;
bool plpgsql_SpaceScanned = false; bool plpgsql_SpaceScanned = false;
%} %}
@ -54,31 +58,73 @@ bool plpgsql_SpaceScanned = false;
%option case-insensitive %option case-insensitive
/*
* Exclusive states are a subset of the core lexer's:
* <xc> extended C-style comments
* <xq> standard quoted strings
* <xe> extended quoted strings (support backslash escape sequences)
* <xdolq> $foo$ quoted strings
*/
%x IN_STRING %x xc
%x IN_COMMENT %x xe
%x IN_DOLLARQUOTE %x xq
%x xdolq
/*
* Definitions --- these generally must match the core lexer, but in some
* cases we can simplify, since we only care about identifying the token
* boundaries and not about deriving the represented value. Also, we
* aren't trying to lex multicharacter operators so their interactions
* with comments go away.
*/
space [ \t\n\r\f]
horiz_space [ \t\f]
newline [\n\r]
non_newline [^\n\r]
comment ("--"{non_newline}*)
whitespace ({space}+|{comment})
special_whitespace ({space}+|{comment}{newline})
horiz_whitespace ({horiz_space}|{comment})
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
quote '
quotestop {quote}{whitespace}*
quotecontinue {quote}{whitespace_with_newline}{quote}
quotefail {quote}{whitespace}*"-"
xestart [eE]{quote}
xeinside [^\\']+
xeescape [\\].
xqstart {quote}
xqdouble {quote}{quote}
xqinside [^']+
dolq_start [A-Za-z\200-\377_]
dolq_cont [A-Za-z\200-\377_0-9]
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
dolqfailed \${dolq_start}{dolq_cont}*
dolqinside [^$]+
xcstart \/\*
xcstop \*+\/
xcinside [^*/]+
digit [0-9] digit [0-9]
ident_start [A-Za-z\200-\377_] ident_start [A-Za-z\200-\377_]
ident_cont [A-Za-z\200-\377_0-9\$] ident_cont [A-Za-z\200-\377_0-9\$]
/* This is a simpler treatment of quoted identifiers than the core uses */
quoted_ident (\"[^\"]*\")+ quoted_ident (\"[^\"]*\")+
identifier ({ident_start}{ident_cont}*|{quoted_ident}) identifier ({ident_start}{ident_cont}*|{quoted_ident})
param \${digit}+ param \${digit}+
space [ \t\n\r\f]
/* $foo$ style quotes ("dollar quoting")
* copied straight from the backend SQL parser
*/
dolq_start [A-Za-z\200-\377_]
dolq_cont [A-Za-z\200-\377_0-9]
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
dolqinside [^$]+
%% %%
/* ---------- /* ----------
* Local variables in scanner to remember where * Local variables in scanner to remember where
@ -95,17 +141,6 @@ dolqinside [^$]+
BEGIN(INITIAL); BEGIN(INITIAL);
plpgsql_SpaceScanned = false; plpgsql_SpaceScanned = false;
/* ----------
* On the first call to a new source report the
* function's type (T_FUNCTION or T_TRIGGER)
* ----------
*/
if (!scanner_typereported)
{
scanner_typereported = true;
return scanner_functype;
}
/* ---------- /* ----------
* The keyword rules * The keyword rules
* ---------- * ----------
@ -225,119 +260,134 @@ dump { return O_DUMP; }
{digit}+ { return T_NUMBER; } {digit}+ { return T_NUMBER; }
\". { \". { yyerror("unterminated quoted identifier"); }
plpgsql_error_lineno = plpgsql_scanner_lineno();
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("unterminated quoted identifier")));
}
/* ---------- /* ----------
* Ignore whitespaces but remember this happened * Ignore whitespace (including comments) but remember this happened
* ---------- * ----------
*/ */
{space}+ { plpgsql_SpaceScanned = true; } {whitespace} { plpgsql_SpaceScanned = true; }
/* ---------- /* ----------
* Eat up comments * Comment and literal handling is mostly copied from the core lexer
* ---------- * ----------
*/ */
--[^\r\n]* ; {xcstart} {
/* Set location in case of syntax error in comment */
SAVE_TOKEN_START();
xcdepth = 0;
BEGIN(xc);
plpgsql_SpaceScanned = true;
}
\/\* { start_lineno = plpgsql_scanner_lineno(); <xc>{xcstart} {
BEGIN(IN_COMMENT); xcdepth++;
} }
<IN_COMMENT>\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
<IN_COMMENT>\n ;
<IN_COMMENT>. ;
<IN_COMMENT><<EOF>> {
plpgsql_error_lineno = start_lineno;
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("unterminated /* comment")));
}
/* ---------- <xc>{xcstop} {
* Collect anything inside of ''s and return one STRING token if (xcdepth <= 0)
* BEGIN(INITIAL);
* Hacking yytext/yyleng here lets us avoid using yymore(), which is else
* a win for performance. It's safe because we know the underlying xcdepth--;
* input buffer is not changing. }
* ----------
*/
' {
start_lineno = plpgsql_scanner_lineno();
start_charpos = yytext;
BEGIN(IN_STRING);
}
[eE]' {
/* for now, treat the same as a regular literal */
start_lineno = plpgsql_scanner_lineno();
start_charpos = yytext;
BEGIN(IN_STRING);
}
<IN_STRING>\\. { }
<IN_STRING>\\ { /* can only happen with \ at EOF */ }
<IN_STRING>'' { }
<IN_STRING>' {
/* tell plpgsql_get_string_value it's not a dollar quote */
dolqlen = 0;
/* adjust yytext/yyleng to describe whole string token */
yyleng += (yytext - start_charpos);
yytext = start_charpos;
BEGIN(INITIAL);
return T_STRING;
}
<IN_STRING>[^'\\]+ { }
<IN_STRING><<EOF>> {
plpgsql_error_lineno = start_lineno;
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("unterminated quoted string")));
}
{dolqdelim} { <xc>{xcinside} {
start_lineno = plpgsql_scanner_lineno(); /* ignore */
start_charpos = yytext; }
dolqstart = pstrdup(yytext);
BEGIN(IN_DOLLARQUOTE); <xc>\/+ {
} /* ignore */
<IN_DOLLARQUOTE>{dolqdelim} { }
if (strcmp(yytext, dolqstart) == 0)
{ <xc>\*+ {
pfree(dolqstart); /* ignore */
/* tell plpgsql_get_string_value it is a dollar quote */ }
dolqlen = yyleng;
<xc><<EOF>> { yyerror("unterminated /* comment"); }
{xqstart} {
SAVE_TOKEN_START();
if (standard_conforming_strings)
BEGIN(xq);
else
BEGIN(xe);
}
{xestart} {
SAVE_TOKEN_START();
BEGIN(xe);
}
<xq,xe>{quotestop} |
<xq,xe>{quotefail} {
yyless(1);
BEGIN(INITIAL);
/* adjust yytext/yyleng to describe whole string token */ /* adjust yytext/yyleng to describe whole string token */
yyleng += (yytext - start_charpos); yyleng += (yytext - start_charpos);
yytext = start_charpos; yytext = start_charpos;
BEGIN(INITIAL);
return T_STRING; return T_STRING;
} }
else <xq,xe>{xqdouble} {
{ }
/* <xq>{xqinside} {
* When we fail to match $...$ to dolqstart, transfer }
* the $... part to the output, but put back the final <xe>{xeinside} {
* $ for rescanning. Consider $delim$...$junk$delim$ }
*/ <xe>{xeescape} {
yyless(yyleng-1); }
} <xq,xe>{quotecontinue} {
} /* ignore */
<IN_DOLLARQUOTE>{dolqinside} { } }
<IN_DOLLARQUOTE>. { /* needed for $ inside the quoted text */ } <xe>. {
<IN_DOLLARQUOTE><<EOF>> { /* This is only needed for \ just before EOF */
plpgsql_error_lineno = start_lineno; }
ereport(ERROR, <xq,xe><<EOF>> { yyerror("unterminated quoted string"); }
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("unterminated dollar-quoted string"))); {dolqdelim} {
} SAVE_TOKEN_START();
dolqstart = pstrdup(yytext);
BEGIN(xdolq);
}
{dolqfailed} {
/* throw back all but the initial "$" */
yyless(1);
/* and treat it as {other} */
return yytext[0];
}
<xdolq>{dolqdelim} {
if (strcmp(yytext, dolqstart) == 0)
{
pfree(dolqstart);
BEGIN(INITIAL);
/* adjust yytext/yyleng to describe whole string */
yyleng += (yytext - start_charpos);
yytext = start_charpos;
return T_STRING;
}
else
{
/*
* When we fail to match $...$ to dolqstart, transfer
* the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$
*/
yyless(yyleng-1);
}
}
<xdolq>{dolqinside} {
}
<xdolq>{dolqfailed} {
}
<xdolq>. {
/* This is only needed for $ inside the quoted text */
}
<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
/* ---------- /* ----------
* Any unmatched character is returned as is * Any unmatched character is returned as is
* ---------- * ----------
*/ */
. { return yytext[0]; } . {
return yytext[0];
}
%% %%
@ -437,7 +487,7 @@ plpgsql_scanner_lineno(void)
* to cite in error messages. * to cite in error messages.
*/ */
void void
plpgsql_scanner_init(const char *str, int functype) plpgsql_scanner_init(const char *str)
{ {
Size slen; Size slen;
@ -460,9 +510,6 @@ plpgsql_scanner_init(const char *str, int functype)
/* Other setup */ /* Other setup */
scanstr = str; scanstr = str;
scanner_functype = functype;
scanner_typereported = false;
have_pushback_token = false; have_pushback_token = false;
cur_line_start = scanbuf; cur_line_start = scanbuf;
@ -493,77 +540,3 @@ plpgsql_scanner_finish(void)
yy_delete_buffer(scanbufhandle); yy_delete_buffer(scanbufhandle);
pfree(scanbuf); pfree(scanbuf);
} }
/*
* Called after a T_STRING token is read to get the string literal's value
* as a palloc'd string. (We make this a separate call because in many
* scenarios there's no need to get the decoded value.)
*
* Note: we expect the literal to be the most recently lexed token. This
* would not work well if we supported multiple-token pushback or if
* plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
*/
char *
plpgsql_get_string_value(void)
{
char *result;
const char *cp;
int len;
if (dolqlen > 0)
{
/* Token is a $foo$...$foo$ string */
len = yyleng - 2 * dolqlen;
Assert(len >= 0);
result = (char *) palloc(len + 1);
memcpy(result, yytext + dolqlen, len);
result[len] = '\0';
}
else if (*yytext == 'E' || *yytext == 'e')
{
/* Token is an E'...' string */
result = (char *) palloc(yyleng + 1); /* more than enough room */
len = 0;
for (cp = yytext + 2; *cp; cp++)
{
if (*cp == '\'')
{
if (cp[1] == '\'')
result[len++] = *cp++;
/* else it must be string end quote */
}
else if (*cp == '\\')
{
if (cp[1] != '\0') /* just a paranoid check */
result[len++] = *(++cp);
}
else
result[len++] = *cp;
}
result[len] = '\0';
}
else
{
/* Token is a '...' string */
result = (char *) palloc(yyleng + 1); /* more than enough room */
len = 0;
for (cp = yytext + 1; *cp; cp++)
{
if (*cp == '\'')
{
if (cp[1] == '\'')
result[len++] = *cp++;
/* else it must be string end quote */
}
else if (*cp == '\\')
{
if (cp[1] != '\0') /* just a paranoid check */
result[len++] = *(++cp);
}
else
result[len++] = *cp;
}
result[len] = '\0';
}
return result;
}

View File

@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true);
DROP FUNCTION leaker_1(bool); DROP FUNCTION leaker_1(bool);
DROP FUNCTION leaker_2(bool); DROP FUNCTION leaker_2(bool);
-- Test handling of string literals.
set standard_conforming_strings = off;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz';
return 'foo\\bar\041baz';
end
$$ language plpgsql;
WARNING: nonstandard use of \\ in a string literal
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
CONTEXT: string literal in PL/PgSQL function "strtest" near line 2
WARNING: nonstandard use of \\ in a string literal
LINE 1: SELECT 'foo\\bar\041baz'
^
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
QUERY: SELECT 'foo\\bar\041baz'
CONTEXT: SQL statement in PL/PgSQL function "strtest" near line 3
select strtest();
NOTICE: foo\bar!baz
WARNING: nonstandard use of \\ in a string literal
LINE 1: SELECT 'foo\\bar\041baz'
^
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
QUERY: SELECT 'foo\\bar\041baz'
CONTEXT: PL/pgSQL function "strtest" line 3 at RETURN
strtest
-------------
foo\bar!baz
(1 row)
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
NOTICE: foo\bar!baz
strtest
-------------
foo\bar!baz
(1 row)
set standard_conforming_strings = on;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz\';
return 'foo\\bar\041baz\';
end
$$ language plpgsql;
select strtest();
NOTICE: foo\\bar\041baz\
strtest
------------------
foo\\bar\041baz\
(1 row)
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
NOTICE: foo\bar!baz
strtest
-------------
foo\bar!baz
(1 row)
drop function strtest();

View File

@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true);
DROP FUNCTION leaker_1(bool); DROP FUNCTION leaker_1(bool);
DROP FUNCTION leaker_2(bool); DROP FUNCTION leaker_2(bool);
-- Test handling of string literals.
set standard_conforming_strings = off;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz';
return 'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
set standard_conforming_strings = on;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz\';
return 'foo\\bar\041baz\';
end
$$ language plpgsql;
select strtest();
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
drop function strtest();