Revise plpgsql's scanner to process comments and string literals in a way

more nearly matching the core SQL scanner. The user-visible effects are: * Block comments (slash-star comments) now nest, as per SQL spec. * In standard_conforming_strings mode, backslash as the last character of a non-E string literal is now correctly taken as an ordinary character; formerly it was misinterpreted as escaping the ending quote. (Since the string also had to pass through the core scanner, this invariably led to syntax errors.) * Formerly, backslashes in the format string of RAISE were always treated as quoting the next character, regardless of mode. Now, they are ordinary characters with standard_conforming_strings on, while with it off, they introduce the same set of escapes as in the core SQL scanner. Also, escape_string_warning is now effective for RAISE format strings. These changes make RAISE format strings work just like any other string literal. This is implemented by copying and pasting a lot of logic from the core scanner. It would be a good idea to look into getting rid of plpgsql's scanner entirely in favor of using the core scanner. However, that involves more change than I can justify making during beta --- in particular, the core scanner would have to become re-entrant. In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or T_TRIGGER as a made-up first token. That presumably had some value once upon a time, but now it's just useless complication for both the scanner and the grammar.
2024-12-21 08:29:39 +08:00 · 2009-04-19 18:52:58 +00:00 · 2009-04-19 18:52:58 +00:00 · 3a624e9200
commit 3a624e9200
parent 7f2f798b30
8 changed files with 395 additions and 233 deletions
--- a/doc/src/sgml/plpgsql.sgml
+++ b/doc/src/sgml/plpgsql.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.139 2009/04/02 19:20:45 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.140 2009/04/19 18:52:56 tgl Exp $ -->
 <chapter id="plpgsql">
  <title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title>
@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>;
     There are two types of comments in <application>PL/pgSQL</>. A double
     dash (<literal>--</literal>) starts a comment that extends to the end of
     the line. A <literal>/*</literal> starts a block comment that extends to
-     the next occurrence of <literal>*/</literal>.  Block comments cannot be
+     the next occurrence of <literal>*/</literal>.  Block comments nest,
-     nested, but double dash comments can be enclosed into a block comment and
+     just as in ordinary SQL.
     a double dash can hide the block comment delimiters <literal>/*</literal>
     and <literal>*/</literal>.
    </para>
    <para>
--- a/src/pl/plpgsql/src/gram.y
+++ b/src/pl/plpgsql/src/gram.y
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -62,6 +62,8 @@ static PLpgSQL_row		*make_scalar_list1(const char *initial_name,
 										   int lineno);
 static	void			 check_sql_expr(const char *stmt);
 static	void			 plpgsql_sql_error_callback(void *arg);
 static	char			*parse_string_token(const char *token);
 static	void			 plpgsql_string_error_callback(void *arg);
 static	char			*check_label(const char *yytxt);
 static	void			 check_labels(const char *start_label,
 									  const char *end_label);
@ -228,8 +230,6 @@ static List				*read_raise_options(void);
 		/*
 		 * Other tokens
 		 */
 %token	T_FUNCTION
 %token	T_TRIGGER
 %token	T_STRING
 %token	T_NUMBER
 %token	T_SCALAR				/* a VAR, RECFIELD, or TRIGARG */
@ -244,13 +244,9 @@ static List				*read_raise_options(void);
 %%
-pl_function		: T_FUNCTION comp_optsect pl_block opt_semi
+pl_function		: comp_optsect pl_block opt_semi
 					{
-						yylval.program = (PLpgSQL_stmt_block *)$3;
+						yylval.program = (PLpgSQL_stmt_block *) $2;
 					}
 				| T_TRIGGER comp_optsect pl_block opt_semi
 					{
 						yylval.program = (PLpgSQL_stmt_block *)$3;
 					}
 				;
@ -1403,7 +1399,7 @@ stmt_raise		: K_RAISE lno
 							if (tok == T_STRING)
 							{
 								/* old style message and parameters */
-								new->message = plpgsql_get_string_value();
+								new->message = parse_string_token(yytext);
 								/*
 								 * We expect either a semi-colon, which
 								 * indicates no parameters, or a comma that
@ -1435,7 +1431,7 @@ stmt_raise		: K_RAISE lno
 									if (yylex() != T_STRING)
 										yyerror("syntax error");
-									sqlstatestr = plpgsql_get_string_value();
+									sqlstatestr = parse_string_token(yytext);
 									if (strlen(sqlstatestr) != 5)
 										yyerror("invalid SQLSTATE code");
@ -1778,7 +1774,7 @@ proc_condition	: opt_lblname
 							/* next token should be a string literal */
 							if (yylex() != T_STRING)
 								yyerror("syntax error");
-							sqlstatestr = plpgsql_get_string_value();
+							sqlstatestr = parse_string_token(yytext);
 							if (strlen(sqlstatestr) != 5)
 								yyerror("invalid SQLSTATE code");
@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg)
 	errposition(0);
 }
 /*
 * Convert a string-literal token to the represented string value.
 *
 * To do this, we need to invoke the core lexer.  To avoid confusion between
 * the core bison/flex definitions and our own, the actual invocation is in
 * pl_funcs.c.  Here we are only concerned with setting up the right errcontext
 * state, which is handled the same as in check_sql_expr().
 */
 static char *
 parse_string_token(const char *token)
 {
 	char	   *result;
 	ErrorContextCallback  syntax_errcontext;
 	ErrorContextCallback *previous_errcontext;
 	/* See comments in check_sql_expr() */
 	Assert(error_context_stack->callback == plpgsql_compile_error_callback);
 	previous_errcontext = error_context_stack;
 	syntax_errcontext.callback = plpgsql_string_error_callback;
 	syntax_errcontext.arg = (char *) token;
 	syntax_errcontext.previous = error_context_stack->previous;
 	error_context_stack = &syntax_errcontext;
 	result = plpgsql_parse_string_token(token);
 	/* Restore former ereport callback */
 	error_context_stack = previous_errcontext;
 	return result;
 }
 static void
 plpgsql_string_error_callback(void *arg)
 {
 	Assert(plpgsql_error_funcname);
 	errcontext("string literal in PL/PgSQL function \"%s\" near line %d",
 			   plpgsql_error_funcname, plpgsql_error_lineno);
 	/* representing the string literal as internalquery seems overkill */
 	errposition(0);
 }
 static char *
 check_label(const char *yytxt)
 {
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo,
 		   bool forValidator)
 {
 	Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup);
-	int			functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION;
+	bool		is_trigger = CALLED_AS_TRIGGER(fcinfo);
 	Datum		prosrcdatum;
 	bool		isnull;
 	char	   *proc_source;
@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo,
 	if (isnull)
 		elog(ERROR, "null prosrc");
 	proc_source = TextDatumGetCString(prosrcdatum);
-	plpgsql_scanner_init(proc_source, functype);
+	plpgsql_scanner_init(proc_source);
 	plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
 	plpgsql_error_lineno = 0;
@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo,
 	function->fn_oid = fcinfo->flinfo->fn_oid;
 	function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data);
 	function->fn_tid = procTup->t_self;
-	function->fn_functype = functype;
+	function->fn_is_trigger = is_trigger;
 	function->fn_cxt = func_cxt;
 	function->out_param_varno = -1;		/* set up for no OUT param */
-	switch (functype)
+	switch (is_trigger)
 	{
-		case T_FUNCTION:
+		case false:
 			/*
 			 * Fetch info about the procedure's parameters. Allocations aren't
@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo,
 			ReleaseSysCache(typeTup);
 			break;
-		case T_TRIGGER:
+		case true:
 			/* Trigger procedure's return type is unknown yet */
 			function->fn_rettype = InvalidOid;
 			function->fn_retbyval = false;
@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo,
 			break;
 		default:
-			elog(ERROR, "unrecognized function typecode: %u", functype);
+			elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger);
 			break;
 	}
@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word)
 	 * Recognize tg_argv when compiling triggers
 	 * (XXX this sucks, it should be a regular variable in the namestack)
 	 */
-	if (plpgsql_curr_compile->fn_functype == T_TRIGGER)
+	if (plpgsql_curr_compile->fn_is_trigger)
 	{
 		if (strcmp(cp[0], "tg_argv") == 0)
 		{
--- a/src/pl/plpgsql/src/pl_funcs.c
+++ b/src/pl/plpgsql/src/pl_funcs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -17,6 +17,8 @@
 #include <ctype.h>
 #include "parser/gramparse.h"
 #include "parser/gram.h"
 #include "parser/scansup.h"
@ -459,6 +461,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
 }
 /*
 * plpgsql_parse_string_token - get the value represented by a string literal
 *
 * We do not make plpgsql's lexer produce the represented value, because
 * in many cases we don't need it.  Instead this function is invoked when
 * we do need it.  The input is the T_STRING token as identified by the lexer.
 *
 * The result is a palloc'd string.
 *
 * Note: this is called only from plpgsql's gram.y, but we can't just put it
 * there because including parser/gram.h there would cause confusion.
 */
 char *
 plpgsql_parse_string_token(const char *token)
 {
 	int		ctoken;
 	/*
 	 * We use the core lexer to do the dirty work.  Aside from getting the
 	 * right results for escape sequences and so on, this helps us produce
 	 * appropriate warnings for escape_string_warning etc.
 	 */
 	scanner_init(token);
 	ctoken = base_yylex();
 	if (ctoken != SCONST)
 		elog(ERROR, "unexpected result from base lexer: %d", ctoken);
 	scanner_finish();
 	return base_yylval.str;
 }
 /*
 * Statement type as a string, for use in error messages etc.
 */
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -650,7 +650,7 @@ typedef struct PLpgSQL_function
 	Oid			fn_oid;
 	TransactionId fn_xmin;
 	ItemPointerData fn_tid;
-	int			fn_functype;
+	bool		fn_is_trigger;
 	PLpgSQL_func_hashkey *fn_hashkey;	/* back-link to hashtable key */
 	MemoryContext fn_cxt;
@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname);
 * ----------
 */
 extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
 extern char *plpgsql_parse_string_token(const char *token);
 extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
 extern void plpgsql_dumptree(PLpgSQL_function *func);
@ -894,8 +895,7 @@ extern int	plpgsql_yylex(void);
 extern void plpgsql_push_back_token(int token);
 extern void plpgsql_yyerror(const char *message);
 extern int	plpgsql_scanner_lineno(void);
-extern void plpgsql_scanner_init(const char *str, int functype);
+extern void plpgsql_scanner_init(const char *str);
 extern void plpgsql_scanner_finish(void);
 extern char *plpgsql_get_string_value(void);
 #endif   /* PLPGSQL_H */
--- a/src/pl/plpgsql/src/scan.l
+++ b/src/pl/plpgsql/src/scan.l
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.67 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.68 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -19,27 +19,31 @@
 #include "mb/pg_wchar.h"
 /* No reason to constrain amount of data slurped */
 #define YY_READ_BUF_SIZE 16777216
 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
 #undef fprintf
 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
 /*
 * When we parse a token that requires multiple lexer rules to process,
 * remember the token's starting position this way.
 */
 #define SAVE_TOKEN_START()  \
 	( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;
 static const char *scanstr;		/* original input string */
 static int	scanner_functype;
 static bool	scanner_typereported;
 static int	pushback_token;
 static bool have_pushback_token;
 static const char *cur_line_start;
 static int	cur_line_num;
 static int		xcdepth = 0;	/* depth of nesting in slash-star comments */
 static char    *dolqstart;      /* current $foo$ quote start string */
-static int	dolqlen;			/* signal to plpgsql_get_string_value */
+
 extern bool		standard_conforming_strings;
 bool plpgsql_SpaceScanned = false;
 %}
@ -54,31 +58,73 @@ bool plpgsql_SpaceScanned = false;
 %option case-insensitive
 /*
 * Exclusive states are a subset of the core lexer's:
 *  <xc> extended C-style comments
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
 *  <xdolq> $foo$ quoted strings
 */
-%x	IN_STRING
+%x xc
-%x	IN_COMMENT
+%x xe
-%x	IN_DOLLARQUOTE
+%x xq
 %x xdolq
 /*
 * Definitions --- these generally must match the core lexer, but in some
 * cases we can simplify, since we only care about identifying the token
 * boundaries and not about deriving the represented value.  Also, we
 * aren't trying to lex multicharacter operators so their interactions
 * with comments go away.
 */
 space			[ \t\n\r\f]
 horiz_space		[ \t\f]
 newline			[\n\r]
 non_newline		[^\n\r]
 comment			("--"{non_newline}*)
 whitespace		({space}+|{comment})
 special_whitespace		({space}+|{comment}{newline})
 horiz_whitespace		({horiz_space}|{comment})
 whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
 quote			'
 quotestop		{quote}{whitespace}*
 quotecontinue	{quote}{whitespace_with_newline}{quote}
 quotefail		{quote}{whitespace}*"-"
 xestart			[eE]{quote}
 xeinside		[^\\']+
 xeescape		[\\].
 xqstart			{quote}
 xqdouble		{quote}{quote}
 xqinside		[^']+
 dolq_start		[A-Za-z\200-\377_]
 dolq_cont		[A-Za-z\200-\377_0-9]
 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
 dolqfailed		\${dolq_start}{dolq_cont}*
 dolqinside		[^$]+
 xcstart			\/\*
 xcstop			\*+\/
 xcinside		[^*/]+
 digit			[0-9]
 ident_start		[A-Za-z\200-\377_]
 ident_cont		[A-Za-z\200-\377_0-9\$]
 /* This is a simpler treatment of quoted identifiers than the core uses */
 quoted_ident	(\"[^\"]*\")+
 identifier		({ident_start}{ident_cont}*|{quoted_ident})
 param			\${digit}+
 space			[ \t\n\r\f]
 /* $foo$ style quotes ("dollar quoting")
 * copied straight from the backend SQL parser
 */
 dolq_start		[A-Za-z\200-\377_]
 dolq_cont		[A-Za-z\200-\377_0-9]
 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
 dolqinside		[^$]+
 %%
    /* ----------
     * Local variables in scanner to remember where
@ -95,17 +141,6 @@ dolqinside		[^$]+
    BEGIN(INITIAL);
    plpgsql_SpaceScanned = false;
    /* ----------
     * On the first call to a new source report the
     * function's type (T_FUNCTION or T_TRIGGER)
     * ----------
     */
 	if (!scanner_typereported)
 	{
 		scanner_typereported = true;
 		return scanner_functype;
 	}
    /* ----------
     * The keyword rules
     * ----------
@ -225,119 +260,134 @@ dump			{ return O_DUMP;			}
 {digit}+		{ return T_NUMBER;			}
-\".				{
+\".				{ yyerror("unterminated quoted identifier"); }
 				plpgsql_error_lineno = plpgsql_scanner_lineno();
 				ereport(ERROR,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
 						 errmsg("unterminated quoted identifier")));
 			}
    /* ----------
-     * Ignore whitespaces but remember this happened
+     * Ignore whitespace (including comments) but remember this happened
     * ----------
     */
-{space}+		{ plpgsql_SpaceScanned = true;		}
+{whitespace}	{ plpgsql_SpaceScanned = true; }
    /* ----------
-     * Eat up comments
+     * Comment and literal handling is mostly copied from the core lexer
     * ----------
     */
--[^\r\n]*		;
+{xcstart}		{
 					/* Set location in case of syntax error in comment */
 					SAVE_TOKEN_START();
 					xcdepth = 0;
 					BEGIN(xc);
 					plpgsql_SpaceScanned = true;
 				}
-\/\*			{ start_lineno = plpgsql_scanner_lineno();
+<xc>{xcstart}	{
-			  BEGIN(IN_COMMENT);
+					xcdepth++;
-			}
+				}
 <IN_COMMENT>\*\/	{ BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
 <IN_COMMENT>\n		;
 <IN_COMMENT>.		;
 <IN_COMMENT><<EOF>>	{
 				plpgsql_error_lineno = start_lineno;
 				ereport(ERROR,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
 						 errmsg("unterminated /* comment")));
 			}
-    /* ----------
+<xc>{xcstop}	{
-     * Collect anything inside of ''s and return one STRING token
+					if (xcdepth <= 0)
-	 *
+						BEGIN(INITIAL);
-	 * Hacking yytext/yyleng here lets us avoid using yymore(), which is
+					else
-	 * a win for performance.  It's safe because we know the underlying
+						xcdepth--;
-	 * input buffer is not changing.
+				}
     * ----------
     */
 '			{
 			  start_lineno = plpgsql_scanner_lineno();
 			  start_charpos = yytext;
 			  BEGIN(IN_STRING);
 			}
 [eE]'		{
 			  /* for now, treat the same as a regular literal */
 			  start_lineno = plpgsql_scanner_lineno();
 			  start_charpos = yytext;
 			  BEGIN(IN_STRING);
 			}
 <IN_STRING>\\.		{ }
 <IN_STRING>\\		{ /* can only happen with \ at EOF */ }
 <IN_STRING>''		{ }
 <IN_STRING>'		{
 			  /* tell plpgsql_get_string_value it's not a dollar quote */
 			  dolqlen = 0;
 			  /* adjust yytext/yyleng to describe whole string token */
 			  yyleng += (yytext - start_charpos);
 			  yytext = start_charpos;
 			  BEGIN(INITIAL);
 			  return T_STRING;
 			}
 <IN_STRING>[^'\\]+	{ }
 <IN_STRING><<EOF>>	{
 				plpgsql_error_lineno = start_lineno;
 				ereport(ERROR,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
 						 errmsg("unterminated quoted string")));
 			}
-{dolqdelim}		{
+<xc>{xcinside}	{
-			  start_lineno = plpgsql_scanner_lineno();
+					/* ignore */
-			  start_charpos = yytext;
+				}
-			  dolqstart = pstrdup(yytext);
+
-			  BEGIN(IN_DOLLARQUOTE);
+<xc>\/+			{
-			}
+					/* ignore */
-<IN_DOLLARQUOTE>{dolqdelim} {
+				}
-			  if (strcmp(yytext, dolqstart) == 0)
+
-			  {
+<xc>\*+			{
-					pfree(dolqstart);
+					/* ignore */
-					/* tell plpgsql_get_string_value it is a dollar quote */
+				}
-					dolqlen = yyleng;
+
 <xc><<EOF>>		{ yyerror("unterminated /* comment"); }
 {xqstart}		{
 					SAVE_TOKEN_START();
 					if (standard_conforming_strings)
 						BEGIN(xq);
 					else
 						BEGIN(xe);
 				}
 {xestart}		{
 					SAVE_TOKEN_START();
 					BEGIN(xe);
 				}
 <xq,xe>{quotestop}	|
 <xq,xe>{quotefail} {
 					yyless(1);
 					BEGIN(INITIAL);
 					/* adjust yytext/yyleng to describe whole string token */
 					yyleng += (yytext - start_charpos);
 					yytext = start_charpos;
 					BEGIN(INITIAL);
 					return T_STRING;
-			  }
+				}
-			  else
+<xq,xe>{xqdouble} {
-			  {
+				}
-					/*
+<xq>{xqinside}  {
-					 * When we fail to match $...$ to dolqstart, transfer
+				}
-					 * the $... part to the output, but put back the final
+<xe>{xeinside}  {
-					 * $ for rescanning.  Consider $delim$...$junk$delim$
+				}
-					 */
+<xe>{xeescape}  {
-					yyless(yyleng-1);
+				}
-			  }
+<xq,xe>{quotecontinue} {
-			}
+					/* ignore */
-<IN_DOLLARQUOTE>{dolqinside} { }
+				}
-<IN_DOLLARQUOTE>.	{ /* needed for $ inside the quoted text */ }
+<xe>.			{
-<IN_DOLLARQUOTE><<EOF>>	{
+					/* This is only needed for \ just before EOF */
-				plpgsql_error_lineno = start_lineno;
+				}
-				ereport(ERROR,
+<xq,xe><<EOF>>		{ yyerror("unterminated quoted string"); }
-						(errcode(ERRCODE_DATATYPE_MISMATCH),
+
-						 errmsg("unterminated dollar-quoted string")));
+{dolqdelim}		{
-			}
+					SAVE_TOKEN_START();
 					dolqstart = pstrdup(yytext);
 					BEGIN(xdolq);
 				}
 {dolqfailed}	{
 					/* throw back all but the initial "$" */
 					yyless(1);
 					/* and treat it as {other} */
 					return yytext[0];
 				}
 <xdolq>{dolqdelim} {
 					if (strcmp(yytext, dolqstart) == 0)
 					{
 						pfree(dolqstart);
 						BEGIN(INITIAL);
 						/* adjust yytext/yyleng to describe whole string */
 						yyleng += (yytext - start_charpos);
 						yytext = start_charpos;
 						return T_STRING;
 					}
 					else
 					{
 						/*
 						 * When we fail to match $...$ to dolqstart, transfer
 						 * the $... part to the output, but put back the final
 						 * $ for rescanning.  Consider $delim$...$junk$delim$
 						 */
 						yyless(yyleng-1);
 					}
 				}
 <xdolq>{dolqinside} {
 				}
 <xdolq>{dolqfailed} {
 				}
 <xdolq>.		{
 					/* This is only needed for $ inside the quoted text */
 				}
 <xdolq><<EOF>>	{ yyerror("unterminated dollar-quoted string"); }
    /* ----------
     * Any unmatched character is returned as is
     * ----------
     */
-.			{ return yytext[0];			}
+.				{
 					return yytext[0];
 				}
 %%
@ -437,7 +487,7 @@ plpgsql_scanner_lineno(void)
 * to cite in error messages.
 */
 void
-plpgsql_scanner_init(const char *str, int functype)
+plpgsql_scanner_init(const char *str)
 {
 	Size	slen;
@ -460,9 +510,6 @@ plpgsql_scanner_init(const char *str, int functype)
 	/* Other setup */
 	scanstr = str;
    scanner_functype = functype;
    scanner_typereported = false;
 	have_pushback_token = false;
 	cur_line_start = scanbuf;
@ -493,77 +540,3 @@ plpgsql_scanner_finish(void)
 	yy_delete_buffer(scanbufhandle);
 	pfree(scanbuf);
 }
 /*
 * Called after a T_STRING token is read to get the string literal's value
 * as a palloc'd string.  (We make this a separate call because in many
 * scenarios there's no need to get the decoded value.)
 *
 * Note: we expect the literal to be the most recently lexed token.  This
 * would not work well if we supported multiple-token pushback or if
 * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
 */
 char *
 plpgsql_get_string_value(void)
 {
 	char	   *result;
 	const char *cp;
 	int			len;
 	if (dolqlen > 0)
 	{
 		/* Token is a $foo$...$foo$ string */
 		len = yyleng - 2 * dolqlen;
 		Assert(len >= 0);
 		result = (char *) palloc(len + 1);
 		memcpy(result, yytext + dolqlen, len);
 		result[len] = '\0';
 	}
 	else if (*yytext == 'E' || *yytext == 'e')
 	{
 		/* Token is an E'...' string */
 		result = (char *) palloc(yyleng + 1);	/* more than enough room */
 		len = 0;
 		for (cp = yytext + 2; *cp; cp++)
 		{
 			if (*cp == '\'')
 			{
 				if (cp[1] == '\'')
 					result[len++] = *cp++;
 				/* else it must be string end quote */
 			}
 			else if (*cp == '\\')
 			{
 				if (cp[1] != '\0')	/* just a paranoid check */
 					result[len++] = *(++cp);
 			}
 			else
 				result[len++] = *cp;
 		}
 		result[len] = '\0';
 	}
 	else
 	{
 		/* Token is a '...' string */
 		result = (char *) palloc(yyleng + 1);	/* more than enough room */
 		len = 0;
 		for (cp = yytext + 1; *cp; cp++)
 		{
 			if (*cp == '\'')
 			{
 				if (cp[1] == '\'')
 					result[len++] = *cp++;
 				/* else it must be string end quote */
 			}
 			else if (*cp == '\\')
 			{
 				if (cp[1] != '\0')	/* just a paranoid check */
 					result[len++] = *(++cp);
 			}
 			else
 				result[len++] = *cp;
 		}
 		result[len] = '\0';
 	}
 	return result;
 }
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true);
 DROP FUNCTION leaker_1(bool);
 DROP FUNCTION leaker_2(bool);
 -- Test handling of string literals.
 set standard_conforming_strings = off;
 create or replace function strtest() returns text as $$
 begin
  raise notice 'foo\\bar\041baz';
  return 'foo\\bar\041baz';
 end
 $$ language plpgsql;
 WARNING:  nonstandard use of \\ in a string literal
 HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
 CONTEXT:  string literal in PL/PgSQL function "strtest" near line 2
 WARNING:  nonstandard use of \\ in a string literal
 LINE 1: SELECT  'foo\\bar\041baz'
                ^
 HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
 QUERY:  SELECT  'foo\\bar\041baz'
 CONTEXT:  SQL statement in PL/PgSQL function "strtest" near line 3
 select strtest();
 NOTICE:  foo\bar!baz
 WARNING:  nonstandard use of \\ in a string literal
 LINE 1: SELECT  'foo\\bar\041baz'
                ^
 HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
 QUERY:  SELECT  'foo\\bar\041baz'
 CONTEXT:  PL/pgSQL function "strtest" line 3 at RETURN
   strtest   
 -------------
 foo\bar!baz
 (1 row)
 create or replace function strtest() returns text as $$
 begin
  raise notice E'foo\\bar\041baz';
  return E'foo\\bar\041baz';
 end
 $$ language plpgsql;
 select strtest();
 NOTICE:  foo\bar!baz
   strtest   
 -------------
 foo\bar!baz
 (1 row)
 set standard_conforming_strings = on;
 create or replace function strtest() returns text as $$
 begin
  raise notice 'foo\\bar\041baz\';
  return 'foo\\bar\041baz\';
 end
 $$ language plpgsql;
 select strtest();
 NOTICE:  foo\\bar\041baz\
     strtest      
 ------------------
 foo\\bar\041baz\
 (1 row)
 create or replace function strtest() returns text as $$
 begin
  raise notice E'foo\\bar\041baz';
  return E'foo\\bar\041baz';
 end
 $$ language plpgsql;
 select strtest();
 NOTICE:  foo\bar!baz
   strtest   
 -------------
 foo\bar!baz
 (1 row)
 drop function strtest();
--- a/src/test/regress/sql/plpgsql.sql
+++ b/src/test/regress/sql/plpgsql.sql
@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true);
 DROP FUNCTION leaker_1(bool);
 DROP FUNCTION leaker_2(bool);
 -- Test handling of string literals.
 set standard_conforming_strings = off;
 create or replace function strtest() returns text as $$
 begin
  raise notice 'foo\\bar\041baz';
  return 'foo\\bar\041baz';
 end
 $$ language plpgsql;
 select strtest();
 create or replace function strtest() returns text as $$
 begin
  raise notice E'foo\\bar\041baz';
  return E'foo\\bar\041baz';
 end
 $$ language plpgsql;
 select strtest();
 set standard_conforming_strings = on;
 create or replace function strtest() returns text as $$
 begin
  raise notice 'foo\\bar\041baz\';
  return 'foo\\bar\041baz\';
 end
 $$ language plpgsql;
 select strtest();
 create or replace function strtest() returns text as $$
 begin
  raise notice E'foo\\bar\041baz';
  return E'foo\\bar\041baz';
 end
 $$ language plpgsql;
 select strtest();
 drop function strtest();