diff --git a/src/backend/nls.mk b/src/backend/nls.mk index 8b5504bb97..45c51b3b73 100644 --- a/src/backend/nls.mk +++ b/src/backend/nls.mk @@ -1,8 +1,8 @@ -# $PostgreSQL: pgsql/src/backend/nls.mk,v 1.27 2009/06/26 19:33:43 petere Exp $ +# $PostgreSQL: pgsql/src/backend/nls.mk,v 1.28 2009/07/13 02:02:19 tgl Exp $ CATALOG_NAME := postgres AVAIL_LANGUAGES := de es fr ja pt_BR tr GETTEXT_FILES := + gettext-files -GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext write_stderr yyerror +GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext write_stderr yyerror parser_yyerror gettext-files: distprep find $(srcdir)/ $(srcdir)/../port/ -name '*.c' -print >$@ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index dd71071efd..f4b795db45 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.667 2009/07/12 17:12:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.668 2009/07/13 02:02:20 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -92,10 +92,6 @@ #define YYMALLOC palloc #define YYFREE pfree -extern List *parsetree; /* final parse result is delivered here */ - -static bool QueryIsRule = FALSE; - /* Private struct for the result of privilege_target production */ typedef struct PrivTarget { @@ -103,14 +99,14 @@ typedef struct PrivTarget List *objs; } PrivTarget; -/* - * If you need access to certain yacc-generated variables and find that - * they're static by default, uncomment the next line. (this is not a - * problem, yet.) - */ -/*#define __YYSCLASS*/ -static Node *makeColumnRef(char *colname, List *indirection, int location); +#define parser_yyerror(msg) scanner_yyerror(msg, yyscanner) +#define parser_errposition(pos) scanner_errposition(pos, yyscanner) + +static void base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, + const char *msg); +static Node *makeColumnRef(char *colname, List *indirection, + int location, base_yyscan_t yyscanner); static Node *makeTypeCast(Node *arg, TypeName *typename, int location); static Node *makeStringConst(char *str, int location); static Node *makeStringConstCast(char *str, int location, TypeName *typename); @@ -120,16 +116,18 @@ static Node *makeBitStringConst(char *str, int location); static Node *makeNullAConst(int location); static Node *makeAConst(Value *v, int location); static Node *makeBoolAConst(bool state, int location); -static FuncCall *makeOverlaps(List *largs, List *rargs, int location); -static void check_qualified_name(List *names); -static List *check_func_name(List *names); -static List *check_indirection(List *indirection); +static FuncCall *makeOverlaps(List *largs, List *rargs, + int location, base_yyscan_t yyscanner); +static void check_qualified_name(List *names, base_yyscan_t yyscanner); +static List *check_func_name(List *names, base_yyscan_t yyscanner); +static List *check_indirection(List *indirection, base_yyscan_t yyscanner); static List *extractArgTypes(List *parameters); static SelectStmt *findLeftmostSelect(SelectStmt *node); static void insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, - WithClause *withClause); + WithClause *withClause, + base_yyscan_t yyscanner); static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); static Node *doNegate(Node *n, int location); static void doNegateFloat(Value *v); @@ -141,10 +139,14 @@ static TypeName *TableFuncTypeName(List *columns); %} +%pure-parser %expect 0 %name-prefix="base_yy" %locations +%parse-param {base_yyscan_t yyscanner} +%lex-param {base_yyscan_t yyscanner} + %union { int ival; @@ -576,26 +578,29 @@ static TypeName *TableFuncTypeName(List *columns); %% /* - * Handle comment-only lines, and ;; SELECT * FROM pg_class ;;; - * psql already handles such cases, but other interfaces don't. - * bjm 1999/10/05 + * The target production for the whole parse. */ -stmtblock: stmtmulti { parsetree = $1; } +stmtblock: stmtmulti + { + pg_yyget_extra(yyscanner)->parsetree = $1; + } ; /* the thrashing around here is to discard "empty" statements... */ stmtmulti: stmtmulti ';' stmt - { if ($3 != NULL) - $$ = lappend($1, $3); - else - $$ = $1; + { + if ($3 != NULL) + $$ = lappend($1, $3); + else + $$ = $1; } | stmt - { if ($1 != NULL) + { + if ($1 != NULL) $$ = list_make1($1); - else + else $$ = NIL; - } + } ; stmt : @@ -1190,7 +1195,7 @@ set_rest: /* Generic SET syntaxes: */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("current database cannot be changed"), - scanner_errposition(@2))); + parser_errposition(@2))); $$ = NULL; /*not reached*/ } | SCHEMA Sconst @@ -1305,7 +1310,7 @@ zone_value: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("time zone interval must be HOUR or HOUR TO MINUTE"), - scanner_errposition(@3))); + parser_errposition(@3))); } t->typmods = $3; $$ = makeStringConstCast($2, @2, t); @@ -1320,12 +1325,12 @@ zone_value: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("time zone interval must be HOUR or HOUR TO MINUTE"), - scanner_errposition(@6))); + parser_errposition(@6))); if (list_length($6) != 1) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("interval precision specified twice"), - scanner_errposition(@1))); + parser_errposition(@1))); t->typmods = lappend($6, makeIntConst($3, @3)); } else @@ -2428,7 +2433,7 @@ key_match: MATCH FULL ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("MATCH PARTIAL not yet implemented"), - scanner_errposition(@1))); + parser_errposition(@1))); $$ = FKCONSTR_MATCH_PARTIAL; } | MATCH SIMPLE @@ -2521,7 +2526,7 @@ CreateAsStmt: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("CREATE TABLE AS cannot specify INTO"), - scanner_errposition(exprLocation((Node *) n->intoClause)))); + parser_errposition(exprLocation((Node *) n->intoClause)))); $4->rel->istemp = $2; n->intoClause = $4; /* Implement WITH NO DATA by forcing top-level LIMIT 0 */ @@ -3174,7 +3179,7 @@ TriggerEvents: | TriggerEvents OR TriggerOneEvent { if ($1 & $3) - yyerror("duplicate trigger events specified"); + parser_yyerror("duplicate trigger events specified"); $$ = $1 | $3; } ; @@ -3245,7 +3250,7 @@ ConstraintAttributeSpec: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("constraint declared INITIALLY DEFERRED must be DEFERRABLE"), - scanner_errposition(@1))); + parser_errposition(@1))); $$ = $1 | $2; } | ConstraintTimeSpec @@ -3261,7 +3266,7 @@ ConstraintAttributeSpec: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("constraint declared INITIALLY DEFERRED must be DEFERRABLE"), - scanner_errposition(@1))); + parser_errposition(@1))); $$ = $1 | $2; } | /*EMPTY*/ @@ -3434,7 +3439,7 @@ DefineStmt: (errcode(ERRCODE_SYNTAX_ERROR), errmsg("improper qualified name (too many dotted names): %s", NameListToString($3)), - scanner_errposition(@3))); + parser_errposition(@3))); break; } r->location = @3; @@ -3638,7 +3643,7 @@ opt_recheck: RECHECK (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("RECHECK is no longer required"), errhint("Update your data type."), - scanner_errposition(@1))); + parser_errposition(@1))); $$ = TRUE; } | /*EMPTY*/ { $$ = FALSE; } @@ -5043,7 +5048,7 @@ oper_argtypes: (errcode(ERRCODE_SYNTAX_ERROR), errmsg("missing argument"), errhint("Use NONE to denote the missing argument of a unary operator."), - scanner_errposition(@3))); + parser_errposition(@3))); } | '(' Typename ',' Typename ')' { $$ = list_make2($2, $4); } @@ -5600,7 +5605,7 @@ AlterOwnerStmt: ALTER AGGREGATE func_name aggr_args OWNER TO RoleId *****************************************************************************/ RuleStmt: CREATE opt_or_replace RULE name AS - { QueryIsRule=TRUE; } + { pg_yyget_extra(yyscanner)->QueryIsRule = TRUE; } ON event TO qualified_name where_clause DO opt_instead RuleActionList { @@ -5613,7 +5618,7 @@ RuleStmt: CREATE opt_or_replace RULE name AS n->instead = $13; n->actions = $14; $$ = (Node *)n; - QueryIsRule=FALSE; + pg_yyget_extra(yyscanner)->QueryIsRule = FALSE; } ; @@ -6605,7 +6610,7 @@ insert_column_item: { $$ = makeNode(ResTarget); $$->name = $1; - $$->indirection = check_indirection($2); + $$->indirection = check_indirection($2, yyscanner); $$->val = NULL; $$->location = @1; } @@ -6735,7 +6740,7 @@ multiple_set_clause: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("number of columns does not match number of values"), - scanner_errposition(@1))); + parser_errposition(@1))); forboth(col_cell, $2, val_cell, $5) { ResTarget *res_col = (ResTarget *) lfirst(col_cell); @@ -6753,7 +6758,7 @@ set_target: { $$ = makeNode(ResTarget); $$->name = $1; - $$->indirection = check_indirection($2); + $$->indirection = check_indirection($2, yyscanner); $$->val = NULL; /* upper production sets this */ $$->location = @1; } @@ -6863,49 +6868,56 @@ select_no_parens: | select_clause sort_clause { insertSelectOptions((SelectStmt *) $1, $2, NIL, - NULL, NULL, NULL); + NULL, NULL, NULL, + yyscanner); $$ = $1; } | select_clause opt_sort_clause for_locking_clause opt_select_limit { insertSelectOptions((SelectStmt *) $1, $2, $3, list_nth($4, 0), list_nth($4, 1), - NULL); + NULL, + yyscanner); $$ = $1; } | select_clause opt_sort_clause select_limit opt_for_locking_clause { insertSelectOptions((SelectStmt *) $1, $2, $4, list_nth($3, 0), list_nth($3, 1), - NULL); + NULL, + yyscanner); $$ = $1; } | with_clause select_clause { insertSelectOptions((SelectStmt *) $2, NULL, NIL, NULL, NULL, - $1); + $1, + yyscanner); $$ = $2; } | with_clause select_clause sort_clause { insertSelectOptions((SelectStmt *) $2, $3, NIL, NULL, NULL, - $1); + $1, + yyscanner); $$ = $2; } | with_clause select_clause opt_sort_clause for_locking_clause opt_select_limit { insertSelectOptions((SelectStmt *) $2, $3, $4, list_nth($5, 0), list_nth($5, 1), - $1); + $1, + yyscanner); $$ = $2; } | with_clause select_clause opt_sort_clause select_limit opt_for_locking_clause { insertSelectOptions((SelectStmt *) $2, $3, $5, list_nth($4, 0), list_nth($4, 1), - $1); + $1, + yyscanner); $$ = $2; } ; @@ -7160,7 +7172,7 @@ select_limit: (errcode(ERRCODE_SYNTAX_ERROR), errmsg("LIMIT #,# syntax is not supported"), errhint("Use separate LIMIT and OFFSET clauses."), - scanner_errposition(@1))); + parser_errposition(@1))); } /* SQL:2008 syntax variants */ | OFFSET select_offset_value2 row_or_rows @@ -7382,13 +7394,13 @@ table_ref: relation_expr (errcode(ERRCODE_SYNTAX_ERROR), errmsg("VALUES in FROM must have an alias"), errhint("For example, FROM (VALUES ...) [AS] foo."), - scanner_errposition(@1))); + parser_errposition(@1))); else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("subquery in FROM must have an alias"), errhint("For example, FROM (SELECT ...) [AS] foo."), - scanner_errposition(@1))); + parser_errposition(@1))); $$ = NULL; } | select_with_parens alias_clause @@ -7743,7 +7755,7 @@ SimpleTypename: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("interval precision specified twice"), - scanner_errposition(@1))); + parser_errposition(@1))); $$->typmods = lappend($5, makeIntConst($3, @3)); } else @@ -7869,7 +7881,7 @@ opt_float: '(' Iconst ')' ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("precision for type float must be at least 1 bit"), - scanner_errposition(@2))); + parser_errposition(@2))); else if ($2 <= 24) $$ = SystemTypeName("float4"); else if ($2 <= 53) @@ -7878,7 +7890,7 @@ opt_float: '(' Iconst ')' ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("precision for type float must be less than 54 bits"), - scanner_errposition(@2))); + parser_errposition(@2))); } | /*EMPTY*/ { @@ -8394,7 +8406,7 @@ a_expr: c_expr { $$ = $1; } } | row OVERLAPS row { - $$ = (Node *)makeOverlaps($1, $3, @2); + $$ = (Node *)makeOverlaps($1, $3, @2, yyscanner); } | a_expr IS TRUE_P { @@ -8574,7 +8586,7 @@ a_expr: c_expr { $$ = $1; } ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("UNIQUE predicate is not yet implemented"), - scanner_errposition(@1))); + parser_errposition(@1))); } | a_expr IS DOCUMENT_P %prec IS { @@ -8681,7 +8693,7 @@ c_expr: columnref { $$ = $1; } { A_Indirection *n = makeNode(A_Indirection); n->arg = (Node *) p; - n->indirection = check_indirection($2); + n->indirection = check_indirection($2, yyscanner); $$ = (Node *) n; } else @@ -8693,7 +8705,7 @@ c_expr: columnref { $$ = $1; } { A_Indirection *n = makeNode(A_Indirection); n->arg = $2; - n->indirection = check_indirection($4); + n->indirection = check_indirection($4, yyscanner); $$ = (Node *)n; } else @@ -9413,12 +9425,12 @@ frame_extent: frame_bound ereport(ERROR, (errcode(ERRCODE_WINDOWING_ERROR), errmsg("frame start cannot be UNBOUNDED FOLLOWING"), - scanner_errposition(@1))); + parser_errposition(@1))); if ($1 & FRAMEOPTION_START_CURRENT_ROW) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("frame start at CURRENT ROW is not implemented"), - scanner_errposition(@1))); + parser_errposition(@1))); $$ = $1 | FRAMEOPTION_END_CURRENT_ROW; } | BETWEEN frame_bound AND frame_bound @@ -9428,17 +9440,17 @@ frame_extent: frame_bound ereport(ERROR, (errcode(ERRCODE_WINDOWING_ERROR), errmsg("frame start cannot be UNBOUNDED FOLLOWING"), - scanner_errposition(@2))); + parser_errposition(@2))); if ($2 & FRAMEOPTION_START_CURRENT_ROW) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("frame start at CURRENT ROW is not implemented"), - scanner_errposition(@2))); + parser_errposition(@2))); if ($4 & FRAMEOPTION_START_UNBOUNDED_PRECEDING) ereport(ERROR, (errcode(ERRCODE_WINDOWING_ERROR), errmsg("frame end cannot be UNBOUNDED PRECEDING"), - scanner_errposition(@4))); + parser_errposition(@4))); /* shift converts START_ options to END_ options */ $$ = FRAMEOPTION_BETWEEN | $2 | ($4 << 1); } @@ -9742,11 +9754,11 @@ case_arg: a_expr { $$ = $1; } */ columnref: relation_name { - $$ = makeColumnRef($1, NIL, @1); + $$ = makeColumnRef($1, NIL, @1, yyscanner); } | relation_name indirection { - $$ = makeColumnRef($1, $2, @1); + $$ = makeColumnRef($1, $2, @1, yyscanner); } ; @@ -9912,7 +9924,7 @@ qualified_name: } | relation_name indirection { - check_qualified_name($2); + check_qualified_name($2, yyscanner); $$ = makeNode(RangeVar); switch (list_length($2)) { @@ -9931,7 +9943,7 @@ qualified_name: (errcode(ERRCODE_SYNTAX_ERROR), errmsg("improper qualified name (too many dotted names): %s", NameListToString(lcons(makeString($1), $2))), - scanner_errposition(@1))); + parser_errposition(@1))); break; } $$->location = @1; @@ -9970,7 +9982,10 @@ file_name: Sconst { $$ = $1; }; func_name: type_function_name { $$ = list_make1(makeString($1)); } | relation_name indirection - { $$ = check_func_name(lcons(makeString($1), $2)); } + { + $$ = check_func_name(lcons(makeString($1), $2), + yyscanner); + } ; @@ -10036,7 +10051,7 @@ AexprConst: Iconst ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("interval precision specified twice"), - scanner_errposition(@1))); + parser_errposition(@1))); t->typmods = lappend($6, makeIntConst($3, @3)); } else @@ -10552,30 +10567,42 @@ reserved_keyword: SpecialRuleRelation: OLD { - if (QueryIsRule) + if (pg_yyget_extra(yyscanner)->QueryIsRule) $$ = "*OLD*"; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("OLD used in query that is not in a rule"), - scanner_errposition(@1))); + parser_errposition(@1))); } | NEW { - if (QueryIsRule) + if (pg_yyget_extra(yyscanner)->QueryIsRule) $$ = "*NEW*"; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("NEW used in query that is not in a rule"), - scanner_errposition(@1))); + parser_errposition(@1))); } ; %% +/* + * The signature of this function is required by bison. However, we + * ignore the passed yylloc and instead use the last token position + * available from the scanner. + */ +static void +base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, const char *msg) +{ + parser_yyerror(msg); +} + static Node * -makeColumnRef(char *colname, List *indirection, int location) +makeColumnRef(char *colname, List *indirection, + int location, base_yyscan_t yyscanner) { /* * Generate a ColumnRef node, with an A_Indirection node added if there @@ -10598,13 +10625,14 @@ makeColumnRef(char *colname, List *indirection, int location) { /* easy case - all indirection goes to A_Indirection */ c->fields = list_make1(makeString(colname)); - i->indirection = check_indirection(indirection); + i->indirection = check_indirection(indirection, yyscanner); } else { /* got to split the list in two */ i->indirection = check_indirection(list_copy_tail(indirection, - nfields)); + nfields), + yyscanner); indirection = list_truncate(indirection, nfields); c->fields = lcons(makeString(colname), indirection); } @@ -10615,7 +10643,7 @@ makeColumnRef(char *colname, List *indirection, int location) { /* We only allow '*' at the end of a ColumnRef */ if (lnext(l) != NULL) - yyerror("improper use of \"*\""); + parser_yyerror("improper use of \"*\""); } nfields++; } @@ -10744,7 +10772,7 @@ makeBoolAConst(bool state, int location) * Create and populate a FuncCall node to support the OVERLAPS operator. */ static FuncCall * -makeOverlaps(List *largs, List *rargs, int location) +makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner) { FuncCall *n = makeNode(FuncCall); @@ -10755,14 +10783,14 @@ makeOverlaps(List *largs, List *rargs, int location) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong number of parameters on left side of OVERLAPS expression"), - scanner_errposition(location))); + parser_errposition(location))); if (list_length(rargs) == 1) rargs = lappend(rargs, rargs); else if (list_length(rargs) != 2) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong number of parameters on right side of OVERLAPS expression"), - scanner_errposition(location))); + parser_errposition(location))); n->args = list_concat(largs, rargs); n->agg_star = FALSE; n->agg_distinct = FALSE; @@ -10778,14 +10806,14 @@ makeOverlaps(List *largs, List *rargs, int location) * subscripts and '*', which we then must reject here. */ static void -check_qualified_name(List *names) +check_qualified_name(List *names, base_yyscan_t yyscanner) { ListCell *i; foreach(i, names) { if (!IsA(lfirst(i), String)) - yyerror("syntax error"); + parser_yyerror("syntax error"); } } @@ -10795,14 +10823,14 @@ check_qualified_name(List *names) * and '*', which we then must reject here. */ static List * -check_func_name(List *names) +check_func_name(List *names, base_yyscan_t yyscanner) { ListCell *i; foreach(i, names) { if (!IsA(lfirst(i), String)) - yyerror("syntax error"); + parser_yyerror("syntax error"); } return names; } @@ -10813,7 +10841,7 @@ check_func_name(List *names) * in the grammar, so do it here. */ static List * -check_indirection(List *indirection) +check_indirection(List *indirection, base_yyscan_t yyscanner) { ListCell *l; @@ -10822,7 +10850,7 @@ check_indirection(List *indirection) if (IsA(lfirst(l), A_Star)) { if (lnext(l) != NULL) - yyerror("improper use of \"*\""); + parser_yyerror("improper use of \"*\""); } } return indirection; @@ -10871,7 +10899,8 @@ static void insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, - WithClause *withClause) + WithClause *withClause, + base_yyscan_t yyscanner) { Assert(IsA(stmt, SelectStmt)); @@ -10885,7 +10914,7 @@ insertSelectOptions(SelectStmt *stmt, ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("multiple ORDER BY clauses not allowed"), - scanner_errposition(exprLocation((Node *) sortClause)))); + parser_errposition(exprLocation((Node *) sortClause)))); stmt->sortClause = sortClause; } /* We can handle multiple locking clauses, though */ @@ -10896,7 +10925,7 @@ insertSelectOptions(SelectStmt *stmt, ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("multiple OFFSET clauses not allowed"), - scanner_errposition(exprLocation(limitOffset)))); + parser_errposition(exprLocation(limitOffset)))); stmt->limitOffset = limitOffset; } if (limitCount) @@ -10905,7 +10934,7 @@ insertSelectOptions(SelectStmt *stmt, ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("multiple LIMIT clauses not allowed"), - scanner_errposition(exprLocation(limitCount)))); + parser_errposition(exprLocation(limitCount)))); stmt->limitCount = limitCount; } if (withClause) @@ -10914,7 +10943,7 @@ insertSelectOptions(SelectStmt *stmt, ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("multiple WITH clauses not allowed"), - scanner_errposition(exprLocation((Node *) withClause)))); + parser_errposition(exprLocation((Node *) withClause)))); stmt->withClause = withClause; } } @@ -11046,9 +11075,10 @@ makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args, * Initialize to parse one query string */ void -parser_init(void) +parser_init(base_yy_extra_type *yyext) { - QueryIsRule = FALSE; + yyext->parsetree = NIL; /* in case grammar forgets to set it */ + yyext->QueryIsRule = FALSE; } /* @@ -11102,4 +11132,9 @@ TableFuncTypeName(List *columns) */ #undef base_yylex +/* Undefine some other stuff that would conflict in scan.c, too */ +#undef yyerror +#undef yylval +#undef yylloc + #include "scan.c" diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index 1e7f93af53..cb8ff8a339 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -14,7 +14,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.79 2009/07/12 17:12:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.80 2009/07/13 02:02:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,14 +25,6 @@ #include "parser/parser.h" -List *parsetree; /* result of parsing is left here */ - -static bool have_lookahead; /* is lookahead info valid? */ -static int lookahead_token; /* one-token lookahead */ -static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ -static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ - - /* * raw_parser * Given a query in string form, do lexical and grammatical analysis. @@ -42,22 +34,29 @@ static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ List * raw_parser(const char *str) { + base_yyscan_t yyscanner; + base_yy_extra_type yyextra; int yyresult; - parsetree = NIL; /* in case grammar forgets to set it */ - have_lookahead = false; + /* initialize the flex scanner */ + yyscanner = scanner_init(str, &yyextra); - scanner_init(str); - parser_init(); + /* filtered_base_yylex() only needs this much initialization */ + yyextra.have_lookahead = false; - yyresult = base_yyparse(); + /* initialize the bison parser */ + parser_init(&yyextra); - scanner_finish(); + /* Parse! */ + yyresult = base_yyparse(yyscanner); + + /* Clean up (release memory) */ + scanner_finish(yyscanner); if (yyresult) /* error */ return NIL; - return parsetree; + return yyextra.parsetree; } @@ -69,25 +68,27 @@ raw_parser(const char *str) * passed string does represent one single string literal. * * We export this function to avoid having plpgsql depend on internal details - * of the core grammar (such as the token code assigned to SCONST). Note - * that since the scanner isn't presently re-entrant, this cannot be used - * during use of the main parser/scanner. + * of the core grammar (such as the token code assigned to SCONST). */ char * pg_parse_string_token(const char *token) { + base_yyscan_t yyscanner; + base_yy_extra_type yyextra; int ctoken; + YYSTYPE yylval; + YYLTYPE yylloc; - scanner_init(token); + yyscanner = scanner_init(token, &yyextra); - ctoken = base_yylex(); + ctoken = base_yylex(&yylval, &yylloc, yyscanner); if (ctoken != SCONST) /* caller error */ elog(ERROR, "expected string constant, got token code %d", ctoken); - scanner_finish(); + scanner_finish(yyscanner); - return base_yylval.str; + return yylval.str; } @@ -105,23 +106,24 @@ pg_parse_string_token(const char *token) * layer does. */ int -filtered_base_yylex(void) +filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) { + base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner); int cur_token; int next_token; YYSTYPE cur_yylval; YYLTYPE cur_yylloc; /* Get next token --- we might already have it */ - if (have_lookahead) + if (yyextra->have_lookahead) { - cur_token = lookahead_token; - base_yylval = lookahead_yylval; - base_yylloc = lookahead_yylloc; - have_lookahead = false; + cur_token = yyextra->lookahead_token; + *lvalp = yyextra->lookahead_yylval; + *llocp = yyextra->lookahead_yylloc; + yyextra->have_lookahead = false; } else - cur_token = base_yylex(); + cur_token = base_yylex(lvalp, llocp, yyscanner); /* Do we need to look ahead for a possible multiword token? */ switch (cur_token) @@ -131,9 +133,9 @@ filtered_base_yylex(void) /* * NULLS FIRST and NULLS LAST must be reduced to one token */ - cur_yylval = base_yylval; - cur_yylloc = base_yylloc; - next_token = base_yylex(); + cur_yylval = *lvalp; + cur_yylloc = *llocp; + next_token = base_yylex(lvalp, llocp, yyscanner); switch (next_token) { case FIRST_P: @@ -144,13 +146,13 @@ filtered_base_yylex(void) break; default: /* save the lookahead token for next time */ - lookahead_token = next_token; - lookahead_yylval = base_yylval; - lookahead_yylloc = base_yylloc; - have_lookahead = true; + yyextra->lookahead_token = next_token; + yyextra->lookahead_yylval = *lvalp; + yyextra->lookahead_yylloc = *llocp; + yyextra->have_lookahead = true; /* and back up the output info to cur_token */ - base_yylval = cur_yylval; - base_yylloc = cur_yylloc; + *lvalp = cur_yylval; + *llocp = cur_yylloc; break; } break; @@ -160,9 +162,9 @@ filtered_base_yylex(void) /* * WITH TIME must be reduced to one token */ - cur_yylval = base_yylval; - cur_yylloc = base_yylloc; - next_token = base_yylex(); + cur_yylval = *lvalp; + cur_yylloc = *llocp; + next_token = base_yylex(lvalp, llocp, yyscanner); switch (next_token) { case TIME: @@ -170,13 +172,13 @@ filtered_base_yylex(void) break; default: /* save the lookahead token for next time */ - lookahead_token = next_token; - lookahead_yylval = base_yylval; - lookahead_yylloc = base_yylloc; - have_lookahead = true; + yyextra->lookahead_token = next_token; + yyextra->lookahead_yylval = *lvalp; + yyextra->lookahead_yylloc = *llocp; + yyextra->have_lookahead = true; /* and back up the output info to cur_token */ - base_yylval = cur_yylval; - base_yylloc = cur_yylloc; + *lvalp = cur_yylval; + *llocp = cur_yylloc; break; } break; diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 1483627510..06e9f6d8c1 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.154 2009/07/12 17:12:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.155 2009/07/13 02:02:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -43,9 +43,6 @@ #undef fprintf #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg))) -static int xcdepth = 0; /* depth of nesting in slash-star comments */ -static char *dolqstart; /* current $foo$ quote start string */ - /* * GUC variables. This is a DIRECT violation of the warning given at the * head of gram.y, ie flex/bison code must not depend on any GUC variables; @@ -57,29 +54,11 @@ int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; bool escape_string_warning = true; bool standard_conforming_strings = false; -static bool warn_on_first_escape; -static bool saw_non_ascii = false; - /* - * literalbuf is used to accumulate literal values when multiple rules - * are needed to parse a single literal. Call startlit to reset buffer - * to empty, addlit to add text. Note that the buffer is palloc'd and - * starts life afresh on every parse cycle. + * Set the type of yyextra. All state variables used by the scanner should + * be in yyextra, *not* statically allocated. */ -static char *literalbuf; /* expandable buffer */ -static int literallen; /* actual current length */ -static int literalalloc; /* current allocated buffer size */ - -#define startlit() (literalbuf[0] = '\0', literallen = 0) -static void addlit(char *ytext, int yleng); -static void addlitchar(unsigned char ychar); -static char *litbufdup(void); -static char *litbuf_udeescape(unsigned char escape); - -#define lexer_errposition() scanner_errposition(yylloc) - -static void check_escape_warning(void); -static void check_string_escape_warning(unsigned char ychar); +#define YY_EXTRA_TYPE base_yy_extra_type * /* * Each call to yylex must set yylloc to the location of the found token @@ -88,22 +67,51 @@ static void check_string_escape_warning(unsigned char ychar); * this should be done in the first such rule, else yylloc will point * into the middle of the token. */ -#define SET_YYLLOC() (yylloc = yytext - scanbuf) +#define SET_YYLLOC() (*(yylloc) = yytext - yyextra->scanbuf) -/* Handles to the buffer that the lexer uses internally */ -static YY_BUFFER_STATE scanbufhandle; -static char *scanbuf; +/* + * Advance yylloc by the given number of bytes. + */ +#define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) ) -static unsigned char unescape_single_char(unsigned char c); +#define startlit() ( yyextra->literallen = 0 ) +static void addlit(char *ytext, int yleng, base_yyscan_t yyscanner); +static void addlitchar(unsigned char ychar, base_yyscan_t yyscanner); +static char *litbufdup(base_yyscan_t yyscanner); +static char *litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner); +static unsigned char unescape_single_char(unsigned char c, base_yyscan_t yyscanner); + +#define yyerror(msg) scanner_yyerror(msg, yyscanner) + +#define lexer_errposition() scanner_errposition(*(yylloc), yyscanner) + +static void check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner); +static void check_escape_warning(base_yyscan_t yyscanner); + +/* + * Work around a bug in flex 2.5.35: it emits a couple of functions that + * it forgets to emit declarations for. Since we use -Wmissing-prototypes, + * this would cause warnings. Providing our own declarations should be + * harmless even when the bug gets fixed. + */ +extern int base_yyget_column(yyscan_t yyscanner); +extern void base_yyset_column(int column_no, yyscan_t yyscanner); %} +%option reentrant +%option bison-bridge +%option bison-locations %option 8bit %option never-interactive %option nodefault %option noinput %option nounput %option noyywrap +%option noyyalloc +%option noyyrealloc +%option noyyfree +%option warn %option prefix="base_yy" /* @@ -350,23 +358,23 @@ other . {xcstart} { /* Set location in case of syntax error in comment */ SET_YYLLOC(); - xcdepth = 0; + yyextra->xcdepth = 0; BEGIN(xc); /* Put back any characters past slash-star; see above */ yyless(2); } {xcstart} { - xcdepth++; + (yyextra->xcdepth)++; /* Put back any characters past slash-star; see above */ yyless(2); } {xcstop} { - if (xcdepth <= 0) + if (yyextra->xcdepth <= 0) BEGIN(INITIAL); else - xcdepth--; + (yyextra->xcdepth)--; } {xcinside} { @@ -393,18 +401,18 @@ other . SET_YYLLOC(); BEGIN(xb); startlit(); - addlitchar('b'); + addlitchar('b', yyscanner); } {quotestop} | {quotefail} { yyless(1); BEGIN(INITIAL); - yylval.str = litbufdup(); + yylval->str = litbufdup(yyscanner); return BCONST; } {xhinside} | {xbinside} { - addlit(yytext, yyleng); + addlit(yytext, yyleng, yyscanner); } {quotecontinue} | {quotecontinue} { @@ -422,13 +430,13 @@ other . SET_YYLLOC(); BEGIN(xh); startlit(); - addlitchar('x'); + addlitchar('x', yyscanner); } {quotestop} | {quotefail} { yyless(1); BEGIN(INITIAL); - yylval.str = litbufdup(); + yylval->str = litbufdup(yyscanner); return XCONST; } <> { yyerror("unterminated hexadecimal string literal"); } @@ -445,13 +453,13 @@ other . /* nchar had better be a keyword! */ keyword = ScanKeywordLookup("nchar"); Assert(keyword != NULL); - yylval.keyword = keyword->name; + yylval->keyword = keyword->name; return keyword->value; } {xqstart} { - warn_on_first_escape = true; - saw_non_ascii = false; + yyextra->warn_on_first_escape = true; + yyextra->saw_non_ascii = false; SET_YYLLOC(); if (standard_conforming_strings) BEGIN(xq); @@ -460,8 +468,8 @@ other . startlit(); } {xestart} { - warn_on_first_escape = false; - saw_non_ascii = false; + yyextra->warn_on_first_escape = false; + yyextra->saw_non_ascii = false; SET_YYLLOC(); BEGIN(xe); startlit(); @@ -485,31 +493,33 @@ other . * check that the data remains valid if it might have been * made invalid by unescaping any chars. */ - if (saw_non_ascii) - pg_verifymbstr(literalbuf, literallen, false); - yylval.str = litbufdup(); + if (yyextra->saw_non_ascii) + pg_verifymbstr(yyextra->literalbuf, + yyextra->literallen, + false); + yylval->str = litbufdup(yyscanner); return SCONST; } {xusstop1} { /* throw back all but the quote */ yyless(1); BEGIN(INITIAL); - yylval.str = litbuf_udeescape('\\'); + yylval->str = litbuf_udeescape('\\', yyscanner); return SCONST; } {xusstop2} { BEGIN(INITIAL); - yylval.str = litbuf_udeescape(yytext[yyleng-2]); + yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner); return SCONST; } {xqdouble} { - addlitchar('\''); + addlitchar('\'', yyscanner); } {xqinside} { - addlit(yytext, yyleng); + addlit(yytext, yyleng, yyscanner); } {xeinside} { - addlit(yytext, yyleng); + addlit(yytext, yyleng, yyscanner); } {xeescape} { if (yytext[1] == '\'') @@ -523,37 +533,38 @@ other . errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."), lexer_errposition())); } - check_string_escape_warning(yytext[1]); - addlitchar(unescape_single_char(yytext[1])); + check_string_escape_warning(yytext[1], yyscanner); + addlitchar(unescape_single_char(yytext[1], yyscanner), + yyscanner); } {xeoctesc} { unsigned char c = strtoul(yytext+1, NULL, 8); - check_escape_warning(); - addlitchar(c); + check_escape_warning(yyscanner); + addlitchar(c, yyscanner); if (c == '\0' || IS_HIGHBIT_SET(c)) - saw_non_ascii = true; + yyextra->saw_non_ascii = true; } {xehexesc} { unsigned char c = strtoul(yytext+2, NULL, 16); - check_escape_warning(); - addlitchar(c); + check_escape_warning(yyscanner); + addlitchar(c, yyscanner); if (c == '\0' || IS_HIGHBIT_SET(c)) - saw_non_ascii = true; + yyextra->saw_non_ascii = true; } {quotecontinue} { /* ignore */ } . { /* This is only needed for \ just before EOF */ - addlitchar(yytext[0]); + addlitchar(yytext[0], yyscanner); } <> { yyerror("unterminated quoted string"); } {dolqdelim} { SET_YYLLOC(); - dolqstart = pstrdup(yytext); + yyextra->dolqstart = pstrdup(yytext); BEGIN(xdolq); startlit(); } @@ -565,11 +576,12 @@ other . return yytext[0]; } {dolqdelim} { - if (strcmp(yytext, dolqstart) == 0) + if (strcmp(yytext, yyextra->dolqstart) == 0) { - pfree(dolqstart); + pfree(yyextra->dolqstart); + yyextra->dolqstart = NULL; BEGIN(INITIAL); - yylval.str = litbufdup(); + yylval->str = litbufdup(yyscanner); return SCONST; } else @@ -579,19 +591,19 @@ other . * the $... part to the output, but put back the final * $ for rescanning. Consider $delim$...$junk$delim$ */ - addlit(yytext, yyleng-1); + addlit(yytext, yyleng-1, yyscanner); yyless(yyleng-1); } } {dolqinside} { - addlit(yytext, yyleng); + addlit(yytext, yyleng, yyscanner); } {dolqfailed} { - addlit(yytext, yyleng); + addlit(yytext, yyleng, yyscanner); } . { /* This is only needed for $ inside the quoted text */ - addlitchar(yytext[0]); + addlitchar(yytext[0], yyscanner); } <> { yyerror("unterminated dollar-quoted string"); } @@ -609,24 +621,24 @@ other . char *ident; BEGIN(INITIAL); - if (literallen == 0) + if (yyextra->literallen == 0) yyerror("zero-length delimited identifier"); - ident = litbufdup(); - if (literallen >= NAMEDATALEN) - truncate_identifier(ident, literallen, true); - yylval.str = ident; + ident = litbufdup(yyscanner); + if (yyextra->literallen >= NAMEDATALEN) + truncate_identifier(ident, yyextra->literallen, true); + yylval->str = ident; return IDENT; } {xuistop1} { char *ident; BEGIN(INITIAL); - if (literallen == 0) + if (yyextra->literallen == 0) yyerror("zero-length delimited identifier"); - ident = litbuf_udeescape('\\'); - if (literallen >= NAMEDATALEN) - truncate_identifier(ident, literallen, true); - yylval.str = ident; + ident = litbuf_udeescape('\\', yyscanner); + if (yyextra->literallen >= NAMEDATALEN) + truncate_identifier(ident, yyextra->literallen, true); + yylval->str = ident; /* throw back all but the quote */ yyless(1); return IDENT; @@ -635,19 +647,19 @@ other . char *ident; BEGIN(INITIAL); - if (literallen == 0) + if (yyextra->literallen == 0) yyerror("zero-length delimited identifier"); - ident = litbuf_udeescape(yytext[yyleng - 2]); - if (literallen >= NAMEDATALEN) - truncate_identifier(ident, literallen, true); - yylval.str = ident; + ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner); + if (yyextra->literallen >= NAMEDATALEN) + truncate_identifier(ident, yyextra->literallen, true); + yylval->str = ident; return IDENT; } {xddouble} { - addlitchar('"'); + addlitchar('"', yyscanner); } {xdinside} { - addlit(yytext, yyleng); + addlit(yytext, yyleng, yyscanner); } <> { yyerror("unterminated quoted identifier"); } @@ -659,7 +671,7 @@ other . yyless(1); /* and treat it as {identifier} */ ident = downcase_truncate_identifier(yytext, yyleng, true); - yylval.str = ident; + yylval->str = ident; return IDENT; } @@ -747,15 +759,15 @@ other . /* Convert "!=" operator to "<>" for compatibility */ if (strcmp(yytext, "!=") == 0) - yylval.str = pstrdup("<>"); + yylval->str = pstrdup("<>"); else - yylval.str = pstrdup(yytext); + yylval->str = pstrdup(yytext); return Op; } {param} { SET_YYLLOC(); - yylval.ival = atol(yytext + 1); + yylval->ival = atol(yytext + 1); return PARAM; } @@ -774,20 +786,20 @@ other . ) { /* integer too large, treat it as a float */ - yylval.str = pstrdup(yytext); + yylval->str = pstrdup(yytext); return FCONST; } - yylval.ival = val; + yylval->ival = val; return ICONST; } {decimal} { SET_YYLLOC(); - yylval.str = pstrdup(yytext); + yylval->str = pstrdup(yytext); return FCONST; } {real} { SET_YYLLOC(); - yylval.str = pstrdup(yytext); + yylval->str = pstrdup(yytext); return FCONST; } {realfail1} { @@ -799,14 +811,14 @@ other . */ yyless(yyleng-1); SET_YYLLOC(); - yylval.str = pstrdup(yytext); + yylval->str = pstrdup(yytext); return FCONST; } {realfail2} { /* throw back the [Ee][+-], and proceed as above */ yyless(yyleng-2); SET_YYLLOC(); - yylval.str = pstrdup(yytext); + yylval->str = pstrdup(yytext); return FCONST; } @@ -821,7 +833,7 @@ other . keyword = ScanKeywordLookup(yytext); if (keyword != NULL) { - yylval.keyword = keyword->name; + yylval->keyword = keyword->name; return keyword->value; } @@ -830,7 +842,7 @@ other . * if necessary. */ ident = downcase_truncate_identifier(yytext, yyleng, true); - yylval.str = ident; + yylval->str = ident; return IDENT; } @@ -846,6 +858,22 @@ other . %% +/* + * Arrange access to yyextra for subroutines of the main yylex() function. + * We expect each subroutine to have a yyscanner parameter. Rather than + * use the yyget_xxx functions, which might or might not get inlined by the + * compiler, we cheat just a bit and cast yyscanner to the right type. + */ +#undef yyextra +#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r) + +/* Likewise for a couple of other things we need. */ +#undef yylloc +#define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r) +#undef yyleng +#define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r) + + /* * scanner_errposition * Report a lexer or grammar error cursor position, if possible. @@ -854,38 +882,39 @@ other . * is a dummy (always 0, in fact). * * Note that this can only be used for messages emitted during raw parsing - * (essentially, scan.l and gram.y), since it requires scanbuf to still be - * valid. + * (essentially, scan.l and gram.y), since it requires the yyscanner struct + * to still be available. */ int -scanner_errposition(int location) +scanner_errposition(int location, base_yyscan_t yyscanner) { int pos; - Assert(scanbuf != NULL); /* else called from wrong place */ if (location < 0) return 0; /* no-op if location is unknown */ /* Convert byte offset to character number */ - pos = pg_mbstrlen_with_len(scanbuf, location) + 1; + pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1; /* And pass it to the ereport mechanism */ return errposition(pos); } /* - * yyerror + * scanner_yyerror * Report a lexer or grammar error. * - * The message's cursor position identifies the most recently lexed token. + * The message's cursor position is whatever YYLLOC was last set to, + * ie, the start of the current token if called within yylex(), or the + * most recently lexed token if called from the grammar. * This is OK for syntax error messages from the Bison parser, because Bison * parsers report error as soon as the first unparsable token is reached. * Beware of using yyerror for other purposes, as the cursor position might * be misleading! */ void -yyerror(const char *message) +scanner_yyerror(const char *message, base_yyscan_t yyscanner) { - const char *loc = scanbuf + yylloc; + const char *loc = yyextra->scanbuf + *yylloc; if (*loc == YY_END_OF_BUFFER_CHAR) { @@ -909,31 +938,32 @@ yyerror(const char *message) /* * Called before any actual parsing is done */ -void -scanner_init(const char *str) +base_yyscan_t +scanner_init(const char *str, base_yy_extra_type *yyext) { - Size slen = strlen(str); + Size slen = strlen(str); + yyscan_t scanner; - /* - * Might be left over after ereport() - */ - if (YY_CURRENT_BUFFER) - yy_delete_buffer(YY_CURRENT_BUFFER); + if (yylex_init(&scanner) != 0) + elog(ERROR, "yylex_init() failed: %m"); + + base_yyset_extra(yyext, scanner); /* * Make a scan buffer with special termination needed by flex. */ - scanbuf = palloc(slen + 2); - memcpy(scanbuf, str, slen); - scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; - scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); + yyext->scanbuf = (char *) palloc(slen + 2); + yyext->scanbuflen = slen; + memcpy(yyext->scanbuf, str, slen); + yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; + yy_scan_buffer(yyext->scanbuf, slen + 2, scanner); /* initialize literal buffer to a reasonable but expansible size */ - literalalloc = 1024; - literalbuf = (char *) palloc(literalalloc); - startlit(); + yyext->literalalloc = 1024; + yyext->literalbuf = (char *) palloc(yyext->literalalloc); + yyext->literallen = 0; - BEGIN(INITIAL); + return scanner; } @@ -941,60 +971,71 @@ scanner_init(const char *str) * Called after parsing is done to clean up after scanner_init() */ void -scanner_finish(void) +scanner_finish(base_yyscan_t yyscanner) { - yy_delete_buffer(scanbufhandle); - pfree(scanbuf); - scanbuf = NULL; + /* + * We don't bother to call yylex_destroy(), because all it would do + * is pfree a small amount of control storage. It's cheaper to leak + * the storage until the parsing context is destroyed. The amount of + * space involved is usually negligible compared to the output parse + * tree anyway. + * + * We do bother to pfree the scanbuf and literal buffer, but only if they + * represent a nontrivial amount of space. The 8K cutoff is arbitrary. + */ + if (yyextra->scanbuflen >= 8192) + pfree(yyextra->scanbuf); + if (yyextra->literalalloc >= 8192) + pfree(yyextra->literalbuf); } static void -addlit(char *ytext, int yleng) +addlit(char *ytext, int yleng, base_yyscan_t yyscanner) { /* enlarge buffer if needed */ - if ((literallen+yleng) >= literalalloc) + if ((yyextra->literallen + yleng) >= yyextra->literalalloc) { do { - literalalloc *= 2; - } while ((literallen+yleng) >= literalalloc); - literalbuf = (char *) repalloc(literalbuf, literalalloc); + yyextra->literalalloc *= 2; + } while ((yyextra->literallen + yleng) >= yyextra->literalalloc); + yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, + yyextra->literalalloc); } - /* append new data, add trailing null */ - memcpy(literalbuf+literallen, ytext, yleng); - literallen += yleng; - literalbuf[literallen] = '\0'; + /* append new data */ + memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng); + yyextra->literallen += yleng; } static void -addlitchar(unsigned char ychar) +addlitchar(unsigned char ychar, base_yyscan_t yyscanner) { /* enlarge buffer if needed */ - if ((literallen+1) >= literalalloc) + if ((yyextra->literallen + 1) >= yyextra->literalalloc) { - literalalloc *= 2; - literalbuf = (char *) repalloc(literalbuf, literalalloc); + yyextra->literalalloc *= 2; + yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, + yyextra->literalalloc); } - /* append new data, add trailing null */ - literalbuf[literallen] = ychar; - literallen += 1; - literalbuf[literallen] = '\0'; + /* append new data */ + yyextra->literalbuf[yyextra->literallen] = ychar; + yyextra->literallen += 1; } /* - * One might be tempted to write pstrdup(literalbuf) instead of this, - * but for long literals this is much faster because the length is - * already known. + * Create a palloc'd copy of literalbuf, adding a trailing null. */ static char * -litbufdup(void) +litbufdup(base_yyscan_t yyscanner) { - char *new; + int llen = yyextra->literallen; + char *new; - new = palloc(literallen + 1); - memcpy(new, literalbuf, literallen+1); + new = palloc(llen + 1); + memcpy(new, yyextra->literalbuf, llen); + new[llen] = '\0'; return new; } @@ -1012,23 +1053,23 @@ hexval(unsigned char c) } static void -check_unicode_value(pg_wchar c, char * loc) +check_unicode_value(pg_wchar c, char *loc, base_yyscan_t yyscanner) { if (GetDatabaseEncoding() == PG_UTF8) return; if (c > 0x7F) { - yylloc += (char *) loc - literalbuf + 3; /* 3 for U&" */ + ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3); /* 3 for U&" */ yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); } } static char * -litbuf_udeescape(unsigned char escape) +litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner) { char *new; - char *in, *out; + char *litbuf, *in, *out; if (isxdigit(escape) || escape == '+' @@ -1036,17 +1077,21 @@ litbuf_udeescape(unsigned char escape) || escape == '"' || scanner_isspace(escape)) { - yylloc += literallen + yyleng + 1; + ADVANCE_YYLLOC(yyextra->literallen + yyleng + 1); yyerror("invalid Unicode escape character"); } + /* Make literalbuf null-terminated to simplify the scanning loop */ + litbuf = yyextra->literalbuf; + litbuf[yyextra->literallen] = '\0'; + /* * This relies on the subtle assumption that a UTF-8 expansion * cannot be longer than its escaped representation. */ - new = palloc(literallen + 1); + new = palloc(yyextra->literallen + 1); - in = literalbuf; + in = litbuf; out = new; while (*in) { @@ -1060,7 +1105,7 @@ litbuf_udeescape(unsigned char escape) else if (isxdigit(in[1]) && isxdigit(in[2]) && isxdigit(in[3]) && isxdigit(in[4])) { pg_wchar unicode = hexval(in[1]) * 16*16*16 + hexval(in[2]) * 16*16 + hexval(in[3]) * 16 + hexval(in[4]); - check_unicode_value(unicode, in); + check_unicode_value(unicode, in, yyscanner); unicode_to_utf8(unicode, (unsigned char *) out); in += 5; out += pg_mblen(out); @@ -1072,14 +1117,14 @@ litbuf_udeescape(unsigned char escape) { pg_wchar unicode = hexval(in[2]) * 16*16*16*16*16 + hexval(in[3]) * 16*16*16*16 + hexval(in[4]) * 16*16*16 + hexval(in[5]) * 16*16 + hexval(in[6]) * 16 + hexval(in[7]); - check_unicode_value(unicode, in); + check_unicode_value(unicode, in, yyscanner); unicode_to_utf8(unicode, (unsigned char *) out); in += 8; out += pg_mblen(out); } else { - yylloc += in - literalbuf + 3; /* 3 for U&" */ + ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode escape value"); } } @@ -1098,7 +1143,7 @@ litbuf_udeescape(unsigned char escape) } static unsigned char -unescape_single_char(unsigned char c) +unescape_single_char(unsigned char c, base_yyscan_t yyscanner) { switch (c) { @@ -1115,47 +1160,74 @@ unescape_single_char(unsigned char c) default: /* check for backslash followed by non-7-bit-ASCII */ if (c == '\0' || IS_HIGHBIT_SET(c)) - saw_non_ascii = true; + yyextra->saw_non_ascii = true; return c; } } static void -check_string_escape_warning(unsigned char ychar) +check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner) { if (ychar == '\'') { - if (warn_on_first_escape && escape_string_warning) + if (yyextra->warn_on_first_escape && escape_string_warning) ereport(WARNING, (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), errmsg("nonstandard use of \\' in a string literal"), errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."), lexer_errposition())); - warn_on_first_escape = false; /* warn only once per string */ + yyextra->warn_on_first_escape = false; /* warn only once per string */ } else if (ychar == '\\') { - if (warn_on_first_escape && escape_string_warning) + if (yyextra->warn_on_first_escape && escape_string_warning) ereport(WARNING, (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), errmsg("nonstandard use of \\\\ in a string literal"), errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."), lexer_errposition())); - warn_on_first_escape = false; /* warn only once per string */ + yyextra->warn_on_first_escape = false; /* warn only once per string */ } else - check_escape_warning(); + check_escape_warning(yyscanner); } static void -check_escape_warning(void) +check_escape_warning(base_yyscan_t yyscanner) { - if (warn_on_first_escape && escape_string_warning) + if (yyextra->warn_on_first_escape && escape_string_warning) ereport(WARNING, (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), errmsg("nonstandard use of escape in a string literal"), errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), lexer_errposition())); - warn_on_first_escape = false; /* warn only once per string */ + yyextra->warn_on_first_escape = false; /* warn only once per string */ +} + +/* + * Interface functions to make flex use palloc() instead of malloc(). + * It'd be better to make these static, but flex insists otherwise. + */ + +void * +base_yyalloc(size_t bytes, base_yyscan_t yyscanner) +{ + return palloc(bytes); +} + +void * +base_yyrealloc(void *ptr, size_t bytes, base_yyscan_t yyscanner) +{ + if (ptr) + return repalloc(ptr, bytes); + else + return palloc(bytes); +} + +void +base_yyfree(void *ptr, base_yyscan_t yyscanner) +{ + if (ptr) + pfree(ptr); } diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h index 31290a270c..a8c2f407e4 100644 --- a/src/include/parser/gramparse.h +++ b/src/include/parser/gramparse.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.45 2009/07/12 17:12:34 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.46 2009/07/13 02:02:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,19 +36,85 @@ */ #include "parser/gram.h" +/* + * The YY_EXTRA data that a flex scanner allows us to pass around. Private + * state needed for raw parsing/lexing goes here. + */ +typedef struct base_yy_extra_type +{ + /* + * The string the lexer is physically scanning. We keep this mainly so + * that we can cheaply compute the offset of the current token (yytext). + */ + char *scanbuf; + Size scanbuflen; + + /* + * literalbuf is used to accumulate literal values when multiple rules + * are needed to parse a single literal. Call startlit() to reset buffer + * to empty, addlit() to add text. NOTE: the string in literalbuf is + * NOT necessarily null-terminated, but there always IS room to add a + * trailing null at offset literallen. We store a null only when we + * need it. + */ + char *literalbuf; /* palloc'd expandable buffer */ + int literallen; /* actual current string length */ + int literalalloc; /* current allocated buffer size */ + + int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ + + /* state variables for literal-lexing warnings */ + bool warn_on_first_escape; + bool saw_non_ascii; + + /* + * State variables for filtered_base_yylex(). + */ + bool have_lookahead; /* is lookahead info valid? */ + int lookahead_token; /* one-token lookahead */ + YYSTYPE lookahead_yylval; /* yylval for lookahead token */ + YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ + + /* + * State variables that belong to the grammar, not the lexer. It's + * simpler to keep these here than to invent a separate structure. + * These fields are unused/undefined if the lexer is invoked on its own. + */ + + List *parsetree; /* final parse result is delivered here */ + + bool QueryIsRule; /* signals we are parsing CREATE RULE */ +} base_yy_extra_type; + +/* + * The type of yyscanner is opaque outside scan.l. + */ +typedef void *base_yyscan_t; + +/* + * In principle we should use yyget_extra() to fetch the yyextra field + * from a yyscanner struct. However, flex always puts that field first, + * and this is sufficiently performance-critical to make it seem worth + * cheating a bit to use an inline macro. + */ +#define pg_yyget_extra(yyscanner) (*((base_yy_extra_type **) (yyscanner))) + /* from parser.c */ -extern int filtered_base_yylex(void); +extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, + base_yyscan_t yyscanner); /* from scan.l */ -extern void scanner_init(const char *str); -extern void scanner_finish(void); -extern int base_yylex(void); -extern int scanner_errposition(int location); -extern void base_yyerror(const char *message); +extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext); +extern void scanner_finish(base_yyscan_t yyscanner); +extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, + base_yyscan_t yyscanner); +extern int scanner_errposition(int location, base_yyscan_t yyscanner); +extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner); /* from gram.y */ -extern void parser_init(void); -extern int base_yyparse(void); +extern void parser_init(base_yy_extra_type *yyext); +extern int base_yyparse(base_yyscan_t yyscanner); #endif /* GRAMPARSE_H */