diff --git a/contrib/cube/cubescan.l b/contrib/cube/cubescan.l index 49cb6992165..a30fbfc3111 100644 --- a/contrib/cube/cubescan.l +++ b/contrib/cube/cubescan.l @@ -63,7 +63,7 @@ NaN [nN][aA][nN] \( cube_yylval = "("; return O_PAREN; \) cube_yylval = ")"; return C_PAREN; \, cube_yylval = ","; return COMMA; -[ \t\n\r\f]+ /* discard spaces */ +[ \t\n\r\f\v]+ /* discard spaces */ . return yytext[0]; /* alert parser of the garbage */ %% diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out index 44058244132..bbc885a181a 100644 --- a/contrib/hstore/expected/hstore_utf8.out +++ b/contrib/hstore/expected/hstore_utf8.out @@ -34,3 +34,34 @@ SELECT 'keyąfoo=>valueą'::hstore; "keyąfoo"=>"valueą" (1 row) +-- More patterns that may depend on isspace() and locales, all discarded. +SELECT E'key\u000A=>value\u000A'::hstore; -- \n + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u0009=>value\u0009'::hstore; -- \t + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u000D=>value\u000D'::hstore; -- \r + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u000B=>value\u000B'::hstore; -- \v + hstore +---------------- + "key"=>"value" +(1 row) + +SELECT E'key\u000C=>value\u000C'::hstore; -- \f + hstore +---------------- + "key"=>"value" +(1 row) + diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql index face878324c..38c9481ee64 100644 --- a/contrib/hstore/sql/hstore_utf8.sql +++ b/contrib/hstore/sql/hstore_utf8.sql @@ -17,3 +17,10 @@ SELECT E'key\u0105=>value\u0105'::hstore; SELECT 'keyą=>valueą'::hstore; SELECT 'ą=>ą'::hstore; SELECT 'keyąfoo=>valueą'::hstore; + +-- More patterns that may depend on isspace() and locales, all discarded. +SELECT E'key\u000A=>value\u000A'::hstore; -- \n +SELECT E'key\u0009=>value\u0009'::hstore; -- \t +SELECT E'key\u000D=>value\u000D'::hstore; -- \r +SELECT E'key\u000B=>value\u000B'::hstore; -- \v +SELECT E'key\u000C=>value\u000C'::hstore; -- \f diff --git a/contrib/seg/segscan.l b/contrib/seg/segscan.l index a1e9e9937ef..4ad529eccc4 100644 --- a/contrib/seg/segscan.l +++ b/contrib/seg/segscan.l @@ -59,7 +59,7 @@ float ({integer}|{real})([eE]{integer})? \< seg_yylval.text = "<"; return EXTENSION; \> seg_yylval.text = ">"; return EXTENSION; \~ seg_yylval.text = "~"; return EXTENSION; -[ \t\n\r\f]+ /* discard spaces */ +[ \t\n\r\f\v]+ /* discard spaces */ . return yytext[0]; /* alert parser of the garbage */ %% diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c index be75dc6ab07..63b4e969624 100644 --- a/src/backend/parser/parse_type.c +++ b/src/backend/parser/parse_type.c @@ -742,7 +742,7 @@ typeStringToTypeName(const char *str, Node *escontext) ErrorContextCallback ptserrcontext; /* make sure we give useful error for empty input */ - if (strspn(str, " \t\n\r\f") == strlen(str)) + if (strspn(str, " \t\n\r\f\v") == strlen(str)) goto fail; /* diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index b2216a9eacd..0708ba65405 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -213,16 +213,16 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); * versions of Postgres failed to recognize -- as a comment if the input * did not end with a newline. * - * XXX perhaps \f (formfeed) should be treated as a newline as well? + * non_newline_space tracks all the other space characters except newlines. * * XXX if you change the set of whitespace characters, fix scanner_isspace() * to agree. */ -space [ \t\n\r\f] -horiz_space [ \t\f] -newline [\n\r] -non_newline [^\n\r] +space [ \t\n\r\f\v] +non_newline_space [ \t\f\v] +newline [\n\r] +non_newline [^\n\r] comment ("--"{non_newline}*) @@ -236,8 +236,8 @@ whitespace ({space}+|{comment}) */ special_whitespace ({space}+|{comment}{newline}) -horiz_whitespace ({horiz_space}|{comment}) -whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) +non_newline_whitespace ({non_newline_space}|{comment}) +whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*) quote ' /* If we see {quote} then {quotecontinue}, the quoted string continues */ @@ -1414,6 +1414,8 @@ unescape_single_char(unsigned char c, core_yyscan_t yyscanner) return '\r'; case 't': return '\t'; + case 'v': + return '\v'; default: /* check for backslash followed by non-7-bit-ASCII */ if (c == '\0' || IS_HIGHBIT_SET(c)) diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c index ed67f5f5fe2..4f0005a114e 100644 --- a/src/backend/parser/scansup.c +++ b/src/backend/parser/scansup.c @@ -121,6 +121,7 @@ scanner_isspace(char ch) ch == '\t' || ch == '\n' || ch == '\r' || + ch == '\v' || ch == '\f') return true; return false; diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index cb467ca46f7..1cc7fb858cd 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -73,7 +73,7 @@ static void addlitchar(unsigned char ychar); %x xd %x xq -space [ \t\n\r\f] +space [ \t\n\r\f\v] quote ' quotestop {quote} diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 9000f83a836..4359dbd83df 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -24,6 +24,7 @@ #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" #include "optimizer/optimizer.h" +#include "parser/scansup.h" #include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/arrayaccess.h" @@ -89,7 +90,6 @@ typedef struct ArrayIteratorData int current_item; /* the item # we're at in the array */ } ArrayIteratorData; -static bool array_isspace(char ch); static int ArrayCount(const char *str, int *dim, char typdelim, Node *escontext); static bool ReadArrayStr(char *arrayStr, const char *origStr, @@ -254,7 +254,7 @@ array_in(PG_FUNCTION_ARGS) * Note: we currently allow whitespace between, but not within, * dimension items. */ - while (array_isspace(*p)) + while (scanner_isspace(*p)) p++; if (*p != '[') break; /* no more dimension items */ @@ -338,7 +338,7 @@ array_in(PG_FUNCTION_ARGS) errdetail("Missing \"%s\" after array dimensions.", ASSGN))); p += strlen(ASSGN); - while (array_isspace(*p)) + while (scanner_isspace(*p)) p++; /* @@ -434,27 +434,6 @@ array_in(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(retval); } -/* - * array_isspace() --- a non-locale-dependent isspace() - * - * We used to use isspace() for parsing array values, but that has - * undesirable results: an array value might be silently interpreted - * differently depending on the locale setting. Now we just hard-wire - * the traditional ASCII definition of isspace(). - */ -static bool -array_isspace(char ch) -{ - if (ch == ' ' || - ch == '\t' || - ch == '\n' || - ch == '\r' || - ch == '\v' || - ch == '\f') - return true; - return false; -} - /* * ArrayCount * Determines the dimensions for an array string. @@ -654,7 +633,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) itemdone = true; nelems[nest_level - 1]++; } - else if (!array_isspace(*ptr)) + else if (!scanner_isspace(*ptr)) { /* * Other non-space characters must be after a @@ -684,7 +663,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) /* only whitespace is allowed after the closing brace */ while (*ptr) { - if (!array_isspace(*ptr++)) + if (!scanner_isspace(*ptr++)) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), @@ -884,7 +863,7 @@ ReadArrayStr(char *arrayStr, indx[ndim - 1]++; srcptr++; } - else if (array_isspace(*srcptr)) + else if (scanner_isspace(*srcptr)) { /* * If leading space, drop it immediately. Else, copy @@ -1176,7 +1155,7 @@ array_out(PG_FUNCTION_ARGS) overall_length += 1; } else if (ch == '{' || ch == '}' || ch == typdelim || - array_isspace(ch)) + scanner_isspace(ch)) needquote = true; } } diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l index 5c020f30b90..1461fa3d3e6 100644 --- a/src/bin/psql/psqlscanslash.l +++ b/src/bin/psql/psqlscanslash.l @@ -108,7 +108,7 @@ extern void slash_yyset_column(int column_no, yyscan_t yyscanner); /* * Assorted character class definitions that should match psqlscan.l. */ -space [ \t\n\r\f] +space [ \t\n\r\f\v] quote ' xeoctesc [\\][0-7]{1,3} xehexesc [\\]x[0-9A-Fa-f]{1,2} diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index 84754aca4a9..5dc6fc2fb9e 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -149,16 +149,16 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); * versions of Postgres failed to recognize -- as a comment if the input * did not end with a newline. * - * XXX perhaps \f (formfeed) should be treated as a newline as well? + * non_newline_space tracks all space characters except newlines. * * XXX if you change the set of whitespace characters, fix scanner_isspace() * to agree. */ -space [ \t\n\r\f] -horiz_space [ \t\f] -newline [\n\r] -non_newline [^\n\r] +space [ \t\n\r\f\v] +non_newline_space [ \t\f\v] +newline [\n\r] +non_newline [^\n\r] comment ("--"{non_newline}*) @@ -172,8 +172,8 @@ whitespace ({space}+|{comment}) */ special_whitespace ({space}+|{comment}{newline}) -horiz_whitespace ({horiz_space}|{comment}) -whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) +non_newline_whitespace ({non_newline_space}|{comment}) +whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*) quote ' /* If we see {quote} then {quotecontinue}, the quoted string continues */ diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c index 0429a72bfe2..58b21c4d6a8 100644 --- a/src/fe_utils/string_utils.c +++ b/src/fe_utils/string_utils.c @@ -761,7 +761,7 @@ appendPGArray(PQExpBuffer buffer, const char *value) if (ch == '"' || ch == '\\' || ch == '{' || ch == '}' || ch == ',' || - /* these match array_isspace(): */ + /* these match scanner_isspace(): */ ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index dcd567e8c3a..77bdf4f82ff 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -180,16 +180,16 @@ static struct _if_value * versions of Postgres failed to recognize -- as a comment if the input * did not end with a newline. * - * XXX perhaps \f (formfeed) should be treated as a newline as well? + * non_newline_space tracks all space characters except newlines. * * XXX if you change the set of whitespace characters, fix ecpg_isspace() * to agree. */ -space [ \t\n\r\f] -horiz_space [ \t\f] -newline [\n\r] -non_newline [^\n\r] +space [ \t\n\r\f\v] +non_newline_space [ \t\f\v] +newline [\n\r] +non_newline [^\n\r] comment ("--"{non_newline}*) @@ -202,8 +202,8 @@ whitespace ({space}+|{comment}) * it, whereas {whitespace} should generally have a * after it... */ -horiz_whitespace ({horiz_space}|{comment}) -whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*) +non_newline_whitespace ({non_newline_space}|{comment}) +whitespace_with_newline ({non_newline_whitespace}*{newline}{whitespace}*) quote ' /* If we see {quote} then {quotecontinue}, the quoted string continues */ @@ -1721,7 +1721,8 @@ ecpg_isspace(char ch) ch == '\t' || ch == '\n' || ch == '\r' || - ch == '\f') + ch == '\f' || + ch == '\v') return true; return false; }