Tighten checks for whitespace in functions that parse identifiers etc.

This patch replaces isspace() calls with scanner_isspace() in functions
that are likely to be presented with non-ASCII input.  isspace() has
the small advantage that it will correctly recognize no-break space
in single-byte encodings (such as LATIN1); but it cannot work successfully
for any multibyte character, and depending on platform it might return
false positive results for some fragments of multibyte characters.  That's
disastrous for functions that are trying to discard whitespace between
valid strings, as noted in bug #14662 from Justin Muise.  Even treating
no-break space as whitespace is pretty questionable for the usages touched
here, because the core scanner would think it is an identifier character.

Affected functions are parse_ident(), parseNameAndArgTypes (underlying
regprocedurein() and siblings), SplitIdentifierString (used for parsing
GUCs and options that are qualified names or lists of names), and
SplitDirectoriesString (used for parsing GUCs that are lists of
directories).

All the functions adjusted here are parsing SQL identifiers and similar
constructs, so it's reasonable to insist that their definition of
whitespace match the core scanner.  So we can hope that this won't cause
many backwards-compatibility problems.  I've left alone isspace() calls
in places that aren't really expecting any non-ASCII input characters,
such as float8in().

Back-patch to all supported branches.

Discussion: https://postgr.es/m/10129.1495302480@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2017-05-24 15:28:34 -04:00
parent f61bd73993
commit 9ae2661fe1
3 changed files with 15 additions and 14 deletions

View File

@ -770,7 +770,7 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = qualname_str;
/* skip leading whitespace */
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++;
for (;;)
@ -858,14 +858,14 @@ parse_ident(PG_FUNCTION_ARGS)
text_to_cstring(qualname))));
}
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++;
if (*nextp == '.')
{
after_dot = true;
nextp++;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++;
}
else if (*nextp == '\0')

View File

@ -32,6 +32,7 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "parser/parse_type.h"
#include "parser/scansup.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
@ -1769,7 +1770,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
ptr2 = ptr + strlen(ptr);
while (--ptr2 > ptr)
{
if (!isspace((unsigned char) *ptr2))
if (!scanner_isspace(*ptr2))
break;
}
if (*ptr2 != ')')
@ -1786,7 +1787,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
for (;;)
{
/* allow leading whitespace */
while (isspace((unsigned char) *ptr))
while (scanner_isspace(*ptr))
ptr++;
if (*ptr == '\0')
{
@ -1842,7 +1843,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
/* Lop off trailing whitespace */
while (--ptr2 >= typename)
{
if (!isspace((unsigned char) *ptr2))
if (!scanner_isspace(*ptr2))
break;
*ptr2 = '\0';
}

View File

@ -3252,7 +3252,7 @@ SplitIdentifierString(char *rawstring, char separator,
*namelist = NIL;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace */
if (*nextp == '\0')
@ -3290,7 +3290,7 @@ SplitIdentifierString(char *rawstring, char separator,
curname = nextp;
while (*nextp && *nextp != separator &&
!isspace((unsigned char) *nextp))
!scanner_isspace(*nextp))
nextp++;
endp = nextp;
if (curname == nextp)
@ -3312,13 +3312,13 @@ SplitIdentifierString(char *rawstring, char separator,
pfree(downname);
}
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip trailing whitespace */
if (*nextp == separator)
{
nextp++;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace for next */
/* we expect another name, so done remains false */
}
@ -3377,7 +3377,7 @@ SplitDirectoriesString(char *rawstring, char separator,
*namelist = NIL;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace */
if (*nextp == '\0')
@ -3414,7 +3414,7 @@ SplitDirectoriesString(char *rawstring, char separator,
while (*nextp && *nextp != separator)
{
/* trailing whitespace should not be included in name */
if (!isspace((unsigned char) *nextp))
if (!scanner_isspace(*nextp))
endp = nextp + 1;
nextp++;
}
@ -3422,13 +3422,13 @@ SplitDirectoriesString(char *rawstring, char separator,
return false; /* empty unquoted name not allowed */
}
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip trailing whitespace */
if (*nextp == separator)
{
nextp++;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace for next */
/* we expect another name, so done remains false */
}