mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Fix de-escaping checks so that we will reject \000 as well as other invalidly
encoded sequences. Per discussion of a couple of days ago.
This commit is contained in:
parent
c1c40e580a
commit
22c922269f
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.307 2009/03/31 22:12:46 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.308 2009/04/19 21:08:54 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -2718,7 +2718,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
|
||||
char *start_ptr;
|
||||
char *end_ptr;
|
||||
int input_len;
|
||||
bool saw_high_bit = false;
|
||||
bool saw_non_ascii = false;
|
||||
|
||||
/* Make sure space remains in fieldvals[] */
|
||||
if (fieldno >= maxfields)
|
||||
@ -2783,8 +2783,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
|
||||
}
|
||||
}
|
||||
c = val & 0377;
|
||||
if (IS_HIGHBIT_SET(c))
|
||||
saw_high_bit = true;
|
||||
if (c == '\0' || IS_HIGHBIT_SET(c))
|
||||
saw_non_ascii = true;
|
||||
}
|
||||
break;
|
||||
case 'x':
|
||||
@ -2808,8 +2808,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
|
||||
}
|
||||
}
|
||||
c = val & 0xff;
|
||||
if (IS_HIGHBIT_SET(c))
|
||||
saw_high_bit = true;
|
||||
if (c == '\0' || IS_HIGHBIT_SET(c))
|
||||
saw_non_ascii = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -2847,11 +2847,11 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
|
||||
*output_ptr++ = '\0';
|
||||
|
||||
/*
|
||||
* If we de-escaped a char with the high bit set, make sure we still
|
||||
* If we de-escaped a non-7-bit-ASCII char, make sure we still
|
||||
* have valid data for the db encoding. Avoid calling strlen here for
|
||||
* the sake of efficiency.
|
||||
*/
|
||||
if (saw_high_bit)
|
||||
if (saw_non_ascii)
|
||||
{
|
||||
char *fld = fieldvals[fieldno];
|
||||
|
||||
|
@ -24,7 +24,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.150 2009/04/14 22:18:47 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.151 2009/04/19 21:08:54 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -60,7 +60,7 @@ bool escape_string_warning = true;
|
||||
bool standard_conforming_strings = false;
|
||||
|
||||
static bool warn_on_first_escape;
|
||||
static bool saw_high_bit = false;
|
||||
static bool saw_non_ascii = false;
|
||||
|
||||
/*
|
||||
* literalbuf is used to accumulate literal values when multiple rules
|
||||
@ -453,7 +453,7 @@ other .
|
||||
|
||||
{xqstart} {
|
||||
warn_on_first_escape = true;
|
||||
saw_high_bit = false;
|
||||
saw_non_ascii = false;
|
||||
SET_YYLLOC();
|
||||
if (standard_conforming_strings)
|
||||
BEGIN(xq);
|
||||
@ -463,7 +463,7 @@ other .
|
||||
}
|
||||
{xestart} {
|
||||
warn_on_first_escape = false;
|
||||
saw_high_bit = false;
|
||||
saw_non_ascii = false;
|
||||
SET_YYLLOC();
|
||||
BEGIN(xe);
|
||||
startlit();
|
||||
@ -477,10 +477,11 @@ other .
|
||||
<xq,xe>{quotefail} {
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
/* check that the data remains valid if it might have been
|
||||
/*
|
||||
* check that the data remains valid if it might have been
|
||||
* made invalid by unescaping any chars.
|
||||
*/
|
||||
if (saw_high_bit)
|
||||
if (saw_non_ascii)
|
||||
pg_verifymbstr(literalbuf, literallen, false);
|
||||
yylval.str = litbufdup();
|
||||
return SCONST;
|
||||
@ -526,16 +527,16 @@ other .
|
||||
|
||||
check_escape_warning();
|
||||
addlitchar(c);
|
||||
if (IS_HIGHBIT_SET(c))
|
||||
saw_high_bit = true;
|
||||
if (c == '\0' || IS_HIGHBIT_SET(c))
|
||||
saw_non_ascii = true;
|
||||
}
|
||||
<xe>{xehexesc} {
|
||||
unsigned char c = strtoul(yytext+2, NULL, 16);
|
||||
|
||||
check_escape_warning();
|
||||
addlitchar(c);
|
||||
if (IS_HIGHBIT_SET(c))
|
||||
saw_high_bit = true;
|
||||
if (c == '\0' || IS_HIGHBIT_SET(c))
|
||||
saw_non_ascii = true;
|
||||
}
|
||||
<xq,xe,xus>{quotecontinue} {
|
||||
/* ignore */
|
||||
@ -1083,6 +1084,11 @@ litbuf_udeescape(unsigned char escape)
|
||||
}
|
||||
|
||||
*out = '\0';
|
||||
/*
|
||||
* We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
|
||||
* codes; but it's probably not worth the trouble, since this isn't
|
||||
* likely to be a performance-critical path.
|
||||
*/
|
||||
pg_verifymbstr(new, out - new, false);
|
||||
return new;
|
||||
}
|
||||
@ -1090,14 +1096,6 @@ litbuf_udeescape(unsigned char escape)
|
||||
static unsigned char
|
||||
unescape_single_char(unsigned char c)
|
||||
{
|
||||
/* Normally we wouldn't expect to see \n where n has its high bit set
|
||||
* but we set the flag to check the string if we do get it, so
|
||||
* that this doesn't become a way of getting around the coding validity
|
||||
* checks.
|
||||
*/
|
||||
if (IS_HIGHBIT_SET(c))
|
||||
saw_high_bit = true;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case 'b':
|
||||
@ -1111,6 +1109,10 @@ unescape_single_char(unsigned char c)
|
||||
case 't':
|
||||
return '\t';
|
||||
default:
|
||||
/* check for backslash followed by non-7-bit-ASCII */
|
||||
if (c == '\0' || IS_HIGHBIT_SET(c))
|
||||
saw_non_ascii = true;
|
||||
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user