mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-30 19:00:29 +08:00
Don't downcase non-ascii identifier chars in multi-byte encodings.
Long-standing code has called tolower() on identifier character bytes with the high bit set. This is clearly an error and produces junk output when the encoding is multi-byte. This patch therefore restricts this activity to cases where there is a character with the high bit set AND the encoding is single-byte. There have been numerous gripes about this, most recently from Martin Schäfer. Backpatch to all live releases.
This commit is contained in:
parent
94e3311b97
commit
d535136b5d
@ -132,8 +132,10 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
|
|||||||
{
|
{
|
||||||
char *result;
|
char *result;
|
||||||
int i;
|
int i;
|
||||||
|
bool enc_is_single_byte;
|
||||||
|
|
||||||
result = palloc(len + 1);
|
result = palloc(len + 1);
|
||||||
|
enc_is_single_byte = pg_database_encoding_max_length() == 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SQL99 specifies Unicode-aware case normalization, which we don't yet
|
* SQL99 specifies Unicode-aware case normalization, which we don't yet
|
||||||
@ -141,8 +143,8 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
|
|||||||
* locale-aware translation. However, there are some locales where this
|
* locale-aware translation. However, there are some locales where this
|
||||||
* is not right either (eg, Turkish may do strange things with 'i' and
|
* is not right either (eg, Turkish may do strange things with 'i' and
|
||||||
* 'I'). Our current compromise is to use tolower() for characters with
|
* 'I'). Our current compromise is to use tolower() for characters with
|
||||||
* the high bit set, and use an ASCII-only downcasing for 7-bit
|
* the high bit set, as long as they aren't part of a multi-byte character,
|
||||||
* characters.
|
* and use an ASCII-only downcasing for 7-bit characters.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < len; i++)
|
for (i = 0; i < len; i++)
|
||||||
{
|
{
|
||||||
@ -150,7 +152,7 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
|
|||||||
|
|
||||||
if (ch >= 'A' && ch <= 'Z')
|
if (ch >= 'A' && ch <= 'Z')
|
||||||
ch += 'a' - 'A';
|
ch += 'a' - 'A';
|
||||||
else if (IS_HIGHBIT_SET(ch) && isupper(ch))
|
else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
|
||||||
ch = tolower(ch);
|
ch = tolower(ch);
|
||||||
result[i] = (char) ch;
|
result[i] = (char) ch;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user