mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
Allow multibyte characters as escape in SIMILAR TO and SUBSTRING.
Previously, only a single-byte character was allowed as an escape. This patch allows it to be a multi-byte character, though it still must be a single character. Reviewed by Heikki Linnakangas and Tom Lane.
This commit is contained in:
parent
1c9701cfe5
commit
8167a3883a
@ -688,12 +688,17 @@ similar_escape(PG_FUNCTION_ARGS)
|
||||
elen = VARSIZE_ANY_EXHDR(esc_text);
|
||||
if (elen == 0)
|
||||
e = NULL; /* no escape character */
|
||||
else if (elen != 1)
|
||||
else
|
||||
{
|
||||
int escape_mblen = pg_mbstrlen_with_len(e, elen);
|
||||
|
||||
if (escape_mblen > 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
||||
errmsg("invalid escape string"),
|
||||
errhint("Escape string must be empty or one character.")));
|
||||
}
|
||||
}
|
||||
|
||||
/*----------
|
||||
* We surround the transformed input string with
|
||||
@ -724,6 +729,54 @@ similar_escape(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char pchar = *p;
|
||||
|
||||
/*
|
||||
* If both the escape character and the current character from the
|
||||
* pattern are multi-byte, we need to take the slow path.
|
||||
*
|
||||
* But if one of them is single-byte, we can process the pattern one
|
||||
* byte at a time, ignoring multi-byte characters. (This works
|
||||
* because all server-encodings have the property that a valid
|
||||
* multi-byte character representation cannot contain the
|
||||
* representation of a valid single-byte character.)
|
||||
*/
|
||||
|
||||
if (elen > 1)
|
||||
{
|
||||
int mblen = pg_mblen(p);
|
||||
if (mblen > 1)
|
||||
{
|
||||
/* slow, multi-byte path */
|
||||
if (afterescape)
|
||||
{
|
||||
*r++ = '\\';
|
||||
memcpy(r, p, mblen);
|
||||
r += mblen;
|
||||
afterescape = false;
|
||||
}
|
||||
else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
|
||||
{
|
||||
/* SQL99 escape character; do not send to output */
|
||||
afterescape = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We know it's a multi-byte character, so we don't need
|
||||
* to do all the comparisons to single-byte characters
|
||||
* that we do below.
|
||||
*/
|
||||
memcpy(r, p, mblen);
|
||||
r += mblen;
|
||||
}
|
||||
|
||||
p += mblen;
|
||||
plen -= mblen;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* fast path */
|
||||
if (afterescape)
|
||||
{
|
||||
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
|
||||
|
Loading…
Reference in New Issue
Block a user