mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
Allow multibyte characters as escape in SIMILAR TO and SUBSTRING.
Previously, only a single-byte character was allowed as an escape. This patch allows it to be a multi-byte character, though it still must be a single character. Reviewed by Heikki Linnakangas and Tom Lane.
This commit is contained in:
parent
1c9701cfe5
commit
8167a3883a
@ -688,12 +688,17 @@ similar_escape(PG_FUNCTION_ARGS)
|
|||||||
elen = VARSIZE_ANY_EXHDR(esc_text);
|
elen = VARSIZE_ANY_EXHDR(esc_text);
|
||||||
if (elen == 0)
|
if (elen == 0)
|
||||||
e = NULL; /* no escape character */
|
e = NULL; /* no escape character */
|
||||||
else if (elen != 1)
|
else
|
||||||
|
{
|
||||||
|
int escape_mblen = pg_mbstrlen_with_len(e, elen);
|
||||||
|
|
||||||
|
if (escape_mblen > 1)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
||||||
errmsg("invalid escape string"),
|
errmsg("invalid escape string"),
|
||||||
errhint("Escape string must be empty or one character.")));
|
errhint("Escape string must be empty or one character.")));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*----------
|
/*----------
|
||||||
* We surround the transformed input string with
|
* We surround the transformed input string with
|
||||||
@ -724,6 +729,54 @@ similar_escape(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
char pchar = *p;
|
char pchar = *p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If both the escape character and the current character from the
|
||||||
|
* pattern are multi-byte, we need to take the slow path.
|
||||||
|
*
|
||||||
|
* But if one of them is single-byte, we can process the pattern one
|
||||||
|
* byte at a time, ignoring multi-byte characters. (This works
|
||||||
|
* because all server-encodings have the property that a valid
|
||||||
|
* multi-byte character representation cannot contain the
|
||||||
|
* representation of a valid single-byte character.)
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (elen > 1)
|
||||||
|
{
|
||||||
|
int mblen = pg_mblen(p);
|
||||||
|
if (mblen > 1)
|
||||||
|
{
|
||||||
|
/* slow, multi-byte path */
|
||||||
|
if (afterescape)
|
||||||
|
{
|
||||||
|
*r++ = '\\';
|
||||||
|
memcpy(r, p, mblen);
|
||||||
|
r += mblen;
|
||||||
|
afterescape = false;
|
||||||
|
}
|
||||||
|
else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
|
||||||
|
{
|
||||||
|
/* SQL99 escape character; do not send to output */
|
||||||
|
afterescape = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We know it's a multi-byte character, so we don't need
|
||||||
|
* to do all the comparisons to single-byte characters
|
||||||
|
* that we do below.
|
||||||
|
*/
|
||||||
|
memcpy(r, p, mblen);
|
||||||
|
r += mblen;
|
||||||
|
}
|
||||||
|
|
||||||
|
p += mblen;
|
||||||
|
plen -= mblen;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fast path */
|
||||||
if (afterescape)
|
if (afterescape)
|
||||||
{
|
{
|
||||||
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
|
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
|
||||||
|
Loading…
Reference in New Issue
Block a user