mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-18 18:44:06 +08:00
Teach regex_fixed_prefix() the correct handling of advanced regex
escapes --- they aren't simply quoted characters. Problem noted by Antti Salmela. Also fix problem with incorrect handling of multibyte characters when followed by a quantifier.
This commit is contained in:
parent
4e91824b94
commit
76e8a87f15
@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.168 2004/12/02 02:45:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -3459,6 +3459,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
char *match;
|
||||
int pos,
|
||||
match_pos,
|
||||
prev_pos,
|
||||
prev_match_pos,
|
||||
paren_depth;
|
||||
char *patt;
|
||||
char *rest;
|
||||
@ -3519,11 +3521,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
|
||||
/* OK, allocate space for pattern */
|
||||
match = palloc(strlen(patt) + 1);
|
||||
match_pos = 0;
|
||||
prev_match_pos = match_pos = 0;
|
||||
|
||||
/* note start at pos 1 to skip leading ^ */
|
||||
for (pos = 1; patt[pos]; pos++)
|
||||
for (prev_pos = pos = 1; patt[pos]; )
|
||||
{
|
||||
int len;
|
||||
|
||||
/*
|
||||
* Check for characters that indicate multiple possible matches
|
||||
* here. XXX I suspect isalpha() is not an adequately
|
||||
@ -3537,6 +3541,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
(case_insensitive && isalpha((unsigned char) patt[pos])))
|
||||
break;
|
||||
|
||||
/*
|
||||
* In AREs, backslash followed by alphanumeric is an escape, not
|
||||
* a quoted character. Must treat it as having multiple possible
|
||||
* matches.
|
||||
*/
|
||||
if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1]))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Check for quantifiers. Except for +, this means the preceding
|
||||
* character is optional, so we must remove it from the prefix
|
||||
@ -3546,14 +3558,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
patt[pos] == '?' ||
|
||||
patt[pos] == '{')
|
||||
{
|
||||
if (match_pos > 0)
|
||||
match_pos--;
|
||||
pos--;
|
||||
match_pos = prev_match_pos;
|
||||
pos = prev_pos;
|
||||
break;
|
||||
}
|
||||
if (patt[pos] == '+')
|
||||
{
|
||||
pos--;
|
||||
pos = prev_pos;
|
||||
break;
|
||||
}
|
||||
if (patt[pos] == '\\')
|
||||
@ -3563,7 +3574,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
||||
if (patt[pos] == '\0')
|
||||
break;
|
||||
}
|
||||
match[match_pos++] = patt[pos];
|
||||
/* save position in case we need to back up on next loop cycle */
|
||||
prev_match_pos = match_pos;
|
||||
prev_pos = pos;
|
||||
/* must use encoding-aware processing here */
|
||||
len = pg_mblen(&patt[pos]);
|
||||
memcpy(&match[match_pos], &patt[pos], len);
|
||||
match_pos += len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
match[match_pos] = '\0';
|
||||
|
Loading…
Reference in New Issue
Block a user