Fix similar_escape() to convert parentheses to non-capturing style.

This is needed to avoid unwanted interference with SUBSTRING behavior,
as per bug #5257 from Roman Kononov.  Also, add some basic intelligence
about character classes (bracket expressions) since we now have several
behaviors that aren't appropriate inside a character class.

As with the previous patch in this area, I'm reluctant to back-patch
since it might affect applications that are relying on the prior
behavior.
This commit is contained in:
Tom Lane 2010-01-02 20:59:16 +00:00
parent 2b59274c09
commit e15d53e7a4

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.85 2010/01/02 16:57:55 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.86 2010/01/02 20:59:16 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@ -640,6 +640,7 @@ similar_escape(PG_FUNCTION_ARGS)
int plen,
elen;
bool afterescape = false;
bool incharclass = false;
int nquotes = 0;
/* This function is not strict, so must test explicitly */
@ -682,10 +683,10 @@ similar_escape(PG_FUNCTION_ARGS)
*/
/*
* We need room for the prefix/postfix plus as many as 2 output bytes per
* input byte
* We need room for the prefix/postfix plus as many as 3 output bytes per
* input byte; since the input is at most 1GB this can't overflow
*/
result = (text *) palloc(VARHDRSZ + 6 + 2 * plen);
result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
r = VARDATA(result);
*r++ = '^';
@ -699,7 +700,7 @@ similar_escape(PG_FUNCTION_ARGS)
if (afterescape)
{
if (pchar == '"') /* for SUBSTRING patterns */
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
else
{
@ -713,6 +714,19 @@ similar_escape(PG_FUNCTION_ARGS)
/* SQL99 escape character; do not send to output */
afterescape = true;
}
else if (incharclass)
{
if (pchar == '\\')
*r++ = '\\';
*r++ = pchar;
if (pchar == ']')
incharclass = false;
}
else if (pchar == '[')
{
*r++ = pchar;
incharclass = true;
}
else if (pchar == '%')
{
*r++ = '.';
@ -720,6 +734,13 @@ similar_escape(PG_FUNCTION_ARGS)
}
else if (pchar == '_')
*r++ = '.';
else if (pchar == '(')
{
/* convert to non-capturing parenthesis */
*r++ = '(';
*r++ = '?';
*r++ = ':';
}
else if (pchar == '\\' || pchar == '.' ||
pchar == '^' || pchar == '$')
{