mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-03-01 19:45:33 +08:00
Fix LIKE's special-case code for % followed by _. I'm not entirely sure that
this case is worth a special code path, but a special code path that gets the boundary condition wrong is definitely no good. Per bug #4821 from Andrew Gierth. In passing, clean up some minor code formatting issues (excess parentheses and blank lines in odd places). Back-patch to 8.3, where the bug was introduced.
This commit is contained in:
parent
8e4862b422
commit
6b4a3a7700
@ -9,7 +9,7 @@
|
||||
* UTF8 is a special case because we can use a much more efficient version
|
||||
* of NextChar than can be used for other multi-byte encodings.
|
||||
*
|
||||
* Before the inclusion, we need to define following macros:
|
||||
* Before the inclusion, we need to define the following macros:
|
||||
*
|
||||
* NextChar
|
||||
* MatchText - to name of function wanted
|
||||
@ -19,47 +19,46 @@
|
||||
* Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.20.2.2 2008/09/27 16:57:43 adunstan Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.20.2.3 2009/05/24 18:10:47 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
|
||||
** Rich $alz is now <rsalz@bbn.com>.
|
||||
** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
|
||||
**
|
||||
** This code was shamelessly stolen from the "pql" code by myself and
|
||||
** slightly modified :)
|
||||
**
|
||||
** All references to the word "star" were replaced by "percent"
|
||||
** All references to the word "wild" were replaced by "like"
|
||||
**
|
||||
** All the nice shell RE matching stuff was replaced by just "_" and "%"
|
||||
**
|
||||
** As I don't have a copy of the SQL standard handy I wasn't sure whether
|
||||
** to leave in the '\' escape character handling.
|
||||
**
|
||||
** Keith Parks. <keith@mtcc.demon.co.uk>
|
||||
**
|
||||
** SQL92 lets you specify the escape character by saying
|
||||
** LIKE <pattern> ESCAPE <escape character>. We are a small operation
|
||||
** so we force you to use '\'. - ay 7/95
|
||||
**
|
||||
** Now we have the like_escape() function that converts patterns with
|
||||
** any specified escape character (or none at all) to the internal
|
||||
** default escape character, which is still '\'. - tgl 9/2000
|
||||
**
|
||||
** The code is rewritten to avoid requiring null-terminated strings,
|
||||
** which in turn allows us to leave out some memcpy() operations.
|
||||
** This code should be faster and take less memory, but no promises...
|
||||
** - thomas 2000-08-06
|
||||
**
|
||||
*/
|
||||
* Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
|
||||
* Rich $alz is now <rsalz@bbn.com>.
|
||||
* Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
|
||||
*
|
||||
* This code was shamelessly stolen from the "pql" code by myself and
|
||||
* slightly modified :)
|
||||
*
|
||||
* All references to the word "star" were replaced by "percent"
|
||||
* All references to the word "wild" were replaced by "like"
|
||||
*
|
||||
* All the nice shell RE matching stuff was replaced by just "_" and "%"
|
||||
*
|
||||
* As I don't have a copy of the SQL standard handy I wasn't sure whether
|
||||
* to leave in the '\' escape character handling.
|
||||
*
|
||||
* Keith Parks. <keith@mtcc.demon.co.uk>
|
||||
*
|
||||
* SQL92 lets you specify the escape character by saying
|
||||
* LIKE <pattern> ESCAPE <escape character>. We are a small operation
|
||||
* so we force you to use '\'. - ay 7/95
|
||||
*
|
||||
* Now we have the like_escape() function that converts patterns with
|
||||
* any specified escape character (or none at all) to the internal
|
||||
* default escape character, which is still '\'. - tgl 9/2000
|
||||
*
|
||||
* The code is rewritten to avoid requiring null-terminated strings,
|
||||
* which in turn allows us to leave out some memcpy() operations.
|
||||
* This code should be faster and take less memory, but no promises...
|
||||
* - thomas 2000-08-06
|
||||
*/
|
||||
|
||||
|
||||
/*--------------------
|
||||
* Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
|
||||
* Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
|
||||
*
|
||||
* LIKE_TRUE: they match
|
||||
* LIKE_FALSE: they don't match
|
||||
@ -80,19 +79,18 @@ static int
|
||||
MatchText(char *t, int tlen, char *p, int plen)
|
||||
{
|
||||
/* Fast path for match-everything pattern */
|
||||
if ((plen == 1) && (*p == '%'))
|
||||
if (plen == 1 && *p == '%')
|
||||
return LIKE_TRUE;
|
||||
|
||||
/*
|
||||
* In this loop, we advance by char when matching wildcards (and thus on
|
||||
* recursive entry to this function we are properly char-synced). On other
|
||||
* occasions it is safe to advance by byte, as the text and pattern will
|
||||
* be in lockstep. This allows us to perform all comparisons between the
|
||||
* be in lockstep. This allows us to perform all comparisons between the
|
||||
* text and pattern on a byte by byte basis, even for multi-byte
|
||||
* encodings.
|
||||
*/
|
||||
|
||||
while ((tlen > 0) && (plen > 0))
|
||||
while (tlen > 0 && plen > 0)
|
||||
{
|
||||
if (*p == '\\')
|
||||
{
|
||||
@ -111,7 +109,7 @@ MatchText(char *t, int tlen, char *p, int plen)
|
||||
|
||||
/* %% is the same as % according to the SQL standard */
|
||||
/* Advance past all %'s */
|
||||
while ((plen > 0) && (*p == '%'))
|
||||
while (plen > 0 && *p == '%')
|
||||
NextByte(p, plen);
|
||||
/* Trailing percent matches everything. */
|
||||
if (plen <= 0)
|
||||
@ -122,22 +120,24 @@ MatchText(char *t, int tlen, char *p, int plen)
|
||||
* rest of the pattern.
|
||||
*/
|
||||
if (*p == '_')
|
||||
|
||||
{
|
||||
/* %_ is the same as _% - avoid matching _ repeatedly */
|
||||
|
||||
NextChar(t, tlen);
|
||||
NextByte(p, plen);
|
||||
|
||||
if (tlen <= 0)
|
||||
do
|
||||
{
|
||||
return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
|
||||
}
|
||||
else if (plen <= 0)
|
||||
{
|
||||
return LIKE_FALSE;
|
||||
}
|
||||
NextChar(t, tlen);
|
||||
NextByte(p, plen);
|
||||
} while (tlen > 0 && plen > 0 && *p == '_');
|
||||
|
||||
/*
|
||||
* If we are at the end of the pattern, succeed: % followed
|
||||
* by n _'s matches any string of at least n characters, and
|
||||
* we have now found there are at least n characters.
|
||||
*/
|
||||
if (plen <= 0)
|
||||
return LIKE_TRUE;
|
||||
|
||||
/* Look for a place that matches the rest of the pattern */
|
||||
while (tlen > 0)
|
||||
{
|
||||
int matched = MatchText(t, tlen, p, plen);
|
||||
@ -150,7 +150,6 @@ MatchText(char *t, int tlen, char *p, int plen)
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
char firstpat = TCHAR(*p);
|
||||
|
||||
if (*p == '\\')
|
||||
@ -175,7 +174,6 @@ MatchText(char *t, int tlen, char *p, int plen)
|
||||
}
|
||||
|
||||
NextChar(t, tlen);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,20 +185,20 @@ MatchText(char *t, int tlen, char *p, int plen)
|
||||
}
|
||||
else if (*p == '_')
|
||||
{
|
||||
/* _ matches any single character, and we know there is one */
|
||||
NextChar(t, tlen);
|
||||
NextByte(p, plen);
|
||||
continue;
|
||||
}
|
||||
else if (TCHAR(*t) != TCHAR(*p))
|
||||
else if (TCHAR(*p) != TCHAR(*t))
|
||||
{
|
||||
/*
|
||||
* Not the single-character wildcard and no explicit match? Then
|
||||
* time to quit...
|
||||
*/
|
||||
/* non-wildcard pattern char fails to match text char */
|
||||
return LIKE_FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pattern and text match, so advance.
|
||||
*
|
||||
* It is safe to use NextByte instead of NextChar here, even for
|
||||
* multi-byte character sets, because we are not following immediately
|
||||
* after a wildcard character. If we are in the middle of a multibyte
|
||||
@ -217,9 +215,8 @@ MatchText(char *t, int tlen, char *p, int plen)
|
||||
if (tlen > 0)
|
||||
return LIKE_FALSE; /* end of pattern, but not of text */
|
||||
|
||||
/* End of input string. Do we have matching pattern remaining? */
|
||||
while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of
|
||||
* pattern */
|
||||
/* End of text string. Do we have matching pattern remaining? */
|
||||
while (plen > 0 && *p == '%') /* allow multiple %'s at end of pattern */
|
||||
NextByte(p, plen);
|
||||
|
||||
if (plen <= 0)
|
||||
@ -349,5 +346,4 @@ do_like_escape(text *pat, text *esc)
|
||||
|
||||
#ifdef MATCH_LOWER
|
||||
#undef MATCH_LOWER
|
||||
|
||||
#endif
|
||||
|
@ -826,6 +826,33 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
|
||||
f
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- test %/_ combination cases, cf bug #4821
|
||||
--
|
||||
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
|
||||
t | t | f
|
||||
---+---+---
|
||||
t | t | f
|
||||
(1 row)
|
||||
|
||||
SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
|
||||
t | t | f
|
||||
---+---+---
|
||||
t | t | f
|
||||
(1 row)
|
||||
|
||||
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
|
||||
t | t | f
|
||||
---+---+---
|
||||
t | t | f
|
||||
(1 row)
|
||||
|
||||
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
|
||||
t | t | f
|
||||
---+---+---
|
||||
t | t | f
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- test implicit type conversion
|
||||
--
|
||||
|
@ -252,6 +252,17 @@ SELECT 'hawkeye' NOT ILIKE 'H%Eye' AS "false";
|
||||
SELECT 'Hawkeye' ILIKE 'h%' AS "true";
|
||||
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
|
||||
|
||||
--
|
||||
-- test %/_ combination cases, cf bug #4821
|
||||
--
|
||||
|
||||
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
|
||||
SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
|
||||
|
||||
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
|
||||
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
|
||||
|
||||
|
||||
--
|
||||
-- test implicit type conversion
|
||||
--
|
||||
|
Loading…
Reference in New Issue
Block a user