mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-27 07:21:09 +08:00
Fix bogus handling of XQuery regex option flags.
The SQL spec defers to XQuery to define what the option flags are for LIKE_REGEX patterns. XQuery says that: * 's' allows the dot character to match newlines, which by default it will not; * 'm' allows ^ and $ to match at newlines, not only at the start/end of the whole string. Thus, these are *not* inverses as they are for the similarly-named POSIX options, and neither one corresponds to the POSIX 'n' option. Fortunately, Spencer's library does expose these two behaviors as separately twiddlable flags, so we just have to fix the mapping from JSP flag bits to REG flag bits. I also chose to rename the symbol for 's' to DOTALL, to make it clearer that it's not the inverse of MLINE. Also, XQuery says that if the 'q' flag "is used together with the m, s, or x flag, that flag has no effect". I read this as saying that 'q' overrides the other flags; whoever wrote our code seems to have read it backwards. Lastly, while XQuery's 'x' flag is related to what Spencer's code does for REG_EXPANDED, it's not the same or a subset. It seems best to treat XQuery's 'x' as unimplemented for now. Maybe later we can expand our regex code to offer 'x'-style parsing as a separate option. While at it, refactor the jsonpath code so that (a) there's only one copy of the flag transformation logic not two, and (b) the processing of flags is independent of the order in which the flags are written. We need some documentation updates to go with this, but I'll tackle that separately. Back-patch to v12 where this code originated. Discussion: https://postgr.es/m/CAPpHfdvDci4iqNF9fhRkTqhe-5_8HmzeLt56drH%2B_Rv2rNRqfg@mail.gmail.com Reference: https://www.w3.org/TR/2017/REC-xpath-functions-31-20170321/#flags
This commit is contained in:
parent
a25221f53c
commit
d5b90cd648
@ -557,7 +557,7 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
|
||||
|
||||
if (v->content.like_regex.flags & JSP_REGEX_ICASE)
|
||||
appendStringInfoChar(buf, 'i');
|
||||
if (v->content.like_regex.flags & JSP_REGEX_SLINE)
|
||||
if (v->content.like_regex.flags & JSP_REGEX_DOTALL)
|
||||
appendStringInfoChar(buf, 's');
|
||||
if (v->content.like_regex.flags & JSP_REGEX_MLINE)
|
||||
appendStringInfoChar(buf, 'm');
|
||||
|
@ -1646,34 +1646,10 @@ executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg,
|
||||
/* Cache regex text and converted flags. */
|
||||
if (!cxt->regex)
|
||||
{
|
||||
uint32 flags = jsp->content.like_regex.flags;
|
||||
|
||||
cxt->regex =
|
||||
cstring_to_text_with_len(jsp->content.like_regex.pattern,
|
||||
jsp->content.like_regex.patternlen);
|
||||
|
||||
/* Convert regex flags. */
|
||||
cxt->cflags = REG_ADVANCED;
|
||||
|
||||
if (flags & JSP_REGEX_ICASE)
|
||||
cxt->cflags |= REG_ICASE;
|
||||
if (flags & JSP_REGEX_MLINE)
|
||||
cxt->cflags |= REG_NEWLINE;
|
||||
if (flags & JSP_REGEX_SLINE)
|
||||
cxt->cflags &= ~REG_NEWLINE;
|
||||
if (flags & JSP_REGEX_WSPACE)
|
||||
cxt->cflags |= REG_EXPANDED;
|
||||
|
||||
/*
|
||||
* 'q' flag can work together only with 'i'. When other is specified,
|
||||
* then 'q' has no effect.
|
||||
*/
|
||||
if ((flags & JSP_REGEX_QUOTE) &&
|
||||
!(flags & (JSP_REGEX_MLINE | JSP_REGEX_SLINE | JSP_REGEX_WSPACE)))
|
||||
{
|
||||
cxt->cflags &= ~REG_ADVANCED;
|
||||
cxt->cflags |= REG_QUOTE;
|
||||
}
|
||||
cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags);
|
||||
}
|
||||
|
||||
if (RE_compile_and_execute(cxt->regex, str->val.string.val,
|
||||
|
@ -481,42 +481,32 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
|
||||
{
|
||||
JsonPathParseItem *v = makeItemType(jpiLikeRegex);
|
||||
int i;
|
||||
int cflags = REG_ADVANCED;
|
||||
int cflags;
|
||||
|
||||
v->value.like_regex.expr = expr;
|
||||
v->value.like_regex.pattern = pattern->val;
|
||||
v->value.like_regex.patternlen = pattern->len;
|
||||
v->value.like_regex.flags = 0;
|
||||
|
||||
/* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
|
||||
v->value.like_regex.flags = 0;
|
||||
for (i = 0; flags && i < flags->len; i++)
|
||||
{
|
||||
switch (flags->val[i])
|
||||
{
|
||||
case 'i':
|
||||
v->value.like_regex.flags |= JSP_REGEX_ICASE;
|
||||
cflags |= REG_ICASE;
|
||||
break;
|
||||
case 's':
|
||||
v->value.like_regex.flags &= ~JSP_REGEX_MLINE;
|
||||
v->value.like_regex.flags |= JSP_REGEX_SLINE;
|
||||
cflags |= REG_NEWLINE;
|
||||
v->value.like_regex.flags |= JSP_REGEX_DOTALL;
|
||||
break;
|
||||
case 'm':
|
||||
v->value.like_regex.flags &= ~JSP_REGEX_SLINE;
|
||||
v->value.like_regex.flags |= JSP_REGEX_MLINE;
|
||||
cflags &= ~REG_NEWLINE;
|
||||
break;
|
||||
case 'x':
|
||||
v->value.like_regex.flags |= JSP_REGEX_WSPACE;
|
||||
cflags |= REG_EXPANDED;
|
||||
break;
|
||||
case 'q':
|
||||
v->value.like_regex.flags |= JSP_REGEX_QUOTE;
|
||||
if (!(v->value.like_regex.flags & (JSP_REGEX_MLINE | JSP_REGEX_SLINE | JSP_REGEX_WSPACE)))
|
||||
{
|
||||
cflags &= ~REG_ADVANCED;
|
||||
cflags |= REG_QUOTE;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ereport(ERROR,
|
||||
@ -528,6 +518,9 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert flags to what RE_compile_and_cache needs */
|
||||
cflags = jspConvertRegexFlags(v->value.like_regex.flags);
|
||||
|
||||
/* check regex validity */
|
||||
(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
|
||||
pattern->len),
|
||||
@ -536,6 +529,49 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert from XQuery regex flags to those recognized by our regex library.
|
||||
*/
|
||||
int
|
||||
jspConvertRegexFlags(uint32 xflags)
|
||||
{
|
||||
/* By default, XQuery is very nearly the same as Spencer's AREs */
|
||||
int cflags = REG_ADVANCED;
|
||||
|
||||
/* Ignore-case means the same thing, too, modulo locale issues */
|
||||
if (xflags & JSP_REGEX_ICASE)
|
||||
cflags |= REG_ICASE;
|
||||
|
||||
/* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
|
||||
if (xflags & JSP_REGEX_QUOTE)
|
||||
{
|
||||
cflags &= ~REG_ADVANCED;
|
||||
cflags |= REG_QUOTE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Note that dotall mode is the default in POSIX */
|
||||
if (!(xflags & JSP_REGEX_DOTALL))
|
||||
cflags |= REG_NLSTOP;
|
||||
if (xflags & JSP_REGEX_MLINE)
|
||||
cflags |= REG_NLANCH;
|
||||
|
||||
/*
|
||||
* XQuery's 'x' mode is related to Spencer's expanded mode, but it's
|
||||
* not really enough alike to justify treating JSP_REGEX_WSPACE as
|
||||
* REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
|
||||
* future we'll modify the regex library to have an option for
|
||||
* XQuery-style ignore-whitespace mode.
|
||||
*/
|
||||
if (xflags & JSP_REGEX_WSPACE)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
|
||||
}
|
||||
|
||||
return cflags;
|
||||
}
|
||||
|
||||
/*
|
||||
* jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
|
||||
* unavoidable because jsonpath_gram does not create a .h file to export its
|
||||
|
@ -88,9 +88,9 @@ typedef enum JsonPathItemType
|
||||
|
||||
/* XQuery regex mode flags for LIKE_REGEX predicate */
|
||||
#define JSP_REGEX_ICASE 0x01 /* i flag, case insensitive */
|
||||
#define JSP_REGEX_SLINE 0x02 /* s flag, single-line mode */
|
||||
#define JSP_REGEX_MLINE 0x04 /* m flag, multi-line mode */
|
||||
#define JSP_REGEX_WSPACE 0x08 /* x flag, expanded syntax */
|
||||
#define JSP_REGEX_DOTALL 0x02 /* s flag, dot matches newline */
|
||||
#define JSP_REGEX_MLINE 0x04 /* m flag, ^/$ match at newlines */
|
||||
#define JSP_REGEX_WSPACE 0x08 /* x flag, ignore whitespace in pattern */
|
||||
#define JSP_REGEX_QUOTE 0x10 /* q flag, no special characters */
|
||||
|
||||
/*
|
||||
@ -245,4 +245,6 @@ typedef struct JsonPathParseResult
|
||||
|
||||
extern JsonPathParseResult *parsejsonpath(const char *str, int len);
|
||||
|
||||
extern int jspConvertRegexFlags(uint32 xflags);
|
||||
|
||||
#endif
|
||||
|
@ -1592,14 +1592,14 @@ select jsonb_path_query('[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts wit
|
||||
1
|
||||
(2 rows)
|
||||
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
|
||||
jsonb_path_query
|
||||
------------------
|
||||
"abc"
|
||||
"abdacb"
|
||||
(2 rows)
|
||||
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^a b.* c " flag "ix")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');
|
||||
jsonb_path_query
|
||||
------------------
|
||||
"abc"
|
||||
@ -1607,7 +1607,7 @@ select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "
|
||||
"abdacb"
|
||||
(3 rows)
|
||||
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
|
||||
jsonb_path_query
|
||||
------------------
|
||||
"abc"
|
||||
@ -1615,12 +1615,13 @@ select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "
|
||||
"adc\nabc"
|
||||
(3 rows)
|
||||
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
|
||||
jsonb_path_query
|
||||
------------------
|
||||
"abc"
|
||||
"abdacb"
|
||||
(2 rows)
|
||||
"ab\nadc"
|
||||
(3 rows)
|
||||
|
||||
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "q")');
|
||||
jsonb_path_query
|
||||
|
@ -442,17 +442,15 @@ select '$ ? (@ like_regex "pattern" flag "is")'::jsonpath;
|
||||
(1 row)
|
||||
|
||||
select '$ ? (@ like_regex "pattern" flag "isim")'::jsonpath;
|
||||
jsonpath
|
||||
--------------------------------------
|
||||
$?(@ like_regex "pattern" flag "im")
|
||||
jsonpath
|
||||
---------------------------------------
|
||||
$?(@ like_regex "pattern" flag "ism")
|
||||
(1 row)
|
||||
|
||||
select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath;
|
||||
jsonpath
|
||||
--------------------------------------
|
||||
$?(@ like_regex "pattern" flag "sx")
|
||||
(1 row)
|
||||
|
||||
ERROR: XQuery "x" flag (expanded regular expressions) is not implemented
|
||||
LINE 1: select '$ ? (@ like_regex "pattern" flag "xsms")'::jsonpath;
|
||||
^
|
||||
select '$ ? (@ like_regex "pattern" flag "q")'::jsonpath;
|
||||
jsonpath
|
||||
-------------------------------------
|
||||
@ -466,9 +464,9 @@ select '$ ? (@ like_regex "pattern" flag "iq")'::jsonpath;
|
||||
(1 row)
|
||||
|
||||
select '$ ? (@ like_regex "pattern" flag "smixq")'::jsonpath;
|
||||
jsonpath
|
||||
----------------------------------------
|
||||
$?(@ like_regex "pattern" flag "imxq")
|
||||
jsonpath
|
||||
-----------------------------------------
|
||||
$?(@ like_regex "pattern" flag "ismxq")
|
||||
(1 row)
|
||||
|
||||
select '$ ? (@ like_regex "pattern" flag "a")'::jsonpath;
|
||||
|
@ -335,10 +335,10 @@ select jsonb_path_query('[[null, 1, "abc", "abcabc"]]', 'lax $ ? (@[*] starts wi
|
||||
select jsonb_path_query('[[null, 1, "abd", "abdabc"]]', 'lax $ ? ((@[*] starts with "abc") is unknown)');
|
||||
select jsonb_path_query('[null, 1, "abd", "abdabc"]', 'lax $[*] ? ((@ starts with "abc") is unknown)');
|
||||
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^a b.* c " flag "ix")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "adc\nabc", "babc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "i")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "m")');
|
||||
select jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c" flag "s")');
|
||||
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "q")');
|
||||
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "a\\b" flag "")');
|
||||
select jsonb_path_query('[null, 1, "a\b", "a\\b", "^a\\b$"]', 'lax $[*] ? (@ like_regex "^a\\b$" flag "q")');
|
||||
|
Loading…
Reference in New Issue
Block a user