mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-09 08:10:09 +08:00
Make some minor improvements in the regex code.
Push some hopefully-uncontroversial bits extracted from an upcoming patch series, to remove non-relevant clutter from the main patches. In compact(), return immediately after setting REG_ASSERT error; continuing the loop would just lead to assertion failure below. (Ask me how I know.) In parseqatom(), remove assertion that moresubs() did its job. When moresubs actually did its job, this is redundant with that function's final assert; but when it failed on OOM, this is an assertion crash. We could avoid the crash by adding a NOERR() check before the assertion, but it seems better to subtract code than add it. (Note that there's a NOERR exit a few lines further down, and nothing else between here and there requires moresubs to have succeeded. So we don't really need an extra error exit.) This is a live bug in assert-enabled builds, but given the very low likelihood of OOM in moresub's tiny allocation, I don't think it's worth back-patching. On the other hand, it seems worthwhile to add an assertion that our intended v->subs[subno] target is still null by the time we are ready to insert into it, since there's a recursion in between. In pg_regexec, ensure we fflush any debug output on the way out, and try to make MDEBUG messages more uniform and helpful. (In particular, ensure that all of them are prefixed with the subre's id number, so one can match up entry and exit reports.) Add some test cases in test_regex to improve coverage of lookahead and lookbehind constraints. Adding these now is mainly to establish that this is indeed the existing behavior. Discussion: https://postgr.es/m/1340281.1613018383@sss.pgh.pa.us
This commit is contained in:
parent
f40c6969d0
commit
4e703d6719
@ -2902,7 +2902,7 @@ compact(struct nfa *nfa,
|
||||
break;
|
||||
default:
|
||||
NERR(REG_ASSERT);
|
||||
break;
|
||||
return;
|
||||
}
|
||||
carcsort(first, ca - first);
|
||||
ca->co = COLORLESS;
|
||||
|
@ -942,7 +942,6 @@ parseqatom(struct vars *v,
|
||||
subno = v->nsubexp;
|
||||
if ((size_t) subno >= v->nsubs)
|
||||
moresubs(v, subno);
|
||||
assert((size_t) subno < v->nsubs);
|
||||
}
|
||||
else
|
||||
atomtype = PLAIN; /* something that's not '(' */
|
||||
@ -960,6 +959,7 @@ parseqatom(struct vars *v,
|
||||
NOERR();
|
||||
if (cap)
|
||||
{
|
||||
assert(v->subs[subno] == NULL);
|
||||
v->subs[subno] = atom;
|
||||
t = subre(v, '(', atom->flags | CAP, lp, rp);
|
||||
NOERR();
|
||||
|
@ -324,6 +324,11 @@ cleanup:
|
||||
if (v->lblastcp != NULL)
|
||||
FREE(v->lblastcp);
|
||||
|
||||
#ifdef REG_DEBUG
|
||||
if (v->eflags & (REG_FTRACE | REG_MTRACE))
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
return st;
|
||||
}
|
||||
|
||||
@ -668,7 +673,7 @@ subset(struct vars *v,
|
||||
if ((size_t) n >= v->nmatch)
|
||||
return;
|
||||
|
||||
MDEBUG(("setting %d\n", n));
|
||||
MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
|
||||
v->pmatch[n].rm_so = OFF(begin);
|
||||
v->pmatch[n].rm_eo = OFF(end);
|
||||
}
|
||||
@ -697,7 +702,7 @@ cdissect(struct vars *v,
|
||||
int er;
|
||||
|
||||
assert(t != NULL);
|
||||
MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
|
||||
MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
|
||||
|
||||
/* handy place to check for operation cancel */
|
||||
if (CANCEL_REQUESTED(v->re))
|
||||
@ -779,14 +784,14 @@ ccondissect(struct vars *v,
|
||||
NOERR();
|
||||
d2 = getsubdfa(v, t->right);
|
||||
NOERR();
|
||||
MDEBUG(("cconcat %d\n", t->id));
|
||||
MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
|
||||
|
||||
/* pick a tentative midpoint */
|
||||
mid = longest(v, d, begin, end, (int *) NULL);
|
||||
NOERR();
|
||||
if (mid == NULL)
|
||||
return REG_NOMATCH;
|
||||
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
|
||||
MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
|
||||
|
||||
/* iterate until satisfaction or failure */
|
||||
for (;;)
|
||||
@ -801,7 +806,7 @@ ccondissect(struct vars *v,
|
||||
if (er == REG_OKAY)
|
||||
{
|
||||
/* satisfaction */
|
||||
MDEBUG(("successful\n"));
|
||||
MDEBUG(("%d: successful\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
}
|
||||
@ -814,7 +819,7 @@ ccondissect(struct vars *v,
|
||||
if (mid == begin)
|
||||
{
|
||||
/* all possibilities exhausted */
|
||||
MDEBUG(("%d no midpoint\n", t->id));
|
||||
MDEBUG(("%d: no midpoint\n", t->id));
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
mid = longest(v, d, begin, mid - 1, (int *) NULL);
|
||||
@ -822,7 +827,7 @@ ccondissect(struct vars *v,
|
||||
if (mid == NULL)
|
||||
{
|
||||
/* failed to find a new one */
|
||||
MDEBUG(("%d failed midpoint\n", t->id));
|
||||
MDEBUG(("%d: failed midpoint\n", t->id));
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
|
||||
@ -857,14 +862,14 @@ crevcondissect(struct vars *v,
|
||||
NOERR();
|
||||
d2 = getsubdfa(v, t->right);
|
||||
NOERR();
|
||||
MDEBUG(("crevcon %d\n", t->id));
|
||||
MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
|
||||
|
||||
/* pick a tentative midpoint */
|
||||
mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
|
||||
NOERR();
|
||||
if (mid == NULL)
|
||||
return REG_NOMATCH;
|
||||
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
|
||||
MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
|
||||
|
||||
/* iterate until satisfaction or failure */
|
||||
for (;;)
|
||||
@ -879,7 +884,7 @@ crevcondissect(struct vars *v,
|
||||
if (er == REG_OKAY)
|
||||
{
|
||||
/* satisfaction */
|
||||
MDEBUG(("successful\n"));
|
||||
MDEBUG(("%d: successful\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
}
|
||||
@ -892,7 +897,7 @@ crevcondissect(struct vars *v,
|
||||
if (mid == end)
|
||||
{
|
||||
/* all possibilities exhausted */
|
||||
MDEBUG(("%d no midpoint\n", t->id));
|
||||
MDEBUG(("%d: no midpoint\n", t->id));
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
|
||||
@ -900,7 +905,7 @@ crevcondissect(struct vars *v,
|
||||
if (mid == NULL)
|
||||
{
|
||||
/* failed to find a new one */
|
||||
MDEBUG(("%d failed midpoint\n", t->id));
|
||||
MDEBUG(("%d: failed midpoint\n", t->id));
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
|
||||
@ -935,7 +940,8 @@ cbrdissect(struct vars *v,
|
||||
assert(n >= 0);
|
||||
assert((size_t) n < v->nmatch);
|
||||
|
||||
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
|
||||
MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
|
||||
LOFF(begin), LOFF(end)));
|
||||
|
||||
/* get the backreferenced string */
|
||||
if (v->pmatch[n].rm_so == -1)
|
||||
@ -952,7 +958,7 @@ cbrdissect(struct vars *v,
|
||||
*/
|
||||
if (begin == end && min <= max)
|
||||
{
|
||||
MDEBUG(("cbackref matched trivially\n"));
|
||||
MDEBUG(("%d: backref matched trivially\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
return REG_NOMATCH;
|
||||
@ -962,7 +968,7 @@ cbrdissect(struct vars *v,
|
||||
/* matches only if zero repetitions are okay */
|
||||
if (min == 0)
|
||||
{
|
||||
MDEBUG(("cbackref matched trivially\n"));
|
||||
MDEBUG(("%d: backref matched trivially\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
return REG_NOMATCH;
|
||||
@ -989,7 +995,7 @@ cbrdissect(struct vars *v,
|
||||
p += brlen;
|
||||
}
|
||||
|
||||
MDEBUG(("cbackref matched\n"));
|
||||
MDEBUG(("%d: backref matched\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
|
||||
@ -1011,13 +1017,13 @@ caltdissect(struct vars *v,
|
||||
assert(t->op == '|');
|
||||
assert(t->left != NULL && t->left->cnfa.nstates > 0);
|
||||
|
||||
MDEBUG(("calt n%d\n", t->id));
|
||||
MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
|
||||
|
||||
d = getsubdfa(v, t->left);
|
||||
NOERR();
|
||||
if (longest(v, d, begin, end, (int *) NULL) == end)
|
||||
{
|
||||
MDEBUG(("calt matched\n"));
|
||||
MDEBUG(("%d: caltdissect matched\n", t->id));
|
||||
er = cdissect(v, t->left, begin, end);
|
||||
if (er != REG_NOMATCH)
|
||||
return er;
|
||||
@ -1054,6 +1060,8 @@ citerdissect(struct vars *v,
|
||||
assert(!(t->left->flags & SHORTER));
|
||||
assert(begin <= end);
|
||||
|
||||
MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
|
||||
|
||||
/*
|
||||
* For the moment, assume the minimum number of matches is 1. If zero
|
||||
* matches are allowed, and the target string is empty, we are allowed to
|
||||
@ -1092,7 +1100,6 @@ citerdissect(struct vars *v,
|
||||
FREE(endpts);
|
||||
return v->err;
|
||||
}
|
||||
MDEBUG(("citer %d\n", t->id));
|
||||
|
||||
/*
|
||||
* Our strategy is to first find a set of sub-match endpoints that are
|
||||
@ -1182,7 +1189,7 @@ citerdissect(struct vars *v,
|
||||
if (i > k)
|
||||
{
|
||||
/* satisfaction */
|
||||
MDEBUG(("%d successful\n", t->id));
|
||||
MDEBUG(("%d: successful\n", t->id));
|
||||
FREE(endpts);
|
||||
return REG_OKAY;
|
||||
}
|
||||
@ -1223,11 +1230,11 @@ backtrack:
|
||||
*/
|
||||
if (t->min == 0 && begin == end)
|
||||
{
|
||||
MDEBUG(("%d allowing zero matches\n", t->id));
|
||||
MDEBUG(("%d: allowing zero matches\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
|
||||
MDEBUG(("%d failed\n", t->id));
|
||||
MDEBUG(("%d: failed\n", t->id));
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
|
||||
@ -1255,6 +1262,8 @@ creviterdissect(struct vars *v,
|
||||
assert(t->left->flags & SHORTER);
|
||||
assert(begin <= end);
|
||||
|
||||
MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
|
||||
|
||||
/*
|
||||
* If zero matches are allowed, and target string is empty, just declare
|
||||
* victory. OTOH, if target string isn't empty, zero matches can't work
|
||||
@ -1264,7 +1273,10 @@ creviterdissect(struct vars *v,
|
||||
if (min_matches <= 0)
|
||||
{
|
||||
if (begin == end)
|
||||
{
|
||||
MDEBUG(("%d: allowing zero matches\n", t->id));
|
||||
return REG_OKAY;
|
||||
}
|
||||
min_matches = 1;
|
||||
}
|
||||
|
||||
@ -1293,7 +1305,6 @@ creviterdissect(struct vars *v,
|
||||
FREE(endpts);
|
||||
return v->err;
|
||||
}
|
||||
MDEBUG(("creviter %d\n", t->id));
|
||||
|
||||
/*
|
||||
* Our strategy is to first find a set of sub-match endpoints that are
|
||||
@ -1389,7 +1400,7 @@ creviterdissect(struct vars *v,
|
||||
if (i > k)
|
||||
{
|
||||
/* satisfaction */
|
||||
MDEBUG(("%d successful\n", t->id));
|
||||
MDEBUG(("%d: successful\n", t->id));
|
||||
FREE(endpts);
|
||||
return REG_OKAY;
|
||||
}
|
||||
@ -1415,7 +1426,7 @@ backtrack:
|
||||
}
|
||||
|
||||
/* all possibilities exhausted */
|
||||
MDEBUG(("%d failed\n", t->id));
|
||||
MDEBUG(("%d: failed\n", t->id));
|
||||
FREE(endpts);
|
||||
return REG_NOMATCH;
|
||||
}
|
||||
|
@ -3315,6 +3315,21 @@ select * from test_regex('(?=b)b', 'a', 'HP');
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
(1 row)
|
||||
|
||||
-- expectMatch 23.9 HP ...(?!.) abcde cde
|
||||
select * from test_regex('...(?!.)', 'abcde', 'HP');
|
||||
test_regex
|
||||
-----------------------------------
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
{cde}
|
||||
(2 rows)
|
||||
|
||||
-- expectNomatch 23.10 HP ...(?=.) abc
|
||||
select * from test_regex('...(?=.)', 'abc', 'HP');
|
||||
test_regex
|
||||
-----------------------------------
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
(1 row)
|
||||
|
||||
-- Postgres addition: lookbehind constraints
|
||||
-- expectMatch 23.11 HPN (?<=a)b* ab b
|
||||
select * from test_regex('(?<=a)b*', 'ab', 'HPN');
|
||||
@ -3376,6 +3391,39 @@ select * from test_regex('(?<=b)b', 'b', 'HP');
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
(1 row)
|
||||
|
||||
-- expectMatch 23.19 HP (?<=.).. abcde bc
|
||||
select * from test_regex('(?<=.)..', 'abcde', 'HP');
|
||||
test_regex
|
||||
-----------------------------------
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
{bc}
|
||||
(2 rows)
|
||||
|
||||
-- expectMatch 23.20 HP (?<=..)a* aaabb a
|
||||
select * from test_regex('(?<=..)a*', 'aaabb', 'HP');
|
||||
test_regex
|
||||
-----------------------------------
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
{a}
|
||||
(2 rows)
|
||||
|
||||
-- expectMatch 23.21 HP (?<=..)b* aaabb {}
|
||||
-- Note: empty match here is correct, it matches after the first 2 characters
|
||||
select * from test_regex('(?<=..)b*', 'aaabb', 'HP');
|
||||
test_regex
|
||||
-----------------------------------
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
{""}
|
||||
(2 rows)
|
||||
|
||||
-- expectMatch 23.22 HP (?<=..)b+ aaabb bb
|
||||
select * from test_regex('(?<=..)b+', 'aaabb', 'HP');
|
||||
test_regex
|
||||
-----------------------------------
|
||||
{0,REG_ULOOKAROUND,REG_UNONPOSIX}
|
||||
{bb}
|
||||
(2 rows)
|
||||
|
||||
-- doing 24 "non-greedy quantifiers"
|
||||
-- expectMatch 24.1 PT ab+? abb ab
|
||||
select * from test_regex('ab+?', 'abb', 'PT');
|
||||
|
@ -1049,6 +1049,10 @@ select * from test_regex('a(?!b)b*', 'a', 'HP');
|
||||
select * from test_regex('(?=b)b', 'b', 'HP');
|
||||
-- expectNomatch 23.8 HP (?=b)b a
|
||||
select * from test_regex('(?=b)b', 'a', 'HP');
|
||||
-- expectMatch 23.9 HP ...(?!.) abcde cde
|
||||
select * from test_regex('...(?!.)', 'abcde', 'HP');
|
||||
-- expectNomatch 23.10 HP ...(?=.) abc
|
||||
select * from test_regex('...(?=.)', 'abc', 'HP');
|
||||
|
||||
-- Postgres addition: lookbehind constraints
|
||||
|
||||
@ -1068,6 +1072,15 @@ select * from test_regex('a(?<!b)b*', 'a', 'HP');
|
||||
select * from test_regex('(?<=b)b', 'bb', 'HP');
|
||||
-- expectNomatch 23.18 HP (?<=b)b b
|
||||
select * from test_regex('(?<=b)b', 'b', 'HP');
|
||||
-- expectMatch 23.19 HP (?<=.).. abcde bc
|
||||
select * from test_regex('(?<=.)..', 'abcde', 'HP');
|
||||
-- expectMatch 23.20 HP (?<=..)a* aaabb a
|
||||
select * from test_regex('(?<=..)a*', 'aaabb', 'HP');
|
||||
-- expectMatch 23.21 HP (?<=..)b* aaabb {}
|
||||
-- Note: empty match here is correct, it matches after the first 2 characters
|
||||
select * from test_regex('(?<=..)b*', 'aaabb', 'HP');
|
||||
-- expectMatch 23.22 HP (?<=..)b+ aaabb bb
|
||||
select * from test_regex('(?<=..)b+', 'aaabb', 'HP');
|
||||
|
||||
-- doing 24 "non-greedy quantifiers"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user