mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-18 18:44:06 +08:00
58640f37d9
The t_iseq() macro does not need to be guarded by a character length check (at least when the comparison value is an ASCII character, as its documentation requires). Some portions of contrib/ltree hadn't read that memo, so simplify them. The last change in gettoken_query, - else if (charlen == 1 && !t_iseq(state->buf, ' ')) + else if (!t_iseq(state->buf, ' ')) looks like it's actually a bug fix: I doubt that the intention was to silently ignore multibyte characters as if they were whitespace. I'm not tempted to back-patch though, because this will have the effect of tightening what is allowed in ltxtquery strings. Discussion: https://postgr.es/m/2548310.1664999615@sss.pgh.pa.us
282 lines
6.0 KiB
C
282 lines
6.0 KiB
C
/*
|
|
* op function for ltree and lquery
|
|
* Teodor Sigaev <teodor@stack.net>
|
|
* contrib/ltree/lquery_op.c
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "catalog/pg_collation.h"
|
|
#include "ltree.h"
|
|
#include "miscadmin.h"
|
|
#include "utils/array.h"
|
|
#include "utils/formatting.h"
|
|
|
|
PG_FUNCTION_INFO_V1(ltq_regex);
|
|
PG_FUNCTION_INFO_V1(ltq_rregex);
|
|
|
|
PG_FUNCTION_INFO_V1(lt_q_regex);
|
|
PG_FUNCTION_INFO_V1(lt_q_rregex);
|
|
|
|
#define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
|
|
|
|
static char *
|
|
getlexeme(char *start, char *end, int *len)
|
|
{
|
|
char *ptr;
|
|
|
|
while (start < end && t_iseq(start, '_'))
|
|
start += pg_mblen(start);
|
|
|
|
ptr = start;
|
|
if (ptr >= end)
|
|
return NULL;
|
|
|
|
while (ptr < end && !t_iseq(ptr, '_'))
|
|
ptr += pg_mblen(ptr);
|
|
|
|
*len = ptr - start;
|
|
return start;
|
|
}
|
|
|
|
bool
|
|
compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend)
|
|
{
|
|
char *endt = t->name + t->len;
|
|
char *endq = qn + len;
|
|
char *tn;
|
|
int lent,
|
|
lenq;
|
|
bool isok;
|
|
|
|
while ((qn = getlexeme(qn, endq, &lenq)) != NULL)
|
|
{
|
|
tn = t->name;
|
|
isok = false;
|
|
while ((tn = getlexeme(tn, endt, &lent)) != NULL)
|
|
{
|
|
if ((lent == lenq || (lent > lenq && anyend)) &&
|
|
(*cmpptr) (qn, tn, lenq) == 0)
|
|
{
|
|
|
|
isok = true;
|
|
break;
|
|
}
|
|
tn += lent;
|
|
}
|
|
|
|
if (!isok)
|
|
return false;
|
|
qn += lenq;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int
|
|
ltree_strncasecmp(const char *a, const char *b, size_t s)
|
|
{
|
|
char *al = str_tolower(a, s, DEFAULT_COLLATION_OID);
|
|
char *bl = str_tolower(b, s, DEFAULT_COLLATION_OID);
|
|
int res;
|
|
|
|
res = strncmp(al, bl, s);
|
|
|
|
pfree(al);
|
|
pfree(bl);
|
|
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
* See if an lquery_level matches an ltree_level
|
|
*
|
|
* This accounts for all flags including LQL_NOT, but does not
|
|
* consider repetition counts.
|
|
*/
|
|
static bool
|
|
checkLevel(lquery_level *curq, ltree_level *curt)
|
|
{
|
|
lquery_variant *curvar = LQL_FIRST(curq);
|
|
bool success;
|
|
|
|
success = (curq->flag & LQL_NOT) ? false : true;
|
|
|
|
/* numvar == 0 means '*' which matches anything */
|
|
if (curq->numvar == 0)
|
|
return success;
|
|
|
|
for (int i = 0; i < curq->numvar; i++)
|
|
{
|
|
int (*cmpptr) (const char *, const char *, size_t);
|
|
|
|
cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
|
|
|
|
if (curvar->flag & LVAR_SUBLEXEME)
|
|
{
|
|
if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
|
|
(curvar->flag & LVAR_ANYEND)))
|
|
return success;
|
|
}
|
|
else if ((curvar->len == curt->len ||
|
|
(curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
|
|
(*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
|
|
return success;
|
|
|
|
curvar = LVAR_NEXT(curvar);
|
|
}
|
|
return !success;
|
|
}
|
|
|
|
/*
|
|
* Try to match an lquery (of qlen items) to an ltree (of tlen items)
|
|
*/
|
|
static bool
|
|
checkCond(lquery_level *curq, int qlen,
|
|
ltree_level *curt, int tlen)
|
|
{
|
|
/* Since this function recurses, it could be driven to stack overflow */
|
|
check_stack_depth();
|
|
|
|
/* Pathological patterns could take awhile, too */
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
/* Loop while we have query items to consider */
|
|
while (qlen > 0)
|
|
{
|
|
int low,
|
|
high;
|
|
lquery_level *nextq;
|
|
|
|
/*
|
|
* Get min and max repetition counts for this query item, dealing with
|
|
* the backwards-compatibility hack that the low/high fields aren't
|
|
* meaningful for non-'*' items unless LQL_COUNT is set.
|
|
*/
|
|
if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
|
|
low = curq->low, high = curq->high;
|
|
else
|
|
low = high = 1;
|
|
|
|
/*
|
|
* We may limit "high" to the remaining text length; this avoids
|
|
* separate tests below.
|
|
*/
|
|
if (high > tlen)
|
|
high = tlen;
|
|
|
|
/* Fail if a match of required number of items is impossible */
|
|
if (high < low)
|
|
return false;
|
|
|
|
/*
|
|
* Recursively check the rest of the pattern against each possible
|
|
* start point following some of this item's match(es).
|
|
*/
|
|
nextq = LQL_NEXT(curq);
|
|
qlen--;
|
|
|
|
for (int matchcnt = 0; matchcnt < high; matchcnt++)
|
|
{
|
|
/*
|
|
* If we've consumed an acceptable number of matches of this item,
|
|
* and the rest of the pattern matches beginning here, we're good.
|
|
*/
|
|
if (matchcnt >= low && checkCond(nextq, qlen, curt, tlen))
|
|
return true;
|
|
|
|
/*
|
|
* Otherwise, try to match one more text item to this query item.
|
|
*/
|
|
if (!checkLevel(curq, curt))
|
|
return false;
|
|
|
|
curt = LEVEL_NEXT(curt);
|
|
tlen--;
|
|
}
|
|
|
|
/*
|
|
* Once we've consumed "high" matches, we can succeed only if the rest
|
|
* of the pattern matches beginning here. Loop around (if you prefer,
|
|
* think of this as tail recursion).
|
|
*/
|
|
curq = nextq;
|
|
}
|
|
|
|
/*
|
|
* Once we're out of query items, we match only if there's no remaining
|
|
* text either.
|
|
*/
|
|
return (tlen == 0);
|
|
}
|
|
|
|
Datum
|
|
ltq_regex(PG_FUNCTION_ARGS)
|
|
{
|
|
ltree *tree = PG_GETARG_LTREE_P(0);
|
|
lquery *query = PG_GETARG_LQUERY_P(1);
|
|
bool res;
|
|
|
|
res = checkCond(LQUERY_FIRST(query), query->numlevel,
|
|
LTREE_FIRST(tree), tree->numlevel);
|
|
|
|
PG_FREE_IF_COPY(tree, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_BOOL(res);
|
|
}
|
|
|
|
Datum
|
|
ltq_rregex(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall2(ltq_regex,
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(0)
|
|
));
|
|
}
|
|
|
|
Datum
|
|
lt_q_regex(PG_FUNCTION_ARGS)
|
|
{
|
|
ltree *tree = PG_GETARG_LTREE_P(0);
|
|
ArrayType *_query = PG_GETARG_ARRAYTYPE_P(1);
|
|
lquery *query = (lquery *) ARR_DATA_PTR(_query);
|
|
bool res = false;
|
|
int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
|
|
|
|
if (ARR_NDIM(_query) > 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array must be one-dimensional")));
|
|
if (array_contains_nulls(_query))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
|
errmsg("array must not contain nulls")));
|
|
|
|
while (num > 0)
|
|
{
|
|
if (DatumGetBool(DirectFunctionCall2(ltq_regex,
|
|
PointerGetDatum(tree), PointerGetDatum(query))))
|
|
{
|
|
|
|
res = true;
|
|
break;
|
|
}
|
|
num--;
|
|
query = NEXTVAL(query);
|
|
}
|
|
|
|
PG_FREE_IF_COPY(tree, 0);
|
|
PG_FREE_IF_COPY(_query, 1);
|
|
PG_RETURN_BOOL(res);
|
|
}
|
|
|
|
Datum
|
|
lt_q_rregex(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall2(lt_q_regex,
|
|
PG_GETARG_DATUM(1),
|
|
PG_GETARG_DATUM(0)
|
|
));
|
|
}
|