postgresql/contrib/ltree/ltree_op.c

667 lines
13 KiB
C
Raw Normal View History

/*
2002-09-05 04:31:48 +08:00
* op function for ltree
* Teodor Sigaev <teodor@stack.net>
* $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.16 2007/02/28 22:44:38 tgl Exp $
*/
#include "ltree.h"
#include <ctype.h>
#include "catalog/pg_statistic.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
PG_MODULE_MAGIC;
/* compare functions */
PG_FUNCTION_INFO_V1(ltree_cmp);
PG_FUNCTION_INFO_V1(ltree_lt);
PG_FUNCTION_INFO_V1(ltree_le);
PG_FUNCTION_INFO_V1(ltree_eq);
PG_FUNCTION_INFO_V1(ltree_ne);
PG_FUNCTION_INFO_V1(ltree_ge);
PG_FUNCTION_INFO_V1(ltree_gt);
PG_FUNCTION_INFO_V1(nlevel);
PG_FUNCTION_INFO_V1(ltree_isparent);
PG_FUNCTION_INFO_V1(ltree_risparent);
PG_FUNCTION_INFO_V1(subltree);
PG_FUNCTION_INFO_V1(subpath);
PG_FUNCTION_INFO_V1(ltree_index);
PG_FUNCTION_INFO_V1(ltree_addltree);
PG_FUNCTION_INFO_V1(ltree_addtext);
PG_FUNCTION_INFO_V1(ltree_textadd);
PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
2002-09-05 04:31:48 +08:00
Datum ltree_cmp(PG_FUNCTION_ARGS);
Datum ltree_lt(PG_FUNCTION_ARGS);
Datum ltree_le(PG_FUNCTION_ARGS);
Datum ltree_eq(PG_FUNCTION_ARGS);
Datum ltree_ne(PG_FUNCTION_ARGS);
Datum ltree_ge(PG_FUNCTION_ARGS);
Datum ltree_gt(PG_FUNCTION_ARGS);
Datum nlevel(PG_FUNCTION_ARGS);
Datum subltree(PG_FUNCTION_ARGS);
Datum subpath(PG_FUNCTION_ARGS);
Datum ltree_index(PG_FUNCTION_ARGS);
2002-09-05 04:31:48 +08:00
Datum ltree_addltree(PG_FUNCTION_ARGS);
Datum ltree_addtext(PG_FUNCTION_ARGS);
Datum ltree_textadd(PG_FUNCTION_ARGS);
Datum lca(PG_FUNCTION_ARGS);
Datum ltree2text(PG_FUNCTION_ARGS);
Datum text2ltree(PG_FUNCTION_ARGS);
Datum ltreeparentsel(PG_FUNCTION_ARGS);
int
2002-09-05 04:31:48 +08:00
ltree_compare(const ltree * a, const ltree * b)
{
ltree_level *al = LTREE_FIRST(a);
ltree_level *bl = LTREE_FIRST(b);
2002-09-05 04:31:48 +08:00
int an = a->numlevel;
int bn = b->numlevel;
int res = 0;
while (an > 0 && bn > 0)
{
if ((res = strncmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
2002-09-05 04:31:48 +08:00
{
if (al->len != bl->len)
return (al->len - bl->len) * 10 * (an + 1);
}
else
return res * 10 * (an + 1);
an--;
bn--;
al = LEVEL_NEXT(al);
bl = LEVEL_NEXT(bl);
}
2002-09-05 04:31:48 +08:00
return (a->numlevel - b->numlevel) * 10 * (an + 1);
}
2002-09-05 04:31:48 +08:00
#define RUNCMP \
ltree *a = PG_GETARG_LTREE(0); \
ltree *b = PG_GETARG_LTREE(1); \
int res = ltree_compare(a,b); \
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1); \
Datum
2002-09-05 04:31:48 +08:00
ltree_cmp(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_INT32(res);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_lt(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_BOOL((res < 0) ? true : false);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_le(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_BOOL((res <= 0) ? true : false);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_eq(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_BOOL((res == 0) ? true : false);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_ge(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_BOOL((res >= 0) ? true : false);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_gt(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_BOOL((res > 0) ? true : false);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_ne(PG_FUNCTION_ARGS)
{
RUNCMP
2003-08-04 08:43:34 +08:00
PG_RETURN_BOOL((res != 0) ? true : false);
}
Datum
2002-09-05 04:31:48 +08:00
nlevel(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE(0);
int res = a->numlevel;
PG_FREE_IF_COPY(a, 0);
PG_RETURN_INT32(res);
}
bool
2002-09-05 04:31:48 +08:00
inner_isparent(const ltree * c, const ltree * p)
{
ltree_level *cl = LTREE_FIRST(c);
ltree_level *pl = LTREE_FIRST(p);
2002-09-05 04:31:48 +08:00
int pn = p->numlevel;
2002-09-05 04:31:48 +08:00
if (pn > c->numlevel)
return false;
2002-09-05 04:31:48 +08:00
while (pn > 0)
{
if (cl->len != pl->len)
return false;
2002-09-05 04:31:48 +08:00
if (strncmp(cl->name, pl->name, cl->len))
return false;
pn--;
2002-09-05 04:31:48 +08:00
cl = LEVEL_NEXT(cl);
pl = LEVEL_NEXT(pl);
}
return true;
}
2002-09-05 04:31:48 +08:00
Datum
ltree_isparent(PG_FUNCTION_ARGS)
{
ltree *c = PG_GETARG_LTREE(1);
ltree *p = PG_GETARG_LTREE(0);
bool res = inner_isparent(c, p);
PG_FREE_IF_COPY(c, 1);
PG_FREE_IF_COPY(p, 0);
PG_RETURN_BOOL(res);
}
2002-09-05 04:31:48 +08:00
Datum
ltree_risparent(PG_FUNCTION_ARGS)
{
ltree *c = PG_GETARG_LTREE(0);
ltree *p = PG_GETARG_LTREE(1);
bool res = inner_isparent(c, p);
PG_FREE_IF_COPY(c, 0);
PG_FREE_IF_COPY(p, 1);
PG_RETURN_BOOL(res);
}
2002-09-05 04:31:48 +08:00
static ltree *
inner_subltree(ltree * t, int4 startpos, int4 endpos)
{
char *start = NULL,
*end = NULL;
ltree_level *ptr = LTREE_FIRST(t);
2002-09-05 04:31:48 +08:00
ltree *res;
int i;
if (startpos < 0 || endpos < 0 || startpos >= t->numlevel || startpos > endpos)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid positions")));
2002-09-05 04:31:48 +08:00
if (endpos > t->numlevel)
endpos = t->numlevel;
start = end = (char *) ptr;
2002-09-05 04:31:48 +08:00
for (i = 0; i < endpos; i++)
{
if (i == startpos)
start = (char *) ptr;
if (i == endpos - 1)
{
end = (char *) LEVEL_NEXT(ptr);
break;
}
2002-09-05 04:31:48 +08:00
ptr = LEVEL_NEXT(ptr);
}
2002-09-05 04:31:48 +08:00
res = (ltree *) palloc(LTREE_HDRSIZE + (end - start));
SET_VARSIZE(res, LTREE_HDRSIZE + (end - start));
2002-09-05 04:31:48 +08:00
res->numlevel = endpos - startpos;
memcpy(LTREE_FIRST(res), start, end - start);
return res;
}
Datum
2002-09-05 04:31:48 +08:00
subltree(PG_FUNCTION_ARGS)
{
ltree *t = PG_GETARG_LTREE(0);
ltree *res = inner_subltree(t, PG_GETARG_INT32(1), PG_GETARG_INT32(2));
2002-09-05 04:31:48 +08:00
PG_FREE_IF_COPY(t, 0);
PG_RETURN_POINTER(res);
}
2002-09-05 04:31:48 +08:00
Datum
subpath(PG_FUNCTION_ARGS)
{
ltree *t = PG_GETARG_LTREE(0);
int4 start = PG_GETARG_INT32(1);
int4 len = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
int4 end;
ltree *res;
end = start + len;
if (start < 0)
{
start = t->numlevel + start;
2002-09-05 04:31:48 +08:00
end = start + len;
}
2002-09-05 04:31:48 +08:00
if (start < 0)
{ /* start > t->numlevel */
start = t->numlevel + start;
2002-09-05 04:31:48 +08:00
end = start + len;
}
2002-09-05 04:31:48 +08:00
if (len < 0)
end = t->numlevel + len;
2002-09-05 04:31:48 +08:00
else if (len == 0)
end = (fcinfo->nargs == 3) ? start : 0xffff;
2002-09-05 04:31:48 +08:00
res = inner_subltree(t, start, end);
2002-09-05 04:31:48 +08:00
PG_FREE_IF_COPY(t, 0);
PG_RETURN_POINTER(res);
}
2002-09-05 04:31:48 +08:00
static ltree *
ltree_concat(ltree * a, ltree * b)
{
ltree *r;
r = (ltree *) palloc(VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
SET_VARSIZE(r, VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
2002-09-05 04:31:48 +08:00
r->numlevel = a->numlevel + b->numlevel;
memcpy(LTREE_FIRST(r), LTREE_FIRST(a), VARSIZE(a) - LTREE_HDRSIZE);
memcpy(((char *) LTREE_FIRST(r)) + VARSIZE(a) - LTREE_HDRSIZE,
LTREE_FIRST(b),
VARSIZE(b) - LTREE_HDRSIZE);
2002-09-05 04:31:48 +08:00
return r;
}
Datum
ltree_addltree(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE(0);
ltree *b = PG_GETARG_LTREE(1);
ltree *r;
r = ltree_concat(a, b);
2002-09-05 04:31:48 +08:00
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(r);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_addtext(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE(0);
text *b = PG_GETARG_TEXT_P(1);
char *s;
ltree *r,
*tmp;
s = (char *) palloc(VARSIZE(b) - VARHDRSZ + 1);
memcpy(s, VARDATA(b), VARSIZE(b) - VARHDRSZ);
s[VARSIZE(b) - VARHDRSZ] = '\0';
2002-09-05 04:31:48 +08:00
tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(
ltree_in,
PointerGetDatum(s)
));
pfree(s);
2002-09-05 04:31:48 +08:00
r = ltree_concat(a, tmp);
2002-09-05 04:31:48 +08:00
pfree(tmp);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(r);
}
Datum
ltree_index(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE(0);
ltree *b = PG_GETARG_LTREE(1);
2003-08-04 08:43:34 +08:00
int start = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
int i,
j;
ltree_level *startptr,
*aptr,
*bptr;
bool found = false;
if (start < 0)
{
if (-start >= a->numlevel)
start = 0;
else
start = (int) (a->numlevel) + start;
}
2003-08-04 08:43:34 +08:00
if (a->numlevel - start < b->numlevel || a->numlevel == 0 || b->numlevel == 0)
{
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(-1);
}
2003-08-04 08:43:34 +08:00
startptr = LTREE_FIRST(a);
for (i = 0; i <= a->numlevel - b->numlevel; i++)
{
if (i >= start)
{
aptr = startptr;
bptr = LTREE_FIRST(b);
for (j = 0; j < b->numlevel; j++)
{
if (!(aptr->len == bptr->len && strncmp(aptr->name, bptr->name, aptr->len) == 0))
break;
aptr = LEVEL_NEXT(aptr);
bptr = LEVEL_NEXT(bptr);
}
2003-08-04 08:43:34 +08:00
if (j == b->numlevel)
{
found = true;
break;
}
}
2003-08-04 08:43:34 +08:00
startptr = LEVEL_NEXT(startptr);
}
2003-08-04 08:43:34 +08:00
if (!found)
i = -1;
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(i);
}
Datum
2002-09-05 04:31:48 +08:00
ltree_textadd(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE(1);
text *b = PG_GETARG_TEXT_P(0);
char *s;
ltree *r,
*tmp;
s = (char *) palloc(VARSIZE(b) - VARHDRSZ + 1);
memcpy(s, VARDATA(b), VARSIZE(b) - VARHDRSZ);
s[VARSIZE(b) - VARHDRSZ] = '\0';
tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(
ltree_in,
PointerGetDatum(s)
));
pfree(s);
r = ltree_concat(tmp, a);
pfree(tmp);
PG_FREE_IF_COPY(a, 1);
PG_FREE_IF_COPY(b, 0);
PG_RETURN_POINTER(r);
}
2002-09-05 04:31:48 +08:00
ltree *
lca_inner(ltree ** a, int len)
{
int tmp,
num = ((*a)->numlevel) ? (*a)->numlevel - 1 : 0;
ltree **ptr = a + 1;
int i,
reslen = LTREE_HDRSIZE;
ltree_level *l1,
*l2;
ltree *res;
2002-09-05 04:31:48 +08:00
if ((*a)->numlevel == 0)
return NULL;
2002-09-05 04:31:48 +08:00
while (ptr - a < len)
{
if ((*ptr)->numlevel == 0)
return NULL;
2002-09-05 04:31:48 +08:00
else if ((*ptr)->numlevel == 1)
num = 0;
else
{
l1 = LTREE_FIRST(*a);
l2 = LTREE_FIRST(*ptr);
2002-09-05 04:31:48 +08:00
tmp = num;
num = 0;
for (i = 0; i < Min(tmp, (*ptr)->numlevel - 1); i++)
2002-09-05 04:31:48 +08:00
{
if (l1->len == l2->len && strncmp(l1->name, l2->name, l1->len) == 0)
num = i + 1;
else
break;
2002-09-05 04:31:48 +08:00
l1 = LEVEL_NEXT(l1);
l2 = LEVEL_NEXT(l2);
}
}
ptr++;
}
l1 = LTREE_FIRST(*a);
2002-09-05 04:31:48 +08:00
for (i = 0; i < num; i++)
{
reslen += MAXALIGN(l1->len + LEVEL_HDRSIZE);
2002-09-05 04:31:48 +08:00
l1 = LEVEL_NEXT(l1);
}
2002-09-05 04:31:48 +08:00
res = (ltree *) palloc(reslen);
SET_VARSIZE(res, reslen);
res->numlevel = num;
l1 = LTREE_FIRST(*a);
l2 = LTREE_FIRST(res);
2002-09-05 04:31:48 +08:00
for (i = 0; i < num; i++)
{
memcpy(l2, l1, MAXALIGN(l1->len + LEVEL_HDRSIZE));
l1 = LEVEL_NEXT(l1);
l2 = LEVEL_NEXT(l2);
}
return res;
}
Datum
2002-09-05 04:31:48 +08:00
lca(PG_FUNCTION_ARGS)
{
int i;
ltree **a,
*res;
a = (ltree **) palloc(sizeof(ltree *) * fcinfo->nargs);
for (i = 0; i < fcinfo->nargs; i++)
a[i] = PG_GETARG_LTREE(i);
2002-09-05 04:31:48 +08:00
res = lca_inner(a, (int) fcinfo->nargs);
for (i = 0; i < fcinfo->nargs; i++)
PG_FREE_IF_COPY(a[i], i);
pfree(a);
2002-09-05 04:31:48 +08:00
if (res)
PG_RETURN_POINTER(res);
else
PG_RETURN_NULL();
}
Datum
text2ltree(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
2003-08-04 08:43:34 +08:00
char *s = (char *) palloc(VARSIZE(in) - VARHDRSZ + 1);
ltree *out;
memcpy(s, VARDATA(in), VARSIZE(in) - VARHDRSZ);
s[VARSIZE(in) - VARHDRSZ] = '\0';
out = (ltree *) DatumGetPointer(DirectFunctionCall1(
2003-08-04 08:43:34 +08:00
ltree_in,
PointerGetDatum(s)
));
pfree(s);
2003-08-04 08:43:34 +08:00
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
Datum
ltree2text(PG_FUNCTION_ARGS)
{
ltree *in = PG_GETARG_LTREE(0);
2003-08-04 08:43:34 +08:00
char *ptr;
int i;
ltree_level *curlevel;
2003-08-04 08:43:34 +08:00
text *out;
out = (text *) palloc(VARSIZE(in) + VARHDRSZ);
2003-08-04 08:43:34 +08:00
ptr = VARDATA(out);
curlevel = LTREE_FIRST(in);
2003-08-04 08:43:34 +08:00
for (i = 0; i < in->numlevel; i++)
{
if (i != 0)
{
*ptr = '.';
ptr++;
}
memcpy(ptr, curlevel->name, curlevel->len);
ptr += curlevel->len;
curlevel = LEVEL_NEXT(curlevel);
}
2003-08-04 08:43:34 +08:00
SET_VARSIZE(out, ptr - ((char *) out));
PG_FREE_IF_COPY(in, 0);
2003-08-04 08:43:34 +08:00
PG_RETURN_POINTER(out);
}
#define DEFAULT_PARENT_SEL 0.001
/*
* ltreeparentsel - Selectivity of parent relationship for ltree data types.
*/
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
double selec;
/*
* If expression is not variable <@ something or something <@ variable,
* then punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
/*
* If the something is a NULL constant, assume operator is strict and
* return zero, ie, operator will never return TRUE.
*/
if (IsA(other, Const) &&
((Const *) other)->constisnull)
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(0.0);
}
if (IsA(other, Const))
{
/* Variable is being compared to a known non-null constant */
Datum constval = ((Const *) other)->constvalue;
FmgrInfo contproc;
double mcvsum;
double mcvsel;
double nullfrac;
fmgr_info(get_opcode(operator), &contproc);
/*
* Is the constant "<@" to any of the column's most common values?
*/
mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
&mcvsum);
/*
* If the histogram is large enough, see what fraction of it the
* constant is "<@" to, and assume that's representative of the
2006-10-04 08:30:14 +08:00
* non-MCV population. Otherwise use the default selectivity for the
* non-MCV population.
*/
selec = histogram_selectivity(&vardata, &contproc,
constval, varonleft,
100, 1);
if (selec < 0)
{
/* Nope, fall back on default */
selec = DEFAULT_PARENT_SEL;
}
else
{
/* Yes, but don't believe extremely small or large estimates. */
if (selec < 0.0001)
selec = 0.0001;
else if (selec > 0.9999)
selec = 0.9999;
}
if (HeapTupleIsValid(vardata.statsTuple))
nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
else
nullfrac = 0.0;
/*
* Now merge the results from the MCV and histogram calculations,
* realizing that the histogram covers only the non-null values that
* are not listed in MCV.
*/
selec *= 1.0 - nullfrac - mcvsum;
selec += mcvsel;
}
else
selec = DEFAULT_PARENT_SEL;
ReleaseVariableStats(vardata);
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
PG_RETURN_FLOAT8((float8) selec);
}