postgresql/contrib/ltree/_ltree_gist.c
Tom Lane fbdf9712af Allow zero-dimensional (ie, empty) arrays in contrib/ltree operations.
The main motivation for changing this is bug #4921, in which it's pointed out
that it's no longer safe to apply ltree operations to the result of
ARRAY(SELECT ...) if the sub-select might return no rows.  Before 8.3,
the ARRAY() construct would return NULL, which might or might not be helpful
but at least it wouldn't result in an error.  Now it returns an empty array
which results in a failure for no good reason, since the ltree operations
are all perfectly capable of dealing with zero-element arrays.

As far as I can find, these ltree functions are the only places where zero
array dimensionality is rejected unnecessarily.

Back-patch to 8.3 to prevent behavioral regression of queries that worked
in older releases.
2010-02-24 18:02:36 +00:00

586 lines
12 KiB
C

/*
* GiST support for ltree[]
* Teodor Sigaev <teodor@stack.net>
*/
#include "ltree.h"
#include "access/gist.h"
#include "access/skey.h"
#include "utils/array.h"
#include "crc32.h"
PG_FUNCTION_INFO_V1(_ltree_compress);
Datum _ltree_compress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_same);
Datum _ltree_same(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_union);
Datum _ltree_union(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_penalty);
Datum _ltree_penalty(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_picksplit);
Datum _ltree_picksplit(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_consistent);
Datum _ltree_consistent(PG_FUNCTION_ARGS);
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
static void
hashing(BITVECP sign, ltree * t)
{
int tlen = t->numlevel;
ltree_level *cur = LTREE_FIRST(t);
int hash;
while (tlen > 0)
{
hash = ltree_crc32_sz(cur->name, cur->len);
AHASH(sign, hash);
cur = LEVEL_NEXT(cur);
tlen--;
}
}
Datum
_ltree_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{ /* ltree */
ltree_gist *key;
ArrayType *val = DatumGetArrayTypeP(entry->key);
int4 len = LTG_HDRSIZE + ASIGLEN;
int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
ltree *item = (ltree *) ARR_DATA_PTR(val);
if (ARR_NDIM(val) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array must be one-dimensional")));
if (ARR_HASNULL(val))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
key = (ltree_gist *) palloc(len);
SET_VARSIZE(key, len);
key->flag = 0;
MemSet(LTG_SIGN(key), 0, ASIGLEN);
while (num > 0)
{
hashing(LTG_SIGN(key), item);
num--;
item = NEXTVAL(item);
}
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, FALSE);
}
else if (!LTG_ISALLTRUE(entry->key))
{
int4 i,
len;
ltree_gist *key;
BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
ALOOPBYTE
{
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
}
len = LTG_HDRSIZE;
key = (ltree_gist *) palloc(len);
SET_VARSIZE(key, len);
key->flag = LTG_ALLTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
_ltree_same(PG_FUNCTION_ARGS)
{
ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
*result = true;
else if (LTG_ISALLTRUE(a))
*result = false;
else if (LTG_ISALLTRUE(b))
*result = false;
else
{
int4 i;
BITVECP sa = LTG_SIGN(a),
sb = LTG_SIGN(b);
*result = true;
ALOOPBYTE
{
if (sa[i] != sb[i])
{
*result = false;
break;
}
}
}
PG_RETURN_POINTER(result);
}
static int4
unionkey(BITVECP sbase, ltree_gist * add)
{
int4 i;
BITVECP sadd = LTG_SIGN(add);
if (LTG_ISALLTRUE(add))
return 1;
ALOOPBYTE
sbase[i] |= sadd[i];
return 0;
}
Datum
_ltree_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
ABITVEC base;
int4 i,
len;
int4 flag = 0;
ltree_gist *result;
MemSet((void *) base, 0, sizeof(ABITVEC));
for (i = 0; i < entryvec->n; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = LTG_ALLTRUE;
break;
}
}
len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
result = (ltree_gist *) palloc(len);
SET_VARSIZE(result, len);
result->flag = flag;
if (!LTG_ISALLTRUE(result))
memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
*size = len;
PG_RETURN_POINTER(result);
}
static int4
sizebitvec(BITVECP sign)
{
int4 size = 0,
i;
ALOOPBYTE
size += number_of_ones[(unsigned char) sign[i]];
return size;
}
static int
hemdistsign(BITVECP a, BITVECP b)
{
int i,
diff,
dist = 0;
ALOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
}
return dist;
}
static int
hemdist(ltree_gist * a, ltree_gist * b)
{
if (LTG_ISALLTRUE(a))
{
if (LTG_ISALLTRUE(b))
return 0;
else
return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
}
else if (LTG_ISALLTRUE(b))
return ASIGLENBIT - sizebitvec(LTG_SIGN(a));
return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
}
Datum
_ltree_penalty(PG_FUNCTION_ARGS)
{
ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
*penalty = hemdist(origval, newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
}
Datum
_ltree_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
ltree_gist *datum_l,
*datum_r;
BITVECP union_l,
union_r;
int4 size_alpha,
size_beta;
int4 size_waste,
waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptr;
int i;
SPLITCOST *costvector;
ltree_gist *_k,
*_j;
maxoff = entryvec->n - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
_k = GETENTRY(entryvec, k);
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(_k, GETENTRY(entryvec, j));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
{
datum_l = (ltree_gist *) palloc(LTG_HDRSIZE);
SET_VARSIZE(datum_l, LTG_HDRSIZE);
datum_l->flag = LTG_ALLTRUE;
}
else
{
datum_l = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
SET_VARSIZE(datum_l, LTG_HDRSIZE + ASIGLEN);
datum_l->flag = 0;
memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
}
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
{
datum_r = (ltree_gist *) palloc(LTG_HDRSIZE);
SET_VARSIZE(datum_r, LTG_HDRSIZE);
datum_r->flag = LTG_ALLTRUE;
}
else
{
datum_r = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
SET_VARSIZE(datum_r, LTG_HDRSIZE + ASIGLEN);
datum_r->flag = 0;
memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
}
maxoff = OffsetNumberNext(maxoff);
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
costvector[j - 1].cost = Abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
union_l = LTG_SIGN(datum_l);
union_r = LTG_SIGN(datum_r);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
{
if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
{
if (!LTG_ISALLTRUE(datum_l))
MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
}
else
{
ptr = LTG_SIGN(_j);
ALOOPBYTE
union_l[i] |= ptr[i];
}
*left++ = j;
v->spl_nleft++;
}
else
{
if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
{
if (!LTG_ISALLTRUE(datum_r))
MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
}
else
{
ptr = LTG_SIGN(_j);
ALOOPBYTE
union_r[i] |= ptr[i];
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}
static bool
gist_te(ltree_gist * key, ltree * query)
{
ltree_level *curq = LTREE_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
unsigned int hv;
if (LTG_ISALLTRUE(key))
return true;
while (qlen > 0)
{
hv = ltree_crc32_sz(curq->name, curq->len);
if (!GETBIT(sign, AHASHVAL(hv)))
return false;
curq = LEVEL_NEXT(curq);
qlen--;
}
return true;
}
static bool
checkcondition_bit(void *checkval, ITEM * val)
{
return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
}
static bool
gist_qtxt(ltree_gist * key, ltxtquery * query)
{
if (LTG_ISALLTRUE(key))
return true;
return ltree_execute(
GETQUERY(query),
(void *) LTG_SIGN(key), false,
checkcondition_bit
);
}
static bool
gist_qe(ltree_gist * key, lquery * query)
{
lquery_level *curq = LQUERY_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
if (LTG_ISALLTRUE(key))
return true;
while (qlen > 0)
{
if (curq->numvar && LQL_CANLOOKSIGN(curq))
{
bool isexist = false;
int vlen = curq->numvar;
lquery_variant *curv = LQL_FIRST(curq);
while (vlen > 0)
{
if (GETBIT(sign, AHASHVAL(curv->val)))
{
isexist = true;
break;
}
curv = LVAR_NEXT(curv);
vlen--;
}
if (!isexist)
return false;
}
curq = LQL_NEXT(curq);
qlen--;
}
return true;
}
static bool
_arrq_cons(ltree_gist * key, ArrayType *_query)
{
lquery *query = (lquery *) ARR_DATA_PTR(_query);
int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
if (ARR_NDIM(_query) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array must be one-dimensional")));
if (ARR_HASNULL(_query))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
while (num > 0)
{
if (gist_qe(key, query))
return true;
num--;
query = (lquery *) NEXTVAL(query);
}
return false;
}
Datum
_ltree_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
char *query = (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
bool res = false;
switch (strategy)
{
case 10:
case 11:
res = gist_te(key, (ltree *) query);
break;
case 12:
case 13:
res = gist_qe(key, (lquery *) query);
break;
case 14:
case 15:
res = gist_qtxt(key, (ltxtquery *) query);
break;
case 16:
case 17:
res = _arrq_cons(key, (ArrayType *) query);
break;
default:
/* internal error */
elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
}
PG_FREE_IF_COPY(query, 1);
PG_RETURN_BOOL(res);
}