2008-05-12 08:00:54 +08:00
|
|
|
/*
|
2010-09-21 04:08:53 +08:00
|
|
|
* contrib/hstore/hstore_gin.c
|
2008-05-12 08:00:54 +08:00
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
#include "access/gin.h"
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 08:16:24 +08:00
|
|
|
#include "access/skey.h"
|
2009-10-01 03:50:22 +08:00
|
|
|
#include "catalog/pg_type.h"
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2008-05-12 08:00:54 +08:00
|
|
|
#include "hstore.h"
|
|
|
|
|
|
|
|
|
2011-01-10 05:43:56 +08:00
|
|
|
/*
|
|
|
|
* When using a GIN index for hstore, we choose to index both keys and values.
|
|
|
|
* The storage format is "text" values, with K, V, or N prepended to the string
|
2011-04-10 23:42:00 +08:00
|
|
|
* to indicate key, value, or null values. (As of 9.1 it might be better to
|
2011-01-10 05:43:56 +08:00
|
|
|
* store null values as nulls, but we'll keep it this way for on-disk
|
|
|
|
* compatibility.)
|
|
|
|
*/
|
2007-11-16 05:14:46 +08:00
|
|
|
#define KEYFLAG 'K'
|
|
|
|
#define VALFLAG 'V'
|
|
|
|
#define NULLFLAG 'N'
|
2007-03-14 22:21:53 +08:00
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_hstore);
|
2007-11-16 05:14:46 +08:00
|
|
|
Datum gin_extract_hstore(PG_FUNCTION_ARGS);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2011-01-10 05:43:56 +08:00
|
|
|
/* Build an indexable text value */
|
2007-11-16 05:14:46 +08:00
|
|
|
static text *
|
2011-01-10 05:43:56 +08:00
|
|
|
makeitem(char *str, int len, char flag)
|
2007-03-14 22:21:53 +08:00
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
text *item;
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
item = (text *) palloc(VARHDRSZ + len + 1);
|
2007-03-14 22:21:53 +08:00
|
|
|
SET_VARSIZE(item, VARHDRSZ + len + 1);
|
|
|
|
|
2011-01-10 05:43:56 +08:00
|
|
|
*VARDATA(item) = flag;
|
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
if (str && len > 0)
|
|
|
|
memcpy(VARDATA(item) + 1, str, len);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
|
|
|
return item;
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
gin_extract_hstore(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
HStore *hs = PG_GETARG_HS(0);
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
Datum *entries = NULL;
|
2010-02-26 10:01:40 +08:00
|
|
|
HEntry *hsent = ARRPTR(hs);
|
|
|
|
char *ptr = STRPTR(hs);
|
|
|
|
int count = HS_COUNT(hs);
|
|
|
|
int i;
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2009-10-01 03:50:22 +08:00
|
|
|
*nentries = 2 * count;
|
|
|
|
if (count)
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * 2 * count);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2009-10-01 03:50:22 +08:00
|
|
|
for (i = 0; i < count; ++i)
|
2007-03-14 22:21:53 +08:00
|
|
|
{
|
2009-10-01 03:50:22 +08:00
|
|
|
text *item;
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2011-01-10 05:43:56 +08:00
|
|
|
item = makeitem(HS_KEY(hsent, ptr, i), HS_KEYLEN(hsent, i),
|
|
|
|
KEYFLAG);
|
2010-02-26 10:01:40 +08:00
|
|
|
entries[2 * i] = PointerGetDatum(item);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2010-02-26 10:01:40 +08:00
|
|
|
if (HS_VALISNULL(hsent, i))
|
2011-01-10 05:43:56 +08:00
|
|
|
item = makeitem(NULL, 0, NULLFLAG);
|
2009-10-01 03:50:22 +08:00
|
|
|
else
|
2011-01-10 05:43:56 +08:00
|
|
|
item = makeitem(HS_VAL(hsent, ptr, i), HS_VALLEN(hsent, i),
|
|
|
|
VALFLAG);
|
2010-02-26 10:01:40 +08:00
|
|
|
entries[2 * i + 1] = PointerGetDatum(item);
|
2007-03-14 22:21:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_hstore_query);
|
2007-11-16 05:14:46 +08:00
|
|
|
Datum gin_extract_hstore_query(PG_FUNCTION_ARGS);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
|
|
|
Datum
|
|
|
|
gin_extract_hstore_query(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
2007-03-14 22:21:53 +08:00
|
|
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
2011-01-10 05:43:56 +08:00
|
|
|
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
|
|
|
Datum *entries;
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
if (strategy == HStoreContainsStrategyNumber)
|
2007-03-14 22:21:53 +08:00
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
/* Query is an hstore, so just apply gin_extract_hstore... */
|
|
|
|
entries = (Datum *)
|
|
|
|
DatumGetPointer(DirectFunctionCall2(gin_extract_hstore,
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
PointerGetDatum(nentries)));
|
|
|
|
/* ... except that "contains {}" requires a full index scan */
|
|
|
|
if (entries == NULL)
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
2007-03-14 22:21:53 +08:00
|
|
|
}
|
2007-11-16 05:14:46 +08:00
|
|
|
else if (strategy == HStoreExistsStrategyNumber)
|
2007-03-14 22:21:53 +08:00
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
text *query = PG_GETARG_TEXT_PP(0);
|
|
|
|
text *item;
|
2007-03-14 22:21:53 +08:00
|
|
|
|
|
|
|
*nentries = 1;
|
2007-11-16 05:14:46 +08:00
|
|
|
entries = (Datum *) palloc(sizeof(Datum));
|
2011-01-10 05:43:56 +08:00
|
|
|
item = makeitem(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query), KEYFLAG);
|
2007-03-14 22:21:53 +08:00
|
|
|
entries[0] = PointerGetDatum(item);
|
|
|
|
}
|
2009-10-01 03:50:22 +08:00
|
|
|
else if (strategy == HStoreExistsAnyStrategyNumber ||
|
|
|
|
strategy == HStoreExistsAllStrategyNumber)
|
|
|
|
{
|
2010-02-26 10:01:40 +08:00
|
|
|
ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
Datum *key_datums;
|
|
|
|
bool *key_nulls;
|
|
|
|
int key_count;
|
|
|
|
int i,
|
|
|
|
j;
|
|
|
|
text *item;
|
2009-10-01 03:50:22 +08:00
|
|
|
|
|
|
|
deconstruct_array(query,
|
|
|
|
TEXTOID, -1, false, 'i',
|
|
|
|
&key_datums, &key_nulls, &key_count);
|
|
|
|
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * key_count);
|
|
|
|
|
|
|
|
for (i = 0, j = 0; i < key_count; ++i)
|
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
/* Nulls in the array are ignored, cf hstoreArrayToPairs */
|
2009-10-01 03:50:22 +08:00
|
|
|
if (key_nulls[i])
|
|
|
|
continue;
|
2011-01-10 05:43:56 +08:00
|
|
|
item = makeitem(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ, KEYFLAG);
|
2009-10-01 03:50:22 +08:00
|
|
|
entries[j++] = PointerGetDatum(item);
|
|
|
|
}
|
|
|
|
|
2011-01-10 05:43:56 +08:00
|
|
|
*nentries = j;
|
|
|
|
/* ExistsAll with no keys should match everything */
|
|
|
|
if (j == 0 && strategy == HStoreExistsAllStrategyNumber)
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
2009-10-01 03:50:22 +08:00
|
|
|
}
|
2007-03-14 22:21:53 +08:00
|
|
|
else
|
2011-01-10 05:43:56 +08:00
|
|
|
{
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
|
|
entries = NULL; /* keep compiler quiet */
|
|
|
|
}
|
2007-03-14 22:21:53 +08:00
|
|
|
|
2011-01-10 05:43:56 +08:00
|
|
|
PG_RETURN_POINTER(entries);
|
2007-03-14 22:21:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(gin_consistent_hstore);
|
2007-11-16 05:14:46 +08:00
|
|
|
Datum gin_consistent_hstore(PG_FUNCTION_ARGS);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
|
|
|
Datum
|
|
|
|
gin_consistent_hstore(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2008-04-15 01:05:34 +08:00
|
|
|
bool *check = (bool *) PG_GETARG_POINTER(0);
|
2007-03-14 22:21:53 +08:00
|
|
|
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
2010-02-26 10:01:40 +08:00
|
|
|
|
2009-10-01 03:50:22 +08:00
|
|
|
/* HStore *query = PG_GETARG_HS(2); */
|
|
|
|
int32 nkeys = PG_GETARG_INT32(3);
|
2010-02-26 10:01:40 +08:00
|
|
|
|
2009-03-26 06:19:02 +08:00
|
|
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
|
|
|
bool *recheck = (bool *) PG_GETARG_POINTER(5);
|
2007-11-16 05:14:46 +08:00
|
|
|
bool res = true;
|
2011-01-10 05:43:56 +08:00
|
|
|
int32 i;
|
2009-10-01 03:50:22 +08:00
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
if (strategy == HStoreContainsStrategyNumber)
|
2007-03-14 22:21:53 +08:00
|
|
|
{
|
2008-04-15 01:05:34 +08:00
|
|
|
/*
|
2011-01-10 05:43:56 +08:00
|
|
|
* Index doesn't have information about correspondence of keys and
|
2011-04-10 23:42:00 +08:00
|
|
|
* values, so we need recheck. However, if not all the keys are
|
2011-01-10 05:43:56 +08:00
|
|
|
* present, we can fail at once.
|
2008-04-15 01:05:34 +08:00
|
|
|
*/
|
|
|
|
*recheck = true;
|
2011-01-10 05:43:56 +08:00
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
{
|
|
|
|
if (!check[i])
|
|
|
|
{
|
2007-03-14 22:21:53 +08:00
|
|
|
res = false;
|
2011-01-10 05:43:56 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2007-03-14 22:21:53 +08:00
|
|
|
}
|
2007-11-16 05:14:46 +08:00
|
|
|
else if (strategy == HStoreExistsStrategyNumber)
|
2008-04-15 01:05:34 +08:00
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
/* Existence of key is guaranteed in default search mode */
|
|
|
|
*recheck = false;
|
2007-03-14 22:21:53 +08:00
|
|
|
res = true;
|
2008-04-15 01:05:34 +08:00
|
|
|
}
|
2009-10-01 03:50:22 +08:00
|
|
|
else if (strategy == HStoreExistsAnyStrategyNumber)
|
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
/* Existence of key is guaranteed in default search mode */
|
|
|
|
*recheck = false;
|
2009-10-01 03:50:22 +08:00
|
|
|
res = true;
|
|
|
|
}
|
|
|
|
else if (strategy == HStoreExistsAllStrategyNumber)
|
|
|
|
{
|
2011-01-10 05:43:56 +08:00
|
|
|
/* Testing for all the keys being present gives an exact result */
|
|
|
|
*recheck = false;
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
{
|
2009-10-01 03:50:22 +08:00
|
|
|
if (!check[i])
|
2011-01-10 05:43:56 +08:00
|
|
|
{
|
2009-10-01 03:50:22 +08:00
|
|
|
res = false;
|
2011-01-10 05:43:56 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2009-10-01 03:50:22 +08:00
|
|
|
}
|
2007-03-14 22:21:53 +08:00
|
|
|
else
|
2011-01-10 05:43:56 +08:00
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
2007-03-14 22:21:53 +08:00
|
|
|
|
|
|
|
PG_RETURN_BOOL(res);
|
|
|
|
}
|