2008-05-17 09:28:26 +08:00
|
|
|
/*
|
2010-09-21 04:08:53 +08:00
|
|
|
* contrib/pg_trgm/trgm_gin.c
|
2008-05-17 09:28:26 +08:00
|
|
|
*/
|
2010-12-04 13:16:21 +08:00
|
|
|
#include "postgres.h"
|
|
|
|
|
2007-03-14 22:15:40 +08:00
|
|
|
#include "trgm.h"
|
|
|
|
|
|
|
|
#include "access/gin.h"
|
2011-02-01 10:33:55 +08:00
|
|
|
#include "access/skey.h"
|
2007-03-14 22:15:40 +08:00
|
|
|
|
2010-12-04 13:16:21 +08:00
|
|
|
|
2007-03-14 22:15:40 +08:00
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
2011-02-01 10:33:55 +08:00
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
|
|
|
|
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
|
2007-03-14 22:15:40 +08:00
|
|
|
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
|
|
|
|
|
2011-01-10 07:04:20 +08:00
|
|
|
/*
|
2011-02-01 10:33:55 +08:00
|
|
|
* This function can only be called if a pre-9.1 version of the GIN operator
|
|
|
|
* class definition is present in the catalogs (probably as a consequence
|
2011-02-18 04:03:30 +08:00
|
|
|
* of upgrade-in-place). Cope.
|
2011-01-10 07:04:20 +08:00
|
|
|
*/
|
2007-03-14 22:15:40 +08:00
|
|
|
Datum
|
|
|
|
gin_extract_trgm(PG_FUNCTION_ARGS)
|
2011-02-01 10:33:55 +08:00
|
|
|
{
|
2011-02-18 04:03:30 +08:00
|
|
|
if (PG_NARGS() == 3)
|
|
|
|
return gin_extract_value_trgm(fcinfo);
|
|
|
|
if (PG_NARGS() == 7)
|
|
|
|
return gin_extract_query_trgm(fcinfo);
|
|
|
|
elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
|
2011-02-01 10:33:55 +08:00
|
|
|
PG_RETURN_NULL();
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
gin_extract_value_trgm(PG_FUNCTION_ARGS)
|
2007-03-14 22:15:40 +08:00
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
Datum *entries = NULL;
|
|
|
|
TRGM *trg;
|
2011-01-10 07:04:20 +08:00
|
|
|
int32 trglen;
|
2007-11-16 05:14:46 +08:00
|
|
|
|
2007-03-14 22:15:40 +08:00
|
|
|
*nentries = 0;
|
2007-11-16 05:14:46 +08:00
|
|
|
|
2007-03-14 22:15:40 +08:00
|
|
|
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
|
|
|
trglen = ARRNELEM(trg);
|
2007-11-16 05:14:46 +08:00
|
|
|
|
2007-03-14 22:15:40 +08:00
|
|
|
if (trglen > 0)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
trgm *ptr;
|
2011-01-10 07:04:20 +08:00
|
|
|
int32 i;
|
2007-11-16 05:14:46 +08:00
|
|
|
|
2011-01-10 07:04:20 +08:00
|
|
|
*nentries = trglen;
|
2007-03-14 22:15:40 +08:00
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
|
|
|
|
|
|
|
ptr = GETARR(trg);
|
2011-01-10 07:04:20 +08:00
|
|
|
for (i = 0; i < trglen; i++)
|
2007-03-14 22:15:40 +08:00
|
|
|
{
|
2011-04-10 23:42:00 +08:00
|
|
|
int32 item = trgm2int(ptr);
|
2007-11-16 05:14:46 +08:00
|
|
|
|
2011-01-10 07:04:20 +08:00
|
|
|
entries[i] = Int32GetDatum(item);
|
2007-03-14 22:15:40 +08:00
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
}
|
|
|
|
|
2011-02-01 10:33:55 +08:00
|
|
|
Datum
|
|
|
|
gin_extract_query_trgm(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
2011-04-10 23:42:00 +08:00
|
|
|
|
|
|
|
/* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
|
2013-04-09 13:05:55 +08:00
|
|
|
Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
|
|
|
|
|
2011-04-10 23:42:00 +08:00
|
|
|
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
|
|
|
|
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
2011-02-01 10:33:55 +08:00
|
|
|
Datum *entries = NULL;
|
|
|
|
TRGM *trg;
|
|
|
|
int32 trglen;
|
|
|
|
trgm *ptr;
|
2013-04-09 13:05:55 +08:00
|
|
|
TrgmPackedGraph *graph;
|
2011-02-01 10:33:55 +08:00
|
|
|
int32 i;
|
|
|
|
|
|
|
|
switch (strategy)
|
|
|
|
{
|
|
|
|
case SimilarityStrategyNumber:
|
|
|
|
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
|
|
|
break;
|
|
|
|
case ILikeStrategyNumber:
|
|
|
|
#ifndef IGNORECASE
|
|
|
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
|
|
|
#endif
|
|
|
|
/* FALL THRU */
|
|
|
|
case LikeStrategyNumber:
|
2011-04-10 23:42:00 +08:00
|
|
|
|
2011-02-01 10:33:55 +08:00
|
|
|
/*
|
|
|
|
* For wildcard search we extract all the trigrams that every
|
|
|
|
* potentially-matching string must include.
|
|
|
|
*/
|
|
|
|
trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
|
|
|
break;
|
2013-04-09 13:05:55 +08:00
|
|
|
case RegExpICaseStrategyNumber:
|
|
|
|
#ifndef IGNORECASE
|
|
|
|
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
|
|
|
|
#endif
|
|
|
|
/* FALL THRU */
|
|
|
|
case RegExpStrategyNumber:
|
2013-04-11 01:30:14 +08:00
|
|
|
trg = createTrgmNFA(val, PG_GET_COLLATION(),
|
|
|
|
&graph, CurrentMemoryContext);
|
2013-04-09 13:05:55 +08:00
|
|
|
if (trg && ARRNELEM(trg) > 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Successful regex processing: store NFA-like graph as
|
2014-05-07 00:12:18 +08:00
|
|
|
* extra_data. GIN API requires an array of nentries
|
2013-04-09 13:05:55 +08:00
|
|
|
* Pointers, but we just put the same value in each element.
|
|
|
|
*/
|
|
|
|
trglen = ARRNELEM(trg);
|
|
|
|
*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
|
|
|
|
for (i = 0; i < trglen; i++)
|
|
|
|
(*extra_data)[i] = (Pointer) graph;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* No result: have to do full index scan. */
|
|
|
|
*nentries = 0;
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
}
|
|
|
|
break;
|
2011-02-01 10:33:55 +08:00
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
2011-04-10 23:42:00 +08:00
|
|
|
trg = NULL; /* keep compiler quiet */
|
2011-02-01 10:33:55 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
trglen = ARRNELEM(trg);
|
|
|
|
*nentries = trglen;
|
|
|
|
|
|
|
|
if (trglen > 0)
|
|
|
|
{
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
|
|
|
ptr = GETARR(trg);
|
|
|
|
for (i = 0; i < trglen; i++)
|
|
|
|
{
|
2011-04-10 23:42:00 +08:00
|
|
|
int32 item = trgm2int(ptr);
|
2011-02-01 10:33:55 +08:00
|
|
|
|
|
|
|
entries[i] = Int32GetDatum(item);
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If no trigram was extracted then we have to scan all the index.
|
|
|
|
*/
|
|
|
|
if (trglen == 0)
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
}
|
|
|
|
|
2007-03-14 22:15:40 +08:00
|
|
|
Datum
|
|
|
|
gin_trgm_consistent(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
bool *check = (bool *) PG_GETARG_POINTER(0);
|
2011-02-01 10:33:55 +08:00
|
|
|
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
2011-04-10 23:42:00 +08:00
|
|
|
|
2009-06-11 22:49:15 +08:00
|
|
|
/* text *query = PG_GETARG_TEXT_P(2); */
|
2011-01-10 07:04:20 +08:00
|
|
|
int32 nkeys = PG_GETARG_INT32(3);
|
2013-04-09 13:05:55 +08:00
|
|
|
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
|
2009-03-26 06:19:02 +08:00
|
|
|
bool *recheck = (bool *) PG_GETARG_POINTER(5);
|
2011-02-01 10:33:55 +08:00
|
|
|
bool res;
|
2011-01-10 07:04:20 +08:00
|
|
|
int32 i,
|
2011-02-01 10:33:55 +08:00
|
|
|
ntrue;
|
2007-11-16 05:14:46 +08:00
|
|
|
|
2008-04-15 01:05:34 +08:00
|
|
|
/* All cases served by this function are inexact */
|
|
|
|
*recheck = true;
|
|
|
|
|
2011-02-01 10:33:55 +08:00
|
|
|
switch (strategy)
|
2011-01-10 07:04:20 +08:00
|
|
|
{
|
2011-02-01 10:33:55 +08:00
|
|
|
case SimilarityStrategyNumber:
|
|
|
|
/* Count the matches */
|
|
|
|
ntrue = 0;
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
{
|
|
|
|
if (check[i])
|
|
|
|
ntrue++;
|
|
|
|
}
|
2007-03-14 22:15:40 +08:00
|
|
|
#ifdef DIVUNION
|
2011-02-01 10:33:55 +08:00
|
|
|
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
|
2007-03-14 22:15:40 +08:00
|
|
|
#else
|
2011-02-01 10:33:55 +08:00
|
|
|
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
|
2007-03-14 22:15:40 +08:00
|
|
|
#endif
|
2011-02-01 10:33:55 +08:00
|
|
|
break;
|
|
|
|
case ILikeStrategyNumber:
|
|
|
|
#ifndef IGNORECASE
|
|
|
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
|
|
|
#endif
|
|
|
|
/* FALL THRU */
|
|
|
|
case LikeStrategyNumber:
|
|
|
|
/* Check if all extracted trigrams are presented. */
|
|
|
|
res = true;
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
{
|
|
|
|
if (!check[i])
|
|
|
|
{
|
|
|
|
res = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2013-04-09 13:05:55 +08:00
|
|
|
case RegExpICaseStrategyNumber:
|
|
|
|
#ifndef IGNORECASE
|
|
|
|
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
|
|
|
|
#endif
|
|
|
|
/* FALL THRU */
|
|
|
|
case RegExpStrategyNumber:
|
|
|
|
if (nkeys < 1)
|
|
|
|
{
|
|
|
|
/* Regex processing gave no result: do full index scan */
|
|
|
|
res = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
|
|
|
|
check);
|
|
|
|
break;
|
2011-02-01 10:33:55 +08:00
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
|
|
res = false; /* keep compiler quiet */
|
|
|
|
break;
|
|
|
|
}
|
2007-03-14 22:15:40 +08:00
|
|
|
|
|
|
|
PG_RETURN_BOOL(res);
|
|
|
|
}
|