mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-15 08:20:16 +08:00
0645663e6c
1 Comparison operation for tsquery 2 Btree index on tsquery 3 numnode(tsquery) - returns 'length' of tsquery 4 tsquery @ tsquery, tsquery ~ tsquery - contains, contained for tsquery. Note: They don't gurantee exact result, only MAY BE, so it useful only for speed up rewrite functions 5 GiST index support for @,~ 6 rewrite(): select rewrite(orig, what, to); select rewrite(ARRAY[orig, what, to]) from tsquery_table; select rewrite(orig, 'select what, to from tsquery_table;'); 7 significantly improve cover algorithm
789 lines
16 KiB
C
789 lines
16 KiB
C
/*
|
|
* Relevation
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
*/
|
|
#include "postgres.h"
|
|
#include <math.h>
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "utils/builtins.h"
|
|
#include "fmgr.h"
|
|
#include "funcapi.h"
|
|
#include "storage/bufpage.h"
|
|
#include "executor/spi.h"
|
|
#include "commands/trigger.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "catalog/namespace.h"
|
|
|
|
#include "utils/array.h"
|
|
|
|
#include "tsvector.h"
|
|
#include "query.h"
|
|
#include "common.h"
|
|
|
|
PG_FUNCTION_INFO_V1(rank);
|
|
Datum rank(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(rank_def);
|
|
Datum rank_def(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(rank_cd);
|
|
Datum rank_cd(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(rank_cd_def);
|
|
Datum rank_cd_def(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(get_covers);
|
|
Datum get_covers(PG_FUNCTION_ARGS);
|
|
|
|
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
|
|
|
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
|
|
|
|
#define DEF_NORM_METHOD 0
|
|
|
|
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
|
|
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
|
|
|
|
/*
|
|
* Returns a weight of a word collocation
|
|
*/
|
|
static float4
|
|
word_distance(int4 w)
|
|
{
|
|
if (w > 100)
|
|
return 1e-30;
|
|
|
|
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
|
|
}
|
|
|
|
static int
|
|
cnt_length(tsvector * t)
|
|
{
|
|
WordEntry *ptr = ARRPTR(t),
|
|
*end = (WordEntry *) STRPTR(t);
|
|
int len = 0,
|
|
clen;
|
|
|
|
while (ptr < end)
|
|
{
|
|
if ((clen = POSDATALEN(t, ptr)) == 0)
|
|
len += 1;
|
|
else
|
|
len += clen;
|
|
ptr++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int4
|
|
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
|
|
{
|
|
if (ptr->len == item->length)
|
|
return strncmp(
|
|
eval + ptr->pos,
|
|
qval + item->distance,
|
|
item->length);
|
|
|
|
return (ptr->len > item->length) ? 1 : -1;
|
|
}
|
|
|
|
static WordEntry *
|
|
find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
|
|
{
|
|
WordEntry *StopLow = ARRPTR(t);
|
|
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
|
WordEntry *StopMiddle;
|
|
int difference;
|
|
|
|
/* Loop invariant: StopLow <= item < StopHigh */
|
|
|
|
while (StopLow < StopHigh)
|
|
{
|
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
|
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
|
if (difference == 0)
|
|
return StopMiddle;
|
|
else if (difference < 0)
|
|
StopLow = StopMiddle + 1;
|
|
else
|
|
StopHigh = StopMiddle;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static char *SortAndUniqOperand = NULL;
|
|
|
|
static int
|
|
compareITEM(const void *a, const void *b)
|
|
{
|
|
if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
|
|
return strncmp(SortAndUniqOperand + (*(ITEM **) a)->distance,
|
|
SortAndUniqOperand + (*(ITEM **) b)->distance,
|
|
(*(ITEM **) b)->length);
|
|
|
|
return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
|
|
}
|
|
|
|
static ITEM **
|
|
SortAndUniqItems(char *operand, ITEM * item, int *size)
|
|
{
|
|
ITEM **res,
|
|
**ptr,
|
|
**prevptr;
|
|
|
|
ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
|
|
|
|
while ((*size)--)
|
|
{
|
|
if (item->type == VAL)
|
|
{
|
|
*ptr = item;
|
|
ptr++;
|
|
}
|
|
item++;
|
|
}
|
|
|
|
*size = ptr - res;
|
|
if (*size < 2)
|
|
return res;
|
|
|
|
SortAndUniqOperand = operand;
|
|
qsort(res, *size, sizeof(ITEM **), compareITEM);
|
|
|
|
ptr = res + 1;
|
|
prevptr = res;
|
|
|
|
while (ptr - res < *size)
|
|
{
|
|
if (compareITEM((void *) ptr, (void *) prevptr) != 0)
|
|
{
|
|
prevptr++;
|
|
*prevptr = *ptr;
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
*size = prevptr + 1 - res;
|
|
return res;
|
|
}
|
|
|
|
static WordEntryPos POSNULL[] = {
|
|
0,
|
|
0
|
|
};
|
|
|
|
static float
|
|
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
|
|
{
|
|
uint16 **pos;
|
|
int i,
|
|
k,
|
|
l,
|
|
p;
|
|
WordEntry *entry;
|
|
WordEntryPos *post,
|
|
*ct;
|
|
int4 dimt,
|
|
lenct,
|
|
dist;
|
|
float res = -1.0;
|
|
ITEM **item;
|
|
int size = q->size;
|
|
|
|
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
|
if (size < 2)
|
|
{
|
|
pfree(item);
|
|
return calc_rank_or(w, t, q);
|
|
}
|
|
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
|
memset(pos, 0, sizeof(uint16 *) * q->size);
|
|
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
|
|
|
|
for (i = 0; i < size; i++)
|
|
{
|
|
entry = find_wordentry(t, q, item[i]);
|
|
if (!entry)
|
|
continue;
|
|
|
|
if (entry->haspos)
|
|
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
|
|
else
|
|
pos[i] = (uint16 *) POSNULL;
|
|
|
|
|
|
dimt = *(uint16 *) (pos[i]);
|
|
post = (WordEntryPos *) (pos[i] + 1);
|
|
for (k = 0; k < i; k++)
|
|
{
|
|
if (!pos[k])
|
|
continue;
|
|
lenct = *(uint16 *) (pos[k]);
|
|
ct = (WordEntryPos *) (pos[k] + 1);
|
|
for (l = 0; l < dimt; l++)
|
|
{
|
|
for (p = 0; p < lenct; p++)
|
|
{
|
|
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
|
|
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
|
|
{
|
|
float curw;
|
|
|
|
if (!dist)
|
|
dist = MAXENTRYPOS;
|
|
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
|
|
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
pfree(pos);
|
|
pfree(item);
|
|
return res;
|
|
}
|
|
|
|
static float
|
|
calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
|
|
{
|
|
WordEntry *entry;
|
|
WordEntryPos *post;
|
|
int4 dimt,
|
|
j,
|
|
i;
|
|
float res = 0.0;
|
|
ITEM **item;
|
|
int size = q->size;
|
|
|
|
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
|
|
|
for (i = 0; i < size; i++)
|
|
{
|
|
float resj,wjm;
|
|
int4 jm;
|
|
entry = find_wordentry(t, q, item[i]);
|
|
if (!entry)
|
|
continue;
|
|
|
|
if (entry->haspos)
|
|
{
|
|
dimt = POSDATALEN(t, entry);
|
|
post = POSDATAPTR(t, entry);
|
|
}
|
|
else
|
|
{
|
|
dimt = *(uint16 *) POSNULL;
|
|
post = POSNULL + 1;
|
|
}
|
|
|
|
resj = 0.0;
|
|
wjm = -1.0;
|
|
jm = 0;
|
|
for (j = 0; j < dimt; j++)
|
|
{
|
|
resj = resj + wpos(post[j])/((j+1)*(j+1));
|
|
if ( wpos(post[j]) > wjm ) {
|
|
wjm = wpos(post[j]);
|
|
jm = j;
|
|
}
|
|
}
|
|
/*
|
|
limit (sum(i/i^2),i->inf) = pi^2/6
|
|
resj = sum(wi/i^2),i=1,noccurence,
|
|
wi - should be sorted desc,
|
|
don't sort for now, just choose maximum weight. This should be corrected
|
|
Oleg Bartunov
|
|
*/
|
|
res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685;
|
|
}
|
|
if ( size > 0 )
|
|
res = res /size;
|
|
pfree(item);
|
|
return res;
|
|
}
|
|
|
|
static float
|
|
calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
|
|
{
|
|
ITEM *item = GETQUERY(q);
|
|
float res = 0.0;
|
|
int len;
|
|
|
|
if (!t->size || !q->size)
|
|
return 0.0;
|
|
|
|
res = (item->type != VAL && item->val == (int4) '&') ?
|
|
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
|
|
|
if (res < 0)
|
|
res = 1e-20;
|
|
|
|
switch (method)
|
|
{
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
res /= log((float) (cnt_length(t) + 1)) / log(2.0);
|
|
break;
|
|
case 2:
|
|
len = cnt_length(t);
|
|
if (len > 0)
|
|
res /= (float) len;
|
|
break;
|
|
default:
|
|
/* internal error */
|
|
elog(ERROR, "unrecognized normalization method: %d", method);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
Datum
|
|
rank(PG_FUNCTION_ARGS)
|
|
{
|
|
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
|
int method = DEF_NORM_METHOD;
|
|
float res = 0.0;
|
|
float ws[lengthof(weights)];
|
|
int i;
|
|
|
|
if (ARR_NDIM(win) != 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array of weight must be one-dimensional")));
|
|
|
|
if (ARRNELEMS(win) < lengthof(weights))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
|
errmsg("array of weight is too short")));
|
|
|
|
for (i = 0; i < lengthof(weights); i++)
|
|
{
|
|
ws[i] = (((float4 *) ARR_DATA_PTR(win))[i] >= 0) ? ((float4 *) ARR_DATA_PTR(win))[i] : weights[i];
|
|
if (ws[i] > 1.0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("weight out of range")));
|
|
}
|
|
|
|
if (PG_NARGS() == 4)
|
|
method = PG_GETARG_INT32(3);
|
|
|
|
res = calc_rank(ws, txt, query, method);
|
|
|
|
PG_FREE_IF_COPY(win, 0);
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
PG_RETURN_FLOAT4(res);
|
|
}
|
|
|
|
Datum
|
|
rank_def(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
float res = 0.0;
|
|
int method = DEF_NORM_METHOD;
|
|
|
|
if (PG_NARGS() == 3)
|
|
method = PG_GETARG_INT32(2);
|
|
|
|
res = calc_rank(weights, txt, query, method);
|
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_FLOAT4(res);
|
|
}
|
|
|
|
|
|
typedef struct
|
|
{
|
|
ITEM **item;
|
|
int16 nitem;
|
|
bool needfree;
|
|
int32 pos;
|
|
} DocRepresentation;
|
|
|
|
static int
|
|
compareDocR(const void *a, const void *b)
|
|
{
|
|
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
|
|
return 0;
|
|
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
|
|
}
|
|
|
|
static bool
|
|
checkcondition_ITEM(void *checkval, ITEM * val) {
|
|
return (bool)(val->istrue);
|
|
}
|
|
|
|
static void
|
|
reset_istrue_flag(QUERYTYPE *query) {
|
|
ITEM *item = GETQUERY(query);
|
|
int i;
|
|
|
|
/* reset istrue flag */
|
|
for(i = 0; i < query->size; i++) {
|
|
if ( item->type == VAL )
|
|
item->istrue = 0;
|
|
item++;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
|
|
{
|
|
DocRepresentation *ptr;
|
|
int lastpos = *pos;
|
|
int i;
|
|
bool found=false;
|
|
|
|
reset_istrue_flag(query);
|
|
|
|
*p = 0x7fffffff;
|
|
*q = 0;
|
|
ptr = doc + *pos;
|
|
|
|
/* find upper bound of cover from current position, move up */
|
|
while (ptr - doc < len) {
|
|
for(i=0;i<ptr->nitem;i++)
|
|
ptr->item[i]->istrue = 1;
|
|
if ( TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM) ) {
|
|
if (ptr->pos > *q) {
|
|
*q = ptr->pos;
|
|
lastpos = ptr - doc;
|
|
found = true;
|
|
}
|
|
break;
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
if (!found)
|
|
return false;
|
|
|
|
reset_istrue_flag(query);
|
|
|
|
ptr = doc + lastpos;
|
|
|
|
/* find lower bound of cover from founded upper bound, move down */
|
|
while (ptr >= doc ) {
|
|
for(i=0;i<ptr->nitem;i++)
|
|
ptr->item[i]->istrue = 1;
|
|
if ( TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM) ) {
|
|
if (ptr->pos < *p)
|
|
*p = ptr->pos;
|
|
break;
|
|
}
|
|
ptr--;
|
|
}
|
|
|
|
if ( *p <= *q ) {
|
|
/* set position for next try to next lexeme after begining of founded cover */
|
|
*pos= (ptr-doc) + 1;
|
|
return true;
|
|
}
|
|
|
|
(*pos)++;
|
|
return Cover( doc, len, query, pos, p, q );
|
|
}
|
|
|
|
static DocRepresentation *
|
|
get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
|
|
{
|
|
ITEM *item = GETQUERY(query);
|
|
WordEntry *entry;
|
|
WordEntryPos *post;
|
|
int4 dimt,
|
|
j,
|
|
i;
|
|
int len = query->size * 4,
|
|
cur = 0;
|
|
DocRepresentation *doc;
|
|
|
|
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
|
SortAndUniqOperand = GETOPERAND(query);
|
|
reset_istrue_flag(query);
|
|
|
|
for (i = 0; i < query->size; i++)
|
|
{
|
|
if (item[i].type != VAL || item[i].istrue)
|
|
continue;
|
|
|
|
entry = find_wordentry(txt, query, &(item[i]));
|
|
if (!entry)
|
|
continue;
|
|
|
|
if (entry->haspos)
|
|
{
|
|
dimt = POSDATALEN(txt, entry);
|
|
post = POSDATAPTR(txt, entry);
|
|
}
|
|
else
|
|
{
|
|
dimt = *(uint16 *) POSNULL;
|
|
post = POSNULL + 1;
|
|
}
|
|
|
|
while (cur + dimt >= len)
|
|
{
|
|
len *= 2;
|
|
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
|
}
|
|
|
|
for (j = 0; j < dimt; j++)
|
|
{
|
|
if ( j == 0 ) {
|
|
ITEM *kptr, *iptr = item+i;
|
|
int k;
|
|
|
|
doc[cur].needfree = false;
|
|
doc[cur].nitem = 0;
|
|
doc[cur].item = (ITEM**)palloc( sizeof(ITEM*) * query->size );
|
|
|
|
for(k=0; k < query->size; k++) {
|
|
kptr = item+k;
|
|
if ( k==i || ( item[k].type == VAL && compareITEM( &kptr, &iptr ) == 0 ) ) {
|
|
doc[cur].item[ doc[cur].nitem ] = item+k;
|
|
doc[cur].nitem++;
|
|
kptr->istrue = 1;
|
|
}
|
|
}
|
|
} else {
|
|
doc[cur].needfree = false;
|
|
doc[cur].nitem = doc[cur-1].nitem;
|
|
doc[cur].item = doc[cur-1].item;
|
|
}
|
|
doc[cur].pos = WEP_GETPOS(post[j]);
|
|
cur++;
|
|
}
|
|
}
|
|
|
|
*doclen = cur;
|
|
|
|
if (cur > 0)
|
|
{
|
|
if (cur > 1)
|
|
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
|
return doc;
|
|
}
|
|
|
|
pfree(doc);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
Datum
|
|
rank_cd(PG_FUNCTION_ARGS)
|
|
{
|
|
int K = PG_GETARG_INT32(0);
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
|
|
int method = DEF_NORM_METHOD;
|
|
DocRepresentation *doc;
|
|
float res = 0.0;
|
|
int p = 0,
|
|
q = 0,
|
|
len,
|
|
cur,
|
|
i,
|
|
doclen=0;
|
|
|
|
doc = get_docrep(txt, query, &doclen);
|
|
if (!doc)
|
|
{
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
PG_RETURN_FLOAT4(0.0);
|
|
}
|
|
|
|
cur = 0;
|
|
if (K <= 0)
|
|
K = 4;
|
|
while (Cover(doc, doclen, query, &cur, &p, &q))
|
|
res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
|
|
|
|
if (PG_NARGS() == 4)
|
|
method = PG_GETARG_INT32(3);
|
|
|
|
switch (method)
|
|
{
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
res /= log((float) (cnt_length(txt) + 1));
|
|
break;
|
|
case 2:
|
|
len = cnt_length(txt);
|
|
if (len > 0)
|
|
res /= (float) len;
|
|
break;
|
|
default:
|
|
/* internal error */
|
|
elog(ERROR, "unrecognized normalization method: %d", method);
|
|
}
|
|
|
|
for(i=0;i<doclen;i++)
|
|
if ( doc[i].needfree )
|
|
pfree( doc[i].item );
|
|
pfree(doc);
|
|
PG_FREE_IF_COPY(txt, 1);
|
|
PG_FREE_IF_COPY(query, 2);
|
|
|
|
PG_RETURN_FLOAT4(res);
|
|
}
|
|
|
|
|
|
Datum
|
|
rank_cd_def(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(
|
|
rank_cd,
|
|
Int32GetDatum(-1),
|
|
PG_GETARG_DATUM(0),
|
|
PG_GETARG_DATUM(1),
|
|
(PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
|
|
));
|
|
}
|
|
|
|
/**************debug*************/
|
|
|
|
typedef struct
|
|
{
|
|
char *w;
|
|
int2 len;
|
|
int2 pos;
|
|
int2 start;
|
|
int2 finish;
|
|
} DocWord;
|
|
|
|
static int
|
|
compareDocWord(const void *a, const void *b)
|
|
{
|
|
if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
|
|
return 0;
|
|
return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
|
|
}
|
|
|
|
|
|
Datum
|
|
get_covers(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
|
|
WordEntry *pptr = ARRPTR(txt);
|
|
int i,
|
|
dlen = 0,
|
|
j,
|
|
cur = 0,
|
|
len = 0,
|
|
rlen;
|
|
DocWord *dw,
|
|
*dwptr;
|
|
text *out;
|
|
char *cptr;
|
|
DocRepresentation *doc;
|
|
int pos = 0,
|
|
p,
|
|
q,
|
|
olddwpos = 0;
|
|
int ncover = 1;
|
|
|
|
doc = get_docrep(txt, query, &rlen);
|
|
|
|
if (!doc)
|
|
{
|
|
out = palloc(VARHDRSZ);
|
|
VARATT_SIZEP(out) = VARHDRSZ;
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
for (i = 0; i < txt->size; i++)
|
|
{
|
|
if (!pptr[i].haspos)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("no pos info")));
|
|
dlen += POSDATALEN(txt, &(pptr[i]));
|
|
}
|
|
|
|
dwptr = dw = palloc(sizeof(DocWord) * dlen);
|
|
memset(dw, 0, sizeof(DocWord) * dlen);
|
|
|
|
for (i = 0; i < txt->size; i++)
|
|
{
|
|
WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
|
|
|
|
for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
|
|
{
|
|
dw[cur].w = STRPTR(txt) + pptr[i].pos;
|
|
dw[cur].len = pptr[i].len;
|
|
dw[cur].pos = WEP_GETPOS(posdata[j]);
|
|
cur++;
|
|
}
|
|
len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
|
|
}
|
|
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
|
|
|
|
while (Cover(doc, rlen, query, &pos, &p, &q))
|
|
{
|
|
dwptr = dw + olddwpos;
|
|
while (dwptr->pos < p && dwptr - dw < dlen)
|
|
dwptr++;
|
|
olddwpos = dwptr - dw;
|
|
dwptr->start = ncover;
|
|
while (dwptr->pos < q + 1 && dwptr - dw < dlen)
|
|
dwptr++;
|
|
(dwptr - 1)->finish = ncover;
|
|
len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
|
|
ncover++;
|
|
}
|
|
|
|
out = palloc(VARHDRSZ + len);
|
|
cptr = ((char *) out) + VARHDRSZ;
|
|
dwptr = dw;
|
|
|
|
while (dwptr - dw < dlen)
|
|
{
|
|
if (dwptr->start)
|
|
{
|
|
sprintf(cptr, "{%d ", dwptr->start);
|
|
cptr = strchr(cptr, '\0');
|
|
}
|
|
memcpy(cptr, dwptr->w, dwptr->len);
|
|
cptr += dwptr->len;
|
|
*cptr = ' ';
|
|
cptr++;
|
|
if (dwptr->finish)
|
|
{
|
|
sprintf(cptr, "}%d ", dwptr->finish);
|
|
cptr = strchr(cptr, '\0');
|
|
}
|
|
dwptr++;
|
|
}
|
|
|
|
VARATT_SIZEP(out) = cptr - ((char *) out);
|
|
|
|
pfree(dw);
|
|
for(i=0;i<rlen;i++)
|
|
if ( doc[i].needfree )
|
|
pfree( doc[i].item );
|
|
pfree(doc);
|
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
PG_FREE_IF_COPY(query, 1);
|
|
PG_RETURN_POINTER(out);
|
|
}
|