mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-06 15:24:56 +08:00
234a02b2a8
Get rid of VARATT_SIZE and VARATT_DATA, which were simply redundant with VARSIZE and VARDATA, and as a consequence almost no code was using the longer names. Rename the length fields of struct varlena and various derived structures to catch anyplace that was accessing them directly; and clean up various places so caught. In itself this patch doesn't change any behavior at all, but it is necessary infrastructure if we hope to play any games with the representation of varlena headers. Greg Stark and Tom Lane
1107 lines
25 KiB
C
1107 lines
25 KiB
C
/*
|
|
* In/Out definitions for tsvector type
|
|
* Internal structure:
|
|
* string of values, array of position lexeme in string and it's length
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "catalog/namespace.h"
|
|
#include "commands/trigger.h"
|
|
#include "executor/spi.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "storage/bufpage.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/pg_locale.h"
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include <ctype.h>
|
|
#include "tsvector.h"
|
|
#include "query.h"
|
|
#include "ts_cfg.h"
|
|
#include "common.h"
|
|
|
|
PG_FUNCTION_INFO_V1(tsvector_in);
|
|
Datum tsvector_in(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(tsvector_out);
|
|
Datum tsvector_out(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(to_tsvector);
|
|
Datum to_tsvector(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(to_tsvector_current);
|
|
Datum to_tsvector_current(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(to_tsvector_name);
|
|
Datum to_tsvector_name(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(tsearch2);
|
|
Datum tsearch2(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(tsvector_length);
|
|
Datum tsvector_length(PG_FUNCTION_ARGS);
|
|
|
|
/*
|
|
* in/out text index type
|
|
*/
|
|
static int
|
|
comparePos(const void *a, const void *b)
|
|
{
|
|
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
|
|
return 0;
|
|
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
|
|
}
|
|
|
|
static int
|
|
uniquePos(WordEntryPos * a, int4 l)
|
|
{
|
|
WordEntryPos *ptr,
|
|
*res;
|
|
|
|
res = a;
|
|
if (l == 1)
|
|
return l;
|
|
|
|
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
|
|
|
|
ptr = a + 1;
|
|
while (ptr - a < l)
|
|
{
|
|
if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
|
|
{
|
|
res++;
|
|
*res = *ptr;
|
|
if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
|
|
break;
|
|
}
|
|
else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
|
|
WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
|
|
ptr++;
|
|
}
|
|
return res + 1 - a;
|
|
}
|
|
|
|
static int
|
|
compareentry(const void *a, const void *b, void *arg)
|
|
{
|
|
char *BufferStr = (char *) arg;
|
|
|
|
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
|
|
{
|
|
return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
|
|
&BufferStr[((WordEntryIN *) b)->entry.pos],
|
|
((WordEntryIN *) a)->entry.len);
|
|
}
|
|
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
|
|
}
|
|
|
|
static int
|
|
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
|
|
{
|
|
WordEntryIN *ptr,
|
|
*res;
|
|
|
|
res = a;
|
|
if (l == 1)
|
|
{
|
|
if (a->entry.haspos)
|
|
{
|
|
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
|
|
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
|
|
}
|
|
return l;
|
|
}
|
|
|
|
ptr = a + 1;
|
|
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
|
|
|
|
while (ptr - a < l)
|
|
{
|
|
if (!(ptr->entry.len == res->entry.len &&
|
|
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
|
|
{
|
|
if (res->entry.haspos)
|
|
{
|
|
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
|
|
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
|
|
}
|
|
*outbuflen += SHORTALIGN(res->entry.len);
|
|
res++;
|
|
memcpy(res, ptr, sizeof(WordEntryIN));
|
|
}
|
|
else if (ptr->entry.haspos)
|
|
{
|
|
if (res->entry.haspos)
|
|
{
|
|
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
|
|
|
|
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
|
|
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
|
|
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
|
|
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
|
|
pfree(ptr->pos);
|
|
}
|
|
else
|
|
{
|
|
res->entry.haspos = 1;
|
|
res->pos = ptr->pos;
|
|
}
|
|
}
|
|
ptr++;
|
|
}
|
|
if (res->entry.haspos)
|
|
{
|
|
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
|
|
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
|
|
}
|
|
*outbuflen += SHORTALIGN(res->entry.len);
|
|
|
|
return res + 1 - a;
|
|
}
|
|
|
|
#define WAITWORD 1
|
|
#define WAITENDWORD 2
|
|
#define WAITNEXTCHAR 3
|
|
#define WAITENDCMPLX 4
|
|
#define WAITPOSINFO 5
|
|
#define INPOSINFO 6
|
|
#define WAITPOSDELIM 7
|
|
#define WAITCHARCMPLX 8
|
|
|
|
#define RESIZEPRSBUF \
|
|
do { \
|
|
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
|
|
{ \
|
|
int4 clen = state->curpos - state->word; \
|
|
state->len *= 2; \
|
|
state->word = (char*)repalloc( (void*)state->word, state->len ); \
|
|
state->curpos = state->word + clen; \
|
|
} \
|
|
} while (0)
|
|
|
|
|
|
int4
|
|
gettoken_tsvector(TI_IN_STATE * state)
|
|
{
|
|
int4 oldstate = 0;
|
|
|
|
state->curpos = state->word;
|
|
state->state = WAITWORD;
|
|
state->alen = 0;
|
|
|
|
while (1)
|
|
{
|
|
if (state->state == WAITWORD)
|
|
{
|
|
if (*(state->prsbuf) == '\0')
|
|
return 0;
|
|
else if (t_iseq(state->prsbuf, '\''))
|
|
state->state = WAITENDCMPLX;
|
|
else if (t_iseq(state->prsbuf, '\\'))
|
|
{
|
|
state->state = WAITNEXTCHAR;
|
|
oldstate = WAITENDWORD;
|
|
}
|
|
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
else if (!t_isspace(state->prsbuf))
|
|
{
|
|
COPYCHAR(state->curpos, state->prsbuf);
|
|
state->curpos += pg_mblen(state->prsbuf);
|
|
state->state = WAITENDWORD;
|
|
}
|
|
}
|
|
else if (state->state == WAITNEXTCHAR)
|
|
{
|
|
if (*(state->prsbuf) == '\0')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("there is no escaped character")));
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
COPYCHAR(state->curpos, state->prsbuf);
|
|
state->curpos += pg_mblen(state->prsbuf);
|
|
state->state = oldstate;
|
|
}
|
|
}
|
|
else if (state->state == WAITENDWORD)
|
|
{
|
|
if (t_iseq(state->prsbuf, '\\'))
|
|
{
|
|
state->state = WAITNEXTCHAR;
|
|
oldstate = WAITENDWORD;
|
|
}
|
|
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
|
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
|
|
{
|
|
RESIZEPRSBUF;
|
|
if (state->curpos == state->word)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
*(state->curpos) = '\0';
|
|
return 1;
|
|
}
|
|
else if (t_iseq(state->prsbuf, ':'))
|
|
{
|
|
if (state->curpos == state->word)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
*(state->curpos) = '\0';
|
|
if (state->oprisdelim)
|
|
return 1;
|
|
else
|
|
state->state = INPOSINFO;
|
|
}
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
COPYCHAR(state->curpos, state->prsbuf);
|
|
state->curpos += pg_mblen(state->prsbuf);
|
|
}
|
|
}
|
|
else if (state->state == WAITENDCMPLX)
|
|
{
|
|
if (t_iseq(state->prsbuf, '\''))
|
|
{
|
|
state->state = WAITCHARCMPLX;
|
|
}
|
|
else if (t_iseq(state->prsbuf, '\\'))
|
|
{
|
|
state->state = WAITNEXTCHAR;
|
|
oldstate = WAITENDCMPLX;
|
|
}
|
|
else if (*(state->prsbuf) == '\0')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
COPYCHAR(state->curpos, state->prsbuf);
|
|
state->curpos += pg_mblen(state->prsbuf);
|
|
}
|
|
}
|
|
else if (state->state == WAITCHARCMPLX)
|
|
{
|
|
if (t_iseq(state->prsbuf, '\''))
|
|
{
|
|
RESIZEPRSBUF;
|
|
COPYCHAR(state->curpos, state->prsbuf);
|
|
state->curpos += pg_mblen(state->prsbuf);
|
|
state->state = WAITENDCMPLX;
|
|
}
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
*(state->curpos) = '\0';
|
|
if (state->curpos == state->word)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
if (state->oprisdelim)
|
|
{
|
|
/* state->prsbuf+=pg_mblen(state->prsbuf); */
|
|
return 1;
|
|
}
|
|
else
|
|
state->state = WAITPOSINFO;
|
|
continue; /* recheck current character */
|
|
}
|
|
}
|
|
else if (state->state == WAITPOSINFO)
|
|
{
|
|
if (t_iseq(state->prsbuf, ':'))
|
|
state->state = INPOSINFO;
|
|
else
|
|
return 1;
|
|
}
|
|
else if (state->state == INPOSINFO)
|
|
{
|
|
if (t_isdigit(state->prsbuf))
|
|
{
|
|
if (state->alen == 0)
|
|
{
|
|
state->alen = 4;
|
|
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
|
|
*(uint16 *) (state->pos) = 0;
|
|
}
|
|
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
|
|
{
|
|
state->alen *= 2;
|
|
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
|
|
}
|
|
(*(uint16 *) (state->pos))++;
|
|
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
|
|
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("wrong position info")));
|
|
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
|
|
state->state = WAITPOSDELIM;
|
|
}
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
}
|
|
else if (state->state == WAITPOSDELIM)
|
|
{
|
|
if (t_iseq(state->prsbuf, ','))
|
|
state->state = INPOSINFO;
|
|
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
|
|
{
|
|
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
|
|
}
|
|
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
|
|
{
|
|
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
|
|
}
|
|
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
|
|
{
|
|
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
|
|
}
|
|
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
|
|
{
|
|
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
|
|
}
|
|
else if (t_isspace(state->prsbuf) ||
|
|
*(state->prsbuf) == '\0')
|
|
return 1;
|
|
else if (!t_isdigit(state->prsbuf))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("syntax error")));
|
|
}
|
|
else
|
|
/* internal error */
|
|
elog(ERROR, "internal error");
|
|
|
|
/* get next char */
|
|
state->prsbuf += pg_mblen(state->prsbuf);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
Datum
|
|
tsvector_in(PG_FUNCTION_ARGS)
|
|
{
|
|
char *buf = PG_GETARG_CSTRING(0);
|
|
TI_IN_STATE state;
|
|
WordEntryIN *arr;
|
|
WordEntry *inarr;
|
|
int4 len = 0,
|
|
totallen = 64;
|
|
tsvector *in;
|
|
char *tmpbuf,
|
|
*cur;
|
|
int4 i,
|
|
buflen = 256;
|
|
|
|
SET_FUNCOID();
|
|
|
|
pg_verifymbstr(buf, strlen(buf), false);
|
|
state.prsbuf = buf;
|
|
state.len = 32;
|
|
state.word = (char *) palloc(state.len);
|
|
state.oprisdelim = false;
|
|
|
|
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
|
|
cur = tmpbuf = (char *) palloc(buflen);
|
|
while (gettoken_tsvector(&state))
|
|
{
|
|
if (len >= totallen)
|
|
{
|
|
totallen *= 2;
|
|
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
|
|
}
|
|
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
|
|
{
|
|
int4 dist = cur - tmpbuf;
|
|
|
|
buflen *= 2;
|
|
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
|
|
cur = tmpbuf + dist;
|
|
}
|
|
if (state.curpos - state.word >= MAXSTRLEN)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("word is too long")));
|
|
arr[len].entry.len = state.curpos - state.word;
|
|
if (cur - tmpbuf > MAXSTRPOS)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("too long value")));
|
|
arr[len].entry.pos = cur - tmpbuf;
|
|
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
|
|
cur += arr[len].entry.len;
|
|
if (state.alen)
|
|
{
|
|
arr[len].entry.haspos = 1;
|
|
arr[len].pos = state.pos;
|
|
}
|
|
else
|
|
arr[len].entry.haspos = 0;
|
|
len++;
|
|
}
|
|
pfree(state.word);
|
|
|
|
if (len > 0)
|
|
len = uniqueentry(arr, len, tmpbuf, &buflen);
|
|
else
|
|
buflen = 0;
|
|
totallen = CALCDATASIZE(len, buflen);
|
|
in = (tsvector *) palloc0(totallen);
|
|
SET_VARSIZE(in, totallen);
|
|
in->size = len;
|
|
cur = STRPTR(in);
|
|
inarr = ARRPTR(in);
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
|
|
arr[i].entry.pos = cur - STRPTR(in);
|
|
cur += SHORTALIGN(arr[i].entry.len);
|
|
if (arr[i].entry.haspos)
|
|
{
|
|
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
|
|
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
|
|
pfree(arr[i].pos);
|
|
}
|
|
memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
|
|
}
|
|
pfree(tmpbuf);
|
|
pfree(arr);
|
|
PG_RETURN_POINTER(in);
|
|
}
|
|
|
|
Datum
|
|
tsvector_length(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
int4 ret = in->size;
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
PG_RETURN_INT32(ret);
|
|
}
|
|
|
|
Datum
|
|
tsvector_out(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
char *outbuf;
|
|
int4 i,
|
|
lenbuf = 0,
|
|
pp;
|
|
WordEntry *ptr = ARRPTR(out);
|
|
char *curbegin,
|
|
*curin,
|
|
*curout;
|
|
|
|
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
|
|
for (i = 0; i < out->size; i++)
|
|
{
|
|
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
|
|
if (ptr[i].haspos)
|
|
lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
|
|
}
|
|
|
|
curout = outbuf = (char *) palloc(lenbuf);
|
|
for (i = 0; i < out->size; i++)
|
|
{
|
|
curbegin = curin = STRPTR(out) + ptr->pos;
|
|
if (i != 0)
|
|
*curout++ = ' ';
|
|
*curout++ = '\'';
|
|
while (curin - curbegin < ptr->len)
|
|
{
|
|
int len = pg_mblen(curin);
|
|
|
|
if (t_iseq(curin, '\''))
|
|
{
|
|
int4 pos = curout - outbuf;
|
|
|
|
outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
|
|
curout = outbuf + pos;
|
|
*curout++ = '\'';
|
|
}
|
|
while (len--)
|
|
*curout++ = *curin++;
|
|
}
|
|
*curout++ = '\'';
|
|
if ((pp = POSDATALEN(out, ptr)) != 0)
|
|
{
|
|
WordEntryPos *wptr;
|
|
|
|
*curout++ = ':';
|
|
wptr = POSDATAPTR(out, ptr);
|
|
while (pp)
|
|
{
|
|
sprintf(curout, "%d", WEP_GETPOS(*wptr));
|
|
curout = strchr(curout, '\0');
|
|
switch (WEP_GETWEIGHT(*wptr))
|
|
{
|
|
case 3:
|
|
*curout++ = 'A';
|
|
break;
|
|
case 2:
|
|
*curout++ = 'B';
|
|
break;
|
|
case 1:
|
|
*curout++ = 'C';
|
|
break;
|
|
case 0:
|
|
default:
|
|
break;
|
|
}
|
|
if (pp > 1)
|
|
*curout++ = ',';
|
|
pp--;
|
|
wptr++;
|
|
}
|
|
}
|
|
ptr++;
|
|
}
|
|
*curout = '\0';
|
|
outbuf[lenbuf - 1] = '\0';
|
|
PG_FREE_IF_COPY(out, 0);
|
|
PG_RETURN_POINTER(outbuf);
|
|
}
|
|
|
|
static int
|
|
compareWORD(const void *a, const void *b)
|
|
{
|
|
if (((TSWORD *) a)->len == ((TSWORD *) b)->len)
|
|
{
|
|
int res = strncmp(
|
|
((TSWORD *) a)->word,
|
|
((TSWORD *) b)->word,
|
|
((TSWORD *) b)->len);
|
|
|
|
if (res == 0)
|
|
return (((TSWORD *) a)->pos.pos > ((TSWORD *) b)->pos.pos) ? 1 : -1;
|
|
return res;
|
|
}
|
|
return (((TSWORD *) a)->len > ((TSWORD *) b)->len) ? 1 : -1;
|
|
}
|
|
|
|
static int
|
|
uniqueWORD(TSWORD * a, int4 l)
|
|
{
|
|
TSWORD *ptr,
|
|
*res;
|
|
int tmppos;
|
|
|
|
if (l == 1)
|
|
{
|
|
tmppos = LIMITPOS(a->pos.pos);
|
|
a->alen = 2;
|
|
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
|
|
a->pos.apos[0] = 1;
|
|
a->pos.apos[1] = tmppos;
|
|
return l;
|
|
}
|
|
|
|
res = a;
|
|
ptr = a + 1;
|
|
|
|
qsort((void *) a, l, sizeof(TSWORD), compareWORD);
|
|
tmppos = LIMITPOS(a->pos.pos);
|
|
a->alen = 2;
|
|
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
|
|
a->pos.apos[0] = 1;
|
|
a->pos.apos[1] = tmppos;
|
|
|
|
while (ptr - a < l)
|
|
{
|
|
if (!(ptr->len == res->len &&
|
|
strncmp(ptr->word, res->word, res->len) == 0))
|
|
{
|
|
res++;
|
|
res->len = ptr->len;
|
|
res->word = ptr->word;
|
|
tmppos = LIMITPOS(ptr->pos.pos);
|
|
res->alen = 2;
|
|
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
|
|
res->pos.apos[0] = 1;
|
|
res->pos.apos[1] = tmppos;
|
|
}
|
|
else
|
|
{
|
|
pfree(ptr->word);
|
|
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
|
|
{
|
|
if (res->pos.apos[0] + 1 >= res->alen)
|
|
{
|
|
res->alen *= 2;
|
|
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
|
|
}
|
|
if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
|
|
{
|
|
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
|
|
res->pos.apos[0]++;
|
|
}
|
|
}
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
return res + 1 - a;
|
|
}
|
|
|
|
/*
|
|
* make value of tsvector
|
|
*/
|
|
static tsvector *
|
|
makevalue(PRSTEXT * prs)
|
|
{
|
|
int4 i,
|
|
j,
|
|
lenstr = 0,
|
|
totallen;
|
|
tsvector *in;
|
|
WordEntry *ptr;
|
|
char *str,
|
|
*cur;
|
|
|
|
prs->curwords = uniqueWORD(prs->words, prs->curwords);
|
|
for (i = 0; i < prs->curwords; i++)
|
|
{
|
|
lenstr += SHORTALIGN(prs->words[i].len);
|
|
|
|
if (prs->words[i].alen)
|
|
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
|
|
}
|
|
|
|
totallen = CALCDATASIZE(prs->curwords, lenstr);
|
|
in = (tsvector *) palloc0(totallen);
|
|
SET_VARSIZE(in, totallen);
|
|
in->size = prs->curwords;
|
|
|
|
ptr = ARRPTR(in);
|
|
cur = str = STRPTR(in);
|
|
for (i = 0; i < prs->curwords; i++)
|
|
{
|
|
ptr->len = prs->words[i].len;
|
|
if (cur - str > MAXSTRPOS)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
errmsg("value is too big")));
|
|
ptr->pos = cur - str;
|
|
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
|
|
pfree(prs->words[i].word);
|
|
cur += SHORTALIGN(prs->words[i].len);
|
|
if (prs->words[i].alen)
|
|
{
|
|
WordEntryPos *wptr;
|
|
|
|
ptr->haspos = 1;
|
|
*(uint16 *) cur = prs->words[i].pos.apos[0];
|
|
wptr = POSDATAPTR(in, ptr);
|
|
for (j = 0; j < *(uint16 *) cur; j++)
|
|
{
|
|
WEP_SETWEIGHT(wptr[j], 0);
|
|
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
|
|
}
|
|
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
|
|
pfree(prs->words[i].pos.apos);
|
|
}
|
|
else
|
|
ptr->haspos = 0;
|
|
ptr++;
|
|
}
|
|
pfree(prs->words);
|
|
return in;
|
|
}
|
|
|
|
|
|
Datum
|
|
to_tsvector(PG_FUNCTION_ARGS)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(1);
|
|
PRSTEXT prs;
|
|
tsvector *out;
|
|
TSCfgInfo *cfg;
|
|
|
|
SET_FUNCOID();
|
|
cfg = findcfg(PG_GETARG_INT32(0));
|
|
|
|
prs.lenwords = 32;
|
|
prs.curwords = 0;
|
|
prs.pos = 0;
|
|
prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
|
|
|
|
parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
|
PG_FREE_IF_COPY(in, 1);
|
|
|
|
if (prs.curwords)
|
|
out = makevalue(&prs);
|
|
else
|
|
{
|
|
pfree(prs.words);
|
|
out = palloc(CALCDATASIZE(0, 0));
|
|
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
|
out->size = 0;
|
|
}
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
Datum
|
|
to_tsvector_name(PG_FUNCTION_ARGS)
|
|
{
|
|
text *cfg = PG_GETARG_TEXT_P(0);
|
|
Datum res;
|
|
|
|
SET_FUNCOID();
|
|
res = DirectFunctionCall3(
|
|
to_tsvector,
|
|
Int32GetDatum(name2id_cfg(cfg)),
|
|
PG_GETARG_DATUM(1),
|
|
(Datum) 0
|
|
);
|
|
|
|
PG_FREE_IF_COPY(cfg, 0);
|
|
PG_RETURN_DATUM(res);
|
|
}
|
|
|
|
Datum
|
|
to_tsvector_current(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum res;
|
|
|
|
SET_FUNCOID();
|
|
res = DirectFunctionCall3(
|
|
to_tsvector,
|
|
Int32GetDatum(get_currcfg()),
|
|
PG_GETARG_DATUM(0),
|
|
(Datum) 0
|
|
);
|
|
|
|
PG_RETURN_DATUM(res);
|
|
}
|
|
|
|
static Oid
|
|
findFunc(char *fname)
|
|
{
|
|
FuncCandidateList clist,
|
|
ptr;
|
|
Oid funcid = InvalidOid;
|
|
List *names = list_make1(makeString(fname));
|
|
|
|
ptr = clist = FuncnameGetCandidates(names, 1);
|
|
list_free(names);
|
|
|
|
if (!ptr)
|
|
return funcid;
|
|
|
|
while (ptr)
|
|
{
|
|
if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
|
|
funcid = ptr->oid;
|
|
clist = ptr->next;
|
|
pfree(ptr);
|
|
ptr = clist;
|
|
}
|
|
|
|
return funcid;
|
|
}
|
|
|
|
/*
|
|
* Trigger
|
|
*/
|
|
Datum
|
|
tsearch2(PG_FUNCTION_ARGS)
|
|
{
|
|
TriggerData *trigdata;
|
|
Trigger *trigger;
|
|
Relation rel;
|
|
HeapTuple rettuple = NULL;
|
|
int numidxattr,
|
|
i;
|
|
PRSTEXT prs;
|
|
Datum datum = (Datum) 0;
|
|
Oid funcoid = InvalidOid;
|
|
TSCfgInfo *cfg;
|
|
|
|
SET_FUNCOID();
|
|
cfg = findcfg(get_currcfg());
|
|
|
|
if (!CALLED_AS_TRIGGER(fcinfo))
|
|
/* internal error */
|
|
elog(ERROR, "TSearch: Not fired by trigger manager");
|
|
|
|
trigdata = (TriggerData *) fcinfo->context;
|
|
if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
|
|
/* internal error */
|
|
elog(ERROR, "TSearch: Cannot process STATEMENT events");
|
|
if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
|
|
/* internal error */
|
|
elog(ERROR, "TSearch: Must be fired BEFORE event");
|
|
|
|
if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
|
|
rettuple = trigdata->tg_trigtuple;
|
|
else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
|
|
rettuple = trigdata->tg_newtuple;
|
|
else
|
|
/* internal error */
|
|
elog(ERROR, "TSearch: Unknown event");
|
|
|
|
trigger = trigdata->tg_trigger;
|
|
rel = trigdata->tg_relation;
|
|
|
|
if (trigger->tgnargs < 2)
|
|
/* internal error */
|
|
elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
|
|
|
|
numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
|
|
if (numidxattr == SPI_ERROR_NOATTRIBUTE)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
errmsg("tsvector column \"%s\" does not exist",
|
|
trigger->tgargs[0])));
|
|
|
|
prs.lenwords = 32;
|
|
prs.curwords = 0;
|
|
prs.pos = 0;
|
|
prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
|
|
|
|
/* find all words in indexable column */
|
|
for (i = 1; i < trigger->tgnargs; i++)
|
|
{
|
|
int numattr;
|
|
Oid oidtype;
|
|
Datum txt_toasted;
|
|
bool isnull;
|
|
text *txt;
|
|
|
|
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
|
|
if (numattr == SPI_ERROR_NOATTRIBUTE)
|
|
{
|
|
funcoid = findFunc(trigger->tgargs[i]);
|
|
if (funcoid == InvalidOid)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
errmsg("could not find function or field \"%s\"",
|
|
trigger->tgargs[i])));
|
|
|
|
continue;
|
|
}
|
|
oidtype = SPI_gettypeid(rel->rd_att, numattr);
|
|
/* We assume char() and varchar() are binary-equivalent to text */
|
|
if (!(oidtype == TEXTOID ||
|
|
oidtype == VARCHAROID ||
|
|
oidtype == BPCHAROID))
|
|
{
|
|
elog(WARNING, "TSearch: '%s' is not of character type",
|
|
trigger->tgargs[i]);
|
|
continue;
|
|
}
|
|
txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
|
|
if (isnull)
|
|
continue;
|
|
|
|
if (funcoid != InvalidOid)
|
|
{
|
|
text *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
|
|
funcoid,
|
|
PointerGetDatum(txt_toasted)
|
|
));
|
|
|
|
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
|
|
if (txt == txttmp)
|
|
txt_toasted = PointerGetDatum(txt);
|
|
}
|
|
else
|
|
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
|
|
|
|
parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
|
|
if (txt != (text *) DatumGetPointer(txt_toasted))
|
|
pfree(txt);
|
|
}
|
|
|
|
/* make tsvector value */
|
|
if (prs.curwords)
|
|
{
|
|
datum = PointerGetDatum(makevalue(&prs));
|
|
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
|
|
&datum, NULL);
|
|
pfree(DatumGetPointer(datum));
|
|
}
|
|
else
|
|
{
|
|
tsvector *out = palloc(CALCDATASIZE(0, 0));
|
|
|
|
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
|
out->size = 0;
|
|
datum = PointerGetDatum(out);
|
|
pfree(prs.words);
|
|
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
|
|
&datum, NULL);
|
|
}
|
|
|
|
if (rettuple == NULL)
|
|
/* internal error */
|
|
elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
|
|
|
|
return PointerGetDatum(rettuple);
|
|
}
|
|
|
|
static int
|
|
silly_cmp_tsvector(const tsvector * a, const tsvector * b)
|
|
{
|
|
if (VARSIZE(a) < VARSIZE(b))
|
|
return -1;
|
|
else if (VARSIZE(a) > VARSIZE(b))
|
|
return 1;
|
|
else if (a->size < b->size)
|
|
return -1;
|
|
else if (a->size > b->size)
|
|
return 1;
|
|
else
|
|
{
|
|
WordEntry *aptr = ARRPTR(a);
|
|
WordEntry *bptr = ARRPTR(b);
|
|
int i = 0;
|
|
int res;
|
|
|
|
|
|
for (i = 0; i < a->size; i++)
|
|
{
|
|
if (aptr->haspos != bptr->haspos)
|
|
{
|
|
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
|
}
|
|
else if (aptr->len != bptr->len)
|
|
{
|
|
return (aptr->len > bptr->len) ? -1 : 1;
|
|
}
|
|
else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
|
|
{
|
|
return res;
|
|
}
|
|
else if (aptr->haspos)
|
|
{
|
|
WordEntryPos *ap = POSDATAPTR(a, aptr);
|
|
WordEntryPos *bp = POSDATAPTR(b, bptr);
|
|
int j;
|
|
|
|
if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
|
|
return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
|
|
|
|
for (j = 0; j < POSDATALEN(a, aptr); j++)
|
|
{
|
|
if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
|
|
{
|
|
return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
|
|
}
|
|
else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
|
|
{
|
|
return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
|
|
}
|
|
ap++, bp++;
|
|
}
|
|
}
|
|
|
|
aptr++;
|
|
bptr++;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
PG_FUNCTION_INFO_V1(tsvector_cmp);
|
|
PG_FUNCTION_INFO_V1(tsvector_lt);
|
|
PG_FUNCTION_INFO_V1(tsvector_le);
|
|
PG_FUNCTION_INFO_V1(tsvector_eq);
|
|
PG_FUNCTION_INFO_V1(tsvector_ne);
|
|
PG_FUNCTION_INFO_V1(tsvector_ge);
|
|
PG_FUNCTION_INFO_V1(tsvector_gt);
|
|
Datum tsvector_cmp(PG_FUNCTION_ARGS);
|
|
Datum tsvector_lt(PG_FUNCTION_ARGS);
|
|
Datum tsvector_le(PG_FUNCTION_ARGS);
|
|
Datum tsvector_eq(PG_FUNCTION_ARGS);
|
|
Datum tsvector_ne(PG_FUNCTION_ARGS);
|
|
Datum tsvector_ge(PG_FUNCTION_ARGS);
|
|
Datum tsvector_gt(PG_FUNCTION_ARGS);
|
|
|
|
#define RUNCMP \
|
|
tsvector *a = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\
|
|
tsvector *b = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\
|
|
int res = silly_cmp_tsvector(a,b); \
|
|
PG_FREE_IF_COPY(a,0); \
|
|
PG_FREE_IF_COPY(b,1); \
|
|
|
|
Datum
|
|
tsvector_cmp(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_INT32(res);
|
|
}
|
|
|
|
Datum
|
|
tsvector_lt(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_BOOL((res < 0) ? true : false);
|
|
}
|
|
|
|
Datum
|
|
tsvector_le(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_BOOL((res <= 0) ? true : false);
|
|
}
|
|
|
|
Datum
|
|
tsvector_eq(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_BOOL((res == 0) ? true : false);
|
|
}
|
|
|
|
Datum
|
|
tsvector_ge(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_BOOL((res >= 0) ? true : false);
|
|
}
|
|
|
|
Datum
|
|
tsvector_gt(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_BOOL((res > 0) ? true : false);
|
|
}
|
|
|
|
Datum
|
|
tsvector_ne(PG_FUNCTION_ARGS)
|
|
{
|
|
RUNCMP
|
|
PG_RETURN_BOOL((res != 0) ? true : false);
|
|
}
|