mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-09 08:10:09 +08:00
92288a1cf9
o Change all current CVS messages of NOTICE to WARNING. We were going to do this just before 7.3 beta but it has to be done now, as you will see below. o Change current INFO messages that should be controlled by client_min_messages to NOTICE. o Force remaining INFO messages, like from EXPLAIN, VACUUM VERBOSE, etc. to always go to the client. o Remove INFO from the client_min_messages options and add NOTICE. Seems we do need three non-ERROR elog levels to handle the various behaviors we need for these messages. Regression passed.
594 lines
12 KiB
C
594 lines
12 KiB
C
/*
|
|
* In/Out definitions for txtidx type
|
|
* Internal structure:
|
|
* string of values, array of position lexem in string and it's length
|
|
* Teodor Sigaev <teodor@stack.net>
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "utils/elog.h"
|
|
#include "utils/palloc.h"
|
|
#include "utils/builtins.h"
|
|
#include "storage/bufpage.h"
|
|
#include "executor/spi.h"
|
|
#include "commands/trigger.h"
|
|
|
|
#include "utils/pg_locale.h"
|
|
|
|
#include <ctype.h> /* tolower */
|
|
#include "txtidx.h"
|
|
#include "query.h"
|
|
|
|
#include "deflex.h"
|
|
#include "parser.h"
|
|
|
|
#include "morph.h"
|
|
|
|
PG_FUNCTION_INFO_V1(txtidx_in);
|
|
Datum txtidx_in(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(txtidx_out);
|
|
Datum txtidx_out(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(txt2txtidx);
|
|
Datum txt2txtidx(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(tsearch);
|
|
Datum tsearch(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(txtidxsize);
|
|
Datum txtidxsize(PG_FUNCTION_ARGS);
|
|
|
|
/*
|
|
* in/out text index type
|
|
*/
|
|
static char *BufferStr;
|
|
static int
|
|
compareentry(const void *a, const void *b)
|
|
{
|
|
if (((WordEntry *) a)->len == ((WordEntry *) b)->len)
|
|
{
|
|
return strncmp(
|
|
&BufferStr[((WordEntry *) a)->pos],
|
|
&BufferStr[((WordEntry *) b)->pos],
|
|
((WordEntry *) b)->len);
|
|
}
|
|
return (((WordEntry *) a)->len > ((WordEntry *) b)->len) ? 1 : -1;
|
|
}
|
|
|
|
static int
|
|
uniqueentry(WordEntry * a, int4 l, char *buf, int4 *outbuflen)
|
|
{
|
|
WordEntry *ptr,
|
|
*res;
|
|
|
|
res = a;
|
|
*outbuflen = res->len;
|
|
if (l == 1)
|
|
return l;
|
|
|
|
ptr = a + 1;
|
|
BufferStr = buf;
|
|
qsort((void *) a, l, sizeof(int4), compareentry);
|
|
*outbuflen = res->len;
|
|
|
|
while (ptr - a < l)
|
|
{
|
|
if (!(ptr->len == res->len &&
|
|
strncmp(&buf[ptr->pos], &buf[res->pos], res->len) == 0))
|
|
{
|
|
res++;
|
|
res->len = ptr->len;
|
|
res->pos = ptr->pos;
|
|
*outbuflen += res->len;
|
|
|
|
}
|
|
ptr++;
|
|
}
|
|
return res + 1 - a;
|
|
}
|
|
|
|
#define WAITWORD 1
|
|
#define WAITENDWORD 2
|
|
#define WAITNEXTCHAR 3
|
|
#define WAITENDCMPLX 4
|
|
|
|
#define RESIZEPRSBUF \
|
|
do { \
|
|
if ( state->curpos - state->word == state->len ) \
|
|
{ \
|
|
int4 clen = state->curpos - state->word; \
|
|
state->len *= 2; \
|
|
state->word = (char*)repalloc( (void*)state->word, state->len ); \
|
|
state->curpos = state->word + clen; \
|
|
} \
|
|
} while (0)
|
|
|
|
int4
|
|
gettoken_txtidx(TI_IN_STATE * state)
|
|
{
|
|
int4 oldstate = 0;
|
|
|
|
state->curpos = state->word;
|
|
state->state = WAITWORD;
|
|
|
|
while (1)
|
|
{
|
|
if (state->state == WAITWORD)
|
|
{
|
|
if (*(state->prsbuf) == '\0')
|
|
return 0;
|
|
else if (*(state->prsbuf) == '\'')
|
|
state->state = WAITENDCMPLX;
|
|
else if (*(state->prsbuf) == '\\')
|
|
{
|
|
state->state = WAITNEXTCHAR;
|
|
oldstate = WAITENDWORD;
|
|
}
|
|
else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
|
|
elog(ERROR, "Syntax error");
|
|
else if (*(state->prsbuf) != ' ')
|
|
{
|
|
*(state->curpos) = *(state->prsbuf);
|
|
state->curpos++;
|
|
state->state = WAITENDWORD;
|
|
}
|
|
}
|
|
else if (state->state == WAITNEXTCHAR)
|
|
{
|
|
if (*(state->prsbuf) == '\0')
|
|
elog(ERROR, "There is no escaped character");
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
*(state->curpos) = *(state->prsbuf);
|
|
state->curpos++;
|
|
state->state = oldstate;
|
|
}
|
|
}
|
|
else if (state->state == WAITENDWORD)
|
|
{
|
|
if (*(state->prsbuf) == '\\')
|
|
{
|
|
state->state = WAITNEXTCHAR;
|
|
oldstate = WAITENDWORD;
|
|
}
|
|
else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
|
|
(state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
|
|
{
|
|
RESIZEPRSBUF;
|
|
if (state->curpos == state->word)
|
|
elog(ERROR, "Syntax error");
|
|
*(state->curpos) = '\0';
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
*(state->curpos) = *(state->prsbuf);
|
|
state->curpos++;
|
|
}
|
|
}
|
|
else if (state->state == WAITENDCMPLX)
|
|
{
|
|
if (*(state->prsbuf) == '\'')
|
|
{
|
|
RESIZEPRSBUF;
|
|
*(state->curpos) = '\0';
|
|
if (state->curpos == state->word)
|
|
elog(ERROR, "Syntax error");
|
|
state->prsbuf++;
|
|
return 1;
|
|
}
|
|
else if (*(state->prsbuf) == '\\')
|
|
{
|
|
state->state = WAITNEXTCHAR;
|
|
oldstate = WAITENDCMPLX;
|
|
}
|
|
else if (*(state->prsbuf) == '\0')
|
|
elog(ERROR, "Syntax error");
|
|
else
|
|
{
|
|
RESIZEPRSBUF;
|
|
*(state->curpos) = *(state->prsbuf);
|
|
state->curpos++;
|
|
}
|
|
}
|
|
else
|
|
elog(ERROR, "Inner bug :(");
|
|
state->prsbuf++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
Datum
|
|
txtidx_in(PG_FUNCTION_ARGS)
|
|
{
|
|
char *buf = (char *) PG_GETARG_POINTER(0);
|
|
TI_IN_STATE state;
|
|
WordEntry *arr;
|
|
int4 len = 0,
|
|
totallen = 64;
|
|
txtidx *in;
|
|
char *tmpbuf,
|
|
*cur;
|
|
int4 i,
|
|
buflen = 256;
|
|
|
|
state.prsbuf = buf;
|
|
state.len = 32;
|
|
state.word = (char *) palloc(state.len);
|
|
state.oprisdelim = false;
|
|
|
|
arr = (WordEntry *) palloc(sizeof(WordEntry) * totallen);
|
|
cur = tmpbuf = (char *) palloc(buflen);
|
|
while (gettoken_txtidx(&state))
|
|
{
|
|
if (len == totallen)
|
|
{
|
|
totallen *= 2;
|
|
arr = (WordEntry *) repalloc((void *) arr, sizeof(int4) * totallen);
|
|
}
|
|
while (cur - tmpbuf + state.curpos - state.word >= buflen)
|
|
{
|
|
int4 dist = cur - tmpbuf;
|
|
|
|
buflen *= 2;
|
|
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
|
|
cur = tmpbuf + dist;
|
|
}
|
|
if (state.curpos - state.word > 0xffff)
|
|
elog(ERROR, "Word is too long");
|
|
arr[len].len = state.curpos - state.word;
|
|
if (cur - tmpbuf > 0xffff)
|
|
elog(ERROR, "Too long value");
|
|
arr[len].pos = cur - tmpbuf;
|
|
memcpy((void *) cur, (void *) state.word, arr[len].len);
|
|
cur += arr[len].len;
|
|
len++;
|
|
}
|
|
pfree(state.word);
|
|
|
|
if (!len)
|
|
elog(ERROR, "Void value");
|
|
|
|
len = uniqueentry(arr, len, tmpbuf, &buflen);
|
|
totallen = CALCDATASIZE(len, buflen);
|
|
in = (txtidx *) palloc(totallen);
|
|
in->len = totallen;
|
|
in->size = len;
|
|
cur = STRPTR(in);
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
memcpy((void *) cur, (void *) &tmpbuf[arr[i].pos], arr[i].len);
|
|
arr[i].pos = cur - STRPTR(in);
|
|
cur += arr[i].len;
|
|
}
|
|
pfree(tmpbuf);
|
|
memcpy((void *) ARRPTR(in), (void *) arr, sizeof(int4) * len);
|
|
pfree(arr);
|
|
PG_RETURN_POINTER(in);
|
|
}
|
|
|
|
Datum
|
|
txtidxsize(PG_FUNCTION_ARGS)
|
|
{
|
|
txtidx *in = (txtidx *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
|
int4 ret = in->size;
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
PG_RETURN_INT32(ret);
|
|
}
|
|
|
|
Datum
|
|
txtidx_out(PG_FUNCTION_ARGS)
|
|
{
|
|
txtidx *out = (txtidx *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
|
char *outbuf;
|
|
int4 i,
|
|
j,
|
|
lenbuf = STRSIZE(out) + 1 /* \0 */ + out->size * 2 /* '' */ + out->size - 1 /* space */ ;
|
|
WordEntry *ptr = ARRPTR(out);
|
|
char *curin,
|
|
*curout;
|
|
|
|
curout = outbuf = (char *) palloc(lenbuf);
|
|
for (i = 0; i < out->size; i++)
|
|
{
|
|
curin = STRPTR(out) + ptr->pos;
|
|
if (i != 0)
|
|
*curout++ = ' ';
|
|
*curout++ = '\'';
|
|
j = ptr->len;
|
|
while (j--)
|
|
{
|
|
if (*curin == '\'')
|
|
{
|
|
int4 pos = curout - outbuf;
|
|
|
|
outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
|
|
curout = outbuf + pos;
|
|
*curout++ = '\\';
|
|
}
|
|
*curout++ = *curin++;
|
|
}
|
|
*curout++ = '\'';
|
|
ptr++;
|
|
}
|
|
outbuf[lenbuf - 1] = '\0';
|
|
PG_FREE_IF_COPY(out, 0);
|
|
PG_RETURN_POINTER(outbuf);
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
uint16 len;
|
|
char *word;
|
|
} WORD;
|
|
|
|
typedef struct
|
|
{
|
|
WORD *words;
|
|
int4 lenwords;
|
|
int4 curwords;
|
|
} PRSTEXT;
|
|
|
|
/*
|
|
* Parse text to lexems
|
|
*/
|
|
static void
|
|
parsetext(PRSTEXT * prs, char *buf, int4 buflen)
|
|
{
|
|
int type,
|
|
lenlemm;
|
|
char *ptr,
|
|
*ptrw;
|
|
char *lemm;
|
|
|
|
start_parse_str(buf, buflen);
|
|
while ((type = tsearch_yylex()) != 0)
|
|
{
|
|
if (prs->curwords == prs->lenwords)
|
|
{
|
|
prs->lenwords *= 2;
|
|
prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
|
|
}
|
|
if (tokenlen > 0xffff)
|
|
{
|
|
end_parse();
|
|
elog(ERROR, "Word is too long");
|
|
}
|
|
|
|
lenlemm = tokenlen;
|
|
lemm = lemmatize(token, &lenlemm, type);
|
|
|
|
if (!lemm)
|
|
continue;
|
|
|
|
if (lemm != token)
|
|
{
|
|
prs->words[prs->curwords].len = lenlemm;
|
|
prs->words[prs->curwords].word = lemm;
|
|
}
|
|
else
|
|
{
|
|
prs->words[prs->curwords].len = lenlemm;
|
|
ptrw = prs->words[prs->curwords].word = (char *) palloc(lenlemm);
|
|
ptr = token;
|
|
while (ptr - token < lenlemm)
|
|
{
|
|
*ptrw = tolower((unsigned char) *ptr);
|
|
ptr++;
|
|
ptrw++;
|
|
}
|
|
}
|
|
prs->curwords++;
|
|
}
|
|
end_parse();
|
|
}
|
|
|
|
static int
|
|
compareWORD(const void *a, const void *b)
|
|
{
|
|
if (((WORD *) a)->len == ((WORD *) b)->len)
|
|
return strncmp(
|
|
((WORD *) a)->word,
|
|
((WORD *) b)->word,
|
|
((WORD *) b)->len);
|
|
return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
|
|
}
|
|
|
|
static int
|
|
uniqueWORD(WORD *a, int4 l)
|
|
{
|
|
WORD *ptr,
|
|
*res;
|
|
|
|
if (l == 1)
|
|
return l;
|
|
|
|
res = a;
|
|
ptr = a + 1;
|
|
|
|
qsort((void *) a, l, sizeof(WORD), compareWORD);
|
|
|
|
while (ptr - a < l)
|
|
{
|
|
if (!(ptr->len == res->len &&
|
|
strncmp(ptr->word, res->word, res->len) == 0))
|
|
{
|
|
res++;
|
|
res->len = ptr->len;
|
|
res->word = ptr->word;
|
|
}
|
|
else
|
|
pfree(ptr->word);
|
|
ptr++;
|
|
}
|
|
|
|
return res + 1 - a;
|
|
}
|
|
|
|
/*
|
|
* make value of txtidx
|
|
*/
|
|
static txtidx *
|
|
makevalue(PRSTEXT * prs)
|
|
{
|
|
int4 i,
|
|
lenstr = 0,
|
|
totallen;
|
|
txtidx *in;
|
|
WordEntry *ptr;
|
|
char *str,
|
|
*cur;
|
|
|
|
prs->curwords = uniqueWORD(prs->words, prs->curwords);
|
|
for (i = 0; i < prs->curwords; i++)
|
|
lenstr += prs->words[i].len;
|
|
|
|
totallen = CALCDATASIZE(prs->curwords, lenstr);
|
|
in = (txtidx *) palloc(totallen);
|
|
in->len = totallen;
|
|
in->size = prs->curwords;
|
|
|
|
ptr = ARRPTR(in);
|
|
cur = str = STRPTR(in);
|
|
for (i = 0; i < prs->curwords; i++)
|
|
{
|
|
ptr->len = prs->words[i].len;
|
|
if (cur - str > 0xffff)
|
|
elog(ERROR, "Value is too big");
|
|
ptr->pos = cur - str;
|
|
ptr++;
|
|
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
|
|
pfree(prs->words[i].word);
|
|
cur += prs->words[i].len;
|
|
}
|
|
pfree(prs->words);
|
|
return in;
|
|
}
|
|
|
|
Datum
|
|
txt2txtidx(PG_FUNCTION_ARGS)
|
|
{
|
|
text *in = (text *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
|
PRSTEXT prs;
|
|
txtidx *out = NULL;
|
|
|
|
prs.lenwords = 32;
|
|
prs.curwords = 0;
|
|
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
|
|
|
|
initmorph();
|
|
parsetext(&prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
if (prs.curwords)
|
|
{
|
|
out = makevalue(&prs);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
pfree(prs.words);
|
|
PG_RETURN_NULL();
|
|
}
|
|
|
|
/*
|
|
* Trigger
|
|
*/
|
|
Datum
|
|
tsearch(PG_FUNCTION_ARGS)
|
|
{
|
|
TriggerData *trigdata;
|
|
Trigger *trigger;
|
|
Relation rel;
|
|
HeapTuple rettuple = NULL;
|
|
int numidxattr,
|
|
i;
|
|
PRSTEXT prs;
|
|
Datum datum = (Datum) 0;
|
|
|
|
|
|
if (!CALLED_AS_TRIGGER(fcinfo))
|
|
elog(ERROR, "TSearch: Not fired by trigger manager");
|
|
|
|
trigdata = (TriggerData *) fcinfo->context;
|
|
if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
|
|
elog(ERROR, "TSearch: Can't process STATEMENT events");
|
|
if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
|
|
elog(ERROR, "TSearch: Must be fired BEFORE event");
|
|
|
|
if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
|
|
rettuple = trigdata->tg_trigtuple;
|
|
else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
|
|
rettuple = trigdata->tg_newtuple;
|
|
else
|
|
elog(ERROR, "TSearch: Unknown event");
|
|
|
|
trigger = trigdata->tg_trigger;
|
|
rel = trigdata->tg_relation;
|
|
|
|
if (trigger->tgnargs < 2)
|
|
elog(ERROR, "TSearch: format tsearch(txtidx_field, text_field1,...)");
|
|
|
|
numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
|
|
if (numidxattr < 0)
|
|
elog(ERROR, "TSearch: Can not find txtidx_field");
|
|
|
|
prs.lenwords = 32;
|
|
prs.curwords = 0;
|
|
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
|
|
|
|
initmorph();
|
|
/* find all words in indexable column */
|
|
for (i = 1; i < trigger->tgnargs; i++)
|
|
{
|
|
int4 numattr;
|
|
text *txt_toasted,
|
|
*txt;
|
|
bool isnull;
|
|
Oid oidtype;
|
|
|
|
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
|
|
oidtype = SPI_gettypeid(rel->rd_att, numattr);
|
|
if (numattr < 0 || (!(oidtype == TEXTOID || oidtype == VARCHAROID)))
|
|
{
|
|
elog(WARNING, "TSearch: can not find field '%s'", trigger->tgargs[i]);
|
|
continue;
|
|
}
|
|
txt_toasted = (text *) DatumGetPointer(SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull));
|
|
if (isnull)
|
|
continue;
|
|
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
|
|
|
|
parsetext(&prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
|
|
if (txt != txt_toasted)
|
|
pfree(txt);
|
|
}
|
|
|
|
/* make txtidx value */
|
|
if (prs.curwords)
|
|
{
|
|
datum = PointerGetDatum(makevalue(&prs));
|
|
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
|
|
&datum, NULL);
|
|
pfree(DatumGetPointer(datum));
|
|
}
|
|
else
|
|
{
|
|
char nulls = 'n';
|
|
|
|
pfree(prs.words);
|
|
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
|
|
&datum, &nulls);
|
|
}
|
|
|
|
if (rettuple == NULL)
|
|
elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
|
|
|
|
return PointerGetDatum(rettuple);
|
|
}
|