mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-18 18:44:06 +08:00
234a02b2a8
Get rid of VARATT_SIZE and VARATT_DATA, which were simply redundant with VARSIZE and VARDATA, and as a consequence almost no code was using the longer names. Rename the length fields of struct varlena and various derived structures to catch anyplace that was accessing them directly; and clean up various places so caught. In itself this patch doesn't change any behavior at all, but it is necessary infrastructure if we hope to play any games with the representation of varlena headers. Greg Stark and Tom Lane
331 lines
6.7 KiB
C
331 lines
6.7 KiB
C
/*
|
|
* Operations for tsvector type
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "catalog/namespace.h"
|
|
#include "commands/trigger.h"
|
|
#include "executor/spi.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "storage/bufpage.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/pg_locale.h"
|
|
|
|
#include "tsvector.h"
|
|
#include "query.h"
|
|
#include "ts_cfg.h"
|
|
#include "common.h"
|
|
|
|
PG_FUNCTION_INFO_V1(strip);
|
|
Datum strip(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(setweight);
|
|
Datum setweight(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(concat);
|
|
Datum concat(PG_FUNCTION_ARGS);
|
|
|
|
Datum
|
|
strip(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
tsvector *out;
|
|
int i,
|
|
len = 0;
|
|
WordEntry *arrin = ARRPTR(in),
|
|
*arrout;
|
|
char *cur;
|
|
|
|
for (i = 0; i < in->size; i++)
|
|
len += SHORTALIGN(arrin[i].len);
|
|
|
|
len = CALCDATASIZE(in->size, len);
|
|
out = (tsvector *) palloc0(len);
|
|
SET_VARSIZE(out, len);
|
|
out->size = in->size;
|
|
arrout = ARRPTR(out);
|
|
cur = STRPTR(out);
|
|
for (i = 0; i < in->size; i++)
|
|
{
|
|
memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
|
|
arrout[i].haspos = 0;
|
|
arrout[i].len = arrin[i].len;
|
|
arrout[i].pos = cur - STRPTR(out);
|
|
cur += SHORTALIGN(arrout[i].len);
|
|
}
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
Datum
|
|
setweight(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
char cw = PG_GETARG_CHAR(1);
|
|
tsvector *out;
|
|
int i,
|
|
j;
|
|
WordEntry *entry;
|
|
WordEntryPos *p;
|
|
int w = 0;
|
|
|
|
switch (cw)
|
|
{
|
|
case 'A':
|
|
case 'a':
|
|
w = 3;
|
|
break;
|
|
case 'B':
|
|
case 'b':
|
|
w = 2;
|
|
break;
|
|
case 'C':
|
|
case 'c':
|
|
w = 1;
|
|
break;
|
|
case 'D':
|
|
case 'd':
|
|
w = 0;
|
|
break;
|
|
/* internal error */
|
|
default:
|
|
elog(ERROR, "unrecognized weight");
|
|
}
|
|
|
|
out = (tsvector *) palloc(VARSIZE(in));
|
|
memcpy(out, in, VARSIZE(in));
|
|
entry = ARRPTR(out);
|
|
i = out->size;
|
|
while (i--)
|
|
{
|
|
if ((j = POSDATALEN(out, entry)) != 0)
|
|
{
|
|
p = POSDATAPTR(out, entry);
|
|
while (j--)
|
|
{
|
|
WEP_SETWEIGHT(*p, w);
|
|
p++;
|
|
}
|
|
}
|
|
entry++;
|
|
}
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
static int
|
|
compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
|
|
{
|
|
if (a->len == b->len)
|
|
{
|
|
return strncmp(
|
|
ptra + a->pos,
|
|
ptrb + b->pos,
|
|
a->len);
|
|
}
|
|
return (a->len > b->len) ? 1 : -1;
|
|
}
|
|
|
|
static int4
|
|
add_pos(tsvector * src, WordEntry * srcptr, tsvector * dest, WordEntry * destptr, int4 maxpos)
|
|
{
|
|
uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr);
|
|
int i;
|
|
uint16 slen = POSDATALEN(src, srcptr),
|
|
startlen;
|
|
WordEntryPos *spos = POSDATAPTR(src, srcptr),
|
|
*dpos = POSDATAPTR(dest, destptr);
|
|
|
|
if (!destptr->haspos)
|
|
*clen = 0;
|
|
|
|
startlen = *clen;
|
|
for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++)
|
|
{
|
|
WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
|
|
WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
|
|
(*clen)++;
|
|
}
|
|
|
|
if (*clen != startlen)
|
|
destptr->haspos = 1;
|
|
return *clen - startlen;
|
|
}
|
|
|
|
|
|
Datum
|
|
concat(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
tsvector *out;
|
|
WordEntry *ptr;
|
|
WordEntry *ptr1,
|
|
*ptr2;
|
|
WordEntryPos *p;
|
|
int maxpos = 0,
|
|
i,
|
|
j,
|
|
i1,
|
|
i2;
|
|
char *cur;
|
|
char *data,
|
|
*data1,
|
|
*data2;
|
|
|
|
ptr = ARRPTR(in1);
|
|
i = in1->size;
|
|
while (i--)
|
|
{
|
|
if ((j = POSDATALEN(in1, ptr)) != 0)
|
|
{
|
|
p = POSDATAPTR(in1, ptr);
|
|
while (j--)
|
|
{
|
|
if (WEP_GETPOS(*p) > maxpos)
|
|
maxpos = WEP_GETPOS(*p);
|
|
p++;
|
|
}
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
ptr1 = ARRPTR(in1);
|
|
ptr2 = ARRPTR(in2);
|
|
data1 = STRPTR(in1);
|
|
data2 = STRPTR(in2);
|
|
i1 = in1->size;
|
|
i2 = in2->size;
|
|
out = (tsvector *) palloc0(VARSIZE(in1) + VARSIZE(in2));
|
|
SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
|
|
out->size = in1->size + in2->size;
|
|
data = cur = STRPTR(out);
|
|
ptr = ARRPTR(out);
|
|
while (i1 && i2)
|
|
{
|
|
int cmp = compareEntry(data1, ptr1, data2, ptr2);
|
|
|
|
if (cmp < 0)
|
|
{ /* in1 first */
|
|
ptr->haspos = ptr1->haspos;
|
|
ptr->len = ptr1->len;
|
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
|
ptr->pos = cur - data;
|
|
cur += SHORTALIGN(ptr1->len);
|
|
if (ptr->haspos)
|
|
{
|
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++;
|
|
ptr1++;
|
|
i1--;
|
|
}
|
|
else if (cmp > 0)
|
|
{ /* in2 first */
|
|
ptr->haspos = ptr2->haspos;
|
|
ptr->len = ptr2->len;
|
|
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
|
ptr->pos = cur - data;
|
|
cur += SHORTALIGN(ptr2->len);
|
|
if (ptr->haspos)
|
|
{
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
if (addlen == 0)
|
|
ptr->haspos = 0;
|
|
else
|
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++;
|
|
ptr2++;
|
|
i2--;
|
|
}
|
|
else
|
|
{
|
|
ptr->haspos = ptr1->haspos | ptr2->haspos;
|
|
ptr->len = ptr1->len;
|
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
|
ptr->pos = cur - data;
|
|
cur += SHORTALIGN(ptr1->len);
|
|
if (ptr->haspos)
|
|
{
|
|
if (ptr1->haspos)
|
|
{
|
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
|
if (ptr2->haspos)
|
|
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
|
|
}
|
|
else if (ptr2->haspos)
|
|
{
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
if (addlen == 0)
|
|
ptr->haspos = 0;
|
|
else
|
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
}
|
|
ptr++;
|
|
ptr1++;
|
|
ptr2++;
|
|
i1--;
|
|
i2--;
|
|
}
|
|
}
|
|
|
|
while (i1)
|
|
{
|
|
ptr->haspos = ptr1->haspos;
|
|
ptr->len = ptr1->len;
|
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
|
ptr->pos = cur - data;
|
|
cur += SHORTALIGN(ptr1->len);
|
|
if (ptr->haspos)
|
|
{
|
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++;
|
|
ptr1++;
|
|
i1--;
|
|
}
|
|
|
|
while (i2)
|
|
{
|
|
ptr->haspos = ptr2->haspos;
|
|
ptr->len = ptr2->len;
|
|
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
|
ptr->pos = cur - data;
|
|
cur += SHORTALIGN(ptr2->len);
|
|
if (ptr->haspos)
|
|
{
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
if (addlen == 0)
|
|
ptr->haspos = 0;
|
|
else
|
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++;
|
|
ptr2++;
|
|
i2--;
|
|
}
|
|
|
|
out->size = ptr - ARRPTR(out);
|
|
SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data));
|
|
if (data != STRPTR(out))
|
|
memmove(STRPTR(out), data, cur - data);
|
|
|
|
PG_FREE_IF_COPY(in1, 0);
|
|
PG_FREE_IF_COPY(in2, 1);
|
|
PG_RETURN_POINTER(out);
|
|
}
|