mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
265 lines
6.5 KiB
C
265 lines
6.5 KiB
C
/*
|
|
* Operations for tsvector type
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "utils/elog.h"
|
|
#include "utils/palloc.h"
|
|
#include "utils/builtins.h"
|
|
#include "storage/bufpage.h"
|
|
#include "executor/spi.h"
|
|
#include "commands/trigger.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "catalog/namespace.h"
|
|
|
|
#include "utils/pg_locale.h"
|
|
|
|
#include <ctype.h> /* tolower */
|
|
#include "tsvector.h"
|
|
#include "query.h"
|
|
#include "ts_cfg.h"
|
|
#include "common.h"
|
|
|
|
PG_FUNCTION_INFO_V1(strip);
|
|
Datum strip(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(setweight);
|
|
Datum setweight(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(concat);
|
|
Datum concat(PG_FUNCTION_ARGS);
|
|
|
|
Datum
|
|
strip(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
tsvector *out;
|
|
int i,len=0;
|
|
WordEntry *arrin=ARRPTR(in), *arrout;
|
|
char *cur;
|
|
|
|
for(i=0;i<in->size;i++)
|
|
len += SHORTALIGN( arrin[i].len );
|
|
|
|
len = CALCDATASIZE(in->size, len);
|
|
out=(tsvector*)palloc(len);
|
|
memset(out,0,len);
|
|
out->len=len;
|
|
out->size=in->size;
|
|
arrout=ARRPTR(out);
|
|
cur=STRPTR(out);
|
|
for(i=0;i<in->size;i++) {
|
|
memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
|
|
arrout[i].haspos = 0;
|
|
arrout[i].len = arrin[i].len;
|
|
arrout[i].pos = cur - STRPTR(out);
|
|
cur += SHORTALIGN( arrout[i].len );
|
|
}
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
Datum
|
|
setweight(PG_FUNCTION_ARGS)
|
|
{
|
|
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
char cw = PG_GETARG_CHAR(1);
|
|
tsvector *out;
|
|
int i,j;
|
|
WordEntry *entry;
|
|
WordEntryPos *p;
|
|
int w=0;
|
|
|
|
switch(tolower(cw)) {
|
|
case 'a': w=3; break;
|
|
case 'b': w=2; break;
|
|
case 'c': w=1; break;
|
|
case 'd': w=0; break;
|
|
default: elog(ERROR,"Unknown weight");
|
|
}
|
|
|
|
out=(tsvector*)palloc(in->len);
|
|
memcpy(out,in,in->len);
|
|
entry=ARRPTR(out);
|
|
i=out->size;
|
|
while(i--) {
|
|
if ( (j=POSDATALEN(out,entry)) != 0 ) {
|
|
p=POSDATAPTR(out,entry);
|
|
while(j--) {
|
|
p->weight=w;
|
|
p++;
|
|
}
|
|
}
|
|
entry++;
|
|
}
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
static int
|
|
compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
|
|
{
|
|
if ( a->len == b->len)
|
|
{
|
|
return strncmp(
|
|
ptra + a->pos,
|
|
ptrb + b->pos,
|
|
a->len);
|
|
}
|
|
return ( a->len > b->len ) ? 1 : -1;
|
|
}
|
|
|
|
static int4
|
|
add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
|
|
uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
|
|
int i;
|
|
uint16 slen = POSDATALEN(src, srcptr), startlen;
|
|
WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
|
|
|
|
if ( ! destptr->haspos )
|
|
*clen=0;
|
|
|
|
startlen = *clen;
|
|
for(i=0; i<slen && *clen<MAXNUMPOS && ( *clen==0 || dpos[ *clen-1 ].pos != MAXENTRYPOS-1 ) ;i++) {
|
|
dpos[ *clen ].weight = spos[i].weight;
|
|
dpos[ *clen ].pos = LIMITPOS(spos[i].pos + maxpos);
|
|
(*clen)++;
|
|
}
|
|
|
|
if ( *clen != startlen )
|
|
destptr->haspos=1;
|
|
return *clen - startlen;
|
|
}
|
|
|
|
|
|
Datum
|
|
concat(PG_FUNCTION_ARGS) {
|
|
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
|
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
|
tsvector *out;
|
|
WordEntry *ptr;
|
|
WordEntry *ptr1,*ptr2;
|
|
WordEntryPos *p;
|
|
int maxpos=0,i,j,i1,i2;
|
|
char *cur;
|
|
char *data,*data1,*data2;
|
|
|
|
ptr=ARRPTR(in1);
|
|
i=in1->size;
|
|
while(i--) {
|
|
if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
|
|
p=POSDATAPTR(in1,ptr);
|
|
while(j--) {
|
|
if ( p->pos > maxpos )
|
|
maxpos = p->pos;
|
|
p++;
|
|
}
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
|
|
data1=STRPTR(in1); data2=STRPTR(in2);
|
|
i1=in1->size; i2=in2->size;
|
|
out=(tsvector*)palloc( in1->len + in2->len );
|
|
memset(out,0,in1->len + in2->len);
|
|
out->len = in1->len + in2->len;
|
|
out->size = in1->size + in2->size;
|
|
data=cur=STRPTR(out);
|
|
ptr=ARRPTR(out);
|
|
while( i1 && i2 ) {
|
|
int cmp=compareEntry(data1,ptr1,data2,ptr2);
|
|
if ( cmp < 0 ) { /* in1 first */
|
|
ptr->haspos = ptr1->haspos;
|
|
ptr->len = ptr1->len;
|
|
memcpy( cur, data1 + ptr1->pos, ptr1->len );
|
|
ptr->pos = cur - data;
|
|
cur+=SHORTALIGN(ptr1->len);
|
|
if ( ptr->haspos ) {
|
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
|
|
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++; ptr1++; i1--;
|
|
} else if ( cmp>0 ) { /* in2 first */
|
|
ptr->haspos = ptr2->haspos;
|
|
ptr->len = ptr2->len;
|
|
memcpy( cur, data2 + ptr2->pos, ptr2->len );
|
|
ptr->pos = cur - data;
|
|
cur+=SHORTALIGN(ptr2->len);
|
|
if ( ptr->haspos ) {
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
|
|
if ( addlen == 0 )
|
|
ptr->haspos=0;
|
|
else
|
|
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++; ptr2++; i2--;
|
|
} else {
|
|
ptr->haspos = ptr1->haspos | ptr2->haspos;
|
|
ptr->len = ptr1->len;
|
|
memcpy( cur, data1 + ptr1->pos, ptr1->len );
|
|
ptr->pos = cur - data;
|
|
cur+=SHORTALIGN(ptr1->len);
|
|
if ( ptr->haspos ) {
|
|
if ( ptr1->haspos ) {
|
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
|
|
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
|
|
if ( ptr2->haspos )
|
|
cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
|
|
} else if ( ptr2->haspos ) {
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
|
|
if ( addlen == 0 )
|
|
ptr->haspos=0;
|
|
else
|
|
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
}
|
|
ptr++; ptr1++; ptr2++; i1--; i2--;
|
|
}
|
|
}
|
|
|
|
while(i1) {
|
|
ptr->haspos = ptr1->haspos;
|
|
ptr->len = ptr1->len;
|
|
memcpy( cur, data1 + ptr1->pos, ptr1->len );
|
|
ptr->pos = cur - data;
|
|
cur+=SHORTALIGN(ptr1->len);
|
|
if ( ptr->haspos ) {
|
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
|
|
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++; ptr1++; i1--;
|
|
}
|
|
|
|
while(i2) {
|
|
ptr->haspos = ptr2->haspos;
|
|
ptr->len = ptr2->len;
|
|
memcpy( cur, data2 + ptr2->pos, ptr2->len );
|
|
ptr->pos = cur - data;
|
|
cur+=SHORTALIGN(ptr2->len);
|
|
if ( ptr->haspos ) {
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
|
|
if ( addlen == 0 )
|
|
ptr->haspos=0;
|
|
else
|
|
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
|
|
}
|
|
ptr++; ptr2++; i2--;
|
|
}
|
|
|
|
out->size=ptr-ARRPTR(out);
|
|
out->len = CALCDATASIZE( out->size, cur-data );
|
|
if ( data != STRPTR(out) )
|
|
memmove( STRPTR(out), data, cur-data );
|
|
|
|
PG_FREE_IF_COPY(in1, 0);
|
|
PG_FREE_IF_COPY(in2, 1);
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|