/* * Operations for tsvector type * Teodor Sigaev */ #include "postgres.h" #include "access/gist.h" #include "access/itup.h" #include "utils/elog.h" #include "utils/palloc.h" #include "utils/builtins.h" #include "storage/bufpage.h" #include "executor/spi.h" #include "commands/trigger.h" #include "nodes/pg_list.h" #include "catalog/namespace.h" #include "utils/pg_locale.h" #include /* tolower */ #include "tsvector.h" #include "query.h" #include "ts_cfg.h" #include "common.h" PG_FUNCTION_INFO_V1(strip); Datum strip(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(setweight); Datum setweight(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(concat); Datum concat(PG_FUNCTION_ARGS); Datum strip(PG_FUNCTION_ARGS) { tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); tsvector *out; int i,len=0; WordEntry *arrin=ARRPTR(in), *arrout; char *cur; for(i=0;isize;i++) len += SHORTALIGN( arrin[i].len ); len = CALCDATASIZE(in->size, len); out=(tsvector*)palloc(len); memset(out,0,len); out->len=len; out->size=in->size; arrout=ARRPTR(out); cur=STRPTR(out); for(i=0;isize;i++) { memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len); arrout[i].haspos = 0; arrout[i].len = arrin[i].len; arrout[i].pos = cur - STRPTR(out); cur += SHORTALIGN( arrout[i].len ); } PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(out); } Datum setweight(PG_FUNCTION_ARGS) { tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); char cw = PG_GETARG_CHAR(1); tsvector *out; int i,j; WordEntry *entry; WordEntryPos *p; int w=0; switch(tolower(cw)) { case 'a': w=3; break; case 'b': w=2; break; case 'c': w=1; break; case 'd': w=0; break; default: elog(ERROR,"Unknown weight"); } out=(tsvector*)palloc(in->len); memcpy(out,in,in->len); entry=ARRPTR(out); i=out->size; while(i--) { if ( (j=POSDATALEN(out,entry)) != 0 ) { p=POSDATAPTR(out,entry); while(j--) { p->weight=w; p++; } } entry++; } PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(out); } static int compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b) { if ( a->len == b->len) { return strncmp( ptra + a->pos, ptrb + b->pos, a->len); } return ( a->len > b->len ) ? 1 : -1; } static int4 add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) { uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr); int i; uint16 slen = POSDATALEN(src, srcptr), startlen; WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr); if ( ! destptr->haspos ) *clen=0; startlen = *clen; for(i=0; ihaspos=1; return *clen - startlen; } Datum concat(PG_FUNCTION_ARGS) { tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); tsvector *out; WordEntry *ptr; WordEntry *ptr1,*ptr2; WordEntryPos *p; int maxpos=0,i,j,i1,i2; char *cur; char *data,*data1,*data2; ptr=ARRPTR(in1); i=in1->size; while(i--) { if ( (j=POSDATALEN(in1,ptr)) != 0 ) { p=POSDATAPTR(in1,ptr); while(j--) { if ( p->pos > maxpos ) maxpos = p->pos; p++; } } ptr++; } ptr1=ARRPTR(in1); ptr2=ARRPTR(in2); data1=STRPTR(in1); data2=STRPTR(in2); i1=in1->size; i2=in2->size; out=(tsvector*)palloc( in1->len + in2->len ); memset(out,0,in1->len + in2->len); out->len = in1->len + in2->len; out->size = in1->size + in2->size; data=cur=STRPTR(out); ptr=ARRPTR(out); while( i1 && i2 ) { int cmp=compareEntry(data1,ptr1,data2,ptr2); if ( cmp < 0 ) { /* in1 first */ ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy( cur, data1 + ptr1->pos, ptr1->len ); ptr->pos = cur - data; cur+=SHORTALIGN(ptr1->len); if ( ptr->haspos ) { memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16)); cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } else if ( cmp>0 ) { /* in2 first */ ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy( cur, data2 + ptr2->pos, ptr2->len ); ptr->pos = cur - data; cur+=SHORTALIGN(ptr2->len); if ( ptr->haspos ) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos ); if ( addlen == 0 ) ptr->haspos=0; else cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr2++; i2--; } else { ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->len = ptr1->len; memcpy( cur, data1 + ptr1->pos, ptr1->len ); ptr->pos = cur - data; cur+=SHORTALIGN(ptr1->len); if ( ptr->haspos ) { if ( ptr1->haspos ) { memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16)); cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16); if ( ptr2->haspos ) cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos); } else if ( ptr2->haspos ) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos ); if ( addlen == 0 ) ptr->haspos=0; else cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr1++; ptr2++; i1--; i2--; } } while(i1) { ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy( cur, data1 + ptr1->pos, ptr1->len ); ptr->pos = cur - data; cur+=SHORTALIGN(ptr1->len); if ( ptr->haspos ) { memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16)); cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } while(i2) { ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy( cur, data2 + ptr2->pos, ptr2->len ); ptr->pos = cur - data; cur+=SHORTALIGN(ptr2->len); if ( ptr->haspos ) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos ); if ( addlen == 0 ) ptr->haspos=0; else cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr2++; i2--; } out->size=ptr-ARRPTR(out); out->len = CALCDATASIZE( out->size, cur-data ); if ( data != STRPTR(out) ) memmove( STRPTR(out), data, cur-data ); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); PG_RETURN_POINTER(out); }