mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
510 lines
13 KiB
C
510 lines
13 KiB
C
|
/*
|
||
|
* interface functions to tscfg
|
||
|
* Teodor Sigaev <teodor@sigaev.ru>
|
||
|
*/
|
||
|
#include <errno.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <ctype.h>
|
||
|
#include <locale.h>
|
||
|
|
||
|
#include "postgres.h"
|
||
|
#include "fmgr.h"
|
||
|
#include "utils/array.h"
|
||
|
#include "catalog/pg_type.h"
|
||
|
#include "executor/spi.h"
|
||
|
|
||
|
#include "ts_cfg.h"
|
||
|
#include "dict.h"
|
||
|
#include "wparser.h"
|
||
|
#include "snmap.h"
|
||
|
#include "common.h"
|
||
|
#include "tsvector.h"
|
||
|
|
||
|
/*********top interface**********/
|
||
|
|
||
|
static void *plan_getcfg_bylocale=NULL;
|
||
|
static void *plan_getcfg=NULL;
|
||
|
static void *plan_getmap=NULL;
|
||
|
static void *plan_name2id=NULL;
|
||
|
static Oid current_cfg_id=0;
|
||
|
|
||
|
void
|
||
|
init_cfg(Oid id, TSCfgInfo *cfg) {
|
||
|
Oid arg[2]={ OIDOID, OIDOID };
|
||
|
bool isnull;
|
||
|
Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
|
||
|
int stat,i,j;
|
||
|
text *ptr;
|
||
|
text *prsname=NULL;
|
||
|
MemoryContext oldcontext;
|
||
|
|
||
|
memset(cfg,0,sizeof(TSCfgInfo));
|
||
|
SPI_connect();
|
||
|
if ( !plan_getcfg ) {
|
||
|
plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
|
||
|
if ( !plan_getcfg )
|
||
|
ts_error(ERROR, "SPI_prepare() failed");
|
||
|
}
|
||
|
|
||
|
stat = SPI_execp(plan_getcfg, pars, " ", 1);
|
||
|
if ( stat < 0 )
|
||
|
ts_error (ERROR, "SPI_execp return %d", stat);
|
||
|
if ( SPI_processed > 0 ) {
|
||
|
prsname = (text*) DatumGetPointer(
|
||
|
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
|
||
|
);
|
||
|
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||
|
prsname = ptextdup( prsname );
|
||
|
MemoryContextSwitchTo(oldcontext);
|
||
|
|
||
|
cfg->id=id;
|
||
|
} else
|
||
|
ts_error(ERROR, "No tsearch cfg with id %d", id);
|
||
|
|
||
|
arg[0]=TEXTOID;
|
||
|
if ( !plan_getmap ) {
|
||
|
plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
|
||
|
if ( !plan_getmap )
|
||
|
ts_error(ERROR, "SPI_prepare() failed");
|
||
|
}
|
||
|
|
||
|
pars[0]=PointerGetDatum( prsname );
|
||
|
stat = SPI_execp(plan_getmap, pars, " ", 0);
|
||
|
if ( stat < 0 )
|
||
|
ts_error (ERROR, "SPI_execp return %d", stat);
|
||
|
if ( SPI_processed <= 0 )
|
||
|
ts_error(ERROR, "No parser with id %d", id);
|
||
|
|
||
|
for(i=0;i<SPI_processed;i++) {
|
||
|
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
|
||
|
ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
|
||
|
ArrayType *a;
|
||
|
|
||
|
if ( !cfg->map ) {
|
||
|
cfg->len=lexid+1;
|
||
|
cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
|
||
|
if ( !cfg->map )
|
||
|
ts_error(ERROR,"No memory");
|
||
|
memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
|
||
|
}
|
||
|
|
||
|
if (isnull)
|
||
|
continue;
|
||
|
|
||
|
a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
|
||
|
|
||
|
if ( ARR_NDIM(a) != 1 )
|
||
|
ts_error(ERROR,"Wrong dimension");
|
||
|
if ( ARRNELEMS(a) < 1 )
|
||
|
continue;
|
||
|
|
||
|
cfg->map[lexid].len=ARRNELEMS(a);
|
||
|
cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
|
||
|
memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
|
||
|
ptr=(text*)ARR_DATA_PTR(a);
|
||
|
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||
|
for(j=0;j<cfg->map[lexid].len;j++) {
|
||
|
cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
|
||
|
ptr=NEXTVAL(ptr);
|
||
|
}
|
||
|
MemoryContextSwitchTo(oldcontext);
|
||
|
|
||
|
if ( a != toasted_a )
|
||
|
pfree(a);
|
||
|
}
|
||
|
|
||
|
SPI_finish();
|
||
|
cfg->prs_id = name2id_prs( prsname );
|
||
|
pfree(prsname);
|
||
|
for(i=0;i<cfg->len;i++) {
|
||
|
for(j=0;j<cfg->map[i].len;j++) {
|
||
|
ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
|
||
|
cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
|
||
|
pfree(ptr);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
typedef struct {
|
||
|
TSCfgInfo *last_cfg;
|
||
|
int len;
|
||
|
int reallen;
|
||
|
TSCfgInfo *list;
|
||
|
SNMap name2id_map;
|
||
|
} CFGList;
|
||
|
|
||
|
static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
|
||
|
|
||
|
void
|
||
|
reset_cfg(void) {
|
||
|
freeSNMap( &(CList.name2id_map) );
|
||
|
if ( CList.list ) {
|
||
|
int i,j;
|
||
|
for(i=0;i<CList.len;i++)
|
||
|
if ( CList.list[i].map ) {
|
||
|
for(j=0;j<CList.list[i].len;j++)
|
||
|
if ( CList.list[i].map[j].dict_id )
|
||
|
free(CList.list[i].map[j].dict_id);
|
||
|
free( CList.list[i].map );
|
||
|
}
|
||
|
free(CList.list);
|
||
|
}
|
||
|
memset(&CList,0,sizeof(CFGList));
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
comparecfg(const void *a, const void *b) {
|
||
|
return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
|
||
|
}
|
||
|
|
||
|
TSCfgInfo *
|
||
|
findcfg(Oid id) {
|
||
|
/* last used cfg */
|
||
|
if ( CList.last_cfg && CList.last_cfg->id==id )
|
||
|
return CList.last_cfg;
|
||
|
|
||
|
/* already used cfg */
|
||
|
if ( CList.len != 0 ) {
|
||
|
TSCfgInfo key;
|
||
|
key.id=id;
|
||
|
CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
|
||
|
if ( CList.last_cfg != NULL )
|
||
|
return CList.last_cfg;
|
||
|
}
|
||
|
|
||
|
/* last chance */
|
||
|
if ( CList.len==CList.reallen ) {
|
||
|
TSCfgInfo *tmp;
|
||
|
int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
|
||
|
tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
|
||
|
if ( !tmp )
|
||
|
ts_error(ERROR,"No memory");
|
||
|
CList.reallen=reallen;
|
||
|
CList.list=tmp;
|
||
|
}
|
||
|
CList.last_cfg=&(CList.list[CList.len]);
|
||
|
init_cfg(id, CList.last_cfg);
|
||
|
CList.len++;
|
||
|
qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
|
||
|
return findcfg(id); /* qsort changed order!! */;
|
||
|
}
|
||
|
|
||
|
|
||
|
Oid
|
||
|
name2id_cfg(text *name) {
|
||
|
Oid arg[1]={ TEXTOID };
|
||
|
bool isnull;
|
||
|
Datum pars[1]={ PointerGetDatum(name) };
|
||
|
int stat;
|
||
|
Oid id=findSNMap_t( &(CList.name2id_map), name );
|
||
|
|
||
|
if ( id )
|
||
|
return id;
|
||
|
|
||
|
SPI_connect();
|
||
|
if ( !plan_name2id ) {
|
||
|
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
|
||
|
if ( !plan_name2id )
|
||
|
elog(ERROR, "SPI_prepare() failed");
|
||
|
}
|
||
|
|
||
|
stat = SPI_execp(plan_name2id, pars, " ", 1);
|
||
|
if ( stat < 0 )
|
||
|
elog (ERROR, "SPI_execp return %d", stat);
|
||
|
if ( SPI_processed > 0 ) {
|
||
|
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||
|
if ( isnull )
|
||
|
elog(ERROR, "Null id for tsearch config");
|
||
|
} else
|
||
|
elog(ERROR, "No tsearch config");
|
||
|
SPI_finish();
|
||
|
addSNMap_t( &(CList.name2id_map), name, id );
|
||
|
return id;
|
||
|
}
|
||
|
|
||
|
|
||
|
void
|
||
|
parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
|
||
|
int type, lenlemm, i;
|
||
|
char *lemm=NULL;
|
||
|
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||
|
|
||
|
prsobj->prs=(void*)DatumGetPointer(
|
||
|
FunctionCall2(
|
||
|
&(prsobj->start_info),
|
||
|
PointerGetDatum(buf),
|
||
|
Int32GetDatum(buflen)
|
||
|
)
|
||
|
);
|
||
|
|
||
|
while( ( type=DatumGetInt32(FunctionCall3(
|
||
|
&(prsobj->getlexeme_info),
|
||
|
PointerGetDatum(prsobj->prs),
|
||
|
PointerGetDatum(&lemm),
|
||
|
PointerGetDatum(&lenlemm))) ) != 0 ) {
|
||
|
|
||
|
if ( lenlemm >= MAXSTRLEN )
|
||
|
elog(ERROR, "Word is too long");
|
||
|
|
||
|
|
||
|
if ( type >= cfg->len ) /* skip this type of lexem */
|
||
|
continue;
|
||
|
|
||
|
for(i=0;i<cfg->map[type].len;i++) {
|
||
|
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
|
||
|
char **norms, **ptr;
|
||
|
|
||
|
norms = ptr = (char**)DatumGetPointer(
|
||
|
FunctionCall3(
|
||
|
&(dict->lexize_info),
|
||
|
PointerGetDatum(dict->dictionary),
|
||
|
PointerGetDatum(lemm),
|
||
|
PointerGetDatum(lenlemm)
|
||
|
)
|
||
|
);
|
||
|
if ( !norms ) /* dictionary doesn't know this lexem */
|
||
|
continue;
|
||
|
|
||
|
prs->pos++; /*set pos*/
|
||
|
|
||
|
while( *ptr ) {
|
||
|
if (prs->curwords == prs->lenwords) {
|
||
|
prs->lenwords *= 2;
|
||
|
prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
|
||
|
}
|
||
|
|
||
|
prs->words[prs->curwords].len = strlen(*ptr);
|
||
|
prs->words[prs->curwords].word = *ptr;
|
||
|
prs->words[prs->curwords].alen = 0;
|
||
|
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
|
||
|
ptr++;
|
||
|
prs->curwords++;
|
||
|
}
|
||
|
pfree(norms);
|
||
|
break; /* lexem already normalized or is stop word*/
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FunctionCall1(
|
||
|
&(prsobj->end_info),
|
||
|
PointerGetDatum(prsobj->prs)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
|
||
|
while (prs->curwords >= prs->lenwords) {
|
||
|
prs->lenwords *= 2;
|
||
|
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
|
||
|
}
|
||
|
memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) );
|
||
|
prs->words[prs->curwords].type = (uint8)type;
|
||
|
prs->words[prs->curwords].len = buflen;
|
||
|
prs->words[prs->curwords].word = palloc(buflen);
|
||
|
memcpy(prs->words[prs->curwords].word, buf, buflen);
|
||
|
prs->curwords++;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
|
||
|
int i;
|
||
|
ITEM *item=GETQUERY(query);
|
||
|
HLWORD *word=&( prs->words[prs->curwords-1] );
|
||
|
|
||
|
while (prs->curwords + query->size >= prs->lenwords) {
|
||
|
prs->lenwords *= 2;
|
||
|
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
|
||
|
}
|
||
|
|
||
|
for(i=0; i<query->size; i++) {
|
||
|
if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
|
||
|
if ( word->item ) {
|
||
|
memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
|
||
|
prs->words[prs->curwords].item=item;
|
||
|
prs->words[prs->curwords].repeated=1;
|
||
|
prs->curwords++;
|
||
|
} else
|
||
|
word->item=item;
|
||
|
}
|
||
|
item++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
|
||
|
int type, lenlemm, i;
|
||
|
char *lemm=NULL;
|
||
|
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||
|
|
||
|
prsobj->prs=(void*)DatumGetPointer(
|
||
|
FunctionCall2(
|
||
|
&(prsobj->start_info),
|
||
|
PointerGetDatum(buf),
|
||
|
Int32GetDatum(buflen)
|
||
|
)
|
||
|
);
|
||
|
|
||
|
while( ( type=DatumGetInt32(FunctionCall3(
|
||
|
&(prsobj->getlexeme_info),
|
||
|
PointerGetDatum(prsobj->prs),
|
||
|
PointerGetDatum(&lemm),
|
||
|
PointerGetDatum(&lenlemm))) ) != 0 ) {
|
||
|
|
||
|
if ( lenlemm >= MAXSTRLEN )
|
||
|
elog(ERROR, "Word is too long");
|
||
|
|
||
|
hladdword(prs,lemm,lenlemm,type);
|
||
|
|
||
|
if ( type >= cfg->len )
|
||
|
continue;
|
||
|
|
||
|
for(i=0;i<cfg->map[type].len;i++) {
|
||
|
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
|
||
|
char **norms, **ptr;
|
||
|
|
||
|
norms = ptr = (char**)DatumGetPointer(
|
||
|
FunctionCall3(
|
||
|
&(dict->lexize_info),
|
||
|
PointerGetDatum(dict->dictionary),
|
||
|
PointerGetDatum(lemm),
|
||
|
PointerGetDatum(lenlemm)
|
||
|
)
|
||
|
);
|
||
|
if ( !norms ) /* dictionary doesn't know this lexem */
|
||
|
continue;
|
||
|
|
||
|
while( *ptr ) {
|
||
|
hlfinditem(prs,query,*ptr,strlen(*ptr));
|
||
|
pfree(*ptr);
|
||
|
ptr++;
|
||
|
}
|
||
|
pfree(norms);
|
||
|
break; /* lexem already normalized or is stop word*/
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FunctionCall1(
|
||
|
&(prsobj->end_info),
|
||
|
PointerGetDatum(prsobj->prs)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
text*
|
||
|
genhl(HLPRSTEXT * prs) {
|
||
|
text *out;
|
||
|
int len=128;
|
||
|
char *ptr;
|
||
|
HLWORD *wrd=prs->words;
|
||
|
|
||
|
out = (text*)palloc( len );
|
||
|
ptr=((char*)out) + VARHDRSZ;
|
||
|
|
||
|
while( wrd - prs->words < prs->curwords ) {
|
||
|
while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
|
||
|
int dist = ptr - ((char*)out);
|
||
|
len*= 2;
|
||
|
out = (text *) repalloc(out, len);
|
||
|
ptr=((char*)out) + dist;
|
||
|
}
|
||
|
|
||
|
if ( wrd->in && !wrd->skip && !wrd->repeated ) {
|
||
|
if ( wrd->replace ) {
|
||
|
*ptr=' ';
|
||
|
ptr++;
|
||
|
} else {
|
||
|
if (wrd->selected) {
|
||
|
memcpy(ptr,prs->startsel,prs->startsellen);
|
||
|
ptr+=prs->startsellen;
|
||
|
}
|
||
|
memcpy(ptr,wrd->word,wrd->len);
|
||
|
ptr+=wrd->len;
|
||
|
if (wrd->selected) {
|
||
|
memcpy(ptr,prs->stopsel,prs->stopsellen);
|
||
|
ptr+=prs->stopsellen;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ( !wrd->repeated )
|
||
|
pfree(wrd->word);
|
||
|
|
||
|
wrd++;
|
||
|
}
|
||
|
|
||
|
VARATT_SIZEP(out)=ptr - ((char*)out);
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
get_currcfg(void) {
|
||
|
Oid arg[1]={ TEXTOID };
|
||
|
const char *curlocale;
|
||
|
Datum pars[1];
|
||
|
bool isnull;
|
||
|
int stat;
|
||
|
|
||
|
if ( current_cfg_id > 0 )
|
||
|
return current_cfg_id;
|
||
|
|
||
|
SPI_connect();
|
||
|
if ( !plan_getcfg_bylocale ) {
|
||
|
plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
|
||
|
if ( !plan_getcfg_bylocale )
|
||
|
elog(ERROR, "SPI_prepare() failed");
|
||
|
}
|
||
|
|
||
|
curlocale = setlocale(LC_CTYPE, NULL);
|
||
|
pars[0] = PointerGetDatum( char2text((char*)curlocale) );
|
||
|
stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
|
||
|
|
||
|
if ( stat < 0 )
|
||
|
elog (ERROR, "SPI_execp return %d", stat);
|
||
|
if ( SPI_processed > 0 )
|
||
|
current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||
|
else
|
||
|
elog(ERROR,"Can't find tsearch config by locale");
|
||
|
|
||
|
pfree(DatumGetPointer(pars[0]));
|
||
|
SPI_finish();
|
||
|
return current_cfg_id;
|
||
|
}
|
||
|
|
||
|
PG_FUNCTION_INFO_V1(set_curcfg);
|
||
|
Datum set_curcfg(PG_FUNCTION_ARGS);
|
||
|
Datum
|
||
|
set_curcfg(PG_FUNCTION_ARGS) {
|
||
|
findcfg(PG_GETARG_OID(0));
|
||
|
current_cfg_id=PG_GETARG_OID(0);
|
||
|
PG_RETURN_VOID();
|
||
|
}
|
||
|
|
||
|
PG_FUNCTION_INFO_V1(set_curcfg_byname);
|
||
|
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
|
||
|
Datum
|
||
|
set_curcfg_byname(PG_FUNCTION_ARGS) {
|
||
|
text *name=PG_GETARG_TEXT_P(0);
|
||
|
|
||
|
DirectFunctionCall1(
|
||
|
set_curcfg,
|
||
|
ObjectIdGetDatum( name2id_cfg(name) )
|
||
|
);
|
||
|
PG_FREE_IF_COPY(name, 0);
|
||
|
PG_RETURN_VOID();
|
||
|
}
|
||
|
|
||
|
PG_FUNCTION_INFO_V1(show_curcfg);
|
||
|
Datum show_curcfg(PG_FUNCTION_ARGS);
|
||
|
Datum
|
||
|
show_curcfg(PG_FUNCTION_ARGS) {
|
||
|
PG_RETURN_OID( get_currcfg() );
|
||
|
}
|
||
|
|
||
|
PG_FUNCTION_INFO_V1(reset_tsearch);
|
||
|
Datum reset_tsearch(PG_FUNCTION_ARGS);
|
||
|
Datum
|
||
|
reset_tsearch(PG_FUNCTION_ARGS) {
|
||
|
ts_error(NOTICE,"TSearch cache cleaned");
|
||
|
PG_RETURN_VOID();
|
||
|
}
|