postgresql/contrib/tsearch2/dict_snowball.c

168 lines
3.5 KiB
C
Raw Normal View History

/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
2003-08-04 08:43:34 +08:00
/*
2003-07-21 18:27:44 +08:00
* example of Snowball dictionary
2003-08-04 08:43:34 +08:00
* http://snowball.tartarus.org/
2003-07-21 18:27:44 +08:00
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include "dict.h"
#include "common.h"
#include "snowball/header.h"
#include "snowball/english_stem.h"
#include "snowball/russian_stem.h"
#include "snowball/russian_stem_UTF8.h"
#include "ts_locale.h"
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
typedef struct
{
2003-07-21 18:27:44 +08:00
struct SN_env *z;
StopList stoplist;
2003-08-04 08:43:34 +08:00
int (*stem) (struct SN_env * z);
} DictSnowball;
2003-07-21 18:27:44 +08:00
PG_FUNCTION_INFO_V1(snb_en_init);
2003-08-04 08:43:34 +08:00
Datum snb_en_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init_koi8);
Datum snb_ru_init_koi8(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init_utf8);
Datum snb_ru_init_utf8(PG_FUNCTION_ARGS);
2003-08-04 08:43:34 +08:00
2003-07-21 18:27:44 +08:00
PG_FUNCTION_INFO_V1(snb_lexize);
2003-08-04 08:43:34 +08:00
Datum snb_lexize(PG_FUNCTION_ARGS);
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
Datum
snb_en_init(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-08-04 08:43:34 +08:00
memset(d, 0, sizeof(DictSnowball));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
2003-07-21 18:27:44 +08:00
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = english_ISO_8859_1_create_env();
2003-08-04 08:43:34 +08:00
if (!d->z)
{
2003-07-21 18:27:44 +08:00
freestoplist(&(d->stoplist));
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-07-21 18:27:44 +08:00
}
d->stem = english_ISO_8859_1_stem;
2003-07-21 18:27:44 +08:00
PG_RETURN_POINTER(d);
}
2003-08-04 08:43:34 +08:00
Datum
snb_ru_init_koi8(PG_FUNCTION_ARGS)
2003-08-04 08:43:34 +08:00
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-08-04 08:43:34 +08:00
memset(d, 0, sizeof(DictSnowball));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
2003-07-21 18:27:44 +08:00
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = russian_KOI8_R_create_env();
2003-08-04 08:43:34 +08:00
if (!d->z)
{
2003-07-21 18:27:44 +08:00
freestoplist(&(d->stoplist));
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-07-21 18:27:44 +08:00
}
d->stem = russian_KOI8_R_stem;
2003-07-21 18:27:44 +08:00
PG_RETURN_POINTER(d);
}
Datum
snb_ru_init_utf8(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d, 0, sizeof(DictSnowball));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = russian_UTF_8_create_env();
if (!d->z)
{
freestoplist(&(d->stoplist));
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
d->stem = russian_UTF_8_stem;
PG_RETURN_POINTER(d);
}
2003-07-21 18:27:44 +08:00
Datum
2003-08-04 08:43:34 +08:00
snb_lexize(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
2005-10-15 10:49:52 +08:00
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
2003-08-04 08:43:34 +08:00
memset(res, 0, sizeof(TSLexeme) * 2);
2003-08-04 08:43:34 +08:00
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
2003-07-21 18:27:44 +08:00
pfree(txt);
2003-08-04 08:43:34 +08:00
}
else
{
SN_set_current(d->z, strlen(txt), (symbol *) txt);
2003-08-04 08:43:34 +08:00
(d->stem) (d->z);
if (d->z->p && d->z->l)
{
txt = repalloc(txt, d->z->l + 1);
memcpy(txt, d->z->p, d->z->l);
txt[d->z->l] = '\0';
}
res->lexeme = txt;
2003-07-21 18:27:44 +08:00
}
PG_RETURN_POINTER(res);
}