mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
419fe7cd1b
Fix string's length calculation for recoding, fix strlower() to avoid wrong assumption about length of recoded string (was: recoded string is no greater that source, it may not true for multibyte encodings) Thanks to Thomas H. <me@alternize.com> and Magnus Hagander <mha@sollentuna.net>
170 lines
3.5 KiB
C
170 lines
3.5 KiB
C
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.13 2006/11/20 14:03:30 teodor Exp $ */
|
|
|
|
/*
|
|
* example of Snowball dictionary
|
|
* http://snowball.tartarus.org/
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "dict.h"
|
|
#include "common.h"
|
|
#include "snowball/english_stem.h"
|
|
#include "snowball/header.h"
|
|
#include "snowball/russian_stem.h"
|
|
#include "snowball/russian_stem_UTF8.h"
|
|
#include "ts_locale.h"
|
|
|
|
typedef struct
|
|
{
|
|
struct SN_env *z;
|
|
StopList stoplist;
|
|
int (*stem) (struct SN_env * z);
|
|
} DictSnowball;
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(snb_en_init);
|
|
Datum snb_en_init(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(snb_ru_init_koi8);
|
|
Datum snb_ru_init_koi8(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(snb_ru_init_utf8);
|
|
Datum snb_ru_init_utf8(PG_FUNCTION_ARGS);
|
|
|
|
PG_FUNCTION_INFO_V1(snb_lexize);
|
|
Datum snb_lexize(PG_FUNCTION_ARGS);
|
|
|
|
Datum
|
|
snb_en_init(PG_FUNCTION_ARGS)
|
|
{
|
|
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
|
|
|
if (!d)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory")));
|
|
memset(d, 0, sizeof(DictSnowball));
|
|
d->stoplist.wordop = lowerstr;
|
|
|
|
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(0);
|
|
|
|
readstoplist(in, &(d->stoplist));
|
|
sortstoplist(&(d->stoplist));
|
|
PG_FREE_IF_COPY(in, 0);
|
|
}
|
|
|
|
d->z = english_ISO_8859_1_create_env();
|
|
if (!d->z)
|
|
{
|
|
freestoplist(&(d->stoplist));
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory")));
|
|
}
|
|
d->stem = english_ISO_8859_1_stem;
|
|
|
|
PG_RETURN_POINTER(d);
|
|
}
|
|
|
|
Datum
|
|
snb_ru_init_koi8(PG_FUNCTION_ARGS)
|
|
{
|
|
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
|
|
|
if (!d)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory")));
|
|
memset(d, 0, sizeof(DictSnowball));
|
|
d->stoplist.wordop = lowerstr;
|
|
|
|
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(0);
|
|
|
|
readstoplist(in, &(d->stoplist));
|
|
sortstoplist(&(d->stoplist));
|
|
PG_FREE_IF_COPY(in, 0);
|
|
}
|
|
|
|
d->z = russian_KOI8_R_create_env();
|
|
if (!d->z)
|
|
{
|
|
freestoplist(&(d->stoplist));
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory")));
|
|
}
|
|
d->stem = russian_KOI8_R_stem;
|
|
|
|
PG_RETURN_POINTER(d);
|
|
}
|
|
|
|
Datum
|
|
snb_ru_init_utf8(PG_FUNCTION_ARGS)
|
|
{
|
|
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
|
|
|
if (!d)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory")));
|
|
memset(d, 0, sizeof(DictSnowball));
|
|
d->stoplist.wordop = lowerstr;
|
|
|
|
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(0);
|
|
|
|
readstoplist(in, &(d->stoplist));
|
|
sortstoplist(&(d->stoplist));
|
|
PG_FREE_IF_COPY(in, 0);
|
|
}
|
|
|
|
d->z = russian_UTF_8_create_env();
|
|
if (!d->z)
|
|
{
|
|
freestoplist(&(d->stoplist));
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
errmsg("out of memory")));
|
|
}
|
|
d->stem = russian_UTF_8_stem;
|
|
|
|
PG_RETURN_POINTER(d);
|
|
}
|
|
|
|
Datum
|
|
snb_lexize(PG_FUNCTION_ARGS)
|
|
{
|
|
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
|
|
char *in = (char *) PG_GETARG_POINTER(1);
|
|
char *utxt = pnstrdup(in, PG_GETARG_INT32(2));
|
|
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
|
|
char *txt = lowerstr(utxt);
|
|
|
|
pfree(utxt);
|
|
memset(res, 0, sizeof(TSLexeme) * 2);
|
|
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
|
{
|
|
pfree(txt);
|
|
}
|
|
else
|
|
{
|
|
SN_set_current(d->z, strlen(txt), (symbol *) txt);
|
|
(d->stem) (d->z);
|
|
if (d->z->p && d->z->l)
|
|
{
|
|
txt = repalloc(txt, d->z->l + 1);
|
|
memcpy(txt, d->z->p, d->z->l);
|
|
txt[d->z->l] = '\0';
|
|
}
|
|
res->lexeme = txt;
|
|
}
|
|
|
|
PG_RETURN_POINTER(res);
|
|
}
|