postgresql/contrib/dict_xsyn/dict_xsyn.c

237 lines
4.3 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* dict_xsyn.c
* Extended synonym dictionary
*
2008-01-02 04:31:21 +08:00
* Copyright (c) 2007-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
2008-01-02 04:31:21 +08:00
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.4 2008/01/01 20:31:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "commands/defrem.h"
#include "fmgr.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
PG_MODULE_MAGIC;
typedef struct
{
2007-11-16 05:14:46 +08:00
char *key; /* Word */
char *value; /* Unparsed list of synonyms, including the
* word itself */
} Syn;
typedef struct
{
2007-11-16 05:14:46 +08:00
int len;
Syn *syn;
2007-11-16 05:14:46 +08:00
bool keeporig;
} DictSyn;
PG_FUNCTION_INFO_V1(dxsyn_init);
2007-11-16 05:14:46 +08:00
Datum dxsyn_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dxsyn_lexize);
2007-11-16 05:14:46 +08:00
Datum dxsyn_lexize(PG_FUNCTION_ARGS);
static char *
find_word(char *in, char **end)
{
2007-11-16 05:14:46 +08:00
char *start;
*end = NULL;
while (*in && t_isspace(in))
in += pg_mblen(in);
if (!*in || *in == '#')
return NULL;
start = in;
while (*in && !t_isspace(in))
in += pg_mblen(in);
*end = in;
return start;
}
static int
compare_syn(const void *a, const void *b)
{
return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
}
static void
read_dictionary(DictSyn *d, char *filename)
{
2007-11-16 05:14:46 +08:00
char *real_filename = get_tsearch_config_filename(filename, "rules");
FILE *fin;
char *line;
int cur = 0;
if ((fin = AllocateFile(real_filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open synonym file \"%s\": %m",
real_filename)));
while ((line = t_readline(fin)) != NULL)
{
2007-11-16 05:14:46 +08:00
char *value;
char *key;
char *end = NULL;
if (*line == '\0')
continue;
value = lowerstr(line);
pfree(line);
key = find_word(value, &end);
if (!key)
{
pfree(value);
continue;
}
if (cur == d->len)
{
d->len = (d->len > 0) ? 2 * d->len : 16;
if (d->syn)
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
else
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
}
d->syn[cur].key = pnstrdup(key, end - key);
d->syn[cur].value = value;
cur++;
}
FreeFile(fin);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
pfree(real_filename);
}
Datum
dxsyn_init(PG_FUNCTION_ARGS)
{
2007-11-16 05:14:46 +08:00
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSyn *d;
ListCell *l;
d = (DictSyn *) palloc0(sizeof(DictSyn));
d->len = 0;
d->syn = NULL;
d->keeporig = true;
foreach(l, dictoptions)
{
2007-11-16 05:14:46 +08:00
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
{
d->keeporig = defGetBoolean(defel);
}
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
{
read_dictionary(d, defGetString(defel));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized xsyn parameter: \"%s\"",
defel->defname)));
}
}
PG_RETURN_POINTER(d);
}
Datum
dxsyn_lexize(PG_FUNCTION_ARGS)
{
2007-11-16 05:14:46 +08:00
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int length = PG_GETARG_INT32(2);
Syn word;
Syn *found;
TSLexeme *res = NULL;
if (!length || d->len == 0)
PG_RETURN_POINTER(NULL);
/* Create search pattern */
{
2007-11-16 05:14:46 +08:00
char *temp = pnstrdup(in, length);
word.key = lowerstr(temp);
pfree(temp);
word.value = NULL;
}
/* Look for matching syn */
2007-11-16 05:14:46 +08:00
found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
pfree(word.key);
if (!found)
PG_RETURN_POINTER(NULL);
/* Parse string of synonyms and return array of words */
{
2007-11-16 05:14:46 +08:00
char *value = pstrdup(found->value);
int value_length = strlen(value);
char *pos = value;
int nsyns = 0;
bool is_first = true;
res = palloc(0);
2007-11-16 05:14:46 +08:00
while (pos < value + value_length)
{
2007-11-16 05:14:46 +08:00
char *end;
char *syn = find_word(pos, &end);
if (!syn)
break;
*end = '\0';
2007-11-16 05:14:46 +08:00
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
res[nsyns].lexeme = NULL;
/* first word is added to result only if KEEPORIG flag is set */
2007-11-16 05:14:46 +08:00
if (d->keeporig || !is_first)
{
res[nsyns].lexeme = pstrdup(syn);
res[nsyns + 1].lexeme = NULL;
nsyns++;
}
is_first = false;
pos = end + 1;
}
pfree(value);
}
PG_RETURN_POINTER(res);
}