2007-10-16 05:36:50 +08:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* dict_xsyn.c
|
|
|
|
* Extended synonym dictionary
|
|
|
|
*
|
2009-01-02 01:24:05 +08:00
|
|
|
* Copyright (c) 2007-2009, PostgreSQL Global Development Group
|
2007-10-16 05:36:50 +08:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2009-01-02 01:24:05 +08:00
|
|
|
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.6 2009/01/01 17:23:32 momjian Exp $
|
2007-10-16 05:36:50 +08:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
#include "commands/defrem.h"
|
|
|
|
#include "fmgr.h"
|
|
|
|
#include "tsearch/ts_locale.h"
|
|
|
|
#include "tsearch/ts_utils.h"
|
|
|
|
|
|
|
|
PG_MODULE_MAGIC;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *key; /* Word */
|
|
|
|
char *value; /* Unparsed list of synonyms, including the
|
|
|
|
* word itself */
|
2007-11-16 06:25:18 +08:00
|
|
|
} Syn;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
int len;
|
|
|
|
Syn *syn;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
bool keeporig;
|
2007-11-16 06:25:18 +08:00
|
|
|
} DictSyn;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(dxsyn_init);
|
2007-11-16 05:14:46 +08:00
|
|
|
Datum dxsyn_init(PG_FUNCTION_ARGS);
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(dxsyn_lexize);
|
2007-11-16 05:14:46 +08:00
|
|
|
Datum dxsyn_lexize(PG_FUNCTION_ARGS);
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
static char *
|
|
|
|
find_word(char *in, char **end)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *start;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
*end = NULL;
|
|
|
|
while (*in && t_isspace(in))
|
|
|
|
in += pg_mblen(in);
|
|
|
|
|
|
|
|
if (!*in || *in == '#')
|
|
|
|
return NULL;
|
|
|
|
start = in;
|
|
|
|
|
|
|
|
while (*in && !t_isspace(in))
|
|
|
|
in += pg_mblen(in);
|
|
|
|
|
|
|
|
*end = in;
|
|
|
|
|
|
|
|
return start;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
compare_syn(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2007-11-16 06:25:18 +08:00
|
|
|
read_dictionary(DictSyn *d, char *filename)
|
2007-10-16 05:36:50 +08:00
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *real_filename = get_tsearch_config_filename(filename, "rules");
|
2008-06-19 04:55:42 +08:00
|
|
|
tsearch_readline_state trst;
|
2007-11-16 05:14:46 +08:00
|
|
|
char *line;
|
|
|
|
int cur = 0;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
2008-06-19 04:55:42 +08:00
|
|
|
if (!tsearch_readline_begin(&trst, real_filename))
|
2007-10-16 05:36:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open synonym file \"%s\": %m",
|
|
|
|
real_filename)));
|
|
|
|
|
2008-06-19 04:55:42 +08:00
|
|
|
while ((line = tsearch_readline(&trst)) != NULL)
|
2007-10-16 05:36:50 +08:00
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *value;
|
|
|
|
char *key;
|
|
|
|
char *end = NULL;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
if (*line == '\0')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
value = lowerstr(line);
|
|
|
|
pfree(line);
|
|
|
|
|
|
|
|
key = find_word(value, &end);
|
|
|
|
if (!key)
|
|
|
|
{
|
|
|
|
pfree(value);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cur == d->len)
|
|
|
|
{
|
|
|
|
d->len = (d->len > 0) ? 2 * d->len : 16;
|
|
|
|
if (d->syn)
|
|
|
|
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
|
|
|
|
else
|
|
|
|
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
|
|
|
|
}
|
|
|
|
|
|
|
|
d->syn[cur].key = pnstrdup(key, end - key);
|
|
|
|
d->syn[cur].value = value;
|
|
|
|
|
|
|
|
cur++;
|
|
|
|
}
|
|
|
|
|
2008-06-19 04:55:42 +08:00
|
|
|
tsearch_readline_end(&trst);
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
d->len = cur;
|
|
|
|
if (cur > 1)
|
|
|
|
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
|
|
|
|
|
|
|
|
pfree(real_filename);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
dxsyn_init(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
|
|
|
DictSyn *d;
|
|
|
|
ListCell *l;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
d = (DictSyn *) palloc0(sizeof(DictSyn));
|
|
|
|
d->len = 0;
|
|
|
|
d->syn = NULL;
|
|
|
|
d->keeporig = true;
|
|
|
|
|
|
|
|
foreach(l, dictoptions)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
DefElem *defel = (DefElem *) lfirst(l);
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
|
|
|
|
{
|
|
|
|
d->keeporig = defGetBoolean(defel);
|
|
|
|
}
|
|
|
|
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
|
|
|
|
{
|
|
|
|
read_dictionary(d, defGetString(defel));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("unrecognized xsyn parameter: \"%s\"",
|
|
|
|
defel->defname)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(d);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
dxsyn_lexize(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
|
|
|
char *in = (char *) PG_GETARG_POINTER(1);
|
|
|
|
int length = PG_GETARG_INT32(2);
|
|
|
|
Syn word;
|
|
|
|
Syn *found;
|
|
|
|
TSLexeme *res = NULL;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
if (!length || d->len == 0)
|
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
|
|
|
/* Create search pattern */
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *temp = pnstrdup(in, length);
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
word.key = lowerstr(temp);
|
|
|
|
pfree(temp);
|
|
|
|
word.value = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Look for matching syn */
|
2007-11-16 05:14:46 +08:00
|
|
|
found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
|
2007-10-16 05:36:50 +08:00
|
|
|
pfree(word.key);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
|
|
|
/* Parse string of synonyms and return array of words */
|
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *value = pstrdup(found->value);
|
|
|
|
int value_length = strlen(value);
|
|
|
|
char *pos = value;
|
|
|
|
int nsyns = 0;
|
|
|
|
bool is_first = true;
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
res = palloc(0);
|
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
while (pos < value + value_length)
|
2007-10-16 05:36:50 +08:00
|
|
|
{
|
2007-11-16 05:14:46 +08:00
|
|
|
char *end;
|
|
|
|
char *syn = find_word(pos, &end);
|
2007-10-16 05:36:50 +08:00
|
|
|
|
|
|
|
if (!syn)
|
|
|
|
break;
|
|
|
|
*end = '\0';
|
|
|
|
|
2007-11-16 05:14:46 +08:00
|
|
|
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
|
2007-10-16 05:36:50 +08:00
|
|
|
res[nsyns].lexeme = NULL;
|
|
|
|
|
|
|
|
/* first word is added to result only if KEEPORIG flag is set */
|
2007-11-16 05:14:46 +08:00
|
|
|
if (d->keeporig || !is_first)
|
2007-10-16 05:36:50 +08:00
|
|
|
{
|
|
|
|
res[nsyns].lexeme = pstrdup(syn);
|
|
|
|
res[nsyns + 1].lexeme = NULL;
|
|
|
|
|
|
|
|
nsyns++;
|
|
|
|
}
|
|
|
|
|
|
|
|
is_first = false;
|
|
|
|
|
|
|
|
pos = end + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pfree(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(res);
|
|
|
|
}
|