postgresql/contrib/tsearch2/dict_syn.c

182 lines
3.2 KiB
C
Raw Normal View History

2003-08-04 08:43:34 +08:00
/*
2003-07-21 18:27:44 +08:00
* ISpell interface
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include <ctype.h>
2003-07-21 18:27:44 +08:00
#include "dict.h"
#include "common.h"
#define SYNBUFLEN 4096
2003-08-04 08:43:34 +08:00
typedef struct
{
char *in;
char *out;
} Syn;
typedef struct
{
int len;
Syn *syn;
} DictSyn;
2003-07-21 18:27:44 +08:00
PG_FUNCTION_INFO_V1(syn_init);
2003-08-04 08:43:34 +08:00
Datum syn_init(PG_FUNCTION_ARGS);
2003-07-21 18:27:44 +08:00
PG_FUNCTION_INFO_V1(syn_lexize);
2003-08-04 08:43:34 +08:00
Datum syn_lexize(PG_FUNCTION_ARGS);
2003-07-21 18:27:44 +08:00
static char *
2003-08-04 08:43:34 +08:00
findwrd(char *in, char **end)
{
char *start;
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
*end = NULL;
while (*in && isspace((unsigned char) *in))
2003-07-21 18:27:44 +08:00
in++;
2003-08-04 08:43:34 +08:00
if (!in)
2003-07-21 18:27:44 +08:00
return NULL;
2003-08-04 08:43:34 +08:00
start = in;
2003-07-21 18:27:44 +08:00
while (*in && !isspace((unsigned char) *in))
2003-07-21 18:27:44 +08:00
in++;
2003-08-04 08:43:34 +08:00
*end = in;
2003-07-21 18:27:44 +08:00
return start;
}
static int
2003-08-04 08:43:34 +08:00
compareSyn(const void *a, const void *b)
{
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
2003-07-21 18:27:44 +08:00
}
2003-08-04 08:43:34 +08:00
Datum
syn_init(PG_FUNCTION_ARGS)
{
text *in;
DictSyn *d;
int cur = 0;
FILE *fin;
char *filename;
char buf[SYNBUFLEN];
char *starti,
*starto,
*end = NULL;
int slen;
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config")));
2003-07-21 18:27:44 +08:00
in = PG_GETARG_TEXT_P(0);
2003-08-04 08:43:34 +08:00
if (VARSIZE(in) - VARHDRSZ == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("VOID config")));
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
filename = text2char(in);
2003-07-21 18:27:44 +08:00
PG_FREE_IF_COPY(in, 0);
2003-08-04 08:43:34 +08:00
if ((fin = fopen(filename, "r")) == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
2003-08-04 08:43:34 +08:00
filename)));
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
d = (DictSyn *) malloc(sizeof(DictSyn));
if (!d)
{
2003-07-21 18:27:44 +08:00
fclose(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-07-21 18:27:44 +08:00
}
2003-08-04 08:43:34 +08:00
memset(d, 0, sizeof(DictSyn));
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
while (fgets(buf, SYNBUFLEN, fin))
{
slen = strlen(buf) - 1;
2003-07-21 18:27:44 +08:00
buf[slen] = '\0';
2003-08-04 08:43:34 +08:00
if (*buf == '\0')
continue;
if (cur == d->len)
{
d->len = (d->len) ? 2 * d->len : 16;
d->syn = (Syn *) realloc(d->syn, sizeof(Syn) * d->len);
if (!d->syn)
{
2003-07-21 18:27:44 +08:00
fclose(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-07-21 18:27:44 +08:00
}
}
2003-08-04 08:43:34 +08:00
starti = findwrd(buf, &end);
if (!starti)
2003-07-21 18:27:44 +08:00
continue;
2003-08-04 08:43:34 +08:00
*end = '\0';
if (end >= buf + slen)
2003-07-21 18:27:44 +08:00
continue;
2003-08-04 08:43:34 +08:00
starto = findwrd(end + 1, &end);
if (!starto)
2003-07-21 18:27:44 +08:00
continue;
2003-08-04 08:43:34 +08:00
*end = '\0';
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
d->syn[cur].in = strdup(lowerstr(starti));
d->syn[cur].out = strdup(lowerstr(starto));
if (!(d->syn[cur].in && d->syn[cur].out))
{
2003-07-21 18:27:44 +08:00
fclose(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
2003-07-21 18:27:44 +08:00
}
2003-08-04 08:43:34 +08:00
cur++;
2003-07-21 18:27:44 +08:00
}
2003-08-04 08:43:34 +08:00
fclose(fin);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
2003-07-21 18:27:44 +08:00
pfree(filename);
2003-08-04 08:43:34 +08:00
PG_RETURN_POINTER(d);
2003-07-21 18:27:44 +08:00
}
Datum
2003-08-04 08:43:34 +08:00
syn_lexize(PG_FUNCTION_ARGS)
{
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
Syn key,
*found;
2005-10-15 10:49:52 +08:00
TSLexeme *res = NULL;
2003-08-04 08:43:34 +08:00
if (!PG_GETARG_INT32(2))
2003-07-21 18:27:44 +08:00
PG_RETURN_POINTER(NULL);
2003-08-04 08:43:34 +08:00
key.out = NULL;
key.in = lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
2003-07-21 18:27:44 +08:00
pfree(key.in);
2003-08-04 08:43:34 +08:00
if (!found)
2003-07-21 18:27:44 +08:00
PG_RETURN_POINTER(NULL);
res = palloc(sizeof(TSLexeme) * 2);
2005-10-15 10:49:52 +08:00
memset(res, 0, sizeof(TSLexeme) * 2);
res[0].lexeme = pstrdup(found->out);
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
PG_RETURN_POINTER(res);
2003-07-21 18:27:44 +08:00
}