2003-08-04 08:43:34 +08:00
|
|
|
/*
|
2003-07-21 18:27:44 +08:00
|
|
|
* stopword library
|
|
|
|
* Teodor Sigaev <teodor@sigaev.ru>
|
|
|
|
*/
|
|
|
|
#include <errno.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "common.h"
|
|
|
|
#include "dict.h"
|
|
|
|
|
|
|
|
#define STOPBUFLEN 4096
|
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
char *
|
|
|
|
lowerstr(char *str)
|
|
|
|
{
|
|
|
|
char *ptr = str;
|
|
|
|
|
|
|
|
while (*ptr)
|
|
|
|
{
|
|
|
|
*ptr = tolower(*(unsigned char *) ptr);
|
2003-07-21 18:27:44 +08:00
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2003-08-04 08:43:34 +08:00
|
|
|
freestoplist(StopList * s)
|
|
|
|
{
|
|
|
|
char **ptr = s->stop;
|
|
|
|
|
|
|
|
if (ptr)
|
|
|
|
while (*ptr && s->len > 0)
|
|
|
|
{
|
2003-07-21 18:27:44 +08:00
|
|
|
free(*ptr);
|
2003-08-04 08:43:34 +08:00
|
|
|
ptr++;
|
|
|
|
s->len--;
|
|
|
|
free(s->stop);
|
|
|
|
}
|
|
|
|
memset(s, 0, sizeof(StopList));
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2003-08-04 08:43:34 +08:00
|
|
|
readstoplist(text *in, StopList * s)
|
|
|
|
{
|
|
|
|
char **stop = NULL;
|
|
|
|
|
|
|
|
s->len = 0;
|
|
|
|
if (in && VARSIZE(in) - VARHDRSZ > 0)
|
|
|
|
{
|
|
|
|
char *filename = text2char(in);
|
|
|
|
FILE *hin = NULL;
|
|
|
|
char buf[STOPBUFLEN];
|
|
|
|
int reallen = 0;
|
|
|
|
|
|
|
|
if ((hin = fopen(filename, "r")) == NULL)
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
|
|
errmsg("could not open file \"%s\": %m",
|
2003-08-04 08:43:34 +08:00
|
|
|
filename)));
|
2003-07-25 01:52:50 +08:00
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
while (fgets(buf, STOPBUFLEN, hin))
|
|
|
|
{
|
|
|
|
buf[strlen(buf) - 1] = '\0';
|
|
|
|
if (*buf == '\0')
|
|
|
|
continue;
|
2003-07-21 18:27:44 +08:00
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
if (s->len >= reallen)
|
|
|
|
{
|
|
|
|
char **tmp;
|
|
|
|
|
|
|
|
reallen = (reallen) ? reallen * 2 : 16;
|
|
|
|
tmp = (char **) realloc((void *) stop, sizeof(char *) * reallen);
|
|
|
|
if (!tmp)
|
|
|
|
{
|
2003-07-21 18:27:44 +08:00
|
|
|
freestoplist(s);
|
2003-08-04 08:43:34 +08:00
|
|
|
fclose(hin);
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
|
|
errmsg("out of memory")));
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
stop = tmp;
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
|
|
|
|
stop[s->len] = strdup(buf);
|
|
|
|
if (!stop[s->len])
|
|
|
|
{
|
2003-07-21 18:27:44 +08:00
|
|
|
freestoplist(s);
|
2003-08-04 08:43:34 +08:00
|
|
|
fclose(hin);
|
2003-07-25 01:52:50 +08:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
|
|
|
errmsg("out of memory")));
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
if (s->wordop)
|
|
|
|
stop[s->len] = (s->wordop) (stop[s->len]);
|
2003-07-21 18:27:44 +08:00
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
(s->len)++;
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
fclose(hin);
|
2003-08-04 08:43:34 +08:00
|
|
|
pfree(filename);
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
2003-08-04 08:43:34 +08:00
|
|
|
s->stop = stop;
|
|
|
|
}
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
static int
|
2003-08-04 08:43:34 +08:00
|
|
|
comparestr(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return strcmp(*(char **) a, *(char **) b);
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2003-08-04 08:43:34 +08:00
|
|
|
sortstoplist(StopList * s)
|
|
|
|
{
|
|
|
|
if (s->stop && s->len > 0)
|
|
|
|
qsort(s->stop, s->len, sizeof(char *), comparestr);
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2003-08-04 08:43:34 +08:00
|
|
|
searchstoplist(StopList * s, char *key)
|
|
|
|
{
|
|
|
|
if (s->wordop)
|
|
|
|
key = (*(s->wordop)) (key);
|
|
|
|
return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false;
|
2003-07-21 18:27:44 +08:00
|
|
|
}
|