postgresql/contrib/tsearch2/ispell/regis.c

216 lines
3.3 KiB
C
Raw Normal View History

#include "regis.h"
#include "ts_locale.h"
#include "common.h"
bool
2004-08-29 13:07:03 +08:00
RS_isRegis(const char *str)
{
2006-02-10 23:57:58 +08:00
while (str && *str)
{
if (t_isalpha(str) ||
t_iseq(str, '[') ||
2006-10-04 08:30:14 +08:00
t_iseq(str, ']') ||
2006-02-10 23:57:58 +08:00
t_iseq(str, '^'))
str += pg_mblen(str);
else
return false;
2006-02-10 23:57:58 +08:00
}
return true;
}
2004-08-29 13:07:03 +08:00
#define RS_IN_ONEOF 1
#define RS_IN_ONEOF_IN 2
#define RS_IN_NONEOF 3
#define RS_IN_WAIT 4
2004-08-29 13:07:03 +08:00
static RegisNode *
newRegisNode(RegisNode * prev, int len)
{
RegisNode *ptr;
ptr = (RegisNode *) malloc(RNHDRSZ + len + 1);
if (!ptr)
2004-08-29 13:07:03 +08:00
ts_error(ERROR, "No memory");
memset(ptr, 0, RNHDRSZ + len + 1);
if (prev)
2004-08-29 13:07:03 +08:00
prev->next = ptr;
return ptr;
}
void
RS_compile(Regis * r, bool issuffix, char *str)
2004-08-29 13:07:03 +08:00
{
int len = strlen(str);
2004-08-29 13:07:03 +08:00
int state = RS_IN_WAIT;
2006-10-04 08:30:14 +08:00
char *c = (char *) str;
2004-08-29 13:07:03 +08:00
RegisNode *ptr = NULL;
memset(r, 0, sizeof(Regis));
r->issuffix = (issuffix) ? 1 : 0;
2006-10-04 08:30:14 +08:00
while (*c)
2004-08-29 13:07:03 +08:00
{
if (state == RS_IN_WAIT)
{
if (t_isalpha(c))
2004-08-29 13:07:03 +08:00
{
if (ptr)
ptr = newRegisNode(ptr, len);
else
2004-08-29 13:07:03 +08:00
ptr = r->node = newRegisNode(NULL, len);
COPYCHAR(ptr->data, c);
ptr->type = RSF_ONEOF;
ptr->len = pg_mblen(c);
2004-08-29 13:07:03 +08:00
}
2006-10-04 08:30:14 +08:00
else if (t_iseq(c, '['))
2004-08-29 13:07:03 +08:00
{
if (ptr)
ptr = newRegisNode(ptr, len);
else
2004-08-29 13:07:03 +08:00
ptr = r->node = newRegisNode(NULL, len);
ptr->type = RSF_ONEOF;
2004-08-29 13:07:03 +08:00
state = RS_IN_ONEOF;
}
else
2006-10-04 08:30:14 +08:00
ts_error(ERROR, "Error in regis: %s", str);
2004-08-29 13:07:03 +08:00
}
else if (state == RS_IN_ONEOF)
{
2006-10-04 08:30:14 +08:00
if (t_iseq(c, '^'))
2004-08-29 13:07:03 +08:00
{
ptr->type = RSF_NONEOF;
2004-08-29 13:07:03 +08:00
state = RS_IN_NONEOF;
}
else if (t_isalpha(c))
2004-08-29 13:07:03 +08:00
{
COPYCHAR(ptr->data, c);
ptr->len = pg_mblen(c);
2004-08-29 13:07:03 +08:00
state = RS_IN_ONEOF_IN;
}
else
ts_error(ERROR, "Error in regis: %s", str);
2004-08-29 13:07:03 +08:00
}
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
{
if (t_isalpha(c))
2004-08-29 13:07:03 +08:00
{
2006-10-04 08:30:14 +08:00
COPYCHAR(ptr->data + ptr->len, c);
ptr->len += pg_mblen(c);
2004-08-29 13:07:03 +08:00
}
2006-10-04 08:30:14 +08:00
else if (t_iseq(c, ']'))
2004-08-29 13:07:03 +08:00
state = RS_IN_WAIT;
else
ts_error(ERROR, "Error in regis: %s", str);
2004-08-29 13:07:03 +08:00
}
else
ts_error(ERROR, "Internal error in RS_compile: %d", state);
c += pg_mblen(c);
}
ptr = r->node;
2004-08-29 13:07:03 +08:00
while (ptr)
{
r->nchar++;
2004-08-29 13:07:03 +08:00
ptr = ptr->next;
}
}
2004-08-29 13:07:03 +08:00
void
RS_free(Regis * r)
{
RegisNode *ptr = r->node,
*tmp;
2004-08-29 13:07:03 +08:00
while (ptr)
{
tmp = ptr->next;
free(ptr);
ptr = tmp;
}
r->node = NULL;
}
#ifdef TS_USE_WIDE
static bool
2006-10-04 08:30:14 +08:00
mb_strchr(char *str, char *c)
{
int clen = pg_mblen(c),
plen,
i;
char *ptr = str;
bool res = false;
clen = pg_mblen(c);
2006-10-04 08:30:14 +08:00
while (*ptr && !res)
{
plen = pg_mblen(ptr);
2006-10-04 08:30:14 +08:00
if (plen == clen)
{
i = plen;
res = true;
2006-10-04 08:30:14 +08:00
while (i--)
if (*(ptr + i) != *(c + i))
{
res = false;
2006-10-04 08:30:14 +08:00
break;
}
}
2006-10-04 08:30:14 +08:00
ptr += plen;
2006-10-04 08:30:14 +08:00
}
2006-10-04 08:30:14 +08:00
return res;
}
#else
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
#endif
bool
RS_execute(Regis * r, char *str)
2004-08-29 13:07:03 +08:00
{
RegisNode *ptr = r->node;
2006-10-04 08:30:14 +08:00
char *c = str;
int len = 0;
2006-10-04 08:30:14 +08:00
while (*c)
{
len++;
c += pg_mblen(c);
2006-10-04 08:30:14 +08:00
}
2004-08-29 13:07:03 +08:00
if (len < r->nchar)
2006-11-22 02:31:28 +08:00
return false;
c = str;
2006-10-04 08:30:14 +08:00
if (r->issuffix)
{
len -= r->nchar;
2006-10-04 08:30:14 +08:00
while (len-- > 0)
c += pg_mblen(c);
}
2004-08-29 13:07:03 +08:00
while (ptr)
{
switch (ptr->type)
{
case RSF_ONEOF:
2006-10-04 08:30:14 +08:00
if (mb_strchr((char *) ptr->data, c) != true)
return false;
break;
case RSF_NONEOF:
2006-10-04 08:30:14 +08:00
if (mb_strchr((char *) ptr->data, c) == true)
return false;
break;
default:
2004-08-29 13:07:03 +08:00
ts_error(ERROR, "RS_execute: Unknown type node: %d\n", ptr->type);
}
2004-08-29 13:07:03 +08:00
ptr = ptr->next;
2006-10-04 08:30:14 +08:00
c += pg_mblen(c);
}
return true;
}