mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
521 lines
12 KiB
C
521 lines
12 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "spell.h"
|
|
|
|
#define MAXNORMLEN 56
|
|
|
|
#define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y)))
|
|
|
|
static int cmpspell(const void *s1,const void *s2){
|
|
return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
|
|
}
|
|
|
|
static void
|
|
strlower( char * str ) {
|
|
unsigned char *ptr = (unsigned char *)str;
|
|
while ( *ptr ) {
|
|
*ptr = tolower( *ptr );
|
|
ptr++;
|
|
}
|
|
}
|
|
|
|
/* backward string compaire for suffix tree operations */
|
|
static int
|
|
strbcmp(const char *s1, const char *s2) {
|
|
int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
|
|
while (l1 >= 0 && l2 >= 0) {
|
|
if (s1[l1] < s2[l2]) return -1;
|
|
if (s1[l1] > s2[l2]) return 1;
|
|
l1--; l2--;
|
|
}
|
|
if (l1 < l2) return -1;
|
|
if (l1 > l2) return 1;
|
|
|
|
return 0;
|
|
}
|
|
static int
|
|
strbncmp(const char *s1, const char *s2, size_t count) {
|
|
int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
|
|
while (l1 >= 0 && l2 >= 0 && l > 0) {
|
|
if (s1[l1] < s2[l2]) return -1;
|
|
if (s1[l1] > s2[l2]) return 1;
|
|
l1--;
|
|
l2--;
|
|
l--;
|
|
}
|
|
if (l == 0) return 0;
|
|
if (l1 < l2) return -1;
|
|
if (l1 > l2) return 1;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
cmpaffix(const void *s1,const void *s2){
|
|
if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
|
|
if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
|
|
if (((const AFFIX*)s1)->type == 'p')
|
|
return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
|
|
else
|
|
return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
|
|
}
|
|
|
|
int
|
|
AddSpell(IspellDict * Conf,const char * word,const char *flag){
|
|
if(Conf->nspell>=Conf->mspell){
|
|
if(Conf->mspell){
|
|
Conf->mspell+=1024*20;
|
|
Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
|
|
}else{
|
|
Conf->mspell=1024*20;
|
|
Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
|
|
}
|
|
if ( Conf->Spell == NULL )
|
|
elog(ERROR,"No memory for AddSpell");
|
|
}
|
|
Conf->Spell[Conf->nspell].word=strdup(word);
|
|
if ( !Conf->Spell[Conf->nspell].word )
|
|
elog(ERROR,"No memory for AddSpell");
|
|
strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
|
|
Conf->nspell++;
|
|
return(0);
|
|
}
|
|
|
|
|
|
int
|
|
ImportDictionary(IspellDict * Conf,const char *filename){
|
|
unsigned char str[BUFSIZ];
|
|
FILE *dict;
|
|
|
|
if(!(dict=fopen(filename,"r")))return(1);
|
|
while(fgets(str,sizeof(str),dict)){
|
|
unsigned char *s;
|
|
const unsigned char *flag;
|
|
|
|
flag = NULL;
|
|
if((s=strchr(str,'/'))){
|
|
*s=0;
|
|
s++;flag=s;
|
|
while(*s){
|
|
if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
|
|
s++;
|
|
else {
|
|
*s=0;
|
|
break;
|
|
}
|
|
}
|
|
}else{
|
|
flag="";
|
|
}
|
|
strlower(str);
|
|
/* Dont load words if first letter is not required */
|
|
/* It allows to optimize loading at search time */
|
|
s=str;
|
|
while(*s){
|
|
if(*s=='\r')*s=0;
|
|
if(*s=='\n')*s=0;
|
|
s++;
|
|
}
|
|
AddSpell(Conf,str,flag);
|
|
}
|
|
fclose(dict);
|
|
return(0);
|
|
}
|
|
|
|
|
|
static SPELL *
|
|
FindWord(IspellDict * Conf, const char *word, int affixflag) {
|
|
int l,c,r,resc,resl,resr, i;
|
|
|
|
i = (int)(*word) & 255;
|
|
l = Conf->SpellTree.Left[i];
|
|
r = Conf->SpellTree.Right[i];
|
|
if (l == -1) return (NULL);
|
|
while(l<=r){
|
|
c = (l + r) >> 1;
|
|
resc = strcmp(Conf->Spell[c].word, word);
|
|
if( (resc == 0) &&
|
|
((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
|
|
return(&Conf->Spell[c]);
|
|
}
|
|
resl = strcmp(Conf->Spell[l].word, word);
|
|
if( (resl == 0) &&
|
|
((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
|
|
return(&Conf->Spell[l]);
|
|
}
|
|
resr = strcmp(Conf->Spell[r].word, word);
|
|
if( (resr == 0) &&
|
|
((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
|
|
return(&Conf->Spell[r]);
|
|
}
|
|
if(resc < 0){
|
|
l = c + 1;
|
|
r--;
|
|
} else if(resc > 0){
|
|
r = c - 1;
|
|
l++;
|
|
} else {
|
|
l++;
|
|
r--;
|
|
}
|
|
}
|
|
return(NULL);
|
|
}
|
|
|
|
int
|
|
AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
|
|
if(Conf->naffixes>=Conf->maffixes){
|
|
if(Conf->maffixes){
|
|
Conf->maffixes+=16;
|
|
Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
|
|
}else{
|
|
Conf->maffixes=16;
|
|
Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
|
|
}
|
|
if ( Conf->Affix == NULL )
|
|
elog(ERROR,"No memory for AddAffix");
|
|
}
|
|
if (type=='s') {
|
|
sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
|
|
} else {
|
|
sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
|
|
}
|
|
Conf->Affix[Conf->naffixes].compile = 1;
|
|
Conf->Affix[Conf->naffixes].flag=flag;
|
|
Conf->Affix[Conf->naffixes].type=type;
|
|
|
|
strcpy(Conf->Affix[Conf->naffixes].find,find);
|
|
strcpy(Conf->Affix[Conf->naffixes].repl,repl);
|
|
Conf->Affix[Conf->naffixes].replen=strlen(repl);
|
|
Conf->naffixes++;
|
|
return(0);
|
|
}
|
|
|
|
static char *
|
|
remove_spaces(char *dist,char *src){
|
|
char *d,*s;
|
|
d=dist;
|
|
s=src;
|
|
while(*s){
|
|
if(*s!=' '&&*s!='-'&&*s!='\t'){
|
|
*d=*s;
|
|
d++;
|
|
}
|
|
s++;
|
|
}
|
|
*d=0;
|
|
return(dist);
|
|
}
|
|
|
|
|
|
int
|
|
ImportAffixes(IspellDict * Conf,const char *filename){
|
|
unsigned char str[BUFSIZ];
|
|
unsigned char flag=0;
|
|
unsigned char mask[BUFSIZ]="";
|
|
unsigned char find[BUFSIZ]="";
|
|
unsigned char repl[BUFSIZ]="";
|
|
unsigned char *s;
|
|
int i;
|
|
int suffixes=0;
|
|
int prefixes=0;
|
|
FILE *affix;
|
|
|
|
if(!(affix=fopen(filename,"r")))
|
|
return(1);
|
|
|
|
while(fgets(str,sizeof(str),affix)){
|
|
if(!STRNCASECMP(str,"suffixes")){
|
|
suffixes=1;
|
|
prefixes=0;
|
|
continue;
|
|
}
|
|
if(!STRNCASECMP(str,"prefixes")){
|
|
suffixes=0;
|
|
prefixes=1;
|
|
continue;
|
|
}
|
|
if(!STRNCASECMP(str,"flag ")){
|
|
s=str+5;
|
|
while(strchr("* ",*s))
|
|
s++;
|
|
flag=*s;
|
|
continue;
|
|
}
|
|
if((!suffixes)&&(!prefixes))continue;
|
|
if((s=strchr(str,'#')))*s=0;
|
|
if(!*str)continue;
|
|
strlower(str);
|
|
strcpy(mask,"");
|
|
strcpy(find,"");
|
|
strcpy(repl,"");
|
|
i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
|
|
remove_spaces(str,repl);strcpy(repl,str);
|
|
remove_spaces(str,find);strcpy(find,str);
|
|
remove_spaces(str,mask);strcpy(mask,str);
|
|
switch(i){
|
|
case 3:
|
|
break;
|
|
case 2:
|
|
if(*find != '\0'){
|
|
strcpy(repl,find);
|
|
strcpy(find,"");
|
|
}
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
|
|
|
|
}
|
|
fclose(affix);
|
|
|
|
return(0);
|
|
}
|
|
|
|
void
|
|
SortDictionary(IspellDict * Conf){
|
|
int CurLet = -1, Let;size_t i;
|
|
|
|
qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
|
|
|
|
for(i = 0; i < 256 ; i++ )
|
|
Conf->SpellTree.Left[i] = -1;
|
|
|
|
for(i = 0; i < Conf->nspell; i++) {
|
|
Let = (int)(*(Conf->Spell[i].word)) & 255;
|
|
if (CurLet != Let) {
|
|
Conf->SpellTree.Left[Let] = i;
|
|
CurLet = Let;
|
|
}
|
|
Conf->SpellTree.Right[Let] = i;
|
|
}
|
|
}
|
|
|
|
void
|
|
SortAffixes(IspellDict * Conf) {
|
|
int CurLetP = -1, CurLetS = -1, Let;
|
|
AFFIX *Affix; size_t i;
|
|
|
|
if (Conf->naffixes > 1)
|
|
qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
|
|
for(i = 0; i < 256; i++) {
|
|
Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
|
|
Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
|
|
}
|
|
|
|
for(i = 0; i < Conf->naffixes; i++) {
|
|
Affix = &(((AFFIX*)Conf->Affix)[i]);
|
|
if(Affix->type == 'p') {
|
|
Let = (int)(*(Affix->repl)) & 255;
|
|
if (CurLetP != Let) {
|
|
Conf->PrefixTree.Left[Let] = i;
|
|
CurLetP = Let;
|
|
}
|
|
Conf->PrefixTree.Right[Let] = i;
|
|
} else {
|
|
Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
|
|
if (CurLetS != Let) {
|
|
Conf->SuffixTree.Left[Let] = i;
|
|
CurLetS = Let;
|
|
}
|
|
Conf->SuffixTree.Right[Let] = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
static char *
|
|
CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
|
|
regmatch_t subs[2]; /* workaround for apache&linux */
|
|
char newword[2*MAXNORMLEN] = "";
|
|
int err;
|
|
|
|
*res = strbncmp(word, Affix->repl, Affix->replen);
|
|
if (*res < 0) {
|
|
return NULL;
|
|
}
|
|
if (*res > 0) {
|
|
return NULL;
|
|
}
|
|
strcpy(newword, word);
|
|
strcpy(newword+len-Affix->replen, Affix->find);
|
|
|
|
if (Affix->compile) {
|
|
err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
|
|
if(err){
|
|
/*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
|
|
regfree(&(Affix->reg));
|
|
return(NULL);
|
|
}
|
|
Affix->compile = 0;
|
|
}
|
|
if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
|
|
if(FindWord(Conf, newword, Affix->flag))
|
|
return pstrdup(newword);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
#define NS 1
|
|
#define MAX_NORM 512
|
|
static int
|
|
CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
|
|
char **forms, char ***cur ) {
|
|
regmatch_t subs[NS*2];
|
|
char newword[2*MAXNORMLEN] = "";
|
|
int err, ls, res, lres;
|
|
size_t newlen;
|
|
AFFIX *CAffix = Conf->Affix;
|
|
|
|
res = strncmp(word, Affix->repl, Affix->replen);
|
|
if (res != 0) {
|
|
return res;
|
|
}
|
|
strcpy(newword, Affix->find);
|
|
strcat(newword, word+Affix->replen);
|
|
|
|
if (Affix->compile) {
|
|
err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
|
|
if(err){
|
|
/*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
|
|
regfree(&(Affix->reg));
|
|
return (0);
|
|
}
|
|
Affix->compile = 0;
|
|
}
|
|
if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
|
|
SPELL * curspell;
|
|
|
|
if((curspell=FindWord(Conf, newword, Affix->flag))){
|
|
if ((*cur - forms) < (MAX_NORM-1)) {
|
|
**cur = pstrdup(newword);
|
|
(*cur)++; **cur = NULL;
|
|
}
|
|
}
|
|
newlen = strlen(newword);
|
|
ls = Conf->SuffixTree.Left[pi];
|
|
if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
|
|
**cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
|
|
if (**cur) {
|
|
(*cur)++; **cur = NULL;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
char **
|
|
NormalizeWord(IspellDict * Conf,char *word){
|
|
/*regmatch_t subs[NS];*/
|
|
size_t len;
|
|
char ** forms;
|
|
char **cur;
|
|
AFFIX * Affix;
|
|
int ri, pi, ipi, lp, rp, cp, ls, rs;
|
|
int lres, rres, cres = 0;
|
|
SPELL *spell;
|
|
|
|
len=strlen(word);
|
|
if (len > MAXNORMLEN)
|
|
return(NULL);
|
|
|
|
strlower(word);
|
|
|
|
forms=(char **) palloc(MAX_NORM*sizeof(char **));
|
|
cur=forms;*cur=NULL;
|
|
|
|
ri = (int)(*word) & 255;
|
|
pi = (int)(word[strlen(word)-1]) & 255;
|
|
Affix=(AFFIX*)Conf->Affix;
|
|
|
|
/* Check that the word itself is normal form */
|
|
if((spell = FindWord(Conf, word, 0))){
|
|
*cur=pstrdup(word);
|
|
cur++;*cur=NULL;
|
|
}
|
|
|
|
/* Find all other NORMAL forms of the 'word' */
|
|
|
|
for (ipi = 0; ipi <= pi; ipi += pi) {
|
|
|
|
/* check prefix */
|
|
lp = Conf->PrefixTree.Left[ri];
|
|
rp = Conf->PrefixTree.Right[ri];
|
|
while (lp >= 0 && lp <= rp) {
|
|
cp = (lp + rp) >> 1;
|
|
cres = 0;
|
|
if ((cur - forms) < (MAX_NORM-1)) {
|
|
cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
|
|
}
|
|
if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
|
|
lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
|
|
}
|
|
if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
|
|
rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
|
|
}
|
|
if (cres < 0) {
|
|
rp = cp - 1;
|
|
lp++;
|
|
} else if (cres > 0) {
|
|
lp = cp + 1;
|
|
rp--;
|
|
} else {
|
|
lp++;
|
|
rp--;
|
|
}
|
|
}
|
|
|
|
/* check suffix */
|
|
ls = Conf->SuffixTree.Left[ipi];
|
|
rs = Conf->SuffixTree.Right[ipi];
|
|
while (ls >= 0 && ls <= rs) {
|
|
if ( ((cur - forms) < (MAX_NORM-1)) ) {
|
|
*cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
|
|
if (*cur) {
|
|
cur++; *cur = NULL;
|
|
}
|
|
}
|
|
if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
|
|
*cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
|
|
if (*cur) {
|
|
cur++; *cur = NULL;
|
|
}
|
|
}
|
|
ls++;
|
|
rs--;
|
|
} /* end while */
|
|
|
|
} /* for ipi */
|
|
|
|
if(cur==forms){
|
|
pfree(forms);
|
|
return(NULL);
|
|
}
|
|
return(forms);
|
|
}
|
|
|
|
void
|
|
FreeIspell (IspellDict *Conf) {
|
|
int i;
|
|
AFFIX *Affix = (AFFIX *)Conf->Affix;
|
|
|
|
for (i = 0; i < Conf->naffixes; i++) {
|
|
if (Affix[i].compile == 0) {
|
|
regfree(&(Affix[i].reg));
|
|
}
|
|
}
|
|
for (i = 0; i < Conf->naffixes; i++) {
|
|
free( Conf->Spell[i].word );
|
|
}
|
|
free(Conf->Affix);
|
|
free(Conf->Spell);
|
|
memset( (void*)Conf, 0, sizeof(IspellDict) );
|
|
return;
|
|
}
|