2006-10-04 08:30:14 +08:00
|
|
|
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.8 2006/10/04 00:29:46 momjian Exp $ */
|
2006-03-11 12:38:42 +08:00
|
|
|
|
2003-07-21 18:27:44 +08:00
|
|
|
#ifndef __DICT_H__
|
|
|
|
#define __DICT_H__
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "fmgr.h"
|
2006-05-31 22:05:31 +08:00
|
|
|
#include "ts_cfg.h"
|
2003-07-21 18:27:44 +08:00
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
char **stop;
|
|
|
|
char *(*wordop) (char *);
|
|
|
|
} StopList;
|
|
|
|
|
|
|
|
void sortstoplist(StopList * s);
|
|
|
|
void freestoplist(StopList * s);
|
|
|
|
void readstoplist(text *in, StopList * s);
|
|
|
|
bool searchstoplist(StopList * s, char *key);
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
Oid dict_id;
|
|
|
|
FmgrInfo lexize_info;
|
|
|
|
void *dictionary;
|
|
|
|
} DictInfo;
|
|
|
|
|
|
|
|
void init_dict(Oid id, DictInfo * dict);
|
|
|
|
DictInfo *finddict(Oid id);
|
|
|
|
Oid name2id_dict(text *name);
|
|
|
|
void reset_dict(void);
|
2003-07-21 18:27:44 +08:00
|
|
|
|
2006-10-04 08:30:14 +08:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool isend; /* in: marks for lexize_info about text end is
|
|
|
|
* reached */
|
|
|
|
bool getnext; /* out: dict wants next lexeme */
|
|
|
|
void *private; /* internal dict state between calls with
|
|
|
|
* getnext == true */
|
|
|
|
} DictSubState;
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
/* simple parser of cfg string */
|
2003-08-04 08:43:34 +08:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
char *key;
|
|
|
|
char *value;
|
|
|
|
} Map;
|
2003-07-21 18:27:44 +08:00
|
|
|
|
2003-08-04 08:43:34 +08:00
|
|
|
void parse_cfgdict(text *in, Map ** m);
|
2003-07-21 18:27:44 +08:00
|
|
|
|
2005-01-25 23:24:38 +08:00
|
|
|
/* return struct for any lexize function */
|
2005-10-15 10:49:52 +08:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* number of variant of split word , for example Word 'fotballklubber'
|
|
|
|
* (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
|
2006-10-04 08:30:14 +08:00
|
|
|
* ball, klubb ). So, dictionary should return: nvariant lexeme 1
|
|
|
|
* fotball 1 klubb 2 fot 2 ball 2 klubb
|
2005-10-15 10:49:52 +08:00
|
|
|
*/
|
|
|
|
uint16 nvariant;
|
2005-01-25 23:24:38 +08:00
|
|
|
|
2005-10-15 10:49:52 +08:00
|
|
|
uint16 flags;
|
2005-01-25 23:24:38 +08:00
|
|
|
|
|
|
|
/* C-string */
|
2005-10-15 10:49:52 +08:00
|
|
|
char *lexeme;
|
|
|
|
} TSLexeme;
|
2005-01-25 23:24:38 +08:00
|
|
|
|
2006-05-31 22:05:31 +08:00
|
|
|
#define TSL_ADDPOS 0x01
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lexize subsystem
|
|
|
|
*/
|
|
|
|
|
2006-10-04 08:30:14 +08:00
|
|
|
typedef struct ParsedLex
|
|
|
|
{
|
|
|
|
int type;
|
|
|
|
char *lemm;
|
|
|
|
int lenlemm;
|
2006-05-31 22:05:31 +08:00
|
|
|
bool resfollow;
|
2006-10-04 08:30:14 +08:00
|
|
|
struct ParsedLex *next;
|
|
|
|
} ParsedLex;
|
|
|
|
|
|
|
|
typedef struct ListParsedLex
|
|
|
|
{
|
|
|
|
ParsedLex *head;
|
|
|
|
ParsedLex *tail;
|
|
|
|
} ListParsedLex;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
TSCfgInfo *cfg;
|
|
|
|
Oid curDictId;
|
|
|
|
int posDict;
|
|
|
|
DictSubState dictState;
|
|
|
|
ParsedLex *curSub;
|
|
|
|
ListParsedLex towork; /* current list to work */
|
|
|
|
ListParsedLex waste; /* list of lexemes that already lexized */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fields to store last variant to lexize (basically, thesaurus or similar
|
|
|
|
* to, which wants several lexemes
|
|
|
|
*/
|
|
|
|
|
|
|
|
ParsedLex *lastRes;
|
|
|
|
TSLexeme *tmpRes;
|
|
|
|
} LexizeData;
|
|
|
|
|
|
|
|
|
|
|
|
void LexizeInit(LexizeData * ld, TSCfgInfo * cfg);
|
|
|
|
void LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm);
|
|
|
|
TSLexeme *LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem);
|
2006-05-31 22:05:31 +08:00
|
|
|
|
2003-07-21 18:27:44 +08:00
|
|
|
#endif
|