postgresql/contrib/tsearch2/dict.h

115 lines
2.3 KiB
C
Raw Normal View History

2006-10-04 08:30:14 +08:00
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.8 2006/10/04 00:29:46 momjian Exp $ */
2003-07-21 18:27:44 +08:00
#ifndef __DICT_H__
#define __DICT_H__
#include "postgres.h"
#include "fmgr.h"
#include "ts_cfg.h"
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
typedef struct
{
int len;
char **stop;
char *(*wordop) (char *);
} StopList;
void sortstoplist(StopList * s);
void freestoplist(StopList * s);
void readstoplist(text *in, StopList * s);
bool searchstoplist(StopList * s, char *key);
typedef struct
{
Oid dict_id;
FmgrInfo lexize_info;
void *dictionary;
} DictInfo;
void init_dict(Oid id, DictInfo * dict);
DictInfo *finddict(Oid id);
Oid name2id_dict(text *name);
void reset_dict(void);
2003-07-21 18:27:44 +08:00
2006-10-04 08:30:14 +08:00
typedef struct
{
bool isend; /* in: marks for lexize_info about text end is
* reached */
bool getnext; /* out: dict wants next lexeme */
void *private; /* internal dict state between calls with
* getnext == true */
} DictSubState;
2003-07-21 18:27:44 +08:00
/* simple parser of cfg string */
2003-08-04 08:43:34 +08:00
typedef struct
{
char *key;
char *value;
} Map;
2003-07-21 18:27:44 +08:00
2003-08-04 08:43:34 +08:00
void parse_cfgdict(text *in, Map ** m);
2003-07-21 18:27:44 +08:00
/* return struct for any lexize function */
2005-10-15 10:49:52 +08:00
typedef struct
{
/*
* number of variant of split word , for example Word 'fotballklubber'
* (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
2006-10-04 08:30:14 +08:00
* ball, klubb ). So, dictionary should return: nvariant lexeme 1
* fotball 1 klubb 2 fot 2 ball 2 klubb
2005-10-15 10:49:52 +08:00
*/
uint16 nvariant;
2005-10-15 10:49:52 +08:00
uint16 flags;
/* C-string */
2005-10-15 10:49:52 +08:00
char *lexeme;
} TSLexeme;
#define TSL_ADDPOS 0x01
/*
* Lexize subsystem
*/
2006-10-04 08:30:14 +08:00
typedef struct ParsedLex
{
int type;
char *lemm;
int lenlemm;
bool resfollow;
2006-10-04 08:30:14 +08:00
struct ParsedLex *next;
} ParsedLex;
typedef struct ListParsedLex
{
ParsedLex *head;
ParsedLex *tail;
} ListParsedLex;
typedef struct
{
TSCfgInfo *cfg;
Oid curDictId;
int posDict;
DictSubState dictState;
ParsedLex *curSub;
ListParsedLex towork; /* current list to work */
ListParsedLex waste; /* list of lexemes that already lexized */
/*
* fields to store last variant to lexize (basically, thesaurus or similar
* to, which wants several lexemes
*/
ParsedLex *lastRes;
TSLexeme *tmpRes;
} LexizeData;
void LexizeInit(LexizeData * ld, TSCfgInfo * cfg);
void LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm);
TSLexeme *LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem);
2003-07-21 18:27:44 +08:00
#endif