2006-03-11 12:38:42 +08:00
|
|
|
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
|
|
|
|
|
2003-07-21 18:27:44 +08:00
|
|
|
#ifndef __PARSER_H__
|
|
|
|
#define __PARSER_H__
|
|
|
|
|
2005-11-21 20:27:57 +08:00
|
|
|
#include <ctype.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include "ts_locale.h"
|
|
|
|
|
2005-11-23 02:17:34 +08:00
|
|
|
typedef enum
|
|
|
|
{
|
2005-11-21 20:27:57 +08:00
|
|
|
TPS_Base = 0,
|
|
|
|
TPS_InUWord,
|
|
|
|
TPS_InLatWord,
|
|
|
|
TPS_InCyrWord,
|
|
|
|
TPS_InUnsignedInt,
|
|
|
|
TPS_InSignedIntFirst,
|
|
|
|
TPS_InSignedInt,
|
|
|
|
TPS_InSpace,
|
|
|
|
TPS_InUDecimalFirst,
|
|
|
|
TPS_InUDecimal,
|
|
|
|
TPS_InDecimalFirst,
|
|
|
|
TPS_InDecimal,
|
2005-12-07 21:12:54 +08:00
|
|
|
TPS_InVerVersion,
|
|
|
|
TPS_InSVerVersion,
|
2005-11-21 20:27:57 +08:00
|
|
|
TPS_InVersionFirst,
|
|
|
|
TPS_InVersion,
|
|
|
|
TPS_InMantissaFirst,
|
|
|
|
TPS_InMantissaSign,
|
|
|
|
TPS_InMantissa,
|
|
|
|
TPS_InHTMLEntityFirst,
|
|
|
|
TPS_InHTMLEntity,
|
|
|
|
TPS_InHTMLEntityNumFirst,
|
|
|
|
TPS_InHTMLEntityNum,
|
|
|
|
TPS_InHTMLEntityEnd,
|
|
|
|
TPS_InTagFirst,
|
2005-12-06 02:13:22 +08:00
|
|
|
TPS_InXMLBegin,
|
2005-11-21 20:27:57 +08:00
|
|
|
TPS_InTagCloseFirst,
|
2005-12-08 17:11:19 +08:00
|
|
|
TPS_InTagName,
|
|
|
|
TPS_InTagBeginEnd,
|
2005-11-21 20:27:57 +08:00
|
|
|
TPS_InTag,
|
|
|
|
TPS_InTagEscapeK,
|
|
|
|
TPS_InTagEscapeKK,
|
|
|
|
TPS_InTagBackSleshed,
|
|
|
|
TPS_InTagEnd,
|
|
|
|
TPS_InCommentFirst,
|
|
|
|
TPS_InCommentLast,
|
|
|
|
TPS_InComment,
|
|
|
|
TPS_InCloseCommentFirst,
|
|
|
|
TPS_InCloseCommentLast,
|
|
|
|
TPS_InCommentEnd,
|
2005-12-06 02:13:22 +08:00
|
|
|
TPS_InHostFirstDomain,
|
|
|
|
TPS_InHostDomainSecond,
|
|
|
|
TPS_InHostDomain,
|
2005-11-21 20:27:57 +08:00
|
|
|
TPS_InPortFirst,
|
|
|
|
TPS_InPort,
|
|
|
|
TPS_InHostFirstAN,
|
|
|
|
TPS_InHost,
|
|
|
|
TPS_InEmail,
|
|
|
|
TPS_InFileFirst,
|
2005-12-07 21:12:54 +08:00
|
|
|
TPS_InFileTwiddle,
|
2005-12-06 02:13:22 +08:00
|
|
|
TPS_InPathFirst,
|
2005-12-07 21:12:54 +08:00
|
|
|
TPS_InPathFirstFirst,
|
2005-12-06 02:13:22 +08:00
|
|
|
TPS_InPathSecond,
|
2005-11-21 20:27:57 +08:00
|
|
|
TPS_InFile,
|
|
|
|
TPS_InFileNext,
|
|
|
|
TPS_InURIFirst,
|
|
|
|
TPS_InURIStart,
|
|
|
|
TPS_InURI,
|
|
|
|
TPS_InFURL,
|
|
|
|
TPS_InProtocolFirst,
|
|
|
|
TPS_InProtocolSecond,
|
|
|
|
TPS_InProtocolEnd,
|
|
|
|
TPS_InHyphenLatWordFirst,
|
|
|
|
TPS_InHyphenLatWord,
|
|
|
|
TPS_InHyphenCyrWordFirst,
|
|
|
|
TPS_InHyphenCyrWord,
|
|
|
|
TPS_InHyphenUWordFirst,
|
|
|
|
TPS_InHyphenUWord,
|
|
|
|
TPS_InHyphenValueFirst,
|
|
|
|
TPS_InHyphenValue,
|
|
|
|
TPS_InHyphenValueExact,
|
|
|
|
TPS_InParseHyphen,
|
|
|
|
TPS_InParseHyphenHyphen,
|
|
|
|
TPS_InHyphenCyrWordPart,
|
|
|
|
TPS_InHyphenLatWordPart,
|
|
|
|
TPS_InHyphenUWordPart,
|
|
|
|
TPS_InHyphenUnsignedInt,
|
|
|
|
TPS_InHDecimalPartFirst,
|
|
|
|
TPS_InHDecimalPart,
|
|
|
|
TPS_InHVersionPartFirst,
|
|
|
|
TPS_InHVersionPart,
|
2005-11-23 02:17:34 +08:00
|
|
|
TPS_Null /* last state (fake value) */
|
|
|
|
} TParserState;
|
2005-11-21 20:27:57 +08:00
|
|
|
|
|
|
|
/* forward declaration */
|
|
|
|
struct TParser;
|
|
|
|
|
|
|
|
|
2005-11-23 02:17:34 +08:00
|
|
|
typedef int (*TParserCharTest) (struct TParser *); /* any p_is* functions
|
|
|
|
* except p_iseq */
|
|
|
|
typedef void (*TParserSpecial) (struct TParser *); /* special handler for
|
|
|
|
* special cases... */
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
TParserCharTest isclass;
|
|
|
|
char c;
|
|
|
|
uint16 flags;
|
|
|
|
TParserState tostate;
|
|
|
|
int type;
|
|
|
|
TParserSpecial special;
|
|
|
|
} TParserStateActionItem;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
TParserState state;
|
|
|
|
TParserStateActionItem *action;
|
|
|
|
} TParserStateAction;
|
|
|
|
|
|
|
|
typedef struct TParserPosition
|
|
|
|
{
|
|
|
|
int posbyte; /* position of parser in bytes */
|
|
|
|
int poschar; /* osition of parser in characters */
|
|
|
|
int charlen; /* length of current char */
|
|
|
|
int lenbytelexeme;
|
|
|
|
int lencharlexeme;
|
|
|
|
TParserState state;
|
|
|
|
struct TParserPosition *prev;
|
|
|
|
int flags;
|
|
|
|
TParserStateActionItem *pushedAtAction;
|
|
|
|
} TParserPosition;
|
|
|
|
|
|
|
|
typedef struct TParser
|
|
|
|
{
|
2005-11-21 20:27:57 +08:00
|
|
|
/* string and position information */
|
2005-11-23 02:17:34 +08:00
|
|
|
char *str; /* multibyte string */
|
|
|
|
int lenstr; /* length of mbstring */
|
2005-12-12 19:10:12 +08:00
|
|
|
#ifdef TS_USE_WIDE
|
2005-11-23 02:17:34 +08:00
|
|
|
wchar_t *wstr; /* wide character string */
|
|
|
|
int lenwstr; /* length of wsting */
|
2005-12-12 19:10:12 +08:00
|
|
|
#endif
|
2005-11-21 20:27:57 +08:00
|
|
|
|
|
|
|
/* State of parse */
|
2005-11-23 02:17:34 +08:00
|
|
|
int charmaxlen;
|
2005-11-21 20:27:57 +08:00
|
|
|
bool usewide;
|
2005-11-23 02:17:34 +08:00
|
|
|
TParserPosition *state;
|
2005-11-21 20:27:57 +08:00
|
|
|
bool ignore;
|
|
|
|
bool wanthost;
|
|
|
|
|
|
|
|
/* silly char */
|
2005-11-23 02:17:34 +08:00
|
|
|
char c;
|
2005-11-21 20:27:57 +08:00
|
|
|
|
|
|
|
/* out */
|
2005-11-23 02:17:34 +08:00
|
|
|
char *lexeme;
|
|
|
|
int lenbytelexeme;
|
|
|
|
int lencharlexeme;
|
|
|
|
int type;
|
|
|
|
|
|
|
|
} TParser;
|
2005-11-21 20:27:57 +08:00
|
|
|
|
|
|
|
|
2005-11-23 02:17:34 +08:00
|
|
|
TParser *TParserInit(char *, int);
|
|
|
|
bool TParserGet(TParser *);
|
|
|
|
void TParserClose(TParser *);
|
2003-07-21 18:27:44 +08:00
|
|
|
|
|
|
|
#endif
|