postgresql/contrib/tsearch2/wordparser/deflex.c
Teodor Sigaev c52795d18a Text parser rewritten:
- supports multibyte encodings
        - more strict rules for lexemes
        - flex isn't used
Add:
        - tsquery plainto_tsquery(text)
          Function makes tsquery from plain text.
        - &&, ||, !! operation for tsquery for combining
          tsquery from it's parts:  'foo & bar' || 'asd' => 'foo & bar | asd'
2005-11-21 12:27:57 +00:00

56 lines
777 B
C

#include "deflex.h"
const char *lex_descr[] = {
"",
"Latin word",
"Non-latin word",
"Word",
"Email",
"URL",
"Host",
"Scientific notation",
"VERSION",
"Part of hyphenated word",
"Non-latin part of hyphenated word",
"Latin part of hyphenated word",
"Space symbols",
"HTML Tag",
"Protocol head",
"Hyphenated word",
"Latin hyphenated word",
"Non-latin hyphenated word",
"URI",
"File or path name",
"Decimal notation",
"Signed integer",
"Unsigned integer",
"HTML Entity"
};
const char *tok_alias[] = {
"",
"lword",
"nlword",
"word",
"email",
"url",
"host",
"sfloat",
"version",
"part_hword",
"nlpart_hword",
"lpart_hword",
"blank",
"tag",
"protocol",
"hword",
"lhword",
"nlhword",
"uri",
"file",
"float",
"int",
"uint",
"entity"
};