mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-15 08:20:16 +08:00
Some languages have symbols with zero display's width or/and vowels/signs which
are not an alphabetic character although they are not word-breakers too. So, treat them as part of word. Per off-list discussion with Dibyendra Hyoju <dibyendra@gmail.com> and and Bal Krishna Bal <balkrishna7bal@gmail.com> about Nepali language and Devanagari alphabet.
This commit is contained in:
parent
c394bd331a
commit
e43bb5beb7
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.22 2009/03/10 17:32:14 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.23 2009/03/11 16:03:40 teodor Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -659,6 +659,291 @@ p_isURLPath(TParser *prs)
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns true if current character has zero display length or
|
||||
* it's a special sign in several languages. Such characters
|
||||
* aren't a word-breaker although they aren't an isalpha.
|
||||
* In beginning of word they aren't a part of it.
|
||||
*/
|
||||
static int
|
||||
p_isspecial(TParser *prs)
|
||||
{
|
||||
/*
|
||||
* pg_dsplen could return -1 which means error or control character
|
||||
*/
|
||||
if ( pg_dsplen(prs->str + prs->state->posbyte) == 0 )
|
||||
return 1;
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
/*
|
||||
* Unicode Characters in the 'Mark, Spacing Combining' Category
|
||||
* That characters are not alpha although they are not breakers
|
||||
* of word too.
|
||||
* Check that only in utf encoding, because other encodings
|
||||
* aren't supported by postgres or even exists.
|
||||
*/
|
||||
if ( GetDatabaseEncoding() == PG_UTF8 && prs->usewide )
|
||||
{
|
||||
static pg_wchar strange_letter[] = {
|
||||
/*
|
||||
* use binary search, so elements
|
||||
* should be ordered
|
||||
*/
|
||||
0x0903, /* DEVANAGARI SIGN VISARGA */
|
||||
0x093E, /* DEVANAGARI VOWEL SIGN AA */
|
||||
0x093F, /* DEVANAGARI VOWEL SIGN I */
|
||||
0x0940, /* DEVANAGARI VOWEL SIGN II */
|
||||
0x0949, /* DEVANAGARI VOWEL SIGN CANDRA O */
|
||||
0x094A, /* DEVANAGARI VOWEL SIGN SHORT O */
|
||||
0x094B, /* DEVANAGARI VOWEL SIGN O */
|
||||
0x094C, /* DEVANAGARI VOWEL SIGN AU */
|
||||
0x0982, /* BENGALI SIGN ANUSVARA */
|
||||
0x0983, /* BENGALI SIGN VISARGA */
|
||||
0x09BE, /* BENGALI VOWEL SIGN AA */
|
||||
0x09BF, /* BENGALI VOWEL SIGN I */
|
||||
0x09C0, /* BENGALI VOWEL SIGN II */
|
||||
0x09C7, /* BENGALI VOWEL SIGN E */
|
||||
0x09C8, /* BENGALI VOWEL SIGN AI */
|
||||
0x09CB, /* BENGALI VOWEL SIGN O */
|
||||
0x09CC, /* BENGALI VOWEL SIGN AU */
|
||||
0x09D7, /* BENGALI AU LENGTH MARK */
|
||||
0x0A03, /* GURMUKHI SIGN VISARGA */
|
||||
0x0A3E, /* GURMUKHI VOWEL SIGN AA */
|
||||
0x0A3F, /* GURMUKHI VOWEL SIGN I */
|
||||
0x0A40, /* GURMUKHI VOWEL SIGN II */
|
||||
0x0A83, /* GUJARATI SIGN VISARGA */
|
||||
0x0ABE, /* GUJARATI VOWEL SIGN AA */
|
||||
0x0ABF, /* GUJARATI VOWEL SIGN I */
|
||||
0x0AC0, /* GUJARATI VOWEL SIGN II */
|
||||
0x0AC9, /* GUJARATI VOWEL SIGN CANDRA O */
|
||||
0x0ACB, /* GUJARATI VOWEL SIGN O */
|
||||
0x0ACC, /* GUJARATI VOWEL SIGN AU */
|
||||
0x0B02, /* ORIYA SIGN ANUSVARA */
|
||||
0x0B03, /* ORIYA SIGN VISARGA */
|
||||
0x0B3E, /* ORIYA VOWEL SIGN AA */
|
||||
0x0B40, /* ORIYA VOWEL SIGN II */
|
||||
0x0B47, /* ORIYA VOWEL SIGN E */
|
||||
0x0B48, /* ORIYA VOWEL SIGN AI */
|
||||
0x0B4B, /* ORIYA VOWEL SIGN O */
|
||||
0x0B4C, /* ORIYA VOWEL SIGN AU */
|
||||
0x0B57, /* ORIYA AU LENGTH MARK */
|
||||
0x0BBE, /* TAMIL VOWEL SIGN AA */
|
||||
0x0BBF, /* TAMIL VOWEL SIGN I */
|
||||
0x0BC1, /* TAMIL VOWEL SIGN U */
|
||||
0x0BC2, /* TAMIL VOWEL SIGN UU */
|
||||
0x0BC6, /* TAMIL VOWEL SIGN E */
|
||||
0x0BC7, /* TAMIL VOWEL SIGN EE */
|
||||
0x0BC8, /* TAMIL VOWEL SIGN AI */
|
||||
0x0BCA, /* TAMIL VOWEL SIGN O */
|
||||
0x0BCB, /* TAMIL VOWEL SIGN OO */
|
||||
0x0BCC, /* TAMIL VOWEL SIGN AU */
|
||||
0x0BD7, /* TAMIL AU LENGTH MARK */
|
||||
0x0C01, /* TELUGU SIGN CANDRABINDU */
|
||||
0x0C02, /* TELUGU SIGN ANUSVARA */
|
||||
0x0C03, /* TELUGU SIGN VISARGA */
|
||||
0x0C41, /* TELUGU VOWEL SIGN U */
|
||||
0x0C42, /* TELUGU VOWEL SIGN UU */
|
||||
0x0C43, /* TELUGU VOWEL SIGN VOCALIC R */
|
||||
0x0C44, /* TELUGU VOWEL SIGN VOCALIC RR */
|
||||
0x0C82, /* KANNADA SIGN ANUSVARA */
|
||||
0x0C83, /* KANNADA SIGN VISARGA */
|
||||
0x0CBE, /* KANNADA VOWEL SIGN AA */
|
||||
0x0CC0, /* KANNADA VOWEL SIGN II */
|
||||
0x0CC1, /* KANNADA VOWEL SIGN U */
|
||||
0x0CC2, /* KANNADA VOWEL SIGN UU */
|
||||
0x0CC3, /* KANNADA VOWEL SIGN VOCALIC R */
|
||||
0x0CC4, /* KANNADA VOWEL SIGN VOCALIC RR */
|
||||
0x0CC7, /* KANNADA VOWEL SIGN EE */
|
||||
0x0CC8, /* KANNADA VOWEL SIGN AI */
|
||||
0x0CCA, /* KANNADA VOWEL SIGN O */
|
||||
0x0CCB, /* KANNADA VOWEL SIGN OO */
|
||||
0x0CD5, /* KANNADA LENGTH MARK */
|
||||
0x0CD6, /* KANNADA AI LENGTH MARK */
|
||||
0x0D02, /* MALAYALAM SIGN ANUSVARA */
|
||||
0x0D03, /* MALAYALAM SIGN VISARGA */
|
||||
0x0D3E, /* MALAYALAM VOWEL SIGN AA */
|
||||
0x0D3F, /* MALAYALAM VOWEL SIGN I */
|
||||
0x0D40, /* MALAYALAM VOWEL SIGN II */
|
||||
0x0D46, /* MALAYALAM VOWEL SIGN E */
|
||||
0x0D47, /* MALAYALAM VOWEL SIGN EE */
|
||||
0x0D48, /* MALAYALAM VOWEL SIGN AI */
|
||||
0x0D4A, /* MALAYALAM VOWEL SIGN O */
|
||||
0x0D4B, /* MALAYALAM VOWEL SIGN OO */
|
||||
0x0D4C, /* MALAYALAM VOWEL SIGN AU */
|
||||
0x0D57, /* MALAYALAM AU LENGTH MARK */
|
||||
0x0D82, /* SINHALA SIGN ANUSVARAYA */
|
||||
0x0D83, /* SINHALA SIGN VISARGAYA */
|
||||
0x0DCF, /* SINHALA VOWEL SIGN AELA-PILLA */
|
||||
0x0DD0, /* SINHALA VOWEL SIGN KETTI AEDA-PILLA */
|
||||
0x0DD1, /* SINHALA VOWEL SIGN DIGA AEDA-PILLA */
|
||||
0x0DD8, /* SINHALA VOWEL SIGN GAETTA-PILLA */
|
||||
0x0DD9, /* SINHALA VOWEL SIGN KOMBUVA */
|
||||
0x0DDA, /* SINHALA VOWEL SIGN DIGA KOMBUVA */
|
||||
0x0DDB, /* SINHALA VOWEL SIGN KOMBU DEKA */
|
||||
0x0DDC, /* SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA */
|
||||
0x0DDD, /* SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA */
|
||||
0x0DDE, /* SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA */
|
||||
0x0DDF, /* SINHALA VOWEL SIGN GAYANUKITTA */
|
||||
0x0DF2, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA */
|
||||
0x0DF3, /* SINHALA VOWEL SIGN DIGA GAYANUKITTA */
|
||||
0x0F3E, /* TIBETAN SIGN YAR TSHES */
|
||||
0x0F3F, /* TIBETAN SIGN MAR TSHES */
|
||||
0x0F7F, /* TIBETAN SIGN RNAM BCAD */
|
||||
0x102B, /* MYANMAR VOWEL SIGN TALL AA */
|
||||
0x102C, /* MYANMAR VOWEL SIGN AA */
|
||||
0x1031, /* MYANMAR VOWEL SIGN E */
|
||||
0x1038, /* MYANMAR SIGN VISARGA */
|
||||
0x103B, /* MYANMAR CONSONANT SIGN MEDIAL YA */
|
||||
0x103C, /* MYANMAR CONSONANT SIGN MEDIAL RA */
|
||||
0x1056, /* MYANMAR VOWEL SIGN VOCALIC R */
|
||||
0x1057, /* MYANMAR VOWEL SIGN VOCALIC RR */
|
||||
0x1062, /* MYANMAR VOWEL SIGN SGAW KAREN EU */
|
||||
0x1063, /* MYANMAR TONE MARK SGAW KAREN HATHI */
|
||||
0x1064, /* MYANMAR TONE MARK SGAW KAREN KE PHO */
|
||||
0x1067, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU */
|
||||
0x1068, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN UE */
|
||||
0x1069, /* MYANMAR SIGN WESTERN PWO KAREN TONE-1 */
|
||||
0x106A, /* MYANMAR SIGN WESTERN PWO KAREN TONE-2 */
|
||||
0x106B, /* MYANMAR SIGN WESTERN PWO KAREN TONE-3 */
|
||||
0x106C, /* MYANMAR SIGN WESTERN PWO KAREN TONE-4 */
|
||||
0x106D, /* MYANMAR SIGN WESTERN PWO KAREN TONE-5 */
|
||||
0x1083, /* MYANMAR VOWEL SIGN SHAN AA */
|
||||
0x1084, /* MYANMAR VOWEL SIGN SHAN E */
|
||||
0x1087, /* MYANMAR SIGN SHAN TONE-2 */
|
||||
0x1088, /* MYANMAR SIGN SHAN TONE-3 */
|
||||
0x1089, /* MYANMAR SIGN SHAN TONE-5 */
|
||||
0x108A, /* MYANMAR SIGN SHAN TONE-6 */
|
||||
0x108B, /* MYANMAR SIGN SHAN COUNCIL TONE-2 */
|
||||
0x108C, /* MYANMAR SIGN SHAN COUNCIL TONE-3 */
|
||||
0x108F, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */
|
||||
0x17B6, /* KHMER VOWEL SIGN AA */
|
||||
0x17BE, /* KHMER VOWEL SIGN OE */
|
||||
0x17BF, /* KHMER VOWEL SIGN YA */
|
||||
0x17C0, /* KHMER VOWEL SIGN IE */
|
||||
0x17C1, /* KHMER VOWEL SIGN E */
|
||||
0x17C2, /* KHMER VOWEL SIGN AE */
|
||||
0x17C3, /* KHMER VOWEL SIGN AI */
|
||||
0x17C4, /* KHMER VOWEL SIGN OO */
|
||||
0x17C5, /* KHMER VOWEL SIGN AU */
|
||||
0x17C7, /* KHMER SIGN REAHMUK */
|
||||
0x17C8, /* KHMER SIGN YUUKALEAPINTU */
|
||||
0x1923, /* LIMBU VOWEL SIGN EE */
|
||||
0x1924, /* LIMBU VOWEL SIGN AI */
|
||||
0x1925, /* LIMBU VOWEL SIGN OO */
|
||||
0x1926, /* LIMBU VOWEL SIGN AU */
|
||||
0x1929, /* LIMBU SUBJOINED LETTER YA */
|
||||
0x192A, /* LIMBU SUBJOINED LETTER RA */
|
||||
0x192B, /* LIMBU SUBJOINED LETTER WA */
|
||||
0x1930, /* LIMBU SMALL LETTER KA */
|
||||
0x1931, /* LIMBU SMALL LETTER NGA */
|
||||
0x1933, /* LIMBU SMALL LETTER TA */
|
||||
0x1934, /* LIMBU SMALL LETTER NA */
|
||||
0x1935, /* LIMBU SMALL LETTER PA */
|
||||
0x1936, /* LIMBU SMALL LETTER MA */
|
||||
0x1937, /* LIMBU SMALL LETTER RA */
|
||||
0x1938, /* LIMBU SMALL LETTER LA */
|
||||
0x19B0, /* NEW TAI LUE VOWEL SIGN VOWEL SHORTENER */
|
||||
0x19B1, /* NEW TAI LUE VOWEL SIGN AA */
|
||||
0x19B2, /* NEW TAI LUE VOWEL SIGN II */
|
||||
0x19B3, /* NEW TAI LUE VOWEL SIGN U */
|
||||
0x19B4, /* NEW TAI LUE VOWEL SIGN UU */
|
||||
0x19B5, /* NEW TAI LUE VOWEL SIGN E */
|
||||
0x19B6, /* NEW TAI LUE VOWEL SIGN AE */
|
||||
0x19B7, /* NEW TAI LUE VOWEL SIGN O */
|
||||
0x19B8, /* NEW TAI LUE VOWEL SIGN OA */
|
||||
0x19B9, /* NEW TAI LUE VOWEL SIGN UE */
|
||||
0x19BA, /* NEW TAI LUE VOWEL SIGN AY */
|
||||
0x19BB, /* NEW TAI LUE VOWEL SIGN AAY */
|
||||
0x19BC, /* NEW TAI LUE VOWEL SIGN UY */
|
||||
0x19BD, /* NEW TAI LUE VOWEL SIGN OY */
|
||||
0x19BE, /* NEW TAI LUE VOWEL SIGN OAY */
|
||||
0x19BF, /* NEW TAI LUE VOWEL SIGN UEY */
|
||||
0x19C0, /* NEW TAI LUE VOWEL SIGN IY */
|
||||
0x19C8, /* NEW TAI LUE TONE MARK-1 */
|
||||
0x19C9, /* NEW TAI LUE TONE MARK-2 */
|
||||
0x1A19, /* BUGINESE VOWEL SIGN E */
|
||||
0x1A1A, /* BUGINESE VOWEL SIGN O */
|
||||
0x1A1B, /* BUGINESE VOWEL SIGN AE */
|
||||
0x1B04, /* BALINESE SIGN BISAH */
|
||||
0x1B35, /* BALINESE VOWEL SIGN TEDUNG */
|
||||
0x1B3B, /* BALINESE VOWEL SIGN RA REPA TEDUNG */
|
||||
0x1B3D, /* BALINESE VOWEL SIGN LA LENGA TEDUNG */
|
||||
0x1B3E, /* BALINESE VOWEL SIGN TALING */
|
||||
0x1B3F, /* BALINESE VOWEL SIGN TALING REPA */
|
||||
0x1B40, /* BALINESE VOWEL SIGN TALING TEDUNG */
|
||||
0x1B41, /* BALINESE VOWEL SIGN TALING REPA TEDUNG */
|
||||
0x1B43, /* BALINESE VOWEL SIGN PEPET TEDUNG */
|
||||
0x1B44, /* BALINESE ADEG ADEG */
|
||||
0x1B82, /* SUNDANESE SIGN PANGWISAD */
|
||||
0x1BA1, /* SUNDANESE CONSONANT SIGN PAMINGKAL */
|
||||
0x1BA6, /* SUNDANESE VOWEL SIGN PANAELAENG */
|
||||
0x1BA7, /* SUNDANESE VOWEL SIGN PANOLONG */
|
||||
0x1BAA, /* SUNDANESE SIGN PAMAAEH */
|
||||
0x1C24, /* LEPCHA SUBJOINED LETTER YA */
|
||||
0x1C25, /* LEPCHA SUBJOINED LETTER RA */
|
||||
0x1C26, /* LEPCHA VOWEL SIGN AA */
|
||||
0x1C27, /* LEPCHA VOWEL SIGN I */
|
||||
0x1C28, /* LEPCHA VOWEL SIGN O */
|
||||
0x1C29, /* LEPCHA VOWEL SIGN OO */
|
||||
0x1C2A, /* LEPCHA VOWEL SIGN U */
|
||||
0x1C2B, /* LEPCHA VOWEL SIGN UU */
|
||||
0x1C34, /* LEPCHA CONSONANT SIGN NYIN-DO */
|
||||
0x1C35, /* LEPCHA CONSONANT SIGN KANG */
|
||||
0xA823, /* SYLOTI NAGRI VOWEL SIGN A */
|
||||
0xA824, /* SYLOTI NAGRI VOWEL SIGN I */
|
||||
0xA827, /* SYLOTI NAGRI VOWEL SIGN OO */
|
||||
0xA880, /* SAURASHTRA SIGN ANUSVARA */
|
||||
0xA881, /* SAURASHTRA SIGN VISARGA */
|
||||
0xA8B4, /* SAURASHTRA CONSONANT SIGN HAARU */
|
||||
0xA8B5, /* SAURASHTRA VOWEL SIGN AA */
|
||||
0xA8B6, /* SAURASHTRA VOWEL SIGN I */
|
||||
0xA8B7, /* SAURASHTRA VOWEL SIGN II */
|
||||
0xA8B8, /* SAURASHTRA VOWEL SIGN U */
|
||||
0xA8B9, /* SAURASHTRA VOWEL SIGN UU */
|
||||
0xA8BA, /* SAURASHTRA VOWEL SIGN VOCALIC R */
|
||||
0xA8BB, /* SAURASHTRA VOWEL SIGN VOCALIC RR */
|
||||
0xA8BC, /* SAURASHTRA VOWEL SIGN VOCALIC L */
|
||||
0xA8BD, /* SAURASHTRA VOWEL SIGN VOCALIC LL */
|
||||
0xA8BE, /* SAURASHTRA VOWEL SIGN E */
|
||||
0xA8BF, /* SAURASHTRA VOWEL SIGN EE */
|
||||
0xA8C0, /* SAURASHTRA VOWEL SIGN AI */
|
||||
0xA8C1, /* SAURASHTRA VOWEL SIGN O */
|
||||
0xA8C2, /* SAURASHTRA VOWEL SIGN OO */
|
||||
0xA8C3, /* SAURASHTRA VOWEL SIGN AU */
|
||||
0xA952, /* REJANG CONSONANT SIGN H */
|
||||
0xA953, /* REJANG VIRAMA */
|
||||
0xAA2F, /* CHAM VOWEL SIGN O */
|
||||
0xAA30, /* CHAM VOWEL SIGN AI */
|
||||
0xAA33, /* CHAM CONSONANT SIGN YA */
|
||||
0xAA34, /* CHAM CONSONANT SIGN RA */
|
||||
0xAA4D /* CHAM CONSONANT SIGN FINAL H */
|
||||
};
|
||||
pg_wchar *StopLow = strange_letter,
|
||||
*StopHigh = strange_letter + lengthof(strange_letter),
|
||||
*StopMiddle;
|
||||
pg_wchar c;
|
||||
|
||||
if ( prs->pgwstr )
|
||||
c = *(prs->pgwstr + prs->state->poschar);
|
||||
else
|
||||
c = (pg_wchar) *(prs->wstr + prs->state->poschar);
|
||||
|
||||
while( StopLow < StopHigh )
|
||||
{
|
||||
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
|
||||
if ( *StopMiddle == c )
|
||||
return 1;
|
||||
else if ( *StopMiddle < c )
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Table of state/action of parser
|
||||
*/
|
||||
@ -683,6 +968,7 @@ static const TParserStateActionItem actionTPS_Base[] = {
|
||||
static const TParserStateActionItem actionTPS_InNumWord[] = {
|
||||
{p_isEOF, 0, A_BINGO, TPS_Base, NUMWORD, NULL},
|
||||
{p_isalnum, 0, A_NEXT, TPS_InNumWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
|
||||
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
|
||||
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
|
||||
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
|
||||
@ -703,12 +989,14 @@ static const TParserStateActionItem actionTPS_InAsciiWord[] = {
|
||||
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
|
||||
{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
|
||||
};
|
||||
|
||||
static const TParserStateActionItem actionTPS_InWord[] = {
|
||||
{p_isEOF, 0, A_BINGO, TPS_Base, WORD_T, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_Null, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_Null, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
|
||||
{p_iseqC, '-', A_PUSH, TPS_InHyphenWordFirst, 0, NULL},
|
||||
{NULL, 0, A_BINGO, TPS_Base, WORD_T, NULL}
|
||||
@ -723,6 +1011,7 @@ static const TParserStateActionItem actionTPS_InUnsignedInt[] = {
|
||||
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
|
||||
{p_isasclet, 0, A_PUSH, TPS_InHost, 0, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InNumWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
|
||||
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
|
||||
{NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
|
||||
};
|
||||
@ -1196,6 +1485,7 @@ static const TParserStateActionItem actionTPS_InHyphenAsciiWord[] = {
|
||||
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen},
|
||||
{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
|
||||
{p_iseqC, '-', A_PUSH, TPS_InHyphenAsciiWordFirst, 0, NULL},
|
||||
{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen}
|
||||
@ -1211,6 +1501,7 @@ static const TParserStateActionItem actionTPS_InHyphenWordFirst[] = {
|
||||
static const TParserStateActionItem actionTPS_InHyphenWord[] = {
|
||||
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
|
||||
{p_iseqC, '-', A_PUSH, TPS_InHyphenWordFirst, 0, NULL},
|
||||
{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen}
|
||||
@ -1226,6 +1517,7 @@ static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[] = {
|
||||
static const TParserStateActionItem actionTPS_InHyphenNumWord[] = {
|
||||
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen},
|
||||
{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
|
||||
{p_iseqC, '-', A_PUSH, TPS_InHyphenNumWordFirst, 0, NULL},
|
||||
{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen}
|
||||
};
|
||||
@ -1234,6 +1526,7 @@ static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[] = {
|
||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
|
||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||
};
|
||||
|
||||
@ -1249,12 +1542,14 @@ static const TParserStateActionItem actionTPS_InParseHyphen[] = {
|
||||
static const TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
|
||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||
{p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
|
||||
{p_isspecial, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
|
||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||
};
|
||||
|
||||
static const TParserStateActionItem actionTPS_InHyphenWordPart[] = {
|
||||
{p_isEOF, 0, A_BINGO, TPS_Base, PARTHWORD, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
|
||||
{NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHWORD, NULL}
|
||||
};
|
||||
@ -1263,6 +1558,7 @@ static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
|
||||
{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIPARTHWORD, NULL},
|
||||
{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
|
||||
{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
|
||||
{NULL, 0, A_BINGO, TPS_InParseHyphen, ASCIIPARTHWORD, NULL}
|
||||
};
|
||||
@ -1270,6 +1566,7 @@ static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
|
||||
static const TParserStateActionItem actionTPS_InHyphenNumWordPart[] = {
|
||||
{p_isEOF, 0, A_BINGO, TPS_Base, NUMPARTHWORD, NULL},
|
||||
{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
|
||||
{p_isspecial, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
|
||||
{NULL, 0, A_BINGO, TPS_InParseHyphen, NUMPARTHWORD, NULL}
|
||||
};
|
||||
|
||||
@ -1277,6 +1574,7 @@ static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
|
||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
|
||||
{p_isalpha, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
|
||||
{p_isspecial, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
|
||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user