mirror of
git://sourceware.org/git/glibc.git
synced 2024-12-21 04:31:04 +08:00
6b6557e8b3
2003-12-23 Paolo Bonzini <bonzini@gnu.org> * posix/regex_internal.c (re_dfa_add_node): Initialize opt_subexp. * posix/regex_internal.h (re_token_type_t): Put OP_DUP_PLUS among the tokens, rather than among the epsilon-transiting nodes. (re_token_t): Add the opt_subexp flag. * posix/regcomp.c (optimize_utf8, calc_first, calc_next, calc_epsdest): Don't consider OP_DUP_PLUS. (mark_opt_subexp, mark_opt_subexp_iter): New functions. (parse_dup_op): Mostly rewritten, lowering OP_DUP_PLUS to OP_DUP_ASTERISK and marking optional subexpressions as such using mark_opt_subexp. * posix/regexec.c (set_regs): Initialize PREV_INDEX_MATCH and pass it to update_regs. (update_regs): Use the PREV_INDEX_MATCH parameter, together with the opt_subexp flag, in order to discard a final empty match of a repeated subexpression. * posix/BOOST.tests: Adjust test vectors. * posix/PCRE.tests: Likewise. * posix/rxspencer/tests: Likewise. 2003-12-17 Paolo Bonzini <bonzini@gnu.org> 2003-12-16 Paolo Bonzini <bonzini@gnu.org> 2003-12-17 Paolo Bonzini <bonzini@gnu.org> 2003-12-16 Jakub Jelinek <jakub@redhat.com> 2003-04-06 Kaz Kojima <kkojima@rr.iij4u.or.jp> 2003-02-20 Paolo Bonzini <bonzini@gnu.org> 2003-01-12 Franz Sirl <Franz.Sirl-kernel@lauterbach.com> 2003-01-09 Richard Henderson <rth@redhat.com> 2003-01-09 Richard Henderson <rth@redhat.com> 2003-01-03 Paul Eggert <eggert@twinsun.com>
830 lines
19 KiB
Plaintext
830 lines
19 KiB
Plaintext
;
|
|
;
|
|
; this file contains a script of tests to run through regress.exe
|
|
;
|
|
; comments start with a semicolon and proceed to the end of the line
|
|
;
|
|
; changes to regular expression compile flags start with a "-" as the first
|
|
; non-whitespace character and consist of a list of the printable names
|
|
; of the flags, for example "match_default"
|
|
;
|
|
; Other lines contain a test to perform using the current flag status
|
|
; the first token contains the expression to compile, the second the string
|
|
; to match it against. If the second string is "!" then the expression should
|
|
; not compile, that is the first string is an invalid regular expression.
|
|
; This is then followed by a list of integers that specify what should match,
|
|
; each pair represents the starting and ending positions of a subexpression
|
|
; starting with the zeroth subexpression (the whole match).
|
|
; A value of -1 indicates that the subexpression should not take part in the
|
|
; match at all, if the first value is -1 then no part of the expression should
|
|
; match the string.
|
|
;
|
|
; Tests taken from BOOST testsuite and adapted to glibc regex.
|
|
;
|
|
; Boost Software License - Version 1.0 - August 17th, 2003
|
|
;
|
|
; Permission is hereby granted, free of charge, to any person or organization
|
|
; obtaining a copy of the software and accompanying documentation covered by
|
|
; this license (the "Software") to use, reproduce, display, distribute,
|
|
; execute, and transmit the Software, and to prepare derivative works of the
|
|
; Software, and to permit third-parties to whom the Software is furnished to
|
|
; do so, all subject to the following:
|
|
;
|
|
; The copyright notices in the Software and this entire statement, including
|
|
; the above license grant, this restriction and the following disclaimer,
|
|
; must be included in all copies of the Software, in whole or in part, and
|
|
; all derivative works of the Software, unless such copies or derivative
|
|
; works are solely in the form of machine-executable object code generated by
|
|
; a source language processor.
|
|
;
|
|
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
|
; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
|
; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
|
; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
; DEALINGS IN THE SOFTWARE.
|
|
;
|
|
|
|
- match_default normal REG_EXTENDED
|
|
|
|
;
|
|
; try some really simple literals:
|
|
a a 0 1
|
|
Z Z 0 1
|
|
Z aaa -1 -1
|
|
Z xxxxZZxxx 4 5
|
|
|
|
; and some simple brackets:
|
|
(a) zzzaazz 3 4 3 4
|
|
() zzz 0 0 0 0
|
|
() "" 0 0 0 0
|
|
( !
|
|
) ) 0 1
|
|
(aa !
|
|
aa) baa)b 1 4
|
|
a b -1 -1
|
|
\(\) () 0 2
|
|
\(a\) (a) 0 3
|
|
\() () 0 2
|
|
(\) !
|
|
p(a)rameter ABCparameterXYZ 3 12 4 5
|
|
[pq](a)rameter ABCparameterXYZ 3 12 4 5
|
|
|
|
; now try escaped brackets:
|
|
- match_default bk_parens REG_BASIC
|
|
\(a\) zzzaazz 3 4 3 4
|
|
\(\) zzz 0 0 0 0
|
|
\(\) "" 0 0 0 0
|
|
\( !
|
|
\) !
|
|
\(aa !
|
|
aa\) !
|
|
() () 0 2
|
|
(a) (a) 0 3
|
|
(\) !
|
|
\() !
|
|
|
|
; now move on to "." wildcards
|
|
- match_default normal REG_EXTENDED REG_STARTEND
|
|
. a 0 1
|
|
. \n 0 1
|
|
. \r 0 1
|
|
. \0 0 1
|
|
|
|
;
|
|
; now move on to the repetion ops,
|
|
; starting with operator *
|
|
- match_default normal REG_EXTENDED
|
|
a* b 0 0
|
|
ab* a 0 1
|
|
ab* ab 0 2
|
|
ab* sssabbbbbbsss 3 10
|
|
ab*c* a 0 1
|
|
ab*c* abbb 0 4
|
|
ab*c* accc 0 4
|
|
ab*c* abbcc 0 5
|
|
*a !
|
|
\<* !
|
|
\>* !
|
|
\n* \n\n 0 2
|
|
\** ** 0 2
|
|
\* * 0 1
|
|
|
|
; now try operator +
|
|
ab+ a -1 -1
|
|
ab+ ab 0 2
|
|
ab+ sssabbbbbbsss 3 10
|
|
ab+c+ a -1 -1
|
|
ab+c+ abbb -1 -1
|
|
ab+c+ accc -1 -1
|
|
ab+c+ abbcc 0 5
|
|
+a !
|
|
\<+ !
|
|
\>+ !
|
|
\n+ \n\n 0 2
|
|
\+ + 0 1
|
|
\+ ++ 0 1
|
|
\++ ++ 0 2
|
|
|
|
; now try operator ?
|
|
- match_default normal REG_EXTENDED
|
|
a? b 0 0
|
|
ab? a 0 1
|
|
ab? ab 0 2
|
|
ab? sssabbbbbbsss 3 5
|
|
ab?c? a 0 1
|
|
ab?c? abbb 0 2
|
|
ab?c? accc 0 2
|
|
ab?c? abcc 0 3
|
|
?a !
|
|
\<? !
|
|
\>? !
|
|
\n? \n\n 0 1
|
|
\? ? 0 1
|
|
\? ?? 0 1
|
|
\?? ?? 0 1
|
|
|
|
; now try operator {}
|
|
- match_default normal REG_EXTENDED
|
|
a{2} a -1 -1
|
|
a{2} aa 0 2
|
|
a{2} aaa 0 2
|
|
a{2,} a -1 -1
|
|
a{2,} aa 0 2
|
|
a{2,} aaaaa 0 5
|
|
a{2,4} a -1 -1
|
|
a{2,4} aa 0 2
|
|
a{2,4} aaa 0 3
|
|
a{2,4} aaaa 0 4
|
|
a{2,4} aaaaa 0 4
|
|
a{} !
|
|
a{2 !
|
|
a} a} 0 2
|
|
\{\} {} 0 2
|
|
|
|
- match_default normal REG_BASIC
|
|
a\{2\} a -1 -1
|
|
a\{2\} aa 0 2
|
|
a\{2\} aaa 0 2
|
|
a\{2,\} a -1 -1
|
|
a\{2,\} aa 0 2
|
|
a\{2,\} aaaaa 0 5
|
|
a\{2,4\} a -1 -1
|
|
a\{2,4\} aa 0 2
|
|
a\{2,4\} aaa 0 3
|
|
a\{2,4\} aaaa 0 4
|
|
a\{2,4\} aaaaa 0 4
|
|
{} {} 0 2
|
|
|
|
; now test the alternation operator |
|
|
- match_default normal REG_EXTENDED
|
|
a|b a 0 1
|
|
a|b b 0 1
|
|
a(b|c) ab 0 2 1 2
|
|
a(b|c) ac 0 2 1 2
|
|
a(b|c) ad -1 -1 -1 -1
|
|
a\| a| 0 2
|
|
|
|
; now test the set operator []
|
|
- match_default normal REG_EXTENDED
|
|
; try some literals first
|
|
[abc] a 0 1
|
|
[abc] b 0 1
|
|
[abc] c 0 1
|
|
[abc] d -1 -1
|
|
[^bcd] a 0 1
|
|
[^bcd] b -1 -1
|
|
[^bcd] d -1 -1
|
|
[^bcd] e 0 1
|
|
a[b]c abc 0 3
|
|
a[ab]c abc 0 3
|
|
a[^ab]c adc 0 3
|
|
a[]b]c a]c 0 3
|
|
a[[b]c a[c 0 3
|
|
a[-b]c a-c 0 3
|
|
a[^]b]c adc 0 3
|
|
a[^-b]c adc 0 3
|
|
a[b-]c a-c 0 3
|
|
a[b !
|
|
a[] !
|
|
|
|
; then some ranges
|
|
[b-e] a -1 -1
|
|
[b-e] b 0 1
|
|
[b-e] e 0 1
|
|
[b-e] f -1 -1
|
|
[^b-e] a 0 1
|
|
[^b-e] b -1 -1
|
|
[^b-e] e -1 -1
|
|
[^b-e] f 0 1
|
|
a[1-3]c a2c 0 3
|
|
a[3-1]c !
|
|
a[1-3-5]c !
|
|
a[1- !
|
|
|
|
; and some classes
|
|
a[[:alpha:]]c abc 0 3
|
|
a[[:unknown:]]c !
|
|
a[[: !
|
|
a[[:alpha !
|
|
a[[:alpha:] !
|
|
a[[:alpha,:] !
|
|
a[[:]:]]b !
|
|
a[[:-:]]b !
|
|
a[[:alph:]] !
|
|
a[[:alphabet:]] !
|
|
[[:alnum:]]+ -%@a0X_- 3 6
|
|
[[:alpha:]]+ -%@aX_0- 3 5
|
|
[[:blank:]]+ "a \tb" 1 4
|
|
[[:cntrl:]]+ a\n\tb 1 3
|
|
[[:digit:]]+ a019b 1 4
|
|
[[:graph:]]+ " a%b " 1 4
|
|
[[:lower:]]+ AabC 1 3
|
|
; This test fails with STLPort, disable for now as this is a corner case anyway...
|
|
;[[:print:]]+ "\na b\n" 1 4
|
|
[[:punct:]]+ " %-&\t" 1 4
|
|
[[:space:]]+ "a \n\t\rb" 1 5
|
|
[[:upper:]]+ aBCd 1 3
|
|
[[:xdigit:]]+ p0f3Cx 1 5
|
|
|
|
; now test flag settings:
|
|
- escape_in_lists REG_NO_POSIX_TEST
|
|
[\n] \n 0 1
|
|
- REG_NO_POSIX_TEST
|
|
|
|
; line anchors
|
|
- match_default normal REG_EXTENDED
|
|
^ab ab 0 2
|
|
^ab xxabxx -1 -1
|
|
ab$ ab 0 2
|
|
ab$ abxx -1 -1
|
|
- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
|
|
^ab ab -1 -1
|
|
^ab xxabxx -1 -1
|
|
ab$ ab -1 -1
|
|
ab$ abxx -1 -1
|
|
|
|
; back references
|
|
- match_default normal REG_PERL
|
|
a(b)\2c !
|
|
a(b\1)c !
|
|
a(b*)c\1d abbcbbd 0 7 1 3
|
|
a(b*)c\1d abbcbd -1 -1
|
|
a(b*)c\1d abbcbbbd -1 -1
|
|
^(.)\1 abc -1 -1
|
|
a([bc])\1d abcdabbd 4 8 5 6
|
|
; strictly speaking this is at best ambiguous, at worst wrong, this is what most
|
|
; re implimentations will match though.
|
|
a(([bc])\2)*d abbccd 0 6 3 5 3 4
|
|
|
|
a(([bc])\2)*d abbcbd -1 -1
|
|
a((b)*\2)*d abbbd 0 5 1 4 2 3
|
|
; perl only:
|
|
(ab*)[ab]*\1 ababaaa 0 7 0 1
|
|
(a)\1bcd aabcd 0 5 0 1
|
|
(a)\1bc*d aabcd 0 5 0 1
|
|
(a)\1bc*d aabd 0 4 0 1
|
|
(a)\1bc*d aabcccd 0 7 0 1
|
|
(a)\1bc*[ce]d aabcccd 0 7 0 1
|
|
^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
|
|
|
|
; posix only:
|
|
- match_default extended REG_EXTENDED
|
|
(ab*)[ab]*\1 ababaaa 0 7 0 1
|
|
|
|
;
|
|
; word operators:
|
|
\w a 0 1
|
|
\w z 0 1
|
|
\w A 0 1
|
|
\w Z 0 1
|
|
\w _ 0 1
|
|
\w } -1 -1
|
|
\w ` -1 -1
|
|
\w [ -1 -1
|
|
\w @ -1 -1
|
|
; non-word:
|
|
\W a -1 -1
|
|
\W z -1 -1
|
|
\W A -1 -1
|
|
\W Z -1 -1
|
|
\W _ -1 -1
|
|
\W } 0 1
|
|
\W ` 0 1
|
|
\W [ 0 1
|
|
\W @ 0 1
|
|
; word start:
|
|
\<abcd " abcd" 2 6
|
|
\<ab cab -1 -1
|
|
\<ab "\nab" 1 3
|
|
\<tag ::tag 2 5
|
|
;word end:
|
|
abc\> abc 0 3
|
|
abc\> abcd -1 -1
|
|
abc\> abc\n 0 3
|
|
abc\> abc:: 0 3
|
|
; word boundary:
|
|
\babcd " abcd" 2 6
|
|
\bab cab -1 -1
|
|
\bab "\nab" 1 3
|
|
\btag ::tag 2 5
|
|
abc\b abc 0 3
|
|
abc\b abcd -1 -1
|
|
abc\b abc\n 0 3
|
|
abc\b abc:: 0 3
|
|
; within word:
|
|
\B ab 1 1
|
|
a\Bb ab 0 2
|
|
a\B ab 0 1
|
|
a\B a -1 -1
|
|
a\B "a " -1 -1
|
|
|
|
;
|
|
; buffer operators:
|
|
\`abc abc 0 3
|
|
\`abc \nabc -1 -1
|
|
\`abc " abc" -1 -1
|
|
abc\' abc 0 3
|
|
abc\' abc\n -1 -1
|
|
abc\' "abc " -1 -1
|
|
|
|
;
|
|
; now follows various complex expressions designed to try and bust the matcher:
|
|
a(((b)))c abc 0 3 1 2 1 2 1 2
|
|
a(b|(c))d abd 0 3 1 2 -1 -1
|
|
a(b|(c))d acd 0 3 1 2 1 2
|
|
a(b*|c)d abbd 0 4 1 3
|
|
; just gotta have one DFA-buster, of course
|
|
a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
|
|
; and an inline expansion in case somebody gets tricky
|
|
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
|
|
; and in case somebody just slips in an NFA...
|
|
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
|
|
; one really big one
|
|
1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
|
|
; fish for problems as brackets go past 8
|
|
[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
|
|
[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
|
|
[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
|
|
[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
|
|
; and as parenthesis go past 9:
|
|
(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
|
|
(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
|
|
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
|
|
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
|
|
(a)d|(b)c abc 1 3 -1 -1 1 2
|
|
_+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
|
|
|
|
; subtleties of matching
|
|
;a(b)?c\1d acd 0 3 -1 -1
|
|
; POSIX is about the following test:
|
|
a(b)?c\1d acd -1 -1 -1 -1
|
|
a(b?c)+d accd 0 4 2 3
|
|
(wee|week)(knights|night) weeknights 0 10 0 3 3 10
|
|
.* abc 0 3
|
|
a(b|(c))d abd 0 3 1 2 -1 -1
|
|
a(b|(c))d acd 0 3 1 2 1 2
|
|
a(b*|c|e)d abbd 0 4 1 3
|
|
a(b*|c|e)d acd 0 3 1 2
|
|
a(b*|c|e)d ad 0 2 1 1
|
|
a(b?)c abc 0 3 1 2
|
|
a(b?)c ac 0 2 1 1
|
|
a(b+)c abc 0 3 1 2
|
|
a(b+)c abbbc 0 5 1 4
|
|
a(b*)c ac 0 2 1 1
|
|
(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
|
|
a([bc]?)c abc 0 3 1 2
|
|
a([bc]?)c ac 0 2 1 1
|
|
a([bc]+)c abc 0 3 1 2
|
|
a([bc]+)c abcc 0 4 1 3
|
|
a([bc]+)bc abcbc 0 5 1 3
|
|
a(bb+|b)b abb 0 3 1 2
|
|
a(bbb+|bb+|b)b abb 0 3 1 2
|
|
a(bbb+|bb+|b)b abbb 0 4 1 3
|
|
a(bbb+|bb+|b)bb abbb 0 4 1 2
|
|
(.*).* abcdef 0 6 0 6
|
|
(a*)* bc 0 0 0 0
|
|
xyx*xz xyxxxxyxxxz 5 11
|
|
|
|
; do we get the right subexpression when it is used more than once?
|
|
a(b|c)*d ad 0 2 -1 -1
|
|
a(b|c)*d abcd 0 4 2 3
|
|
a(b|c)+d abd 0 3 1 2
|
|
a(b|c)+d abcd 0 4 2 3
|
|
a(b|c?)+d ad 0 2 1 1
|
|
a(b|c){0,0}d ad 0 2 -1 -1
|
|
a(b|c){0,1}d ad 0 2 -1 -1
|
|
a(b|c){0,1}d abd 0 3 1 2
|
|
a(b|c){0,2}d ad 0 2 -1 -1
|
|
a(b|c){0,2}d abcd 0 4 2 3
|
|
a(b|c){0,}d ad 0 2 -1 -1
|
|
a(b|c){0,}d abcd 0 4 2 3
|
|
a(b|c){1,1}d abd 0 3 1 2
|
|
a(b|c){1,2}d abd 0 3 1 2
|
|
a(b|c){1,2}d abcd 0 4 2 3
|
|
a(b|c){1,}d abd 0 3 1 2
|
|
a(b|c){1,}d abcd 0 4 2 3
|
|
a(b|c){2,2}d acbd 0 4 2 3
|
|
a(b|c){2,2}d abcd 0 4 2 3
|
|
a(b|c){2,4}d abcd 0 4 2 3
|
|
a(b|c){2,4}d abcbd 0 5 3 4
|
|
a(b|c){2,4}d abcbcd 0 6 4 5
|
|
a(b|c){2,}d abcd 0 4 2 3
|
|
a(b|c){2,}d abcbd 0 5 3 4
|
|
; perl only: these conflict with the POSIX test below
|
|
;a(b|c?)+d abcd 0 4 3 3
|
|
;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
|
|
;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
|
|
|
|
; posix only:
|
|
- match_default extended REG_EXTENDED REG_STARTEND
|
|
|
|
a(b|c?)+d abcd 0 4 2 3
|
|
a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
|
a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
|
|
a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
|
a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
|
|
a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
|
|
a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
|
|
a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
|
|
a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
|
|
|
|
- match_default normal REG_PERL
|
|
; try to match C++ syntax elements:
|
|
; line comment:
|
|
//[^\n]* "++i //here is a line comment\n" 4 28
|
|
; block comment:
|
|
/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
|
|
/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
|
|
/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
|
|
/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
|
|
/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
|
|
/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
|
|
; preprossor directives:
|
|
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
|
|
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
|
|
; perl only:
|
|
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
|
|
; literals:
|
|
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
|
|
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
|
|
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
|
|
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
|
|
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
|
|
; strings:
|
|
'([^\\']|\\.)*' '\\x3A' 0 6 4 5
|
|
'([^\\']|\\.)*' '\\'' 0 4 1 3
|
|
'([^\\']|\\.)*' '\\n' 0 4 1 3
|
|
|
|
; finally try some case insensitive matches:
|
|
- match_default normal REG_EXTENDED REG_ICASE
|
|
; upper and lower have no meaning here so they fail, however these
|
|
; may compile with other libraries...
|
|
;[[:lower:]] !
|
|
;[[:upper:]] !
|
|
0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
|
|
|
|
; known and suspected bugs:
|
|
- match_default normal REG_EXTENDED
|
|
\( ( 0 1
|
|
\) ) 0 1
|
|
\$ $ 0 1
|
|
\^ ^ 0 1
|
|
\. . 0 1
|
|
\* * 0 1
|
|
\+ + 0 1
|
|
\? ? 0 1
|
|
\[ [ 0 1
|
|
\] ] 0 1
|
|
\| | 0 1
|
|
\\ \\ 0 1
|
|
# # 0 1
|
|
\# # 0 1
|
|
a- a- 0 2
|
|
\- - 0 1
|
|
\{ { 0 1
|
|
\} } 0 1
|
|
0 0 0 1
|
|
1 1 0 1
|
|
9 9 0 1
|
|
b b 0 1
|
|
B B 0 1
|
|
< < 0 1
|
|
> > 0 1
|
|
w w 0 1
|
|
W W 0 1
|
|
` ` 0 1
|
|
' ' 0 1
|
|
\n \n 0 1
|
|
, , 0 1
|
|
a a 0 1
|
|
f f 0 1
|
|
n n 0 1
|
|
r r 0 1
|
|
t t 0 1
|
|
v v 0 1
|
|
c c 0 1
|
|
x x 0 1
|
|
: : 0 1
|
|
(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
|
|
|
|
- match_default normal REG_EXTENDED REG_ICASE
|
|
a A 0 1
|
|
A a 0 1
|
|
[abc]+ abcABC 0 6
|
|
[ABC]+ abcABC 0 6
|
|
[a-z]+ abcABC 0 6
|
|
[A-Z]+ abzANZ 0 6
|
|
[a-Z]+ abzABZ 0 6
|
|
[A-z]+ abzABZ 0 6
|
|
[[:lower:]]+ abyzABYZ 0 8
|
|
[[:upper:]]+ abzABZ 0 6
|
|
[[:alpha:]]+ abyzABYZ 0 8
|
|
[[:alnum:]]+ 09abyzABYZ 0 10
|
|
|
|
; word start:
|
|
\<abcd " abcd" 2 6
|
|
\<ab cab -1 -1
|
|
\<ab "\nab" 1 3
|
|
\<tag ::tag 2 5
|
|
;word end:
|
|
abc\> abc 0 3
|
|
abc\> abcd -1 -1
|
|
abc\> abc\n 0 3
|
|
abc\> abc:: 0 3
|
|
|
|
; collating elements and rewritten set code:
|
|
- match_default normal REG_EXTENDED REG_STARTEND
|
|
;[[.zero.]] 0 0 1
|
|
;[[.one.]] 1 0 1
|
|
;[[.two.]] 2 0 1
|
|
;[[.three.]] 3 0 1
|
|
[[.a.]] baa 1 2
|
|
;[[.right-curly-bracket.]] } 0 1
|
|
;[[.NUL.]] \0 0 1
|
|
[[:<:]z] !
|
|
[a[:>:]] !
|
|
[[=a=]] a 0 1
|
|
;[[=right-curly-bracket=]] } 0 1
|
|
- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
|
|
[[.A.]] A 0 1
|
|
[[.A.]] a 0 1
|
|
[[.A.]-b]+ AaBb 0 4
|
|
[A-[.b.]]+ AaBb 0 4
|
|
[[.a.]-B]+ AaBb 0 4
|
|
[a-[.B.]]+ AaBb 0 4
|
|
- match_default normal REG_EXTENDED REG_STARTEND
|
|
[[.a.]-c]+ abcd 0 3
|
|
[a-[.c.]]+ abcd 0 3
|
|
[[:alpha:]-a] !
|
|
[a-[:alpha:]] !
|
|
|
|
; try mutli-character ligatures:
|
|
;[[.ae.]] ae 0 2
|
|
;[[.ae.]] aE -1 -1
|
|
;[[.AE.]] AE 0 2
|
|
;[[.Ae.]] Ae 0 2
|
|
;[[.ae.]-b] a -1 -1
|
|
;[[.ae.]-b] b 0 1
|
|
;[[.ae.]-b] ae 0 2
|
|
;[a-[.ae.]] a 0 1
|
|
;[a-[.ae.]] b -1 -1
|
|
;[a-[.ae.]] ae 0 2
|
|
- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
|
|
;[[.ae.]] AE 0 2
|
|
;[[.ae.]] Ae 0 2
|
|
;[[.AE.]] Ae 0 2
|
|
;[[.Ae.]] aE 0 2
|
|
;[[.AE.]-B] a -1 -1
|
|
;[[.Ae.]-b] b 0 1
|
|
;[[.Ae.]-b] B 0 1
|
|
;[[.ae.]-b] AE 0 2
|
|
|
|
- match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
|
|
\s+ "ab ab" 2 5
|
|
\S+ " abc " 2 5
|
|
|
|
- match_default normal REG_EXTENDED REG_STARTEND
|
|
\`abc abc 0 3
|
|
\`abc aabc -1 -1
|
|
abc\' abc 0 3
|
|
abc\' abcd -1 -1
|
|
abc\' abc\n\n -1 -1
|
|
abc\' abc 0 3
|
|
|
|
; extended repeat checking to exercise new algorithms:
|
|
ab.*xy abxy_ 0 4
|
|
ab.*xy ab_xy_ 0 5
|
|
ab.*xy abxy 0 4
|
|
ab.*xy ab_xy 0 5
|
|
ab.* ab 0 2
|
|
ab.* ab__ 0 4
|
|
|
|
ab.{2,5}xy ab__xy_ 0 6
|
|
ab.{2,5}xy ab____xy_ 0 8
|
|
ab.{2,5}xy ab_____xy_ 0 9
|
|
ab.{2,5}xy ab__xy 0 6
|
|
ab.{2,5}xy ab_____xy 0 9
|
|
ab.{2,5} ab__ 0 4
|
|
ab.{2,5} ab_______ 0 7
|
|
ab.{2,5}xy ab______xy -1 -1
|
|
ab.{2,5}xy ab_xy -1 -1
|
|
|
|
ab.*?xy abxy_ 0 4
|
|
ab.*?xy ab_xy_ 0 5
|
|
ab.*?xy abxy 0 4
|
|
ab.*?xy ab_xy 0 5
|
|
ab.*? ab 0 2
|
|
ab.*? ab__ 0 4
|
|
|
|
ab.{2,5}?xy ab__xy_ 0 6
|
|
ab.{2,5}?xy ab____xy_ 0 8
|
|
ab.{2,5}?xy ab_____xy_ 0 9
|
|
ab.{2,5}?xy ab__xy 0 6
|
|
ab.{2,5}?xy ab_____xy 0 9
|
|
ab.{2,5}? ab__ 0 4
|
|
ab.{2,5}? ab_______ 0 7
|
|
ab.{2,5}?xy ab______xy -1 -1
|
|
ab.{2,5}xy ab_xy -1 -1
|
|
|
|
; again but with slower algorithm variant:
|
|
- match_default REG_EXTENDED
|
|
; now again for single character repeats:
|
|
|
|
ab_*xy abxy_ 0 4
|
|
ab_*xy ab_xy_ 0 5
|
|
ab_*xy abxy 0 4
|
|
ab_*xy ab_xy 0 5
|
|
ab_* ab 0 2
|
|
ab_* ab__ 0 4
|
|
|
|
ab_{2,5}xy ab__xy_ 0 6
|
|
ab_{2,5}xy ab____xy_ 0 8
|
|
ab_{2,5}xy ab_____xy_ 0 9
|
|
ab_{2,5}xy ab__xy 0 6
|
|
ab_{2,5}xy ab_____xy 0 9
|
|
ab_{2,5} ab__ 0 4
|
|
ab_{2,5} ab_______ 0 7
|
|
ab_{2,5}xy ab______xy -1 -1
|
|
ab_{2,5}xy ab_xy -1 -1
|
|
|
|
ab_*?xy abxy_ 0 4
|
|
ab_*?xy ab_xy_ 0 5
|
|
ab_*?xy abxy 0 4
|
|
ab_*?xy ab_xy 0 5
|
|
ab_*? ab 0 2
|
|
ab_*? ab__ 0 4
|
|
|
|
ab_{2,5}?xy ab__xy_ 0 6
|
|
ab_{2,5}?xy ab____xy_ 0 8
|
|
ab_{2,5}?xy ab_____xy_ 0 9
|
|
ab_{2,5}?xy ab__xy 0 6
|
|
ab_{2,5}?xy ab_____xy 0 9
|
|
ab_{2,5}? ab__ 0 4
|
|
ab_{2,5}? ab_______ 0 7
|
|
ab_{2,5}?xy ab______xy -1 -1
|
|
ab_{2,5}xy ab_xy -1 -1
|
|
|
|
; and again for sets:
|
|
ab[_,;]*xy abxy_ 0 4
|
|
ab[_,;]*xy ab_xy_ 0 5
|
|
ab[_,;]*xy abxy 0 4
|
|
ab[_,;]*xy ab_xy 0 5
|
|
ab[_,;]* ab 0 2
|
|
ab[_,;]* ab__ 0 4
|
|
|
|
ab[_,;]{2,5}xy ab__xy_ 0 6
|
|
ab[_,;]{2,5}xy ab____xy_ 0 8
|
|
ab[_,;]{2,5}xy ab_____xy_ 0 9
|
|
ab[_,;]{2,5}xy ab__xy 0 6
|
|
ab[_,;]{2,5}xy ab_____xy 0 9
|
|
ab[_,;]{2,5} ab__ 0 4
|
|
ab[_,;]{2,5} ab_______ 0 7
|
|
ab[_,;]{2,5}xy ab______xy -1 -1
|
|
ab[_,;]{2,5}xy ab_xy -1 -1
|
|
|
|
ab[_,;]*?xy abxy_ 0 4
|
|
ab[_,;]*?xy ab_xy_ 0 5
|
|
ab[_,;]*?xy abxy 0 4
|
|
ab[_,;]*?xy ab_xy 0 5
|
|
ab[_,;]*? ab 0 2
|
|
ab[_,;]*? ab__ 0 4
|
|
|
|
ab[_,;]{2,5}?xy ab__xy_ 0 6
|
|
ab[_,;]{2,5}?xy ab____xy_ 0 8
|
|
ab[_,;]{2,5}?xy ab_____xy_ 0 9
|
|
ab[_,;]{2,5}?xy ab__xy 0 6
|
|
ab[_,;]{2,5}?xy ab_____xy 0 9
|
|
ab[_,;]{2,5}? ab__ 0 4
|
|
ab[_,;]{2,5}? ab_______ 0 7
|
|
ab[_,;]{2,5}?xy ab______xy -1 -1
|
|
ab[_,;]{2,5}xy ab_xy -1 -1
|
|
|
|
; and again for tricky sets with digraphs:
|
|
;ab[_[.ae.]]*xy abxy_ 0 4
|
|
;ab[_[.ae.]]*xy ab_xy_ 0 5
|
|
;ab[_[.ae.]]*xy abxy 0 4
|
|
;ab[_[.ae.]]*xy ab_xy 0 5
|
|
;ab[_[.ae.]]* ab 0 2
|
|
;ab[_[.ae.]]* ab__ 0 4
|
|
|
|
;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6
|
|
;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8
|
|
;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9
|
|
;ab[_[.ae.]]{2,5}xy ab__xy 0 6
|
|
;ab[_[.ae.]]{2,5}xy ab_____xy 0 9
|
|
;ab[_[.ae.]]{2,5} ab__ 0 4
|
|
;ab[_[.ae.]]{2,5} ab_______ 0 7
|
|
;ab[_[.ae.]]{2,5}xy ab______xy -1 -1
|
|
;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
|
|
|
|
;ab[_[.ae.]]*?xy abxy_ 0 4
|
|
;ab[_[.ae.]]*?xy ab_xy_ 0 5
|
|
;ab[_[.ae.]]*?xy abxy 0 4
|
|
;ab[_[.ae.]]*?xy ab_xy 0 5
|
|
;ab[_[.ae.]]*? ab 0 2
|
|
;ab[_[.ae.]]*? ab__ 0 2
|
|
|
|
;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6
|
|
;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8
|
|
;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9
|
|
;ab[_[.ae.]]{2,5}?xy ab__xy 0 6
|
|
;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9
|
|
;ab[_[.ae.]]{2,5}? ab__ 0 4
|
|
;ab[_[.ae.]]{2,5}? ab_______ 0 4
|
|
;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
|
|
;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
|
|
|
|
; new bugs detected in spring 2003:
|
|
- normal match_continuous REG_NO_POSIX_TEST
|
|
b abc 1 2
|
|
|
|
() abc 0 0 0 0
|
|
^() abc 0 0 0 0
|
|
^()+ abc 0 0 0 0
|
|
^(){1} abc 0 0 0 0
|
|
^(){2} abc 0 0 0 0
|
|
^((){2}) abc 0 0 0 0 0 0
|
|
() "" 0 0 0 0
|
|
()\1 "" 0 0 0 0
|
|
()\1 a 0 0 0 0
|
|
a()\1b ab 0 2 1 1
|
|
a()b\1 ab 0 2 1 1
|
|
|
|
; subtleties of matching with no sub-expressions marked
|
|
- normal match_nosubs REG_NO_POSIX_TEST
|
|
a(b?c)+d accd 0 4
|
|
(wee|week)(knights|night) weeknights 0 10
|
|
.* abc 0 3
|
|
a(b|(c))d abd 0 3
|
|
a(b|(c))d acd 0 3
|
|
a(b*|c|e)d abbd 0 4
|
|
a(b*|c|e)d acd 0 3
|
|
a(b*|c|e)d ad 0 2
|
|
a(b?)c abc 0 3
|
|
a(b?)c ac 0 2
|
|
a(b+)c abc 0 3
|
|
a(b+)c abbbc 0 5
|
|
a(b*)c ac 0 2
|
|
(a|ab)(bc([de]+)f|cde) abcdef 0 6
|
|
a([bc]?)c abc 0 3
|
|
a([bc]?)c ac 0 2
|
|
a([bc]+)c abc 0 3
|
|
a([bc]+)c abcc 0 4
|
|
a([bc]+)bc abcbc 0 5
|
|
a(bb+|b)b abb 0 3
|
|
a(bbb+|bb+|b)b abb 0 3
|
|
a(bbb+|bb+|b)b abbb 0 4
|
|
a(bbb+|bb+|b)bb abbb 0 4
|
|
(.*).* abcdef 0 6
|
|
(a*)* bc 0 0
|
|
|
|
- normal nosubs REG_NO_POSIX_TEST
|
|
a(b?c)+d accd 0 4
|
|
(wee|week)(knights|night) weeknights 0 10
|
|
.* abc 0 3
|
|
a(b|(c))d abd 0 3
|
|
a(b|(c))d acd 0 3
|
|
a(b*|c|e)d abbd 0 4
|
|
a(b*|c|e)d acd 0 3
|
|
a(b*|c|e)d ad 0 2
|
|
a(b?)c abc 0 3
|
|
a(b?)c ac 0 2
|
|
a(b+)c abc 0 3
|
|
a(b+)c abbbc 0 5
|
|
a(b*)c ac 0 2
|
|
(a|ab)(bc([de]+)f|cde) abcdef 0 6
|
|
a([bc]?)c abc 0 3
|
|
a([bc]?)c ac 0 2
|
|
a([bc]+)c abc 0 3
|
|
a([bc]+)c abcc 0 4
|
|
a([bc]+)bc abcbc 0 5
|
|
a(bb+|b)b abb 0 3
|
|
a(bbb+|bb+|b)b abb 0 3
|
|
a(bbb+|bb+|b)b abbb 0 4
|
|
a(bbb+|bb+|b)bb abbb 0 4
|
|
(.*).* abcdef 0 6
|
|
(a*)* bc 0 0
|
|
|