2002-09-12 06:04:32 +08:00
|
|
|
/* Regular expression tests.
|
2024-01-02 02:12:26 +08:00
|
|
|
Copyright (C) 2002-2024 Free Software Foundation, Inc.
|
2002-09-05 00:21:24 +08:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-02-10 07:18:22 +08:00
|
|
|
License along with the GNU C Library; if not, see
|
Prefer https to http for gnu.org and fsf.org URLs
Also, change sources.redhat.com to sourceware.org.
This patch was automatically generated by running the following shell
script, which uses GNU sed, and which avoids modifying files imported
from upstream:
sed -ri '
s,(http|ftp)(://(.*\.)?(gnu|fsf|sourceware)\.org($|[^.]|\.[^a-z])),https\2,g
s,(http|ftp)(://(.*\.)?)sources\.redhat\.com($|[^.]|\.[^a-z]),https\2sourceware.org\4,g
' \
$(find $(git ls-files) -prune -type f \
! -name '*.po' \
! -name 'ChangeLog*' \
! -path COPYING ! -path COPYING.LIB \
! -path manual/fdl-1.3.texi ! -path manual/lgpl-2.1.texi \
! -path manual/texinfo.tex ! -path scripts/config.guess \
! -path scripts/config.sub ! -path scripts/install-sh \
! -path scripts/mkinstalldirs ! -path scripts/move-if-change \
! -path INSTALL ! -path locale/programs/charmap-kw.h \
! -path po/libc.pot ! -path sysdeps/gnu/errlist.c \
! '(' -name configure \
-execdir test -f configure.ac -o -f configure.in ';' ')' \
! '(' -name preconfigure \
-execdir test -f preconfigure.ac ';' ')' \
-print)
and then by running 'make dist-prepare' to regenerate files built
from the altered files, and then executing the following to cleanup:
chmod a+x sysdeps/unix/sysv/linux/riscv/configure
# Omit irrelevant whitespace and comment-only changes,
# perhaps from a slightly-different Autoconf version.
git checkout -f \
sysdeps/csky/configure \
sysdeps/hppa/configure \
sysdeps/riscv/configure \
sysdeps/unix/sysv/linux/csky/configure
# Omit changes that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/powerpc/powerpc64/ppc-mcount.S: trailing lines
git checkout -f \
sysdeps/powerpc/powerpc64/ppc-mcount.S \
sysdeps/unix/sysv/linux/s390/s390-64/syscall.S
# Omit change that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: last line does not end in newline
git checkout -f sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
2019-09-07 13:40:42 +08:00
|
|
|
<https://www.gnu.org/licenses/>. */
|
2002-09-05 00:21:24 +08:00
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <mcheck.h>
|
|
|
|
#include <regex.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2002-09-29 04:51:17 +08:00
|
|
|
/* Tests supposed to match. */
|
2002-09-12 06:04:32 +08:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
const char *pattern;
|
|
|
|
const char *string;
|
2002-09-29 04:51:17 +08:00
|
|
|
int flags, nmatch;
|
2002-10-18 01:15:06 +08:00
|
|
|
regmatch_t rm[5];
|
2002-09-12 06:04:32 +08:00
|
|
|
} tests[] = {
|
|
|
|
/* Test for newline handling in regex. */
|
2002-09-29 04:51:17 +08:00
|
|
|
{ "[^~]*~", "\nx~y", 0, 2, { { 0, 3 }, { -1, -1 } } },
|
2002-09-12 06:04:32 +08:00
|
|
|
/* Other tests. */
|
2003-10-07 07:44:15 +08:00
|
|
|
{ "a(.*)b", "a b", REG_EXTENDED, 2, { { 0, 3 }, { 1, 2 } } },
|
2002-09-29 04:51:17 +08:00
|
|
|
{ ".*|\\([KIO]\\)\\([^|]*\\).*|?[KIO]", "10~.~|P|K0|I10|O16|?KSb", 0, 3,
|
2002-09-12 06:04:32 +08:00
|
|
|
{ { 0, 21 }, { 15, 16 }, { 16, 18 } } },
|
2002-09-29 04:51:17 +08:00
|
|
|
{ ".*|\\([KIO]\\)\\([^|]*\\).*|?\\1", "10~.~|P|K0|I10|O16|?KSb", 0, 3,
|
2002-10-18 01:15:06 +08:00
|
|
|
{ { 0, 21 }, { 8, 9 }, { 9, 10 } } },
|
|
|
|
{ "^\\(a*\\)\\1\\{9\\}\\(a\\{0,9\\}\\)\\([0-9]*;.*[^a]\\2\\([0-9]\\)\\)",
|
|
|
|
"a1;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9aa2aa1a0", 0,
|
2003-10-03 06:41:11 +08:00
|
|
|
5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } },
|
|
|
|
/* Test for BRE expression anchoring. POSIX says just that this may match;
|
|
|
|
in glibc regex it always matched, so avoid changing it. */
|
|
|
|
{ "\\(^\\|foo\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } },
|
|
|
|
{ "\\(foo\\|^\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } },
|
|
|
|
/* In ERE this must be treated as an anchor. */
|
|
|
|
{ "(^|foo)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } },
|
|
|
|
{ "(foo|^)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } },
|
|
|
|
/* Here ^ cannot be treated as an anchor according to POSIX. */
|
|
|
|
{ "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
|
|
|
|
{ "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
|
2003-10-07 07:44:15 +08:00
|
|
|
/* More tests on backreferences. */
|
2003-11-26 11:24:15 +08:00
|
|
|
{ "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
|
|
|
|
{ "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } },
|
2003-10-07 07:44:15 +08:00
|
|
|
{ "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
|
|
|
|
{ "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
|
|
|
|
{ "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
|
2003-11-26 11:24:15 +08:00
|
|
|
{ "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } },
|
|
|
|
{ "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
|
|
|
|
{ "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } },
|
|
|
|
{ "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
|
|
|
|
{ "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
|
2003-10-07 07:44:15 +08:00
|
|
|
{ "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } },
|
2003-11-26 11:24:15 +08:00
|
|
|
{ "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
|
|
|
|
{ "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } },
|
|
|
|
{ "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } },
|
2003-10-07 07:44:15 +08:00
|
|
|
{ "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } },
|
2003-12-31 04:01:17 +08:00
|
|
|
{ "^([^,]*),\\1,\\1$", "a,a,a", REG_EXTENDED, 2, { { 0, 5 }, { 0, 1 } } },
|
|
|
|
{ "^([^,]*),\\1,\\1$", "ab,ab,ab", REG_EXTENDED, 2, { { 0, 8 }, { 0, 2 } } },
|
|
|
|
{ "^([^,]*),\\1,\\1,\\1$", "abc,abc,abc,abc", REG_EXTENDED, 2,
|
|
|
|
{ { 0, 15 }, { 0, 3 } } },
|
2003-11-27 13:32:46 +08:00
|
|
|
{ "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$",
|
|
|
|
"level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
|
Update.
2003-11-28 Ulrich Drepper <drepper@redhat.com>
* sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes
to compensate other setup.
2003-11-27 Andreas Jaeger <aj@suse.de>
* sysdeps/x86_64/fpu/libm-test-ulps: Add ulps for new atan2 test.
* math/libm-test.inc (atan2_test): Add test that run infinitly.
Reported by "Willus" <etc231etc231@willus.com>.
2003-11-27 Michael Matz <matz@suse.de>
* sysdeps/ieee754/dbl-64/mpsqrt.c (fastiroot): Fix 64-bit problem
with wrong types.
2003-11-28 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (acquire_init_state_context): Make inline.
Add always_inline attribute.
(check_matching): Add BE macro. Move if (cur_state->has_backref)
into if (dfa->nbackref).
(sift_states_backward): Fix comment.
(transit_state): Add BE macro. Move if (next_state->has_backref)
into if (dfa->nbackref && next_state). Don't check for next_state
!= NULL twice.
* posix/regcomp.c (peek_token): Use opr.ctx_type instead of opr.idx
for ANCHOR.
(parse_expression): Only call init_word_char if word context will be
needed.
* posix/bug-regex11.c (tests): Add new tests.
* posix/tst-regex.c: Include getopt.h.
(timing): New variable.
(main): Set timing to 1 if --timing argument is present.
Add 2 new tests.
(run_test, run_test_backwards): Handle timing.
2003-11-27 Jakub Jelinek <jakub@redhat.com>
* posix/regex_internal.h (re_string_t): Remove mbs_case field.
Add offsets, valid_raw_len, raw_len, raw_stop, mbs_allocated and
offsets_needed fields. Change icase, is_utf8 and map_notascii
type from int bitfield to unsigned char.
(MBS_ALLOCATED, MBS_CASE_ALLOCATED): Remove.
(build_wcs_upper_buffer): Change prototype to return int.
(re_string_peek_byte_case, re_string_fetch_byte_case): Remove
defines, add prototypes.
* posix/regex_internal.c (re_string_allocate): Don't initialize
stop here. Don't initialize mbs_case. Set valid_raw_len.
Use mbs_allocated instead of MBS_* macros.
(re_string_construct): Don't initialize stop and valid_len here.
Don't initialize mbs_case. Use mbs_allocated instead of MBS_*
macros. Reallocate buffers if build_wcs_upper_buffer converted
too few bytes. Set valid_len to bufs_len only for single byte
no translation and set in that case valid_raw_len as well.
(re_string_realloc_buffers): Reallocate offsets if not NULL.
Use mbs_allocated instead of MBS_ALLOCATED. Don't reallocate
mbs_case.
(re_string_construct_common): Initialize raw_len, mbs_allocated,
stop and raw_stop.
(build_wcs_buffer): Apply pstr->trans before mbrtowc instead of
after it. Set valid_raw_len. Don't set mbs_case.
(build_wcs_upper_buffer): Return REG_NOERROR or REG_ESPACE.
Only use the fast path if !pstr->offsets_needed. Apply pstr->trans
before mbrtowc instead of after it. If upper case character
uses different number of bytes than lower case, goto to the
slow path. Don't call towupper unnecessarily twice. Set
valid_raw_len as well. Handle in the slow path the case if
lower and upper case use different number of characters.
Don't set mbs_case.
(re_string_skip_chars): Use valid_raw_len instead of valid_len.
(build_upper_buffer): Don't set mbs_case. Add BE macro. Set
valid_raw_len.
(re_string_translate_buffer): Set mbs instead of mbs_case. Set
valid_raw_len.
(re_string_reconstruct): Use raw_len/raw_stop to initialize
len/stop. Clear valid_raw_len and offsets_needed when clearing
valid_len. Use mbs_allocated instead of MBS_* macros.
Check original offset against valid_raw_len instead of valid_len.
Remove mbs_case handling. Adjust valid_raw_len together with
valid_len. If is_utf8 and looking for tip context, apply
pstr->trans first. If buffers start with partial multi-byte
character, initialize mbs array as well if mbs_allocated.
Check return value of build_wcs_upper_buffer.
(re_string_peek_byte_case): New function.
(re_string_fetch_byte_case): New function.
(re_string_destruct): Use mbs_allocated instead of MBS_ALLOCATED.
Don't free mbs_case. Free offsets.
* posix/regcomp.c (init_dfa): Only check if charset name is UTF-8
if mb_cur_max == 6.
* posix/regexec.c (re_search_internal): Initialize input.raw_stop
as well. Use valid_raw_len instead of valid_len when looking
through fastmap. Adjust registers through input.offsets.
(extend_buffers): Allow build_wcs_upper_buffer to fail.
* posix/bug-regex18.c (tests): Enable #ifdefed out tests. Add new
tests.
2003-11-29 14:13:09 +08:00
|
|
|
{ "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$",
|
|
|
|
"level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
|
|
|
|
{ "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$",
|
|
|
|
"abcdedcba", REG_EXTENDED, 1, { { 0, 9 } } },
|
2003-11-27 13:32:46 +08:00
|
|
|
#if 0
|
|
|
|
/* XXX Not used since they fail so far. */
|
Update.
2003-11-28 Ulrich Drepper <drepper@redhat.com>
* sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes
to compensate other setup.
2003-11-27 Andreas Jaeger <aj@suse.de>
* sysdeps/x86_64/fpu/libm-test-ulps: Add ulps for new atan2 test.
* math/libm-test.inc (atan2_test): Add test that run infinitly.
Reported by "Willus" <etc231etc231@willus.com>.
2003-11-27 Michael Matz <matz@suse.de>
* sysdeps/ieee754/dbl-64/mpsqrt.c (fastiroot): Fix 64-bit problem
with wrong types.
2003-11-28 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (acquire_init_state_context): Make inline.
Add always_inline attribute.
(check_matching): Add BE macro. Move if (cur_state->has_backref)
into if (dfa->nbackref).
(sift_states_backward): Fix comment.
(transit_state): Add BE macro. Move if (next_state->has_backref)
into if (dfa->nbackref && next_state). Don't check for next_state
!= NULL twice.
* posix/regcomp.c (peek_token): Use opr.ctx_type instead of opr.idx
for ANCHOR.
(parse_expression): Only call init_word_char if word context will be
needed.
* posix/bug-regex11.c (tests): Add new tests.
* posix/tst-regex.c: Include getopt.h.
(timing): New variable.
(main): Set timing to 1 if --timing argument is present.
Add 2 new tests.
(run_test, run_test_backwards): Handle timing.
2003-11-27 Jakub Jelinek <jakub@redhat.com>
* posix/regex_internal.h (re_string_t): Remove mbs_case field.
Add offsets, valid_raw_len, raw_len, raw_stop, mbs_allocated and
offsets_needed fields. Change icase, is_utf8 and map_notascii
type from int bitfield to unsigned char.
(MBS_ALLOCATED, MBS_CASE_ALLOCATED): Remove.
(build_wcs_upper_buffer): Change prototype to return int.
(re_string_peek_byte_case, re_string_fetch_byte_case): Remove
defines, add prototypes.
* posix/regex_internal.c (re_string_allocate): Don't initialize
stop here. Don't initialize mbs_case. Set valid_raw_len.
Use mbs_allocated instead of MBS_* macros.
(re_string_construct): Don't initialize stop and valid_len here.
Don't initialize mbs_case. Use mbs_allocated instead of MBS_*
macros. Reallocate buffers if build_wcs_upper_buffer converted
too few bytes. Set valid_len to bufs_len only for single byte
no translation and set in that case valid_raw_len as well.
(re_string_realloc_buffers): Reallocate offsets if not NULL.
Use mbs_allocated instead of MBS_ALLOCATED. Don't reallocate
mbs_case.
(re_string_construct_common): Initialize raw_len, mbs_allocated,
stop and raw_stop.
(build_wcs_buffer): Apply pstr->trans before mbrtowc instead of
after it. Set valid_raw_len. Don't set mbs_case.
(build_wcs_upper_buffer): Return REG_NOERROR or REG_ESPACE.
Only use the fast path if !pstr->offsets_needed. Apply pstr->trans
before mbrtowc instead of after it. If upper case character
uses different number of bytes than lower case, goto to the
slow path. Don't call towupper unnecessarily twice. Set
valid_raw_len as well. Handle in the slow path the case if
lower and upper case use different number of characters.
Don't set mbs_case.
(re_string_skip_chars): Use valid_raw_len instead of valid_len.
(build_upper_buffer): Don't set mbs_case. Add BE macro. Set
valid_raw_len.
(re_string_translate_buffer): Set mbs instead of mbs_case. Set
valid_raw_len.
(re_string_reconstruct): Use raw_len/raw_stop to initialize
len/stop. Clear valid_raw_len and offsets_needed when clearing
valid_len. Use mbs_allocated instead of MBS_* macros.
Check original offset against valid_raw_len instead of valid_len.
Remove mbs_case handling. Adjust valid_raw_len together with
valid_len. If is_utf8 and looking for tip context, apply
pstr->trans first. If buffers start with partial multi-byte
character, initialize mbs array as well if mbs_allocated.
Check return value of build_wcs_upper_buffer.
(re_string_peek_byte_case): New function.
(re_string_fetch_byte_case): New function.
(re_string_destruct): Use mbs_allocated instead of MBS_ALLOCATED.
Don't free mbs_case. Free offsets.
* posix/regcomp.c (init_dfa): Only check if charset name is UTF-8
if mb_cur_max == 6.
* posix/regexec.c (re_search_internal): Initialize input.raw_stop
as well. Use valid_raw_len instead of valid_len when looking
through fastmap. Adjust registers through input.offsets.
(extend_buffers): Allow build_wcs_upper_buffer to fail.
* posix/bug-regex18.c (tests): Enable #ifdefed out tests. Add new
tests.
2003-11-29 14:13:09 +08:00
|
|
|
{ "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$",
|
|
|
|
"ababababa", REG_EXTENDED, 1, { { 0, 9 } } },
|
2003-11-22 06:38:10 +08:00
|
|
|
{ "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
|
|
|
|
"level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
|
Update.
2003-11-28 Ulrich Drepper <drepper@redhat.com>
* sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes
to compensate other setup.
2003-11-27 Andreas Jaeger <aj@suse.de>
* sysdeps/x86_64/fpu/libm-test-ulps: Add ulps for new atan2 test.
* math/libm-test.inc (atan2_test): Add test that run infinitly.
Reported by "Willus" <etc231etc231@willus.com>.
2003-11-27 Michael Matz <matz@suse.de>
* sysdeps/ieee754/dbl-64/mpsqrt.c (fastiroot): Fix 64-bit problem
with wrong types.
2003-11-28 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (acquire_init_state_context): Make inline.
Add always_inline attribute.
(check_matching): Add BE macro. Move if (cur_state->has_backref)
into if (dfa->nbackref).
(sift_states_backward): Fix comment.
(transit_state): Add BE macro. Move if (next_state->has_backref)
into if (dfa->nbackref && next_state). Don't check for next_state
!= NULL twice.
* posix/regcomp.c (peek_token): Use opr.ctx_type instead of opr.idx
for ANCHOR.
(parse_expression): Only call init_word_char if word context will be
needed.
* posix/bug-regex11.c (tests): Add new tests.
* posix/tst-regex.c: Include getopt.h.
(timing): New variable.
(main): Set timing to 1 if --timing argument is present.
Add 2 new tests.
(run_test, run_test_backwards): Handle timing.
2003-11-27 Jakub Jelinek <jakub@redhat.com>
* posix/regex_internal.h (re_string_t): Remove mbs_case field.
Add offsets, valid_raw_len, raw_len, raw_stop, mbs_allocated and
offsets_needed fields. Change icase, is_utf8 and map_notascii
type from int bitfield to unsigned char.
(MBS_ALLOCATED, MBS_CASE_ALLOCATED): Remove.
(build_wcs_upper_buffer): Change prototype to return int.
(re_string_peek_byte_case, re_string_fetch_byte_case): Remove
defines, add prototypes.
* posix/regex_internal.c (re_string_allocate): Don't initialize
stop here. Don't initialize mbs_case. Set valid_raw_len.
Use mbs_allocated instead of MBS_* macros.
(re_string_construct): Don't initialize stop and valid_len here.
Don't initialize mbs_case. Use mbs_allocated instead of MBS_*
macros. Reallocate buffers if build_wcs_upper_buffer converted
too few bytes. Set valid_len to bufs_len only for single byte
no translation and set in that case valid_raw_len as well.
(re_string_realloc_buffers): Reallocate offsets if not NULL.
Use mbs_allocated instead of MBS_ALLOCATED. Don't reallocate
mbs_case.
(re_string_construct_common): Initialize raw_len, mbs_allocated,
stop and raw_stop.
(build_wcs_buffer): Apply pstr->trans before mbrtowc instead of
after it. Set valid_raw_len. Don't set mbs_case.
(build_wcs_upper_buffer): Return REG_NOERROR or REG_ESPACE.
Only use the fast path if !pstr->offsets_needed. Apply pstr->trans
before mbrtowc instead of after it. If upper case character
uses different number of bytes than lower case, goto to the
slow path. Don't call towupper unnecessarily twice. Set
valid_raw_len as well. Handle in the slow path the case if
lower and upper case use different number of characters.
Don't set mbs_case.
(re_string_skip_chars): Use valid_raw_len instead of valid_len.
(build_upper_buffer): Don't set mbs_case. Add BE macro. Set
valid_raw_len.
(re_string_translate_buffer): Set mbs instead of mbs_case. Set
valid_raw_len.
(re_string_reconstruct): Use raw_len/raw_stop to initialize
len/stop. Clear valid_raw_len and offsets_needed when clearing
valid_len. Use mbs_allocated instead of MBS_* macros.
Check original offset against valid_raw_len instead of valid_len.
Remove mbs_case handling. Adjust valid_raw_len together with
valid_len. If is_utf8 and looking for tip context, apply
pstr->trans first. If buffers start with partial multi-byte
character, initialize mbs array as well if mbs_allocated.
Check return value of build_wcs_upper_buffer.
(re_string_peek_byte_case): New function.
(re_string_fetch_byte_case): New function.
(re_string_destruct): Use mbs_allocated instead of MBS_ALLOCATED.
Don't free mbs_case. Free offsets.
* posix/regcomp.c (init_dfa): Only check if charset name is UTF-8
if mb_cur_max == 6.
* posix/regexec.c (re_search_internal): Initialize input.raw_stop
as well. Use valid_raw_len instead of valid_len when looking
through fastmap. Adjust registers through input.offsets.
(extend_buffers): Allow build_wcs_upper_buffer to fail.
* posix/bug-regex18.c (tests): Enable #ifdefed out tests. Add new
tests.
2003-11-29 14:13:09 +08:00
|
|
|
{ "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
|
|
|
|
"ababababa", REG_EXTENDED, 1, { { 0, 9 } } },
|
2003-10-07 07:44:15 +08:00
|
|
|
#endif
|
2002-09-12 06:04:32 +08:00
|
|
|
};
|
|
|
|
|
2002-09-05 00:21:24 +08:00
|
|
|
int
|
|
|
|
main (void)
|
|
|
|
{
|
|
|
|
regex_t re;
|
2002-10-18 01:15:06 +08:00
|
|
|
regmatch_t rm[5];
|
2002-09-29 04:51:17 +08:00
|
|
|
size_t i;
|
|
|
|
int n, ret = 0;
|
2002-09-05 00:21:24 +08:00
|
|
|
|
|
|
|
mtrace ();
|
|
|
|
|
2002-09-12 06:04:32 +08:00
|
|
|
for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
|
2002-09-05 00:21:24 +08:00
|
|
|
{
|
2002-09-29 04:51:17 +08:00
|
|
|
n = regcomp (&re, tests[i].pattern, tests[i].flags);
|
2002-09-12 06:04:32 +08:00
|
|
|
if (n != 0)
|
|
|
|
{
|
|
|
|
char buf[500];
|
|
|
|
regerror (n, &re, buf, sizeof (buf));
|
2003-10-07 07:44:15 +08:00
|
|
|
printf ("%s: regcomp %zd failed: %s\n", tests[i].pattern, i, buf);
|
2002-09-12 06:04:32 +08:00
|
|
|
ret = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2002-09-05 00:21:24 +08:00
|
|
|
|
2002-09-12 06:04:32 +08:00
|
|
|
if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0))
|
|
|
|
{
|
2003-10-07 07:44:15 +08:00
|
|
|
printf ("%s: regexec %zd failed\n", tests[i].pattern, i);
|
2002-09-12 06:04:32 +08:00
|
|
|
ret = 1;
|
|
|
|
regfree (&re);
|
|
|
|
continue;
|
|
|
|
}
|
2002-09-05 00:21:24 +08:00
|
|
|
|
2002-09-12 06:04:32 +08:00
|
|
|
for (n = 0; n < tests[i].nmatch; ++n)
|
|
|
|
if (rm[n].rm_so != tests[i].rm[n].rm_so
|
|
|
|
|| rm[n].rm_eo != tests[i].rm[n].rm_eo)
|
|
|
|
{
|
|
|
|
if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1)
|
|
|
|
break;
|
2003-10-07 07:44:15 +08:00
|
|
|
printf ("%s: regexec %zd match failure rm[%d] %d..%d\n",
|
|
|
|
tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo);
|
2002-09-12 06:04:32 +08:00
|
|
|
ret = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
regfree (&re);
|
|
|
|
}
|
2002-09-05 00:21:24 +08:00
|
|
|
|
2002-09-29 04:51:17 +08:00
|
|
|
return ret;
|
2002-09-05 00:21:24 +08:00
|
|
|
}
|