2003-11-14 04:52:55 +08:00
|
|
|
/* Regular expression tests.
|
2022-01-02 02:54:23 +08:00
|
|
|
Copyright (C) 2003-2022 Free Software Foundation, Inc.
|
2003-11-14 04:52:55 +08:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-02-10 07:18:22 +08:00
|
|
|
License along with the GNU C Library; if not, see
|
Prefer https to http for gnu.org and fsf.org URLs
Also, change sources.redhat.com to sourceware.org.
This patch was automatically generated by running the following shell
script, which uses GNU sed, and which avoids modifying files imported
from upstream:
sed -ri '
s,(http|ftp)(://(.*\.)?(gnu|fsf|sourceware)\.org($|[^.]|\.[^a-z])),https\2,g
s,(http|ftp)(://(.*\.)?)sources\.redhat\.com($|[^.]|\.[^a-z]),https\2sourceware.org\4,g
' \
$(find $(git ls-files) -prune -type f \
! -name '*.po' \
! -name 'ChangeLog*' \
! -path COPYING ! -path COPYING.LIB \
! -path manual/fdl-1.3.texi ! -path manual/lgpl-2.1.texi \
! -path manual/texinfo.tex ! -path scripts/config.guess \
! -path scripts/config.sub ! -path scripts/install-sh \
! -path scripts/mkinstalldirs ! -path scripts/move-if-change \
! -path INSTALL ! -path locale/programs/charmap-kw.h \
! -path po/libc.pot ! -path sysdeps/gnu/errlist.c \
! '(' -name configure \
-execdir test -f configure.ac -o -f configure.in ';' ')' \
! '(' -name preconfigure \
-execdir test -f preconfigure.ac ';' ')' \
-print)
and then by running 'make dist-prepare' to regenerate files built
from the altered files, and then executing the following to cleanup:
chmod a+x sysdeps/unix/sysv/linux/riscv/configure
# Omit irrelevant whitespace and comment-only changes,
# perhaps from a slightly-different Autoconf version.
git checkout -f \
sysdeps/csky/configure \
sysdeps/hppa/configure \
sysdeps/riscv/configure \
sysdeps/unix/sysv/linux/csky/configure
# Omit changes that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/powerpc/powerpc64/ppc-mcount.S: trailing lines
git checkout -f \
sysdeps/powerpc/powerpc64/ppc-mcount.S \
sysdeps/unix/sysv/linux/s390/s390-64/syscall.S
# Omit change that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: last line does not end in newline
git checkout -f sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
2019-09-07 13:40:42 +08:00
|
|
|
<https://www.gnu.org/licenses/>. */
|
2003-11-14 04:52:55 +08:00
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <mcheck.h>
|
|
|
|
#include <regex.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <locale.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
|
|
|
|
static void
|
|
|
|
replace_special_chars (char *str)
|
|
|
|
{
|
|
|
|
for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str)
|
|
|
|
switch (*str)
|
|
|
|
{
|
|
|
|
case 'N': *str = '\n'; break;
|
|
|
|
case 'T': *str = '\t'; break;
|
|
|
|
case 'S': *str = ' '; break;
|
|
|
|
case 'Z': *str = '\0'; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
glibc_re_syntax (char *str)
|
|
|
|
{
|
|
|
|
char *p, *end = strchr (str, '\0') + 1;
|
|
|
|
|
|
|
|
/* Replace [[:<:]] with \< and [[:>:]] with \>. */
|
|
|
|
for (p = str; (p = strstr (p, "[[:")) != NULL; )
|
|
|
|
if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0)
|
|
|
|
{
|
|
|
|
p[0] = '\\';
|
|
|
|
p[1] = p[3];
|
|
|
|
memmove (p + 2, p + 7, end - p - 7);
|
|
|
|
end -= 5;
|
|
|
|
p += 2;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
p += 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *
|
|
|
|
mb_replace (char *dst, const char c)
|
|
|
|
{
|
|
|
|
switch (c)
|
|
|
|
{
|
|
|
|
/* Replace a with \'a and A with \'A. */
|
|
|
|
case 'a':
|
|
|
|
*dst++ = '\xc3';
|
|
|
|
*dst++ = '\xa1';
|
|
|
|
break;
|
|
|
|
case 'A':
|
|
|
|
*dst++ = '\xc3';
|
|
|
|
*dst++ = '\x81';
|
|
|
|
break;
|
|
|
|
/* Replace b with \v{c} and B with \v{C}. */
|
|
|
|
case 'b':
|
|
|
|
*dst++ = '\xc4';
|
|
|
|
*dst++ = '\x8d';
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
*dst++ = '\xc4';
|
|
|
|
*dst++ = '\x8c';
|
|
|
|
break;
|
|
|
|
/* Replace c with \v{d} and C with \v{D}. */
|
|
|
|
case 'c':
|
|
|
|
*dst++ = '\xc4';
|
|
|
|
*dst++ = '\x8f';
|
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
*dst++ = '\xc4';
|
|
|
|
*dst++ = '\x8e';
|
|
|
|
break;
|
|
|
|
/* Replace d with \'e and D with \'E. */
|
|
|
|
case 'd':
|
|
|
|
*dst++ = '\xc3';
|
|
|
|
*dst++ = '\xa9';
|
|
|
|
break;
|
|
|
|
case 'D':
|
|
|
|
*dst++ = '\xc3';
|
|
|
|
*dst++ = '\x89';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *
|
|
|
|
mb_frob_string (const char *str, const char *letters)
|
|
|
|
{
|
|
|
|
char *ret, *dst;
|
|
|
|
const char *src;
|
|
|
|
|
|
|
|
if (str == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
ret = malloc (2 * strlen (str) + 1);
|
|
|
|
if (ret == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (src = str, dst = ret; *src; ++src)
|
|
|
|
if (strchr (letters, *src))
|
|
|
|
dst = mb_replace (dst, *src);
|
|
|
|
else
|
|
|
|
*dst++ = *src;
|
|
|
|
*dst = '\0';
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Like mb_frob_string, but don't replace anything between
|
2005-01-27 03:56:03 +08:00
|
|
|
[: and :], [. and .] or [= and =] or characters escaped
|
|
|
|
with a backslash. */
|
2003-11-14 04:52:55 +08:00
|
|
|
|
|
|
|
static char *
|
|
|
|
mb_frob_pattern (const char *str, const char *letters)
|
|
|
|
{
|
|
|
|
char *ret, *dst;
|
|
|
|
const char *src;
|
2005-01-27 03:56:03 +08:00
|
|
|
int in_class = 0, escaped = 0;
|
2003-11-14 04:52:55 +08:00
|
|
|
|
|
|
|
if (str == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
ret = malloc (2 * strlen (str) + 1);
|
|
|
|
if (ret == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (src = str, dst = ret; *src; ++src)
|
2005-01-27 03:56:03 +08:00
|
|
|
if (*src == '\\')
|
|
|
|
{
|
|
|
|
escaped ^= 1;
|
|
|
|
*dst++ = *src;
|
|
|
|
}
|
|
|
|
else if (escaped)
|
|
|
|
{
|
|
|
|
escaped = 0;
|
|
|
|
*dst++ = *src;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (!in_class && strchr (letters, *src))
|
2003-11-14 04:52:55 +08:00
|
|
|
dst = mb_replace (dst, *src);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (!in_class && *src == '[' && strchr (":.=", src[1]))
|
|
|
|
in_class = 1;
|
|
|
|
else if (in_class && *src == ']' && strchr (":.=", src[-1]))
|
|
|
|
in_class = 0;
|
|
|
|
*dst++ = *src;
|
|
|
|
}
|
|
|
|
*dst = '\0';
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
check_match (regmatch_t *rm, int idx, const char *string,
|
|
|
|
const char *match, const char *fail)
|
|
|
|
{
|
|
|
|
if (match[0] == '-' && match[1] == '\0')
|
|
|
|
{
|
|
|
|
if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1)
|
|
|
|
return 0;
|
|
|
|
printf ("%s rm[%d] unexpectedly matched\n", fail, idx);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1)
|
|
|
|
{
|
|
|
|
printf ("%s rm[%d] unexpectedly did not match\n", fail, idx);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match[0] == '@')
|
|
|
|
{
|
|
|
|
if (rm[idx].rm_so != rm[idx].rm_eo)
|
|
|
|
{
|
|
|
|
printf ("%s rm[%d] not empty\n", fail, idx);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2003-11-15 02:17:12 +08:00
|
|
|
if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1) ?: 1))
|
2003-11-14 04:52:55 +08:00
|
|
|
{
|
|
|
|
printf ("%s rm[%d] not matching %s\n", fail, idx, match);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match)
|
|
|
|
|| strncmp (string + rm[idx].rm_so, match,
|
|
|
|
rm[idx].rm_eo - rm[idx].rm_so))
|
|
|
|
{
|
|
|
|
printf ("%s rm[%d] not matching %s\n", fail, idx, match);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
test (const char *pattern, int cflags, const char *string, int eflags,
|
|
|
|
char *expect, char *matches, const char *fail)
|
|
|
|
{
|
|
|
|
regex_t re;
|
|
|
|
regmatch_t rm[10];
|
|
|
|
int n, ret = 0;
|
|
|
|
|
|
|
|
n = regcomp (&re, pattern, cflags);
|
|
|
|
if (n != 0)
|
|
|
|
{
|
Update.
2003-12-22 Jakub Jelinek <jakub@redhat.com>
* posix/regcomp.c: Remove C99-ism.
* posix/tst-rxspencer.c: Likewise.
Based on a patch by Alex Davis <alex14641@yahoo.com>.
2002-12-17 Paolo Bonzini <bonzini@gnu.org>
* posix/regex_internal.h [!_LIBC] (internal_function): Define.
(re_string_allocate, re_string_construct, re_string_reconstruct,
re_string_realloc_buffers, build_wcs_buffer,
build_wcs_upper_buffer, build_upper_buffer,
re_string_translate_buffer, re_string_destruct,
re_string_elem_size_at, re_string_char_size_at,
re_string_wchar_at, re_string_context_at,
re_node_set_alloc, re_node_set_init_1
re_node_set_init_2, re_node_set_init_copy,
re_node_set_add_intersect, re_node_set_init_union,
re_node_set_merge, re_node_set_insert
re_node_set_compare, re_node_set_contains
re_node_set_remove_at, re_dfa_add_node,
re_acquire_state, re_acquire_state_context,
free_state): Add internal_function to declaration.
* posix/regexec.c (match_ctx_init, match_ctx_clean,
match_ctx_free, match_ctx_free_subtops,
match_ctx_add_entry, search_cur_bkref_entry,
match_ctx_clear_flag, match_ctx_add_subtop,
match_ctx_add_sublast, sift_ctx_init,
re_search_internal, re_search_2_stub, re_search_stub,
re_copy_regs, acquire_init_state_context,
prune_impossible_nodes, check_matching,
check_halt_node_context, check_halt_state_context
update_regs, proceed_next_node, push_fail_stack,
pop_fail_stack, set_regs, free_fail_stack_return,
sift_states_iter_mb, sift_states_backward
update_cur_sifted_state, add_epsilon_src_nodes,
sub_epsilon_src_nodes, check_dst_limits,
check_dst_limits_calc_pos, check_subexp_limits,
sift_states_bkref, clean_state_log_if_need,
merge_state_array, transit_state,
check_subexp_matching_top, transit_state_sb,
transit_state_mb, transit_state_bkref,
get_subexp, get_subexp_sub, find_subexp_node,
check_arrival, check_arrival_add_next_nodes,
find_collation_sequence_value, check_arrival_expand_ecl,
check_arrival_expand_ecl_sub, expand_bkref_cache,
build_trtable, check_node_accept_bytes, extend_buffers,
group_nodes_into_DFAstates, check_node_accept): Likewise.
* posix/regex_internal.c (re_string_construct_common,
re_string_skip_chars, create_newstate_common,
register_state, create_ci_newstate, create_cd_newstate,
calc_state_hash): Likewise.
(re_string_peek_byte_case, re_fetch_byte_case): Change
declaration from ANSI to K&R.
2002-12-16 Paolo Bonzini <bonzini@gnu.org>
* posix/regexec.c (build_trtable): Don't allocate the trtable
until state->word_trtable is known. Don't hardcode UINT_BITS
iterations on each bitset item.
2003-12-23 10:29:44 +08:00
|
|
|
char buf[500];
|
2003-11-14 04:52:55 +08:00
|
|
|
if (eflags == -1)
|
|
|
|
{
|
|
|
|
static struct { reg_errcode_t code; const char *name; } codes []
|
|
|
|
#define C(x) { REG_##x, #x }
|
|
|
|
= { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE),
|
|
|
|
C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK),
|
|
|
|
C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE),
|
|
|
|
C(ESPACE), C(BADRPT) };
|
|
|
|
|
|
|
|
for (int i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i)
|
|
|
|
if (n == codes[i].code)
|
|
|
|
{
|
|
|
|
if (strcmp (string, codes[i].name))
|
|
|
|
{
|
|
|
|
printf ("%s regcomp returned REG_%s (expected REG_%s)\n",
|
|
|
|
fail, codes[i].name, string);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf ("%s regcomp return value REG_%d\n", fail, n);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
regerror (n, &re, buf, sizeof (buf));
|
|
|
|
printf ("%s regcomp failed: %s\n", fail, buf);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (eflags == -1)
|
|
|
|
{
|
|
|
|
regfree (&re);
|
|
|
|
|
|
|
|
/* The test case file assumes something only guaranteed by the
|
|
|
|
rxspencer regex implementation. Namely that for empty
|
|
|
|
expressions regcomp() return REG_EMPTY. This is not the case
|
|
|
|
for us and so we ignore this error. */
|
|
|
|
if (strcmp (string, "EMPTY") == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
printf ("%s regcomp unexpectedly succeeded\n", fail);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (regexec (&re, string, 10, rm, eflags))
|
|
|
|
{
|
|
|
|
regfree (&re);
|
|
|
|
if (expect == NULL)
|
|
|
|
return 0;
|
|
|
|
printf ("%s regexec failed\n", fail);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
regfree (&re);
|
|
|
|
|
|
|
|
if (expect == NULL)
|
|
|
|
{
|
|
|
|
printf ("%s regexec unexpectedly succeeded\n", fail);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cflags & REG_NOSUB)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ret = check_match (rm, 0, string, expect, fail);
|
|
|
|
if (matches == NULL)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
for (n = 1; ret == 0 && n < 10; ++n)
|
|
|
|
{
|
|
|
|
char *p = NULL;
|
|
|
|
|
|
|
|
if (matches)
|
|
|
|
{
|
|
|
|
p = strchr (matches, ',');
|
|
|
|
if (p != NULL)
|
|
|
|
*p = '\0';
|
|
|
|
}
|
|
|
|
ret = check_match (rm, n, string, matches ?: "-", fail);
|
|
|
|
if (p)
|
|
|
|
{
|
|
|
|
*p = ',';
|
|
|
|
matches = p + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
matches = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
mb_test (const char *pattern, int cflags, const char *string, int eflags,
|
|
|
|
char *expect, const char *matches, const char *letters,
|
|
|
|
const char *fail)
|
|
|
|
{
|
|
|
|
char *pattern_mb = mb_frob_pattern (pattern, letters);
|
|
|
|
const char *string_mb
|
|
|
|
= eflags == -1 ? string : mb_frob_string (string, letters);
|
|
|
|
char *expect_mb = mb_frob_string (expect, letters);
|
|
|
|
char *matches_mb = mb_frob_string (matches, letters);
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (!pattern_mb || !string_mb
|
|
|
|
|| (expect && !expect_mb) || (matches && !matches_mb))
|
|
|
|
{
|
|
|
|
printf ("%s %m", fail);
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb,
|
|
|
|
matches_mb, fail);
|
|
|
|
|
|
|
|
free (matches_mb);
|
|
|
|
free (expect_mb);
|
|
|
|
if (string_mb != string)
|
|
|
|
free ((char *) string_mb);
|
|
|
|
free (pattern_mb);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
mb_tests (const char *pattern, int cflags, const char *string, int eflags,
|
|
|
|
char *expect, const char *matches)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int i;
|
|
|
|
char letters[9], fail[20];
|
|
|
|
|
|
|
|
/* The tests aren't supposed to work with xdigit, since a-dA-D are
|
|
|
|
hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. */
|
|
|
|
if (strstr (pattern, "[:xdigit:]"))
|
|
|
|
return 0;
|
|
|
|
|
2003-11-21 07:36:40 +08:00
|
|
|
/* XXX: regex ATM handles only single byte equivalence classes. */
|
|
|
|
if (strstr (pattern, "[[=b=]]"))
|
|
|
|
return 0;
|
|
|
|
|
2003-11-14 04:52:55 +08:00
|
|
|
for (i = 1; i < 16; ++i)
|
|
|
|
{
|
|
|
|
char *p = letters;
|
2003-11-21 17:20:45 +08:00
|
|
|
if (i & 1)
|
|
|
|
{
|
|
|
|
if (!strchr (pattern, 'a') && !strchr (string, 'a')
|
|
|
|
&& !strchr (pattern, 'A') && !strchr (string, 'A'))
|
|
|
|
continue;
|
|
|
|
*p++ = 'a', *p++ = 'A';
|
|
|
|
}
|
|
|
|
if (i & 2)
|
|
|
|
{
|
|
|
|
if (!strchr (pattern, 'b') && !strchr (string, 'b')
|
|
|
|
&& !strchr (pattern, 'B') && !strchr (string, 'B'))
|
|
|
|
continue;
|
|
|
|
*p++ = 'b', *p++ = 'B';
|
|
|
|
}
|
|
|
|
if (i & 4)
|
|
|
|
{
|
|
|
|
if (!strchr (pattern, 'c') && !strchr (string, 'c')
|
|
|
|
&& !strchr (pattern, 'C') && !strchr (string, 'C'))
|
|
|
|
continue;
|
|
|
|
*p++ = 'c', *p++ = 'C';
|
|
|
|
}
|
|
|
|
if (i & 8)
|
|
|
|
{
|
|
|
|
if (!strchr (pattern, 'd') && !strchr (string, 'd')
|
|
|
|
&& !strchr (pattern, 'D') && !strchr (string, 'D'))
|
|
|
|
continue;
|
|
|
|
*p++ = 'd', *p++ = 'D';
|
|
|
|
}
|
2003-11-14 04:52:55 +08:00
|
|
|
*p++ = '\0';
|
|
|
|
sprintf (fail, "UTF-8 %s FAIL", letters);
|
|
|
|
ret |= mb_test (pattern, cflags, string, eflags, expect, matches,
|
|
|
|
letters, fail);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main (int argc, char **argv)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
char *line = NULL;
|
|
|
|
size_t line_len = 0;
|
|
|
|
ssize_t len;
|
|
|
|
FILE *f;
|
|
|
|
static int test_utf8 = 0;
|
|
|
|
static const struct option options[] =
|
|
|
|
{
|
|
|
|
{"utf8", no_argument, &test_utf8, 1},
|
|
|
|
{NULL, 0, NULL, 0 }
|
|
|
|
};
|
|
|
|
|
Update.
2003-11-19 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (extend_buffers): Don't allocate
twice as big state_log as needed. Don't modify pstr->valid_len
for mb_cur_max == 1 !icase !trans.
* posix/regcomp.c (free_bin_tree): Removed.
(create_tree): Add dfa argument. Don't call re_malloc for
each tree, instead allocate from str_tree_storage.
(re_dfa_add_tree_node): New function.
(free_dfa_content): Handle freeing if dfa->nodes == NULL
or dfa->state_table == NULL.
(re_compile_internal): Call free_dfa_content if init_dfa
fails. Call free_workarea_compile, re_string_destruct
and free_dfa_content for most of the other failure paths.
(init_dfa): Initialize str_tree_storage_idx.
Don't clear any fields on allocation failure.
(free_workarea_compile): Free str_tree_storage chunks
instead of free_bin_tree (dfa->str_tree).
(parse): Call re_dfa_add_tree_node instead of re_dfa_add_node
followed by create_tree. Add dfa argument to remaining
create_tree calls. Remove new_idx variable. Remove calls
to free_bin_tree.
(parse_reg_exp, parse_branch, parse_expression, parse_sub_exp,
parse_dup_op, parse_bracket_exp, build_charclass_op): Likewise.
(duplicate_tree): Remove calls to free_bin_tree, add dfa
argument to create_tree.
* posix/regex_internal.h (BIN_TREE_STORAGE_SIZE): Define.
(bin_tree_storage_t): New type.
(re_dfa_t): Add str_tree_storage and str_tree_storage_idx
fields.
* posix/Makefile (tests): Add bug-regex21.
(generated): Add bug-regex21-mem, bug-regex21.mtrace,
tst-rxspencer-mem and tst-rxspencer.mtrace.
(tests): Depend on $(objpfx)bug-regex21-mem
and $(objpfx)tst-rxspencer-mem.
(bug-regex21-ENV, tst-rxspencer-ENV): Set.
($(objpfx)bug-regex21-mem, $(objpfx)tst-rxspencer-mem): New.
* posix/tst-rxspencer.c (main): Add call to mtrace.
Free line at the end.
* posix/bug-regex21.c: New test.
* posix/regexec.c (get_subexp): After calling get_subexp_sub
2003-11-20 03:37:31 +08:00
|
|
|
mtrace ();
|
|
|
|
|
2003-11-14 07:39:31 +08:00
|
|
|
while (getopt_long (argc, argv, "", options, NULL) >= 0);
|
2003-11-14 04:52:55 +08:00
|
|
|
|
|
|
|
if (optind + 1 != argc)
|
|
|
|
{
|
|
|
|
fprintf (stderr, "Missing test filename\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
f = fopen (argv[optind], "r");
|
|
|
|
if (f == NULL)
|
|
|
|
{
|
2003-11-15 02:17:12 +08:00
|
|
|
fprintf (stderr, "Couldn't open %s\n", argv[optind]);
|
2003-11-14 04:52:55 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((len = getline (&line, &line_len, f)) > 0)
|
|
|
|
{
|
|
|
|
char *pattern, *flagstr, *string, *expect, *matches, *p;
|
|
|
|
int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0;
|
|
|
|
|
|
|
|
if (line[len - 1] == '\n')
|
|
|
|
line[len - 1] = '\0';
|
|
|
|
|
|
|
|
/* Skip comments and empty lines. */
|
|
|
|
if (*line == '#' || *line == '\0')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
puts (line);
|
|
|
|
fflush (stdout);
|
|
|
|
|
|
|
|
pattern = strtok (line, "\t");
|
|
|
|
if (pattern == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (strcmp (pattern, "\"\"") == 0)
|
|
|
|
pattern += 2;
|
|
|
|
|
|
|
|
flagstr = strtok (NULL, "\t");
|
|
|
|
if (flagstr == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
string = strtok (NULL, "\t");
|
|
|
|
if (string == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (strcmp (string, "\"\"") == 0)
|
|
|
|
string += 2;
|
|
|
|
|
|
|
|
for (p = flagstr; *p; ++p)
|
|
|
|
switch (*p)
|
|
|
|
{
|
|
|
|
case '-':
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
cflags &= ~REG_EXTENDED;
|
|
|
|
break;
|
|
|
|
case '&':
|
|
|
|
try_bre_ere = 1;
|
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
eflags = -1;
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
cflags |= REG_ICASE;
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
cflags |= REG_NOSUB;
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
cflags |= REG_NEWLINE;
|
|
|
|
break;
|
|
|
|
case '^':
|
|
|
|
eflags |= REG_NOTBOL;
|
|
|
|
break;
|
|
|
|
case '$':
|
|
|
|
eflags |= REG_NOTEOL;
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
case 'p':
|
|
|
|
case '#':
|
|
|
|
/* Not supported. */
|
|
|
|
flagstr = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flagstr == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
replace_special_chars (pattern);
|
|
|
|
glibc_re_syntax (pattern);
|
|
|
|
if (eflags != -1)
|
|
|
|
replace_special_chars (string);
|
|
|
|
|
|
|
|
expect = strtok (NULL, "\t");
|
|
|
|
matches = NULL;
|
|
|
|
if (expect != NULL)
|
|
|
|
{
|
|
|
|
replace_special_chars (expect);
|
|
|
|
matches = strtok (NULL, "\t");
|
|
|
|
if (matches != NULL)
|
|
|
|
replace_special_chars (matches);
|
|
|
|
}
|
|
|
|
|
2003-11-21 07:36:40 +08:00
|
|
|
if (setlocale (LC_ALL, "C") == NULL)
|
|
|
|
{
|
|
|
|
puts ("setlocale C failed");
|
|
|
|
ret = 1;
|
|
|
|
}
|
2003-11-14 04:52:55 +08:00
|
|
|
if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
|
|
|
|
|| (try_bre_ere
|
|
|
|
&& test (pattern, cflags & ~REG_EXTENDED, string, eflags,
|
|
|
|
expect, matches, "FAIL")))
|
|
|
|
ret = 1;
|
|
|
|
else if (test_utf8)
|
|
|
|
{
|
2003-11-21 07:36:40 +08:00
|
|
|
if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
|
|
|
|
{
|
|
|
|
puts ("setlocale cs_CZ.UTF-8 failed");
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
else if (test (pattern, cflags, string, eflags, expect, matches,
|
|
|
|
"UTF-8 FAIL")
|
|
|
|
|| (try_bre_ere
|
|
|
|
&& test (pattern, cflags & ~REG_EXTENDED, string,
|
|
|
|
eflags, expect, matches, "UTF-8 FAIL")))
|
2003-11-14 04:52:55 +08:00
|
|
|
ret = 1;
|
|
|
|
else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
|
|
|
|
|| (try_bre_ere
|
|
|
|
&& mb_tests (pattern, cflags & ~REG_EXTENDED, string,
|
|
|
|
eflags, expect, matches)))
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Update.
2003-11-19 Jakub Jelinek <jakub@redhat.com>
* posix/regexec.c (extend_buffers): Don't allocate
twice as big state_log as needed. Don't modify pstr->valid_len
for mb_cur_max == 1 !icase !trans.
* posix/regcomp.c (free_bin_tree): Removed.
(create_tree): Add dfa argument. Don't call re_malloc for
each tree, instead allocate from str_tree_storage.
(re_dfa_add_tree_node): New function.
(free_dfa_content): Handle freeing if dfa->nodes == NULL
or dfa->state_table == NULL.
(re_compile_internal): Call free_dfa_content if init_dfa
fails. Call free_workarea_compile, re_string_destruct
and free_dfa_content for most of the other failure paths.
(init_dfa): Initialize str_tree_storage_idx.
Don't clear any fields on allocation failure.
(free_workarea_compile): Free str_tree_storage chunks
instead of free_bin_tree (dfa->str_tree).
(parse): Call re_dfa_add_tree_node instead of re_dfa_add_node
followed by create_tree. Add dfa argument to remaining
create_tree calls. Remove new_idx variable. Remove calls
to free_bin_tree.
(parse_reg_exp, parse_branch, parse_expression, parse_sub_exp,
parse_dup_op, parse_bracket_exp, build_charclass_op): Likewise.
(duplicate_tree): Remove calls to free_bin_tree, add dfa
argument to create_tree.
* posix/regex_internal.h (BIN_TREE_STORAGE_SIZE): Define.
(bin_tree_storage_t): New type.
(re_dfa_t): Add str_tree_storage and str_tree_storage_idx
fields.
* posix/Makefile (tests): Add bug-regex21.
(generated): Add bug-regex21-mem, bug-regex21.mtrace,
tst-rxspencer-mem and tst-rxspencer.mtrace.
(tests): Depend on $(objpfx)bug-regex21-mem
and $(objpfx)tst-rxspencer-mem.
(bug-regex21-ENV, tst-rxspencer-ENV): Set.
($(objpfx)bug-regex21-mem, $(objpfx)tst-rxspencer-mem): New.
* posix/tst-rxspencer.c (main): Add call to mtrace.
Free line at the end.
* posix/bug-regex21.c: New test.
* posix/regexec.c (get_subexp): After calling get_subexp_sub
2003-11-20 03:37:31 +08:00
|
|
|
free (line);
|
2003-11-14 04:52:55 +08:00
|
|
|
fclose (f);
|
|
|
|
return ret;
|
|
|
|
}
|