mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
bb3f4825c4
2003-11-28 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes to compensate other setup. 2003-11-27 Andreas Jaeger <aj@suse.de> * sysdeps/x86_64/fpu/libm-test-ulps: Add ulps for new atan2 test. * math/libm-test.inc (atan2_test): Add test that run infinitly. Reported by "Willus" <etc231etc231@willus.com>. 2003-11-27 Michael Matz <matz@suse.de> * sysdeps/ieee754/dbl-64/mpsqrt.c (fastiroot): Fix 64-bit problem with wrong types. 2003-11-28 Jakub Jelinek <jakub@redhat.com> * posix/regexec.c (acquire_init_state_context): Make inline. Add always_inline attribute. (check_matching): Add BE macro. Move if (cur_state->has_backref) into if (dfa->nbackref). (sift_states_backward): Fix comment. (transit_state): Add BE macro. Move if (next_state->has_backref) into if (dfa->nbackref && next_state). Don't check for next_state != NULL twice. * posix/regcomp.c (peek_token): Use opr.ctx_type instead of opr.idx for ANCHOR. (parse_expression): Only call init_word_char if word context will be needed. * posix/bug-regex11.c (tests): Add new tests. * posix/tst-regex.c: Include getopt.h. (timing): New variable. (main): Set timing to 1 if --timing argument is present. Add 2 new tests. (run_test, run_test_backwards): Handle timing. 2003-11-27 Jakub Jelinek <jakub@redhat.com> * posix/regex_internal.h (re_string_t): Remove mbs_case field. Add offsets, valid_raw_len, raw_len, raw_stop, mbs_allocated and offsets_needed fields. Change icase, is_utf8 and map_notascii type from int bitfield to unsigned char. (MBS_ALLOCATED, MBS_CASE_ALLOCATED): Remove. (build_wcs_upper_buffer): Change prototype to return int. (re_string_peek_byte_case, re_string_fetch_byte_case): Remove defines, add prototypes. * posix/regex_internal.c (re_string_allocate): Don't initialize stop here. Don't initialize mbs_case. Set valid_raw_len. Use mbs_allocated instead of MBS_* macros. (re_string_construct): Don't initialize stop and valid_len here. Don't initialize mbs_case. Use mbs_allocated instead of MBS_* macros. Reallocate buffers if build_wcs_upper_buffer converted too few bytes. Set valid_len to bufs_len only for single byte no translation and set in that case valid_raw_len as well. (re_string_realloc_buffers): Reallocate offsets if not NULL. Use mbs_allocated instead of MBS_ALLOCATED. Don't reallocate mbs_case. (re_string_construct_common): Initialize raw_len, mbs_allocated, stop and raw_stop. (build_wcs_buffer): Apply pstr->trans before mbrtowc instead of after it. Set valid_raw_len. Don't set mbs_case. (build_wcs_upper_buffer): Return REG_NOERROR or REG_ESPACE. Only use the fast path if !pstr->offsets_needed. Apply pstr->trans before mbrtowc instead of after it. If upper case character uses different number of bytes than lower case, goto to the slow path. Don't call towupper unnecessarily twice. Set valid_raw_len as well. Handle in the slow path the case if lower and upper case use different number of characters. Don't set mbs_case. (re_string_skip_chars): Use valid_raw_len instead of valid_len. (build_upper_buffer): Don't set mbs_case. Add BE macro. Set valid_raw_len. (re_string_translate_buffer): Set mbs instead of mbs_case. Set valid_raw_len. (re_string_reconstruct): Use raw_len/raw_stop to initialize len/stop. Clear valid_raw_len and offsets_needed when clearing valid_len. Use mbs_allocated instead of MBS_* macros. Check original offset against valid_raw_len instead of valid_len. Remove mbs_case handling. Adjust valid_raw_len together with valid_len. If is_utf8 and looking for tip context, apply pstr->trans first. If buffers start with partial multi-byte character, initialize mbs array as well if mbs_allocated. Check return value of build_wcs_upper_buffer. (re_string_peek_byte_case): New function. (re_string_fetch_byte_case): New function. (re_string_destruct): Use mbs_allocated instead of MBS_ALLOCATED. Don't free mbs_case. Free offsets. * posix/regcomp.c (init_dfa): Only check if charset name is UTF-8 if mb_cur_max == 6. * posix/regexec.c (re_search_internal): Initialize input.raw_stop as well. Use valid_raw_len instead of valid_len when looking through fastmap. Adjust registers through input.offsets. (extend_buffers): Allow build_wcs_upper_buffer to fail. * posix/bug-regex18.c (tests): Enable #ifdefed out tests. Add new tests.
504 lines
12 KiB
C
504 lines
12 KiB
C
/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
|
||
This file is part of the GNU C Library.
|
||
|
||
The GNU C Library is free software; you can redistribute it and/or
|
||
modify it under the terms of the GNU Lesser General Public
|
||
License as published by the Free Software Foundation; either
|
||
version 2.1 of the License, or (at your option) any later version.
|
||
|
||
The GNU C Library is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
Lesser General Public License for more details.
|
||
|
||
You should have received a copy of the GNU Lesser General Public
|
||
License along with the GNU C Library; if not, write to the Free
|
||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||
02111-1307 USA. */
|
||
|
||
#include <spawn.h>
|
||
#include "spawn_int.h"
|
||
|
||
#include <assert.h>
|
||
#include <errno.h>
|
||
#include <error.h>
|
||
#include <fcntl.h>
|
||
#include <getopt.h>
|
||
#include <iconv.h>
|
||
#include <locale.h>
|
||
#include <mcheck.h>
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <time.h>
|
||
#include <unistd.h>
|
||
#include <sys/stat.h>
|
||
#include <sys/types.h>
|
||
#include <regex.h>
|
||
|
||
|
||
#ifdef _POSIX_CPUTIME
|
||
static clockid_t cl;
|
||
static int use_clock;
|
||
#endif
|
||
static iconv_t cd;
|
||
static char *mem;
|
||
static char *umem;
|
||
static size_t memlen;
|
||
static size_t umemlen;
|
||
static int timing;
|
||
|
||
static int test_expr (const char *expr, int expected, int expectedicase);
|
||
static int run_test (const char *expr, const char *mem, size_t memlen,
|
||
int icase, int expected);
|
||
static int run_test_backwards (const char *expr, const char *mem,
|
||
size_t memlen, int icase, int expected);
|
||
|
||
|
||
int
|
||
main (int argc, char *argv[])
|
||
{
|
||
const char *file;
|
||
int fd;
|
||
struct stat st;
|
||
int result;
|
||
char *inmem;
|
||
char *outmem;
|
||
size_t inlen;
|
||
size_t outlen;
|
||
static const struct option options[] =
|
||
{
|
||
{"timing",no_argument, &timing, 1 },
|
||
{NULL, 0, NULL, 0 }
|
||
};
|
||
|
||
mtrace ();
|
||
|
||
while (getopt_long (argc, argv, "", options, NULL) >= 0);
|
||
|
||
/* Make the content of the file available in memory. */
|
||
file = "../ChangeLog.8";
|
||
fd = open (file, O_RDONLY);
|
||
if (fd == -1)
|
||
error (EXIT_FAILURE, errno, "cannot open %s", basename (file));
|
||
|
||
if (fstat (fd, &st) != 0)
|
||
error (EXIT_FAILURE, errno, "cannot stat %s", basename (file));
|
||
memlen = st.st_size;
|
||
|
||
mem = (char *) malloc (memlen + 1);
|
||
if (mem == NULL)
|
||
error (EXIT_FAILURE, errno, "while allocating buffer");
|
||
|
||
if ((size_t) read (fd, mem, memlen) != memlen)
|
||
error (EXIT_FAILURE, 0, "cannot read entire file");
|
||
mem[memlen] = '\0';
|
||
|
||
close (fd);
|
||
|
||
/* We have to convert a few things from Latin-1 to UTF-8. */
|
||
cd = iconv_open ("UTF-8", "ISO-8859-1");
|
||
if (cd == (iconv_t) -1)
|
||
error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
|
||
|
||
/* For the second test we have to convert the file content to UTF-8.
|
||
Since the text is mostly ASCII it should be enough to allocate
|
||
twice as much memory for the UTF-8 text than for the Latin-1
|
||
text. */
|
||
umem = (char *) calloc (2, memlen);
|
||
if (umem == NULL)
|
||
error (EXIT_FAILURE, errno, "while allocating buffer");
|
||
|
||
inmem = mem;
|
||
inlen = memlen;
|
||
outmem = umem;
|
||
outlen = 2 * memlen - 1;
|
||
iconv (cd, &inmem, &inlen, &outmem, &outlen);
|
||
umemlen = outmem - umem;
|
||
if (inlen != 0)
|
||
error (EXIT_FAILURE, errno, "cannot convert buffer");
|
||
|
||
#ifdef _POSIX_CPUTIME
|
||
/* See whether we can use the CPU clock. */
|
||
use_clock = clock_getcpuclockid (0, &cl) == 0;
|
||
#endif
|
||
|
||
#ifdef DEBUG
|
||
re_set_syntax (RE_DEBUG);
|
||
#endif
|
||
|
||
/* Run the actual tests. All tests are run in a single-byte and a
|
||
multi-byte locale. */
|
||
result = test_expr ("[<5B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>]", 2, 2);
|
||
result |= test_expr ("G.ran", 2, 3);
|
||
result |= test_expr ("G.\\{1\\}ran", 2, 3);
|
||
result |= test_expr ("G.*ran", 3, 44);
|
||
result |= test_expr ("[<5B><><EFBFBD><EFBFBD>]", 0, 0);
|
||
result |= test_expr ("Uddeborg", 2, 2);
|
||
result |= test_expr (".Uddeborg", 2, 2);
|
||
|
||
/* Free the resources. */
|
||
free (umem);
|
||
iconv_close (cd);
|
||
free (mem);
|
||
|
||
return result;
|
||
}
|
||
|
||
|
||
static int
|
||
test_expr (const char *expr, int expected, int expectedicase)
|
||
{
|
||
int result;
|
||
char *inmem;
|
||
char *outmem;
|
||
size_t inlen;
|
||
size_t outlen;
|
||
char *uexpr;
|
||
|
||
/* First test: search with an ISO-8859-1 locale. */
|
||
if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
|
||
error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
|
||
|
||
printf ("\nTest \"%s\" with 8-bit locale\n", expr);
|
||
result = run_test (expr, mem, memlen, 0, expected);
|
||
printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
|
||
result |= run_test (expr, mem, memlen, 1, expectedicase);
|
||
printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
|
||
result |= run_test_backwards (expr, mem, memlen, 0, expected);
|
||
printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
|
||
expr);
|
||
result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
|
||
|
||
/* Second test: search with an UTF-8 locale. */
|
||
if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
|
||
error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
|
||
|
||
inmem = (char *) expr;
|
||
inlen = strlen (expr);
|
||
outlen = inlen * MB_CUR_MAX;
|
||
outmem = uexpr = alloca (outlen + 1);
|
||
memset (outmem, '\0', outlen + 1);
|
||
iconv (cd, &inmem, &inlen, &outmem, &outlen);
|
||
if (inlen != 0)
|
||
error (EXIT_FAILURE, errno, "cannot convert expression");
|
||
|
||
/* Run the tests. */
|
||
printf ("\nTest \"%s\" with multi-byte locale\n", expr);
|
||
result |= run_test (uexpr, umem, umemlen, 0, expected);
|
||
printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
|
||
result |= run_test (uexpr, umem, umemlen, 1, expectedicase);
|
||
printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
|
||
result |= run_test_backwards (uexpr, umem, umemlen, 0, expected);
|
||
printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
|
||
expr);
|
||
result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase);
|
||
|
||
return result;
|
||
}
|
||
|
||
|
||
static int
|
||
run_test (const char *expr, const char *mem, size_t memlen, int icase,
|
||
int expected)
|
||
{
|
||
#ifdef _POSIX_CPUTIME
|
||
struct timespec start;
|
||
struct timespec finish;
|
||
#endif
|
||
regex_t re;
|
||
int err;
|
||
size_t offset;
|
||
int cnt;
|
||
|
||
#ifdef _POSIX_CPUTIME
|
||
if (use_clock && !timing)
|
||
use_clock = clock_gettime (cl, &start) == 0;
|
||
#endif
|
||
|
||
err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
|
||
if (err != REG_NOERROR)
|
||
{
|
||
char buf[200];
|
||
regerror (err, &re, buf, sizeof buf);
|
||
error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf);
|
||
}
|
||
|
||
cnt = 0;
|
||
offset = 0;
|
||
assert (mem[memlen] == '\0');
|
||
while (offset < memlen)
|
||
{
|
||
regmatch_t ma[1];
|
||
const char *sp;
|
||
const char *ep;
|
||
|
||
err = regexec (&re, mem + offset, 1, ma, 0);
|
||
if (err == REG_NOMATCH)
|
||
break;
|
||
|
||
if (err != REG_NOERROR)
|
||
{
|
||
char buf[200];
|
||
regerror (err, &re, buf, sizeof buf);
|
||
error (EXIT_FAILURE, 0, "cannot use expression: %s", buf);
|
||
}
|
||
|
||
assert (ma[0].rm_so >= 0);
|
||
sp = mem + offset + ma[0].rm_so;
|
||
while (sp > mem && sp[-1] != '\n')
|
||
--sp;
|
||
|
||
ep = mem + offset + ma[0].rm_so;
|
||
while (*ep != '\0' && *ep != '\n')
|
||
++ep;
|
||
|
||
printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
|
||
|
||
offset = ep + 1 - mem;
|
||
}
|
||
|
||
regfree (&re);
|
||
|
||
#ifdef _POSIX_CPUTIME
|
||
if (use_clock && !timing)
|
||
{
|
||
use_clock = clock_gettime (cl, &finish) == 0;
|
||
if (use_clock)
|
||
{
|
||
if (finish.tv_nsec < start.tv_nsec)
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec - 1000000000;
|
||
finish.tv_sec -= 1 + start.tv_sec;
|
||
}
|
||
else
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec;
|
||
finish.tv_sec -= start.tv_sec;
|
||
}
|
||
|
||
printf ("elapsed time: %ld.%09ld sec\n",
|
||
finish.tv_sec, finish.tv_nsec);
|
||
}
|
||
}
|
||
|
||
if (use_clock && timing)
|
||
{
|
||
struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
|
||
|
||
for (int i = 0; i < 10; ++i)
|
||
{
|
||
offset = 0;
|
||
use_clock = clock_gettime (cl, &start) == 0;
|
||
|
||
if (!use_clock)
|
||
continue;
|
||
|
||
err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
|
||
if (err != REG_NOERROR)
|
||
continue;
|
||
|
||
while (offset < memlen)
|
||
{
|
||
regmatch_t ma[1];
|
||
|
||
err = regexec (&re, mem + offset, 1, ma, 0);
|
||
if (err != REG_NOERROR)
|
||
break;
|
||
|
||
offset += ma[0].rm_eo;
|
||
}
|
||
|
||
regfree (&re);
|
||
|
||
use_clock = clock_gettime (cl, &finish) == 0;
|
||
if (use_clock)
|
||
{
|
||
if (finish.tv_nsec < start.tv_nsec)
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec - 1000000000;
|
||
finish.tv_sec -= 1 + start.tv_sec;
|
||
}
|
||
else
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec;
|
||
finish.tv_sec -= start.tv_sec;
|
||
}
|
||
if (finish.tv_sec < mintime.tv_sec
|
||
|| (finish.tv_sec == mintime.tv_sec
|
||
&& finish.tv_nsec < mintime.tv_nsec))
|
||
mintime = finish;
|
||
}
|
||
}
|
||
printf ("elapsed time: %ld.%09ld sec\n",
|
||
mintime.tv_sec, mintime.tv_nsec);
|
||
}
|
||
#endif
|
||
|
||
/* Return an error if the number of matches found is not match we
|
||
expect. */
|
||
return cnt != expected;
|
||
}
|
||
|
||
|
||
static int
|
||
run_test_backwards (const char *expr, const char *mem, size_t memlen,
|
||
int icase, int expected)
|
||
{
|
||
#ifdef _POSIX_CPUTIME
|
||
struct timespec start;
|
||
struct timespec finish;
|
||
#endif
|
||
struct re_pattern_buffer re;
|
||
const char *err;
|
||
size_t offset;
|
||
int cnt;
|
||
|
||
#ifdef _POSIX_CPUTIME
|
||
if (use_clock && !timing)
|
||
use_clock = clock_gettime (cl, &start) == 0;
|
||
#endif
|
||
|
||
re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
|
||
| RE_HAT_LISTS_NOT_NEWLINE
|
||
| (icase ? RE_ICASE : 0));
|
||
|
||
memset (&re, 0, sizeof (re));
|
||
re.fastmap = malloc (256);
|
||
if (re.fastmap == NULL)
|
||
error (EXIT_FAILURE, errno, "cannot allocate fastmap");
|
||
|
||
err = re_compile_pattern (expr, strlen (expr), &re);
|
||
if (err != NULL)
|
||
error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);
|
||
|
||
if (re_compile_fastmap (&re))
|
||
error (EXIT_FAILURE, 0, "couldn't compile fastmap");
|
||
|
||
cnt = 0;
|
||
offset = memlen;
|
||
assert (mem[memlen] == '\0');
|
||
while (offset <= memlen)
|
||
{
|
||
int start;
|
||
const char *sp;
|
||
const char *ep;
|
||
|
||
start = re_search (&re, mem, memlen, offset, -offset, NULL);
|
||
if (start == -1)
|
||
break;
|
||
|
||
if (start == -2)
|
||
error (EXIT_FAILURE, 0, "internal error in re_search");
|
||
|
||
sp = mem + start;
|
||
while (sp > mem && sp[-1] != '\n')
|
||
--sp;
|
||
|
||
ep = mem + start;
|
||
while (*ep != '\0' && *ep != '\n')
|
||
++ep;
|
||
|
||
printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
|
||
|
||
offset = sp - 1 - mem;
|
||
}
|
||
|
||
regfree (&re);
|
||
|
||
#ifdef _POSIX_CPUTIME
|
||
if (use_clock && !timing)
|
||
{
|
||
use_clock = clock_gettime (cl, &finish) == 0;
|
||
if (use_clock)
|
||
{
|
||
if (finish.tv_nsec < start.tv_nsec)
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec - 1000000000;
|
||
finish.tv_sec -= 1 + start.tv_sec;
|
||
}
|
||
else
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec;
|
||
finish.tv_sec -= start.tv_sec;
|
||
}
|
||
|
||
printf ("elapsed time: %ld.%09ld sec\n",
|
||
finish.tv_sec, finish.tv_nsec);
|
||
}
|
||
}
|
||
|
||
if (use_clock && timing)
|
||
{
|
||
struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
|
||
|
||
for (int i = 0; i < 10; ++i)
|
||
{
|
||
offset = memlen;
|
||
use_clock = clock_gettime (cl, &start) == 0;
|
||
|
||
if (!use_clock)
|
||
continue;
|
||
|
||
memset (&re, 0, sizeof (re));
|
||
re.fastmap = malloc (256);
|
||
if (re.fastmap == NULL)
|
||
continue;
|
||
|
||
err = re_compile_pattern (expr, strlen (expr), &re);
|
||
if (err != NULL)
|
||
continue;
|
||
|
||
if (re_compile_fastmap (&re))
|
||
{
|
||
regfree (&re);
|
||
continue;
|
||
}
|
||
|
||
while (offset <= memlen)
|
||
{
|
||
int start;
|
||
const char *sp;
|
||
|
||
start = re_search (&re, mem, memlen, offset, -offset, NULL);
|
||
if (start < -1)
|
||
break;
|
||
|
||
sp = mem + start;
|
||
while (sp > mem && sp[-1] != '\n')
|
||
--sp;
|
||
|
||
offset = sp - 1 - mem;
|
||
}
|
||
|
||
regfree (&re);
|
||
|
||
use_clock = clock_gettime (cl, &finish) == 0;
|
||
if (use_clock)
|
||
{
|
||
if (finish.tv_nsec < start.tv_nsec)
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec - 1000000000;
|
||
finish.tv_sec -= 1 + start.tv_sec;
|
||
}
|
||
else
|
||
{
|
||
finish.tv_nsec -= start.tv_nsec;
|
||
finish.tv_sec -= start.tv_sec;
|
||
}
|
||
if (finish.tv_sec < mintime.tv_sec
|
||
|| (finish.tv_sec == mintime.tv_sec
|
||
&& finish.tv_nsec < mintime.tv_nsec))
|
||
mintime = finish;
|
||
}
|
||
}
|
||
printf ("elapsed time: %ld.%09ld sec\n",
|
||
mintime.tv_sec, mintime.tv_nsec);
|
||
}
|
||
#endif
|
||
|
||
/* Return an error if the number of matches found is not match we
|
||
expect. */
|
||
return cnt != expected;
|
||
}
|