mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
cf11e74b0d
It uses the same two-way algorithm used on strstr, strcasestr, and memmem. Different than strstr, neither the "shift table" optimization nor the self-adapting filtering check is used because it would result in a too-large shift table (and it also simplifies the implementation bit). Checked on x86_64-linux-gnu and aarch64-linux-gnu. Reviewed-by: DJ Delorie <dj@redhat.com>
482 lines
12 KiB
C
482 lines
12 KiB
C
/* Test and measure strstr functions.
|
|
Copyright (C) 2010-2024 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#define TEST_MAIN
|
|
#ifndef WIDE
|
|
# define TEST_NAME "strstr"
|
|
# define TEST_FUNC strstr
|
|
#else
|
|
# define TEST_NAME "wcsstr"
|
|
# define TEST_FUNC wcsstr
|
|
#endif
|
|
|
|
#ifndef WIDE
|
|
# define CHAR char
|
|
# define STRLEN strlen
|
|
# define STRCPY strcpy
|
|
# define MEMCPY memcpy
|
|
# define MEMSET memset
|
|
# define MEMPCPY mempcpy
|
|
# define L(s) s
|
|
#else
|
|
# include <wchar.h>
|
|
# define CHAR wchar_t
|
|
# define STRLEN wcslen
|
|
# define STRCPY wcscpy
|
|
# define MEMCPY wmemcpy
|
|
# define MEMSET wmemset
|
|
# define MEMPCPY wmempcpy
|
|
# define L(s) L ## s
|
|
/* The test requires up to 8191 characters, so allocate at least 32Kb
|
|
(considering 4kb page size). */
|
|
# define BUF1PAGES 4
|
|
#endif
|
|
|
|
#include "test-string.h"
|
|
|
|
#ifndef WIDE
|
|
# define STRSTR c_strstr
|
|
# define libc_hidden_builtin_def(arg) /* nothing */
|
|
# define __strnlen strnlen
|
|
# include "strstr.c"
|
|
# define C_IMPL STRSTR
|
|
#else
|
|
# undef weak_alias
|
|
# define weak_alias(a, b)
|
|
# define WCSSTR c_wcsstr
|
|
# define __wmemcmp wmemcmp
|
|
# define __wcsnlen wcsnlen
|
|
# define __wcslen wcslen
|
|
# include "wcsstr.c"
|
|
# define C_IMPL WCSSTR
|
|
#endif
|
|
|
|
/* Naive implementation to verify results. */
|
|
static CHAR *
|
|
simple_strstr (const CHAR *s1, const CHAR *s2)
|
|
{
|
|
ssize_t s1len = STRLEN (s1);
|
|
ssize_t s2len = STRLEN (s2);
|
|
|
|
if (s2len > s1len)
|
|
return NULL;
|
|
|
|
for (ssize_t i = 0; i <= s1len - s2len; ++i)
|
|
{
|
|
size_t j;
|
|
for (j = 0; j < s2len; ++j)
|
|
if (s1[i + j] != s2[j])
|
|
break;
|
|
if (j == s2len)
|
|
return (CHAR *) s1 + i;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
typedef CHAR *(*proto_t) (const CHAR *, const CHAR *);
|
|
|
|
IMPL (C_IMPL, 1)
|
|
IMPL (TEST_FUNC, 1)
|
|
|
|
static int
|
|
check_result (impl_t *impl, const CHAR *s1, const CHAR *s2,
|
|
CHAR *exp_result)
|
|
{
|
|
CHAR *result = CALL (impl, s1, s2);
|
|
if (result != exp_result)
|
|
{
|
|
error (0, 0, "Wrong result in function %s %p %p", impl->name,
|
|
result, exp_result);
|
|
ret = 1;
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
do_one_test (impl_t *impl, const CHAR *s1, const CHAR *s2, CHAR *exp_result)
|
|
{
|
|
if (check_result (impl, s1, s2, exp_result) < 0)
|
|
return;
|
|
}
|
|
|
|
|
|
static void
|
|
do_test (size_t align1, size_t align2, size_t len1, size_t len2,
|
|
int fail)
|
|
{
|
|
align1 = align1 * sizeof (CHAR);
|
|
align2 = align2 * sizeof (CHAR);
|
|
|
|
CHAR *s1 = (CHAR *) (buf1 + align1);
|
|
CHAR *s2 = (CHAR *) (buf2 + align2);
|
|
|
|
static const CHAR d[] = L("1234567890abcdef");
|
|
const size_t dl = STRLEN (d);
|
|
CHAR *ss2 = s2;
|
|
for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
|
|
{
|
|
size_t t = l > dl ? dl : l;
|
|
ss2 = MEMPCPY (ss2, d, t);
|
|
}
|
|
s2[len2] = '\0';
|
|
|
|
if (fail)
|
|
{
|
|
CHAR *ss1 = s1;
|
|
for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0)
|
|
{
|
|
size_t t = l > dl ? dl : l;
|
|
MEMCPY (ss1, d, t);
|
|
++ss1[len2 > 7 ? 7 : len2 - 1];
|
|
ss1 += t;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
MEMSET (s1, '0', len1);
|
|
MEMCPY (s1 + len1 - len2, s2, len2);
|
|
}
|
|
s1[len1] = '\0';
|
|
|
|
FOR_EACH_IMPL (impl, 0)
|
|
do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
|
|
}
|
|
|
|
static void
|
|
check1 (void)
|
|
{
|
|
const CHAR s1[] =
|
|
L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD_C3_A7_20_EF_BF_BD");
|
|
const CHAR s2[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
|
|
CHAR *exp_result;
|
|
|
|
exp_result = simple_strstr (s1, s2);
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, s1, s2, exp_result);
|
|
}
|
|
|
|
static void
|
|
check2 (void)
|
|
{
|
|
const CHAR s1_stack[] = L(", enable_static, \0, enable_shared, ");
|
|
const size_t s1_char_count = 18;
|
|
const size_t s1_byte_len = 18 * sizeof (CHAR);
|
|
const CHAR *s2_stack = &(s1_stack[s1_char_count]);
|
|
const size_t s2_byte_len = 18 * sizeof (CHAR);
|
|
CHAR *exp_result;
|
|
const size_t page_size_real = getpagesize ();
|
|
|
|
/* Haystack at end of page. The following page is protected. */
|
|
CHAR *s1_page_end = (void *) buf1 + page_size - s1_byte_len;
|
|
STRCPY (s1_page_end, s1_stack);
|
|
|
|
/* Haystack which crosses a page boundary.
|
|
Note: page_size is at least 2 * getpagesize. See test_init. */
|
|
CHAR *s1_page_cross = (void *) buf1 + page_size_real - 8;
|
|
STRCPY (s1_page_cross, s1_stack);
|
|
|
|
/* Needle at end of page. The following page is protected. */
|
|
CHAR *s2_page_end = (void *) buf2 + page_size - s2_byte_len;
|
|
STRCPY (s2_page_end, s2_stack);
|
|
|
|
/* Needle which crosses a page boundary.
|
|
Note: page_size is at least 2 * getpagesize. See test_init. */
|
|
CHAR *s2_page_cross = (void *) buf2 + page_size_real - 8;
|
|
STRCPY (s2_page_cross, s2_stack);
|
|
|
|
exp_result = simple_strstr (s1_stack, s2_stack);
|
|
FOR_EACH_IMPL (impl, 0)
|
|
{
|
|
check_result (impl, s1_stack, s2_stack, exp_result);
|
|
check_result (impl, s1_stack, s2_page_end, exp_result);
|
|
check_result (impl, s1_stack, s2_page_cross, exp_result);
|
|
|
|
check_result (impl, s1_page_end, s2_stack, exp_result);
|
|
check_result (impl, s1_page_end, s2_page_end, exp_result);
|
|
check_result (impl, s1_page_end, s2_page_cross, exp_result);
|
|
|
|
check_result (impl, s1_page_cross, s2_stack, exp_result);
|
|
check_result (impl, s1_page_cross, s2_page_end, exp_result);
|
|
check_result (impl, s1_page_cross, s2_page_cross, exp_result);
|
|
}
|
|
}
|
|
|
|
static void
|
|
check3 (void)
|
|
{
|
|
/* Check that a long periodic needle does not cause false positives. */
|
|
{
|
|
const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"
|
|
"_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD"
|
|
"_C3_A7_20_EF_BF_BD");
|
|
const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, input, need, NULL);
|
|
}
|
|
|
|
{
|
|
const CHAR input[] = L("F_BD_CE_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD"
|
|
"_C3_88_20_EF_BF_BD_EF_BF_BD_EF_BF_BD"
|
|
"_C3_A7_20_EF_BF_BD_DA_B5_C2_A6_20"
|
|
"_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
|
|
const CHAR need[] = L("_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD_EF_BF_BD");
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, input, need, (CHAR *) input + 115);
|
|
}
|
|
}
|
|
|
|
|
|
#define N 1024
|
|
|
|
static void
|
|
pr23637 (void)
|
|
{
|
|
CHAR *h = (CHAR*) buf1;
|
|
CHAR *n = (CHAR*) buf2;
|
|
|
|
for (int i = 0; i < N; i++)
|
|
{
|
|
n[i] = 'x';
|
|
h[i] = ' ';
|
|
h[i + N] = 'x';
|
|
}
|
|
|
|
n[N] = '\0';
|
|
h[N * 2] = '\0';
|
|
|
|
/* Ensure we don't match at the first 'x'. */
|
|
h[0] = 'x';
|
|
|
|
CHAR *exp_result = simple_strstr (h, n);
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, h, n, exp_result);
|
|
}
|
|
|
|
static void
|
|
pr23865 (void)
|
|
{
|
|
/* Check that a very long haystack is handled quickly if the needle is
|
|
short and occurs near the beginning. */
|
|
{
|
|
size_t m = 1000000;
|
|
const CHAR *needle =
|
|
L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
|
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
|
|
CHAR *haystack = xmalloc ((m + 1) * sizeof (CHAR));
|
|
MEMSET (haystack, L('A'), m);
|
|
haystack[0] = L('B');
|
|
haystack[m] = L('\0');
|
|
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, haystack, needle, haystack + 1);
|
|
|
|
free (haystack);
|
|
}
|
|
|
|
/* Check that a very long needle is discarded quickly if the haystack is
|
|
short. */
|
|
{
|
|
size_t m = 1000000;
|
|
const CHAR *haystack =
|
|
L("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
|
"ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB");
|
|
CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR));
|
|
MEMSET (needle, L'A', m);
|
|
needle[m] = L('\0');
|
|
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, haystack, needle, NULL);
|
|
|
|
free (needle);
|
|
}
|
|
|
|
/* Check that the asymptotic worst-case complexity is not quadratic. */
|
|
{
|
|
size_t m = 1000000;
|
|
CHAR *haystack = xmalloc ((2 * m + 2) * sizeof (CHAR));
|
|
CHAR *needle = xmalloc ((m + 2) * sizeof (CHAR));
|
|
|
|
MEMSET (haystack, L('A'), 2 * m);
|
|
haystack[2 * m] = L('B');
|
|
haystack[2 * m + 1] = L('\0');
|
|
|
|
MEMSET (needle, L('A'), m);
|
|
needle[m] = L('B');
|
|
needle[m + 1] = L('\0');
|
|
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, haystack, needle, haystack + m);
|
|
|
|
free (needle);
|
|
free (haystack);
|
|
}
|
|
|
|
{
|
|
/* Ensure that with a barely periodic "short" needle, STRSTR's
|
|
search does not mistakenly skip just past the match point. */
|
|
const CHAR *haystack =
|
|
L("\n"
|
|
"with_build_libsubdir\n"
|
|
"with_local_prefix\n"
|
|
"with_gxx_include_dir\n"
|
|
"with_cpp_install_dir\n"
|
|
"enable_generated_files_in_srcdir\n"
|
|
"with_gnu_ld\n"
|
|
"with_ld\n"
|
|
"with_demangler_in_ld\n"
|
|
"with_gnu_as\n"
|
|
"with_as\n"
|
|
"enable_largefile\n"
|
|
"enable_werror_always\n"
|
|
"enable_checking\n"
|
|
"enable_coverage\n"
|
|
"enable_gather_detailed_mem_stats\n"
|
|
"enable_build_with_cxx\n"
|
|
"with_stabs\n"
|
|
"enable_multilib\n"
|
|
"enable___cxa_atexit\n"
|
|
"enable_decimal_float\n"
|
|
"enable_fixed_point\n"
|
|
"enable_threads\n"
|
|
"enable_tls\n"
|
|
"enable_objc_gc\n"
|
|
"with_dwarf2\n"
|
|
"enable_shared\n"
|
|
"with_build_sysroot\n"
|
|
"with_sysroot\n"
|
|
"with_specs\n"
|
|
"with_pkgversion\n"
|
|
"with_bugurl\n"
|
|
"enable_languages\n"
|
|
"with_multilib_list\n");
|
|
const CHAR *needle =
|
|
L("\n"
|
|
"with_gnu_ld\n");
|
|
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, haystack, needle, (CHAR*) haystack + 114);
|
|
}
|
|
|
|
{
|
|
/* Same bug, shorter trigger. */
|
|
const CHAR *haystack = L("..wi.d.");
|
|
const CHAR *needle = L(".d.");
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, haystack, needle, (CHAR*) haystack + 4);
|
|
}
|
|
|
|
/* Test case from Yves Bastide.
|
|
<https://www.openwall.com/lists/musl/2014/04/18/2> */
|
|
{
|
|
const CHAR *input = L("playing play play play always");
|
|
const CHAR *needle = L("play play play");
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, input, needle, (CHAR*) input + 8);
|
|
}
|
|
|
|
/* Test long needles. */
|
|
{
|
|
size_t m = 1024;
|
|
CHAR *haystack = xmalloc ((2 * m + 1) * sizeof (CHAR));
|
|
CHAR *needle = xmalloc ((m + 1) * sizeof (CHAR));
|
|
haystack[0] = L('x');
|
|
MEMSET (haystack + 1, L(' '), m - 1);
|
|
MEMSET (haystack + m, L('x'), m);
|
|
haystack[2 * m] = L('\0');
|
|
MEMSET (needle, L('x'), m);
|
|
needle[m] = L('\0');
|
|
|
|
FOR_EACH_IMPL (impl, 0)
|
|
check_result (impl, haystack, needle, haystack + m);
|
|
|
|
free (needle);
|
|
free (haystack);
|
|
}
|
|
}
|
|
|
|
static int
|
|
test_main (void)
|
|
{
|
|
test_init ();
|
|
|
|
check1 ();
|
|
check2 ();
|
|
check3 ();
|
|
pr23637 ();
|
|
pr23865 ();
|
|
|
|
printf ("%23s", "");
|
|
FOR_EACH_IMPL (impl, 0)
|
|
printf ("\t%s", impl->name);
|
|
putchar ('\n');
|
|
|
|
for (size_t klen = 2; klen < 32; ++klen)
|
|
for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen)
|
|
{
|
|
do_test (0, 0, hlen, klen, 0);
|
|
do_test (0, 0, hlen, klen, 1);
|
|
do_test (0, 3, hlen, klen, 0);
|
|
do_test (0, 3, hlen, klen, 1);
|
|
do_test (0, 9, hlen, klen, 0);
|
|
do_test (0, 9, hlen, klen, 1);
|
|
do_test (0, 15, hlen, klen, 0);
|
|
do_test (0, 15, hlen, klen, 1);
|
|
|
|
do_test (3, 0, hlen, klen, 0);
|
|
do_test (3, 0, hlen, klen, 1);
|
|
do_test (3, 3, hlen, klen, 0);
|
|
do_test (3, 3, hlen, klen, 1);
|
|
do_test (3, 9, hlen, klen, 0);
|
|
do_test (3, 9, hlen, klen, 1);
|
|
do_test (3, 15, hlen, klen, 0);
|
|
do_test (3, 15, hlen, klen, 1);
|
|
|
|
do_test (9, 0, hlen, klen, 0);
|
|
do_test (9, 0, hlen, klen, 1);
|
|
do_test (9, 3, hlen, klen, 0);
|
|
do_test (9, 3, hlen, klen, 1);
|
|
do_test (9, 9, hlen, klen, 0);
|
|
do_test (9, 9, hlen, klen, 1);
|
|
do_test (9, 15, hlen, klen, 0);
|
|
do_test (9, 15, hlen, klen, 1);
|
|
|
|
do_test (15, 0, hlen, klen, 0);
|
|
do_test (15, 0, hlen, klen, 1);
|
|
do_test (15, 3, hlen, klen, 0);
|
|
do_test (15, 3, hlen, klen, 1);
|
|
do_test (15, 9, hlen, klen, 0);
|
|
do_test (15, 9, hlen, klen, 1);
|
|
do_test (15, 15, hlen, klen, 0);
|
|
do_test (15, 15, hlen, klen, 1);
|
|
|
|
do_test (15, 15, hlen + klen * 4, klen * 4, 0);
|
|
do_test (15, 15, hlen + klen * 4, klen * 4, 1);
|
|
}
|
|
|
|
do_test (0, 0, page_size - 1, 16, 0);
|
|
do_test (0, 0, page_size - 1, 16, 1);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#include <support/test-driver.c>
|