Improve strstr performance

Improve strstr performance.  Strstr tends to be slow because it uses
many calls to memchr and a slow byte loop to scan for the next match.
Performance is significantly improved by using strnlen on larger blocks
and using strchr to search for the next matching character.  strcasestr
can also use strnlen to scan ahead, and memmem can use memchr to check
for the next match.

On the GLIBC bench tests the performance gains on Cortex-A72 are:
strstr: +25%
strcasestr: +4.3%
memmem: +18%

On a 256KB dataset strstr performance improves by 67%, strcasestr by 47%.

    Reviewd-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>

(cherry picked from commit 3ae725dfb6)
This commit is contained in:
Wilco Dijkstra 2018-07-16 17:50:09 +01:00
parent d6613ad24f
commit 55a280689e
9 changed files with 53 additions and 34 deletions

View File

@ -1,4 +1,17 @@
2019-01-09 Wilco Dijkstra <wdijkstr@arm.com>
2019-09-13 Wilco Dijkstra <wdijkstr@arm.com>
* benchtests/bench-strcasestr.c: Rename __strnlen to strnlen.
* benchtests/bench-strstr.c: Likewise.
* string/memmem.c (FASTSEARCH): Define.
* string/str-two-way.h (two_way_short_needle): Minor cleanups.
Add support for FASTSEARCH.
* string/strcasestr.c (AVAILABLE): Use read-ahead __strnlen.
* string/strstr.c (AVAILABLE): Use read-ahead __strnlen.
(FASTSEARCH): Define.
* string/test-strcasestr.c: Rename __strnlen to strnlen.
* string/test-strstr.c: Likewise.
2019-09-06 Wilco Dijkstra <wdijkstr@arm.com>
* manual/tunables.texi (glibc.cpu.name): Add ares tunable.
* sysdeps/aarch64/multiarch/memcpy.c (__libc_memcpy): Use

View File

@ -24,6 +24,7 @@
#define STRCASESTR simple_strcasestr
#define NO_ALIAS
#define __strncasecmp strncasecmp
#define __strnlen strnlen
#include "../string/strcasestr.c"

View File

@ -22,6 +22,9 @@
#define STRSTR simple_strstr
#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(X)
#define __strnlen strnlen
#include "../string/strstr.c"

View File

@ -31,6 +31,7 @@
#define RETURN_TYPE void *
#define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
#include "str-two-way.h"
#undef memmem

View File

@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
else
{
const unsigned char *phaystack = &haystack[suffix];
const unsigned char *phaystack;
/* The comparison always starts from needle[suffix], so cache it
and use an optimized first-character loop. */
unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
#if CHECK_EOL
/* We start matching from the SUFFIX'th element, so make sure we
don't hit '\0' before that. */
if (haystack_len < suffix + 1
&& !AVAILABLE (haystack, haystack_len, 0, suffix + 1))
return NULL;
#endif
/* The two halves of needle are distinct; no extra memory is
required, and any mismatch results in a maximal shift. */
period = MAX (suffix, needle_len - suffix) + 1;
j = 0;
while (1
#if !CHECK_EOL
&& AVAILABLE (haystack, haystack_len, j, needle_len)
#endif
)
while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
unsigned char haystack_char;
const unsigned char *pneedle;
/* TODO: The first-character loop can be sped up by adapting
longword-at-a-time implementation of memchr/strchr. */
if (needle_suffix
phaystack = &haystack[suffix + j];
#ifdef FASTSEARCH
if (*phaystack++ != needle_suffix)
{
phaystack = FASTSEARCH (phaystack, needle_suffix,
haystack_len - needle_len - j);
if (phaystack == NULL)
goto ret0;
j = phaystack - &haystack[suffix];
phaystack++;
}
#else
while (needle_suffix
!= (haystack_char = CANON_ELEMENT (*phaystack++)))
{
RET0_IF_0 (haystack_char);
#if !CHECK_EOL
# if !CHECK_EOL
++j;
#endif
continue;
if (!AVAILABLE (haystack, haystack_len, j, needle_len))
goto ret0;
# endif
}
#if CHECK_EOL
# if CHECK_EOL
/* Calculate J if it wasn't kept up-to-date in the first-character
loop. */
j = phaystack - &haystack[suffix] - 1;
# endif
#endif
/* Scan for matches in right half. */
i = suffix + 1;
pneedle = &needle[i];
@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
++i;
}
#if CHECK_EOL
/* Update minimal length of haystack. */
if (phaystack > haystack + haystack_len)
haystack_len = phaystack - haystack;
#endif
if (needle_len <= i)
{
/* Scan for matches in left half. */
@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
else
j += i - suffix + 1;
#if CHECK_EOL
if (!AVAILABLE (haystack, haystack_len, j, needle_len))
break;
#endif
phaystack = &haystack[suffix + j];
}
}
ret0: __attribute__ ((unused))

View File

@ -37,8 +37,8 @@
/* Two-Way algorithm. */
#define RETURN_TYPE char *
#define AVAILABLE(h, h_l, j, n_l) \
(!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
&& ((h_l) = (j) + (n_l)))
(((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
(j) + (n_l) <= (h_l)))
#define CHECK_EOL (1)
#define RET0_IF_0(a) if (!a) goto ret0
#define CANON_ELEMENT(c) TOLOWER (c)

View File

@ -33,10 +33,11 @@
#define RETURN_TYPE char *
#define AVAILABLE(h, h_l, j, n_l) \
(!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
&& ((h_l) = (j) + (n_l)))
(((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
(j) + (n_l) <= (h_l)))
#define CHECK_EOL (1)
#define RET0_IF_0(a) if (!a) goto ret0
#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))
#include "str-two-way.h"
#undef strstr

View File

@ -25,6 +25,7 @@
#define STRCASESTR simple_strcasestr
#define NO_ALIAS
#define __strncasecmp strncasecmp
#define __strnlen strnlen
#include "strcasestr.c"

View File

@ -24,6 +24,7 @@
#define STRSTR simple_strstr
#define libc_hidden_builtin_def(arg) /* nothing */
#define __strnlen strnlen
#include "strstr.c"