mirror of
git://sourceware.org/git/glibc.git
synced 2025-02-23 13:09:58 +08:00
Speed up SSE4.2 strcasestr by avoiding indirect function call.
This commit is contained in:
parent
9b059f9774
commit
cc9f2e47a0
10
ChangeLog
10
ChangeLog
@ -1,3 +1,13 @@
|
|||||||
|
2010-07-16 Ulrich Drepper <drepper@redhat.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/multiarch/strstr.c: Rewrite to avoid indirect function
|
||||||
|
call in strcasestr.
|
||||||
|
* sysdeps/x86_64/multiarch/strcasestr.c: Declare
|
||||||
|
__strcasestr_sse42_nonascii.
|
||||||
|
* sysdeps/x86_64/multiarch/Makefile: Add rules to build
|
||||||
|
strcasestr-nonascii.c.
|
||||||
|
* sysdeps/x86_64/multiarch/strcasestr-nonascii.c: New file.
|
||||||
|
|
||||||
2010-06-15 Luis Machado <luisgpm@br.ibm.com>
|
2010-06-15 Luis Machado <luisgpm@br.ibm.com>
|
||||||
|
|
||||||
* sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S: New file.
|
* sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S: New file.
|
||||||
|
@ -7,7 +7,7 @@ ifeq ($(subdir),string)
|
|||||||
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
||||||
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
|
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
|
||||||
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
||||||
memmove-ssse3-back
|
memmove-ssse3-back strcasestr-nonascii
|
||||||
ifeq (yes,$(config-cflags-sse4))
|
ifeq (yes,$(config-cflags-sse4))
|
||||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||||
CFLAGS-strcspn-c.c += -msse4
|
CFLAGS-strcspn-c.c += -msse4
|
||||||
@ -15,5 +15,6 @@ CFLAGS-strpbrk-c.c += -msse4
|
|||||||
CFLAGS-strspn-c.c += -msse4
|
CFLAGS-strspn-c.c += -msse4
|
||||||
CFLAGS-strstr.c += -msse4
|
CFLAGS-strstr.c += -msse4
|
||||||
CFLAGS-strcasestr.c += -msse4
|
CFLAGS-strcasestr.c += -msse4
|
||||||
|
CFLAGS-strcasestr-nonascii.c += -msse4
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
50
sysdeps/x86_64/multiarch/strcasestr-nonascii.c
Normal file
50
sysdeps/x86_64/multiarch/strcasestr-nonascii.c
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/* strstr with SSE4.2 intrinsics
|
||||||
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
# include <ctype.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C
|
||||||
|
locale. */
|
||||||
|
static inline __m128i
|
||||||
|
__m128i_strloadu_tolower (const unsigned char * p)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
char b[16];
|
||||||
|
__m128i x;
|
||||||
|
} u;
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; ++i)
|
||||||
|
if (p[i] == 0)
|
||||||
|
{
|
||||||
|
u.b[i] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
u.b[i] = tolower (p[i]);
|
||||||
|
|
||||||
|
return u.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define STRCASESTR_NONASCII
|
||||||
|
#define USE_AS_STRCASESTR
|
||||||
|
#define STRSTR_SSE42 attribute_hidden __strcasestr_sse42_nonascii
|
||||||
|
#include "strstr.c"
|
@ -1,3 +1,7 @@
|
|||||||
|
extern char *__strcasestr_sse42_nonascii (const unsigned char *s1,
|
||||||
|
const unsigned char *s2)
|
||||||
|
attribute_hidden;
|
||||||
|
|
||||||
#define USE_AS_STRCASESTR
|
#define USE_AS_STRCASESTR
|
||||||
#define STRSTR_SSE42 __strcasestr_sse42
|
#define STRSTR_SSE42 __strcasestr_sse42
|
||||||
#include "strstr.c"
|
#include "strstr.c"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* strstr with SSE4.2 intrinsics
|
/* strstr with SSE4.2 intrinsics
|
||||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
Copyright (C) 2009, 2010 Free Software Foundation, Inc.
|
||||||
Contributed by Intel Corporation.
|
Contributed by Intel Corporation.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
@ -67,10 +67,10 @@
|
|||||||
|
|
||||||
case ECX CFlag ZFlag SFlag
|
case ECX CFlag ZFlag SFlag
|
||||||
3 X 1 0 0/1
|
3 X 1 0 0/1
|
||||||
4a 0 1 0 0
|
4a 0 1 0 0
|
||||||
4b 0 1 0 1
|
4b 0 1 0 1
|
||||||
4c 0 < X 1 0 0/1
|
4c 0 < X 1 0 0/1
|
||||||
5 16 0 1 0
|
5 16 0 1 0
|
||||||
|
|
||||||
3. An initial ordered-comparison fragment match, we fix up to do
|
3. An initial ordered-comparison fragment match, we fix up to do
|
||||||
subsequent string comparison
|
subsequent string comparison
|
||||||
@ -147,8 +147,7 @@ __m128i_shift_right (__m128i value, int offset)
|
|||||||
If EOS occurs within less than 16B before 4KB boundary, we don't
|
If EOS occurs within less than 16B before 4KB boundary, we don't
|
||||||
cross to next page. */
|
cross to next page. */
|
||||||
|
|
||||||
static __m128i
|
static inline __m128i
|
||||||
__attribute__ ((section (".text.sse4.2")))
|
|
||||||
__m128i_strloadu (const unsigned char * p)
|
__m128i_strloadu (const unsigned char * p)
|
||||||
{
|
{
|
||||||
int offset = ((size_t) p & (16 - 1));
|
int offset = ((size_t) p & (16 - 1));
|
||||||
@ -164,14 +163,12 @@ __m128i_strloadu (const unsigned char * p)
|
|||||||
return _mm_loadu_si128 ((__m128i *) p);
|
return _mm_loadu_si128 ((__m128i *) p);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_AS_STRCASESTR
|
#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII
|
||||||
|
|
||||||
/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C
|
/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C
|
||||||
locale. */
|
locale. */
|
||||||
|
static inline __m128i
|
||||||
static __m128i
|
__m128i_strloadu_tolower (const unsigned char * p)
|
||||||
__attribute__ ((section (".text.sse4.2")))
|
|
||||||
__m128i_strloadu_tolower_posix (const unsigned char * p)
|
|
||||||
{
|
{
|
||||||
__m128i frag = __m128i_strloadu (p);
|
__m128i frag = __m128i_strloadu (p);
|
||||||
|
|
||||||
@ -184,39 +181,13 @@ __m128i_strloadu_tolower_posix (const unsigned char * p)
|
|||||||
return _mm_blendv_epi8 (frag, mask2, mask1);
|
return _mm_blendv_epi8 (frag, mask2, mask1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C
|
|
||||||
locale. */
|
|
||||||
|
|
||||||
static __m128i
|
|
||||||
__attribute__ ((section (".text.sse4.2")))
|
|
||||||
__m128i_strloadu_tolower (const unsigned char * p)
|
|
||||||
{
|
|
||||||
union
|
|
||||||
{
|
|
||||||
char b[16];
|
|
||||||
__m128i x;
|
|
||||||
} u;
|
|
||||||
|
|
||||||
for (int i = 0; i < 16; i++)
|
|
||||||
if (p[i] == 0)
|
|
||||||
{
|
|
||||||
u.b[i] = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
u.b[i] = tolower (p[i]);
|
|
||||||
|
|
||||||
return u.x;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
|
/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
|
||||||
algorithm) overlap for a fully populated 16B vector.
|
algorithm) overlap for a fully populated 16B vector.
|
||||||
Input parameter: 1st 16Byte loaded from the reference string of a
|
Input parameter: 1st 16Byte loaded from the reference string of a
|
||||||
strstr function.
|
strstr function.
|
||||||
We don't use KMP algorithm if reference string is less than 16B.
|
We don't use KMP algorithm if reference string is less than 16B. */
|
||||||
*/
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
__inline__ __attribute__ ((__always_inline__,))
|
__inline__ __attribute__ ((__always_inline__,))
|
||||||
KMP16Bovrlap (__m128i s2)
|
KMP16Bovrlap (__m128i s2)
|
||||||
@ -236,7 +207,7 @@ KMP16Bovrlap (__m128i s2)
|
|||||||
return 1;
|
return 1;
|
||||||
else if (!k1)
|
else if (!k1)
|
||||||
{
|
{
|
||||||
/* There are al least two ditinct char in s2. If byte 0 and 1 are
|
/* There are al least two distinct chars in s2. If byte 0 and 1 are
|
||||||
idential and the distinct value lies farther down, we can deduce
|
idential and the distinct value lies farther down, we can deduce
|
||||||
the next byte offset to restart full compare is least no earlier
|
the next byte offset to restart full compare is least no earlier
|
||||||
than byte 3. */
|
than byte 3. */
|
||||||
@ -256,23 +227,24 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
|
|||||||
#define p1 s1
|
#define p1 s1
|
||||||
const unsigned char *p2 = s2;
|
const unsigned char *p2 = s2;
|
||||||
|
|
||||||
if (p2[0] == '\0')
|
#ifndef STRCASESTR_NONASCII
|
||||||
|
if (__builtin_expect (p2[0] == '\0', 0))
|
||||||
return (char *) p1;
|
return (char *) p1;
|
||||||
|
|
||||||
if (p1[0] == '\0')
|
if (__builtin_expect (p1[0] == '\0', 0))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* Check if p1 length is 1 byte long. */
|
/* Check if p1 length is 1 byte long. */
|
||||||
if (p1[1] == '\0')
|
if (__builtin_expect (p1[1] == '\0', 0))
|
||||||
return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
|
return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef USE_AS_STRCASESTR
|
#ifdef USE_AS_STRCASESTR
|
||||||
__m128i (*strloadu) (const unsigned char *);
|
if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE)
|
||||||
|
!= 0, 0))
|
||||||
|
return __strcasestr_sse42_nonascii (s1, s2);
|
||||||
|
|
||||||
if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) == 0)
|
# define strloadu __m128i_strloadu_tolower
|
||||||
strloadu = __m128i_strloadu_tolower_posix;
|
|
||||||
else
|
|
||||||
strloadu = __m128i_strloadu_tolower;
|
|
||||||
#else
|
#else
|
||||||
# define strloadu __m128i_strloadu
|
# define strloadu __m128i_strloadu
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user