Clean up x86-64 strcasestr

Actually describe in the C code what is going on.
This commit is contained in:
Ulrich Drepper 2011-10-28 18:18:04 -04:00
parent a5b81e1fb7
commit fd52bc6dc4
2 changed files with 17 additions and 13 deletions

View File

@ -1,5 +1,10 @@
2011-10-28 Ulrich Drepper <drepper@gmail.com>
* sysdeps/x86_64/multiarch/strstr.c (__m128i_strloadu_tolower): Take
the three constants needed as parameters. Drop the others.
(strcasestr_sse42): Load uclow, uchigh, and lcqword and pass to
__m128i_strloadu_tolower.
* sysdeps/x86_64/fpu/multiarch/Makefile: Don't build brandred-avx.c,
doasin-avx.c, dosincos-avx.c, e_asin-avx.c, mpatan-avx.c,
mpatan2-avx.c, mpsqrt-avx.c, mptan-avx.c, sincos32-avx.c.

View File

@ -1,5 +1,5 @@
/* strstr with SSE4.2 intrinsics
Copyright (C) 2009, 2010 Free Software Foundation, Inc.
Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@ -106,24 +106,22 @@ __m128i_strloadu (const unsigned char * p)
#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII
/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C
locale. */
locale and other which have single-byte letters only in the ASCII
range. */
static inline __m128i
__m128i_strloadu_tolower (const unsigned char *p, __m128i rangeuc,
__m128i u2ldelta)
__m128i_strloadu_tolower (const unsigned char *p, __m128i uclow,
__m128i uchigh, __m128i lcqword)
{
__m128i frag = __m128i_strloadu (p);
#define UCLOW 0x4040404040404040ULL
#define UCHIGH 0x5b5b5b5b5b5b5b5bULL
#define LCQWORD 0x2020202020202020ULL
/* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */
__m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag);
__m128i r2 = _mm_cmpgt_epi8 (uchigh, frag);
/* Compare if bytes are > 'A' - 1. */
__m128i r1 = _mm_cmpgt_epi8 (frag, _mm_set1_epi64x (UCLOW));
__m128i r1 = _mm_cmpgt_epi8 (frag, uclow);
/* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */
__m128i mask = _mm_and_si128 (r2, r1);
/* Apply lowercase bit 6 mask for above mask bytes == ff. */
return _mm_or_si128 (frag, _mm_and_si128 (mask, _mm_set1_epi64x (LCQWORD)));
return _mm_or_si128 (frag, _mm_and_si128 (mask, lcqword));
}
#endif
@ -190,9 +188,10 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
!= 0, 0))
return __strcasestr_sse42_nonascii (s1, s2);
const __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41);
const __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0);
# define strloadu(p) __m128i_strloadu_tolower (p, rangeuc, u2ldelta)
const __m128i uclow = _mm_set1_epi8 (0x40);
const __m128i uchigh = _mm_set1_epi8 (0x5b);
const __m128i lcqword = _mm_set1_epi8 (0x20);
# define strloadu(p) __m128i_strloadu_tolower (p, uclow, uchigh, lcqword)
# else
# define strloadu __m128i_strloadu_tolower
# endif