mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-31 14:01:18 +08:00
Use strspn/strcspn/strpbrk ifunc in internal calls.
To make a strtok faster and improve performance in general we need to do one additional change. A comment: /* It doesn't make sense to send libc-internal strcspn calls through a PLT. The speedup we get from using SSE4.2 instruction is likely eaten away by the indirect call in the PLT. */ Does not make sense at all because nobody bothered to check it. Gap between these implementations is quite big, when haystack is empty a sse2 is around 40 cycles slower because it needs to populate a lookup table and difference only increases with size. That is much bigger than plt slowdown which is few cycles. Even benchtest show a gap which also may be reverse by branch misprediction but my internal benchmark shown. simple_strspn stupid_strspn __strspn_sse42 __strspn_sse2 Length 0, alignment 0, acc len 6: 18.6562 35.2344 17.0469 61.6719 Length 6, alignment 0, acc len 6: 59.5469 72.5781 16.4219 73.625 This patch also handles strpbrk which is implemented by including a x86_64/multiarch/strcspn.S file. * sysdeps/x86_64/multiarch/strspn.S: Remove plt indirection. * sysdeps/x86_64/multiarch/strcspn.S: Likewise.
This commit is contained in:
parent
7327b333e5
commit
0f4840be25
@ -1,3 +1,8 @@
|
||||
2015-05-12 Ondřej Bílka <neleai@seznam.cz>
|
||||
|
||||
* sysdeps/x86_64/multiarch/strcspn.S: Remove plt indirection.
|
||||
* sysdeps/x86_64/multiarch/strspn.S: Likewise.
|
||||
|
||||
2015-05-12 Roland McGrath <roland@hack.frob.com>
|
||||
|
||||
* posix/uname-values.h: New file.
|
||||
|
@ -65,14 +65,7 @@ END(STRCSPN)
|
||||
# undef END
|
||||
# define END(name) \
|
||||
cfi_endproc; .size STRCSPN_SSE2, .-STRCSPN_SSE2
|
||||
# undef libc_hidden_builtin_def
|
||||
/* It doesn't make sense to send libc-internal strcspn calls through a PLT.
|
||||
The speedup we get from using SSE4.2 instruction is likely eaten away
|
||||
by the indirect call in the PLT. */
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_SSE2
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_SSE4_SUPPORT */
|
||||
|
||||
#ifdef USE_AS_STRPBRK
|
||||
|
@ -50,12 +50,6 @@ END(strspn)
|
||||
# undef END
|
||||
# define END(name) \
|
||||
cfi_endproc; .size __strspn_sse2, .-__strspn_sse2
|
||||
# undef libc_hidden_builtin_def
|
||||
/* It doesn't make sense to send libc-internal strspn calls through a PLT.
|
||||
The speedup we get from using SSE4.2 instruction is likely eaten away
|
||||
by the indirect call in the PLT. */
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI_strspn; __GI_strspn = __strspn_sse2
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_SSE4_SUPPORT */
|
||||
|
Loading…
x
Reference in New Issue
Block a user