mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
x86: Remove strspn-sse2.S and use the generic implementation
The generic implementation is faster. geometric_mean(N=20) of all benchmarks New / Original: .710 All string/memory tests pass. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
parent
6533585352
commit
9c8a6ad620
@ -1,4 +1,4 @@
|
||||
/* strspn optimized with SSE2.
|
||||
/* strspn.
|
||||
Copyright (C) 2017-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
@ -19,10 +19,10 @@
|
||||
#if IS_IN (libc)
|
||||
|
||||
# include <sysdep.h>
|
||||
# define strspn __strspn_sse2
|
||||
# define STRSPN __strspn_sse2
|
||||
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(strspn)
|
||||
# define libc_hidden_builtin_def(STRSPN)
|
||||
#endif
|
||||
|
||||
#include <sysdeps/x86_64/strspn.S>
|
||||
#include <string/strspn.c>
|
@ -1,112 +0,0 @@
|
||||
/* strspn (str, ss) -- Return the length of the initial segment of STR
|
||||
which contains only characters from SS.
|
||||
For AMD x86-64.
|
||||
Copyright (C) 1994-2022 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
ENTRY (strspn)
|
||||
|
||||
movq %rdi, %rdx /* Save SRC. */
|
||||
|
||||
/* First we create a table with flags for all possible characters.
|
||||
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
|
||||
supported by the C string functions we have 256 characters.
|
||||
Before inserting marks for the stop characters we clear the whole
|
||||
table. */
|
||||
movq %rdi, %r8 /* Save value. */
|
||||
subq $256, %rsp /* Make space for 256 bytes. */
|
||||
cfi_adjust_cfa_offset(256)
|
||||
movl $32, %ecx /* 32*8 bytes = 256 bytes. */
|
||||
movq %rsp, %rdi
|
||||
xorl %eax, %eax /* We store 0s. */
|
||||
cld
|
||||
rep
|
||||
stosq
|
||||
|
||||
movq %rsi, %rax /* Setup stopset. */
|
||||
|
||||
/* For understanding the following code remember that %rcx == 0 now.
|
||||
Although all the following instruction only modify %cl we always
|
||||
have a correct zero-extended 64-bit value in %rcx. */
|
||||
|
||||
.p2align 4
|
||||
L(2): movb (%rax), %cl /* get byte from stopset */
|
||||
testb %cl, %cl /* is NUL char? */
|
||||
jz L(1) /* yes => start compare loop */
|
||||
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 1(%rax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L(1) /* yes => start compare loop */
|
||||
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 2(%rax), %cl /* get byte from stopset */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jz L(1) /* yes => start compare loop */
|
||||
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
||||
|
||||
movb 3(%rax), %cl /* get byte from stopset */
|
||||
addq $4, %rax /* increment stopset pointer */
|
||||
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
||||
testb $0xff, %cl /* is NUL char? */
|
||||
jnz L(2) /* no => process next dword from stopset */
|
||||
|
||||
L(1): leaq -4(%rdx), %rax /* prepare loop */
|
||||
|
||||
/* We use a neat trick for the following loop. Normally we would
|
||||
have to test for two termination conditions
|
||||
1. a character in the stopset was found
|
||||
and
|
||||
2. the end of the string was found
|
||||
But as a sign that the character is in the stopset we store its
|
||||
value in the table. But the value of NUL is NUL so the loop
|
||||
terminates for NUL in every case. */
|
||||
|
||||
.p2align 4
|
||||
L(3): addq $4, %rax /* adjust pointer for full loop round */
|
||||
|
||||
movb (%rax), %cl /* get byte from string */
|
||||
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
||||
jz L(4) /* no => return */
|
||||
|
||||
movb 1(%rax), %cl /* get byte from string */
|
||||
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
||||
jz L(5) /* no => return */
|
||||
|
||||
movb 2(%rax), %cl /* get byte from string */
|
||||
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
||||
jz L(6) /* no => return */
|
||||
|
||||
movb 3(%rax), %cl /* get byte from string */
|
||||
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
||||
jnz L(3) /* yes => start loop again */
|
||||
|
||||
incq %rax /* adjust pointer */
|
||||
L(6): incq %rax
|
||||
L(5): incq %rax
|
||||
|
||||
L(4): addq $256, %rsp /* remove stopset */
|
||||
cfi_adjust_cfa_offset(-256)
|
||||
subq %rdx, %rax /* we have to return the number of valid
|
||||
characters, so compute distance to first
|
||||
non-valid character */
|
||||
ret
|
||||
END (strspn)
|
||||
libc_hidden_builtin_def (strspn)
|
Loading…
Reference in New Issue
Block a user