mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-27 03:41:23 +08:00
Optimize x86-64 strlen for SSE4.2.
The SSE4.2 implementation is used in the DSO only. The patch also adds some infrastructure to be used in similar code later one.
This commit is contained in:
parent
443caceb35
commit
3ab2d57a4d
@ -1,5 +1,11 @@
|
||||
2009-06-05 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* sysdeps/x86_64/multiarch/strlen.S: New file.
|
||||
* sysdeps/x86_64/multiarch/ifunc-defines.sym: New file.
|
||||
* sysdeps/x86_64/multiarch/Makefile: Add rule to build ifunc-defines.h.
|
||||
* sysdeps/x86_64/multiarch/init-arch.h: Name structure with register
|
||||
content.
|
||||
|
||||
* csu/elf-init.c: Only compile in IFUNC functionality if USE_MULTIARCH
|
||||
is defined.
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
ifeq ($(subdir),csu)
|
||||
aux += init-arch
|
||||
gen-as-const-headers += ifunc-defines.sym
|
||||
endif
|
||||
|
15
sysdeps/x86_64/multiarch/ifunc-defines.sym
Normal file
15
sysdeps/x86_64/multiarch/ifunc-defines.sym
Normal file
@ -0,0 +1,15 @@
|
||||
#include "init-arch.h"
|
||||
#include <stddef.h>
|
||||
|
||||
--
|
||||
|
||||
CPU_FEATURES_SIZE sizeof (struct cpu_features)
|
||||
KIND_OFFSET offsetof (struct cpu_features, kind)
|
||||
CPUID_OFFSET offsetof (struct cpu_features, cpuid)
|
||||
CPUID_SIZE sizeof (struct cpuid_registers)
|
||||
CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax)
|
||||
CPUID_EBX_OFFSET offsetof (struct cpuid_registers, ebx)
|
||||
CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx)
|
||||
CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx)
|
||||
|
||||
COMMON_CPUID_INDEX_1
|
@ -35,7 +35,7 @@ extern struct cpu_features
|
||||
arch_kind_other
|
||||
} kind;
|
||||
int max_cpuid;
|
||||
struct
|
||||
struct cpuid_registers
|
||||
{
|
||||
unsigned int eax;
|
||||
unsigned int ebx;
|
||||
|
87
sysdeps/x86_64/multiarch/strlen.S
Normal file
87
sysdeps/x86_64/multiarch/strlen.S
Normal file
@ -0,0 +1,87 @@
|
||||
/* strlen(str) -- determine the length of the string STR.
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <ifunc-defines.h>
|
||||
|
||||
|
||||
/* Define multiple versions only for the definition in libc and for
|
||||
the DSO. In static binaries we need strlen before the initialization
|
||||
happened. */
|
||||
#if defined SHARED && !defined NOT_IN_libc
|
||||
.text
|
||||
ENTRY(strlen)
|
||||
.type strlen, @gnu_indirect_function
|
||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leaq __strlen_sse2(%rip), %rax
|
||||
testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
|
||||
jz 2f
|
||||
leaq __strlen_sse42(%rip), %rax
|
||||
2: ret
|
||||
END(strlen)
|
||||
|
||||
|
||||
.type __strlen_sse42, @function
|
||||
__strlen_sse42:
|
||||
pxor %xmm2, %xmm2
|
||||
movq %rdi, %rcx
|
||||
movq %rdi, %r8
|
||||
andq $~15, %rdi
|
||||
movdqa %xmm2, %xmm1
|
||||
pcmpeqb (%rdi), %xmm2
|
||||
orl $0xffffffff, %esi
|
||||
subq %rdi, %rcx
|
||||
shll %cl, %esi
|
||||
pmovmskb %xmm2, %edx
|
||||
andl %esi, %edx
|
||||
jnz 1f
|
||||
|
||||
2: pcmpistri $0x08, 16(%rdi), %xmm1
|
||||
leaq 16(%rdi), %rdi
|
||||
jnz 2b
|
||||
|
||||
leaq (%rdi,%rcx), %rax
|
||||
subq %r8, %rax
|
||||
ret
|
||||
|
||||
1: bsfl %edx, %eax
|
||||
leaq (%rdi,%rax), %rax
|
||||
subq %r8, %rax
|
||||
ret
|
||||
.size __strlen_sse42, .-__strlen_sse42
|
||||
|
||||
|
||||
# undef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type __strlen_sse2, @function; __strlen_sse2:
|
||||
# undef END
|
||||
# define END(name) \
|
||||
.size __strlen_sse2, .-__strlen_sse2
|
||||
# undef libc_hidden_builtin_def
|
||||
/* It doesn't make sense to send libc-internal strlen calls through a PLT.
|
||||
The speedup we get from using SSE4.2 instruction is likely eaten away
|
||||
by the indirect call in the PLT. */
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI_strlen; __GI_strlen = __strlen_sse2
|
||||
#endif
|
||||
|
||||
#include "../strlen.S"
|
Loading…
Reference in New Issue
Block a user