mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-12 12:07:12 +08:00
5c74e47cd6
In this patch we take advantage of HSW memory bandwidth, manage to reduce miss branch prediction by avoiding using branch instructions and force destination to be aligned with avx & avx2 instruction. The CPU2006 403.gcc benchmark indicates this patch improves performance from 26% to 59%. * sysdeps/x86_64/multiarch/Makefile: Add memset-avx2. * sysdeps/x86_64/multiarch/memset-avx2.S: New file. * sysdeps/x86_64/multiarch/memset.S: Likewise. * sysdeps/x86_64/multiarch/memset_chk.S: Likewise. * sysdeps/x86_64/multiarch/rtld-memset.S: Likewise.
35 lines
1.1 KiB
Makefile
35 lines
1.1 KiB
Makefile
ifeq ($(subdir),csu)
|
|
aux += init-arch
|
|
tests += test-multiarch
|
|
gen-as-const-headers += ifunc-defines.sym
|
|
endif
|
|
|
|
ifeq ($(subdir),string)
|
|
|
|
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
|
|
strcmp-sse2-unaligned strncmp-ssse3 \
|
|
memcmp-sse4 memcpy-ssse3 \
|
|
memcpy-sse2-unaligned mempcpy-ssse3 \
|
|
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
|
memmove-ssse3-back strcasecmp_l-ssse3 \
|
|
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
|
|
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
|
|
strcpy-sse2-unaligned strncpy-sse2-unaligned \
|
|
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
|
strcat-sse2-unaligned strncat-sse2-unaligned \
|
|
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
|
|
memset-avx2
|
|
|
|
ifeq (yes,$(config-cflags-sse4))
|
|
sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
|
|
CFLAGS-varshift.c += -msse4
|
|
CFLAGS-strcspn-c.c += -msse4
|
|
CFLAGS-strpbrk-c.c += -msse4
|
|
CFLAGS-strspn-c.c += -msse4
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(subdir),wcsmbs)
|
|
sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
|
|
endif
|