mirror of
git://sourceware.org/git/glibc.git
synced 2025-04-06 14:10:30 +08:00
x86: Add avx2 optimized functions for the wchar_t strcpy family
Implemented: wcscat-avx2 (+ 744 bytes wcscpy-avx2 (+ 539 bytes) wcpcpy-avx2 (+ 577 bytes) wcsncpy-avx2 (+1108 bytes) wcpncpy-avx2 (+1214 bytes) wcsncat-avx2 (+1085 bytes) Performance Changes: Times are from N = 10 runs of the benchmark suite and are reported as geometric mean of all ratios of New Implementation / Best Old Implementation. Best Old Implementation was determined with the highest ISA implementation. wcscat-avx2 -> 0.975 wcscpy-avx2 -> 0.591 wcpcpy-avx2 -> 0.698 wcsncpy-avx2 -> 0.730 wcpncpy-avx2 -> 0.711 wcsncat-avx2 -> 0.954 Code Size Changes: This change increase the size of libc.so by ~5.5kb bytes. For reference the patch optimizing the normal strcpy family functions decreases libc.so by ~5.2kb. Full check passes on x86-64 and build succeeds for all ISA levels w/ and w/o multiarch.
This commit is contained in:
parent
64b8b6516b
commit
52cf11004e
@ -131,10 +131,13 @@ endif
|
||||
|
||||
ifeq ($(subdir),wcsmbs)
|
||||
sysdep_routines += \
|
||||
wcpcpy-avx2 \
|
||||
wcpcpy-evex \
|
||||
wcpcpy-generic \
|
||||
wcpncpy-avx2 \
|
||||
wcpncpy-evex \
|
||||
wcpncpy-generic \
|
||||
wcscat-avx2 \
|
||||
wcscat-evex \
|
||||
wcscat-generic \
|
||||
wcschr-avx2 \
|
||||
@ -146,6 +149,7 @@ sysdep_routines += \
|
||||
wcscmp-avx2-rtm \
|
||||
wcscmp-evex \
|
||||
wcscmp-sse2 \
|
||||
wcscpy-avx2 \
|
||||
wcscpy-evex \
|
||||
wcscpy-generic \
|
||||
wcscpy-ssse3 \
|
||||
@ -155,11 +159,13 @@ sysdep_routines += \
|
||||
wcslen-evex512 \
|
||||
wcslen-sse2 \
|
||||
wcslen-sse4_1 \
|
||||
wcsncat-avx2 \
|
||||
wcsncat-evex \
|
||||
wcsncat-generic \
|
||||
wcsncmp-avx2 \
|
||||
wcsncmp-avx2-rtm \
|
||||
wcsncmp-evex \
|
||||
wcsncpy-avx2 \
|
||||
wcsncpy-evex \
|
||||
wcsncpy-generic \
|
||||
wcsnlen-avx2 \
|
||||
|
@ -907,6 +907,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcscpy_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscpy,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcscpy_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcscpy,
|
||||
CPU_FEATURE_USABLE (SSSE3),
|
||||
__wcscpy_ssse3)
|
||||
X86_IFUNC_IMPL_ADD_V1 (array, i, wcscpy,
|
||||
@ -920,7 +924,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsncpy_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcpncpy,
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncpy,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsncpy_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy,
|
||||
1,
|
||||
__wcsncpy_generic))
|
||||
|
||||
@ -932,6 +940,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcpcpy_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcpcpy,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcpcpy_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcpcpy,
|
||||
1,
|
||||
__wcpcpy_generic))
|
||||
|
||||
@ -942,7 +954,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
&& CPU_FEATURE_USABLE (AVX512BW)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcpncpy_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncpy,
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcpncpy,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcpncpy_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy,
|
||||
1,
|
||||
__wcpncpy_generic))
|
||||
|
||||
@ -954,6 +970,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcscat_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcscat,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcscat_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcscat,
|
||||
1,
|
||||
__wcscat_generic))
|
||||
|
||||
@ -965,6 +985,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsncat_evex)
|
||||
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncat,
|
||||
(CPU_FEATURE_USABLE (AVX2)
|
||||
&& CPU_FEATURE_USABLE (BMI2)),
|
||||
__wcsncat_avx2)
|
||||
X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncat,
|
||||
1,
|
||||
__wcsncat_generic))
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
|
||||
|
||||
static inline void *
|
||||
@ -42,6 +44,11 @@ IFUNC_SELECTOR (void)
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
|
||||
Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
|
||||
}
|
||||
|
||||
return OPTIMIZE (GENERIC);
|
||||
|
8
sysdeps/x86_64/multiarch/wcpcpy-avx2.S
Normal file
8
sysdeps/x86_64/multiarch/wcpcpy-avx2.S
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef WCPCPY
|
||||
# define WCPCPY __wcpcpy_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_STPCPY
|
||||
#define USE_AS_WCSCPY
|
||||
#define STRCPY WCPCPY
|
||||
#include "strcpy-avx2.S"
|
@ -19,7 +19,7 @@
|
||||
/* We always need to build this implementation as strspn-sse4 needs to
|
||||
be able to fallback to it. */
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCPCPY __wcpcpy_generic
|
||||
# include <wcsmbs/wcpcpy.c>
|
||||
|
8
sysdeps/x86_64/multiarch/wcpncpy-avx2.S
Normal file
8
sysdeps/x86_64/multiarch/wcpncpy-avx2.S
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef WCPNCPY
|
||||
# define WCPNCPY __wcpncpy_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_WCSCPY
|
||||
#define USE_AS_STPCPY
|
||||
#define STRNCPY WCPNCPY
|
||||
#include "strncpy-avx2.S"
|
@ -19,7 +19,7 @@
|
||||
/* We always need to build this implementation as strspn-sse4 needs to
|
||||
be able to fallback to it. */
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCPNCPY __wcpncpy_generic
|
||||
# include <wcsmbs/wcpncpy.c>
|
||||
|
10
sysdeps/x86_64/multiarch/wcscat-avx2.S
Normal file
10
sysdeps/x86_64/multiarch/wcscat-avx2.S
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef WCSCAT
|
||||
# define WCSCAT __wcscat_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_WCSCPY
|
||||
#define USE_AS_STRCAT
|
||||
|
||||
#define STRCPY WCSCAT
|
||||
|
||||
#include "strcpy-avx2.S"
|
@ -19,7 +19,7 @@
|
||||
/* We always need to build this implementation as strspn-sse4 needs to
|
||||
be able to fallback to it. */
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCSCAT __wcscat_generic
|
||||
# include <wcsmbs/wcscat.c>
|
||||
|
7
sysdeps/x86_64/multiarch/wcscpy-avx2.S
Normal file
7
sysdeps/x86_64/multiarch/wcscpy-avx2.S
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef WCSCPY
|
||||
# define WCSCPY __wcscpy_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_WCSCPY
|
||||
#define STRCPY WCSCPY
|
||||
#include "strcpy-avx2.S"
|
@ -18,7 +18,7 @@
|
||||
|
||||
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCSCPY __wcscpy_generic
|
||||
# include <wcsmbs/wcscpy.c>
|
||||
|
@ -28,6 +28,8 @@
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
|
||||
|
||||
extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
|
||||
@ -44,6 +46,9 @@ IFUNC_SELECTOR (void)
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||||
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||||
return OPTIMIZE (evex);
|
||||
|
||||
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER, !))
|
||||
return OPTIMIZE (avx2);
|
||||
}
|
||||
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
|
||||
|
9
sysdeps/x86_64/multiarch/wcsncat-avx2.S
Normal file
9
sysdeps/x86_64/multiarch/wcsncat-avx2.S
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef WCSNCAT
|
||||
# define WCSNCAT __wcsncat_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_WCSCPY
|
||||
#define USE_AS_STRCAT
|
||||
|
||||
#define STRNCAT WCSNCAT
|
||||
#include "strncat-avx2.S"
|
@ -19,7 +19,7 @@
|
||||
/* We always need to build this implementation as strspn-sse4 needs to
|
||||
be able to fallback to it. */
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCSNCAT __wcsncat_generic
|
||||
# include <wcsmbs/wcsncat.c>
|
||||
|
7
sysdeps/x86_64/multiarch/wcsncpy-avx2.S
Normal file
7
sysdeps/x86_64/multiarch/wcsncpy-avx2.S
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef WCSNCPY
|
||||
# define WCSNCPY __wcsncpy_avx2
|
||||
#endif
|
||||
|
||||
#define USE_AS_WCSCPY
|
||||
#define STRNCPY WCSNCPY
|
||||
#include "strncpy-avx2.S"
|
@ -19,7 +19,7 @@
|
||||
/* We always need to build this implementation as strspn-sse4 needs to
|
||||
be able to fallback to it. */
|
||||
#include <isa-level.h>
|
||||
#if ISA_SHOULD_BUILD (3)
|
||||
#if ISA_SHOULD_BUILD (2)
|
||||
|
||||
# define WCSNCPY __wcsncpy_generic
|
||||
# include <wcsmbs/wcsncpy.c>
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 3
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 2
|
||||
|
||||
# include <wcsmbs/wcpcpy.c>
|
||||
|
||||
|
@ -24,11 +24,12 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 4
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 3
|
||||
|
||||
# define WCPCPY __wcpcpy
|
||||
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcpcpy-evex.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcpcpy-avx2.S"
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 3
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 2
|
||||
|
||||
# include <wcsmbs/wcpncpy.c>
|
||||
|
||||
|
@ -24,11 +24,12 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 4
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 3
|
||||
|
||||
# define WCPNCPY __wcpncpy
|
||||
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcpncpy-evex.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcpncpy-avx2.S"
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 3
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 2
|
||||
|
||||
# include <wcsmbs/wcscat.c>
|
||||
|
||||
|
@ -24,11 +24,12 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 4
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 3
|
||||
|
||||
# define WCSCAT __wcscat
|
||||
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcscat-evex.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcscat-avx2.S"
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
@ -29,6 +29,7 @@
|
||||
# define WCSCPY __wcscpy
|
||||
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcscpy-evex.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcscpy-avx2.S"
|
||||
# define DEFAULT_IMPL_V2 "multiarch/wcscpy-ssse3.S"
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 3
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 2
|
||||
|
||||
# include <wcsmbs/wcsncat.c>
|
||||
|
||||
|
@ -24,11 +24,12 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 4
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 3
|
||||
|
||||
# define WCSNCAT wcsncat
|
||||
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcsncat-evex.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcsncat-avx2.S"
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 3
|
||||
#if MINIMUM_X86_ISA_LEVEL <= 2
|
||||
|
||||
# include <wcsmbs/wcsncpy.c>
|
||||
|
||||
|
@ -24,11 +24,12 @@
|
||||
|
||||
#include <isa-level.h>
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 4
|
||||
#if MINIMUM_X86_ISA_LEVEL >= 3
|
||||
|
||||
# define WCSNCPY __wcsncpy
|
||||
|
||||
# define DEFAULT_IMPL_V4 "multiarch/wcsncpy-evex.S"
|
||||
# define DEFAULT_IMPL_V3 "multiarch/wcsncpy-avx2.S"
|
||||
/* isa-default-impl.h expects DEFAULT_IMPL_V1 to be defined but it
|
||||
should never be used from here. */
|
||||
# define DEFAULT_IMPL_V1 "ERROR -- Invalid ISA IMPL"
|
||||
|
Loading…
x
Reference in New Issue
Block a user