mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-06 12:00:24 +08:00
104c7b1967
No bug. This commit adds a new implementation for EVEX memchr that is not safe for RTM because it uses vzeroupper. The benefit is that by using ymm0-ymm15 it can use vpcmpeq and vpternlogd in the 4x loop which is faster than the RTM safe version which cannot use vpcmpeq because there is no EVEX encoding for the instruction. All parts of the implementation aside from the 4x loop are the same for the two versions and the optimization is only relevant for large sizes. Tigerlake: size , algn , Pos , Cur T , New T , Win , Dif 512 , 6 , 192 , 9.2 , 9.04 , no-RTM , 0.16 512 , 7 , 224 , 9.19 , 8.98 , no-RTM , 0.21 2048 , 0 , 256 , 10.74 , 10.54 , no-RTM , 0.2 2048 , 0 , 512 , 14.81 , 14.87 , RTM , 0.06 2048 , 0 , 1024 , 22.97 , 22.57 , no-RTM , 0.4 2048 , 0 , 2048 , 37.49 , 34.51 , no-RTM , 2.98 <-- Icelake: size , algn , Pos , Cur T , New T , Win , Dif 512 , 6 , 192 , 7.6 , 7.3 , no-RTM , 0.3 512 , 7 , 224 , 7.63 , 7.27 , no-RTM , 0.36 2048 , 0 , 256 , 8.48 , 8.38 , no-RTM , 0.1 2048 , 0 , 512 , 11.57 , 11.42 , no-RTM , 0.15 2048 , 0 , 1024 , 17.92 , 17.38 , no-RTM , 0.54 2048 , 0 , 2048 , 30.37 , 27.34 , no-RTM , 3.03 <-- test-memchr, test-wmemchr, and test-rawmemchr are all passing. Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> Reviewed-by: H.J. Lu <hjl.tools@gmail.com> |
||
---|---|---|
.. | ||
64 | ||
fpu | ||
multiarch | ||
nptl | ||
x32 | ||
____longjmp_chk.S | ||
__longjmp.S | ||
_mcount.S | ||
abort-instr.h | ||
add_n.S | ||
addmul_1.S | ||
bsd-_setjmp.S | ||
bsd-setjmp.S | ||
bzero.S | ||
configure | ||
configure.ac | ||
crti.S | ||
crtn.S | ||
dl-hwcaps-subdirs.c | ||
dl-irel.h | ||
dl-machine.h | ||
dl-procinfo.c | ||
dl-runtime.h | ||
dl-tls.c | ||
dl-tls.h | ||
dl-tlsdesc.h | ||
dl-tlsdesc.S | ||
dl-trampoline.h | ||
dl-trampoline.S | ||
ffs.c | ||
ffsll.c | ||
htonl.S | ||
ifuncmain8.c | ||
ifuncmod8.c | ||
Implies | ||
isa.h | ||
jmpbuf-offsets.h | ||
jmpbuf-unwind.h | ||
l10nflist.c | ||
link-defines.sym | ||
locale-defines.sym | ||
localplt.data | ||
lshift.S | ||
machine-gmon.h | ||
Makefile | ||
memchr.S | ||
memcmp.S | ||
memcpy_chk.S | ||
memcpy.S | ||
memmove_chk.S | ||
memmove.S | ||
mempcpy_chk.S | ||
mempcpy.S | ||
memrchr.S | ||
memset_chk.S | ||
memset.S | ||
memusage.h | ||
mp_clz_tab.c | ||
mul_1.S | ||
preconfigure | ||
preconfigure.ac | ||
rawmemchr.S | ||
rshift.S | ||
rtld-offsets.sym | ||
setjmp.S | ||
stackguard-macros.h | ||
stackinfo.h | ||
start.S | ||
stpcpy.S | ||
strcasecmp_l-nonascii.c | ||
strcasecmp_l.S | ||
strcasecmp.S | ||
strcat.S | ||
strchr.S | ||
strchrnul.S | ||
strcmp.S | ||
strcpy.S | ||
strcspn.S | ||
strlen.S | ||
strncase_l-nonascii.c | ||
strncase_l.S | ||
strncase.S | ||
strncmp.S | ||
strnlen.S | ||
strpbrk.S | ||
strrchr.S | ||
strspn.S | ||
sub_n.S | ||
submul_1.S | ||
sysdep.h | ||
tls_get_addr.S | ||
tls-macros.h | ||
tlsdesc.c | ||
tlsdesc.sym | ||
tst-audit3.c | ||
tst-audit4-aux.c | ||
tst-audit4.c | ||
tst-audit5.c | ||
tst-audit6.c | ||
tst-audit7.c | ||
tst-audit10-aux.c | ||
tst-audit10.c | ||
tst-audit.h | ||
tst-auditmod3a.c | ||
tst-auditmod3b.c | ||
tst-auditmod4a.c | ||
tst-auditmod4b.c | ||
tst-auditmod5a.c | ||
tst-auditmod5b.c | ||
tst-auditmod6a.c | ||
tst-auditmod6b.c | ||
tst-auditmod6c.c | ||
tst-auditmod7a.c | ||
tst-auditmod7b.c | ||
tst-auditmod10a.c | ||
tst-auditmod10b.c | ||
tst-avx512-aux.c | ||
tst-avx512.c | ||
tst-avx512mod.c | ||
tst-avx-aux.c | ||
tst-avx.c | ||
tst-avxmod.c | ||
tst-glibc-hwcaps.c | ||
tst-mallocalign1.c | ||
tst-platform-1.c | ||
tst-platformmod-1.c | ||
tst-platformmod-2.c | ||
tst-quad1.c | ||
tst-quad1pie.c | ||
tst-quad2.c | ||
tst-quad2pie.c | ||
tst-quadmod1.S | ||
tst-quadmod1pie.S | ||
tst-quadmod2.S | ||
tst-quadmod2pie.S | ||
tst-split-dynreloc.c | ||
tst-split-dynreloc.lds | ||
tst-sse.c | ||
tst-ssemod.c | ||
tst-stack-align.h | ||
tst-x86_64-1.c | ||
tst-x86_64mod-1.c | ||
tst-x86-64-tls-1.c | ||
Versions | ||
wcschr.S | ||
wcscmp.S | ||
wcslen.S | ||
wcsrchr.S | ||
wmemset_chk.S | ||
wmemset.S | ||
wordcopy.c |