mirror of
git://sourceware.org/git/glibc.git
synced 2024-12-21 04:31:04 +08:00
71ae86478e
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes. |
||
---|---|---|
.. | ||
970 | ||
a2 | ||
bits | ||
cell | ||
fpu | ||
multiarch | ||
power4 | ||
power5 | ||
power5+ | ||
power6 | ||
power6x | ||
power7 | ||
power8 | ||
__longjmp-common.S | ||
__longjmp.S | ||
addmul_1.S | ||
backtrace.c | ||
bsd-_setjmp.S | ||
bsd-setjmp.S | ||
bzero.S | ||
configure | ||
configure.ac | ||
crti.S | ||
crtn.S | ||
dl-dtprocnum.h | ||
dl-irel.h | ||
dl-machine.c | ||
dl-machine.h | ||
dl-trampoline.S | ||
entry.h | ||
ffsll.c | ||
hp-timing.h | ||
Implies | ||
lshift.S | ||
Makefile | ||
memcpy.S | ||
memset.S | ||
mul_1.S | ||
ppc-mcount.S | ||
register-dump.h | ||
rtld-memset.c | ||
setjmp-common.S | ||
setjmp.S | ||
stackguard-macros.h | ||
start.S | ||
stpcpy.S | ||
strchr.S | ||
strcmp.S | ||
strcpy.S | ||
strlen.S | ||
strncmp.S | ||
submul_1.S | ||
sysdep.h | ||
tls-macros.h | ||
tst-audit.h |