mirror of
git://sourceware.org/git/glibc.git
synced 2024-12-21 04:31:04 +08:00
71ae86478e
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes. |
||
---|---|---|
.. | ||
bits | ||
fpu | ||
nofpu | ||
nptl | ||
power4 | ||
power5+/fpu | ||
power6 | ||
power7/fpu | ||
powerpc32 | ||
powerpc64 | ||
soft-fp | ||
sys/platform | ||
abort-instr.h | ||
configure | ||
configure.ac | ||
dl-procinfo.c | ||
dl-procinfo.h | ||
dl-tls.h | ||
ffs.c | ||
fpu_control.h | ||
gccframe.h | ||
ifunc-sel.h | ||
Implies | ||
jmpbuf-offsets.h | ||
jmpbuf-unwind.h | ||
ldsodefs.h | ||
locale-defines.sym | ||
longjmp.c | ||
machine-gmon.h | ||
Makefile | ||
math-tests.h | ||
memusage.h | ||
mp_clz_tab.c | ||
novmx-longjmp.c | ||
novmx-sigjmp.c | ||
novmxsetjmp.h | ||
preconfigure | ||
rtld-global-offsets.sym | ||
sched_cpucount.c | ||
sigjmp.c | ||
sotruss-lib.c | ||
stackinfo.h | ||
strcat.c | ||
sysdep.h | ||
test-arith.c | ||
test-arithf.c | ||
test-gettimebase.c | ||
tls-macros.h | ||
tst-stack-align.h | ||
Versions |