mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-06 12:00:24 +08:00
88b57b8ed4
Implement x86-64 memmove with unaligned load/store and rep movsb. Support 16-byte, 32-byte and 64-byte vector register sizes. When size <= 8 times of vector register size, there is no check for address overlap bewteen source and destination. Since overhead for overlap check is small when size > 8 times of vector register size, memcpy is an alias of memmove. A single file provides 2 implementations of memmove, one with rep movsb and the other without rep movsb. They share the same codes when size is between 2 times of vector register size and REP_MOVSB_THRESHOLD which is 2KB for 16-byte vector register size and scaled up by large vector register size. Key features: 1. Use overlapping load and store to avoid branch. 2. For size <= 8 times of vector register size, load all sources into registers and store them together. 3. If there is no address overlap bewteen source and destination, copy from both ends with 4 times of vector register size at a time. 4. If address of destination > address of source, backward copy 8 times of vector register size at a time. 5. Otherwise, forward copy 8 times of vector register size at a time. 6. Use rep movsb only for forward copy. Avoid slow backward rep movsb by fallbacking to backward copy 8 times of vector register size at a time. 7. Skip when address of destination == address of source. [BZ #19776] * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add memmove-sse2-unaligned-erms, memmove-avx-unaligned-erms and memmove-avx512-unaligned-erms. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Test __memmove_chk_avx512_unaligned_2, __memmove_chk_avx512_unaligned_erms, __memmove_chk_avx_unaligned_2, __memmove_chk_avx_unaligned_erms, __memmove_chk_sse2_unaligned_2, __memmove_chk_sse2_unaligned_erms, __memmove_avx_unaligned_2, __memmove_avx_unaligned_erms, __memmove_avx512_unaligned_2, __memmove_avx512_unaligned_erms, __memmove_erms, __memmove_sse2_unaligned_2, __memmove_sse2_unaligned_erms, __memcpy_chk_avx512_unaligned_2, __memcpy_chk_avx512_unaligned_erms, __memcpy_chk_avx_unaligned_2, __memcpy_chk_avx_unaligned_erms, __memcpy_chk_sse2_unaligned_2, __memcpy_chk_sse2_unaligned_erms, __memcpy_avx_unaligned_2, __memcpy_avx_unaligned_erms, __memcpy_avx512_unaligned_2, __memcpy_avx512_unaligned_erms, __memcpy_sse2_unaligned_2, __memcpy_sse2_unaligned_erms, __memcpy_erms, __mempcpy_chk_avx512_unaligned_2, __mempcpy_chk_avx512_unaligned_erms, __mempcpy_chk_avx_unaligned_2, __mempcpy_chk_avx_unaligned_erms, __mempcpy_chk_sse2_unaligned_2, __mempcpy_chk_sse2_unaligned_erms, __mempcpy_avx512_unaligned_2, __mempcpy_avx512_unaligned_erms, __mempcpy_avx_unaligned_2, __mempcpy_avx_unaligned_erms, __mempcpy_sse2_unaligned_2, __mempcpy_sse2_unaligned_erms and __mempcpy_erms. * sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S: New file. * sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S: Likwise. * sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S: Likwise. * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Likwise. |
||
---|---|---|
.. | ||
bcopy.S | ||
ifunc-defines.sym | ||
ifunc-impl-list.c | ||
Makefile | ||
memcmp-sse4.S | ||
memcmp-ssse3.S | ||
memcmp.S | ||
memcpy_chk.S | ||
memcpy-avx-unaligned.S | ||
memcpy-sse2-unaligned.S | ||
memcpy-ssse3-back.S | ||
memcpy-ssse3.S | ||
memcpy.S | ||
memmove_chk.c | ||
memmove-avx512-no-vzeroupper.S | ||
memmove-avx512-unaligned-erms.S | ||
memmove-avx-unaligned-erms.S | ||
memmove-avx-unaligned.S | ||
memmove-sse2-unaligned-erms.S | ||
memmove-ssse3-back.S | ||
memmove-ssse3.S | ||
memmove-vec-unaligned-erms.S | ||
memmove.c | ||
mempcpy_chk.S | ||
mempcpy.S | ||
memset_chk.S | ||
memset-avx2.S | ||
memset-avx512-no-vzeroupper.S | ||
memset.S | ||
sched_cpucount.c | ||
stpcpy-sse2-unaligned.S | ||
stpcpy-ssse3.S | ||
stpcpy.S | ||
stpncpy-c.c | ||
stpncpy-sse2-unaligned.S | ||
stpncpy-ssse3.S | ||
stpncpy.S | ||
strcasecmp_l-ssse3.S | ||
strcasecmp_l.S | ||
strcat-sse2-unaligned.S | ||
strcat-ssse3.S | ||
strcat.S | ||
strchr-sse2-no-bsf.S | ||
strchr.S | ||
strcmp-sse2-unaligned.S | ||
strcmp-sse42.S | ||
strcmp-ssse3.S | ||
strcmp.S | ||
strcpy-sse2-unaligned.S | ||
strcpy-ssse3.S | ||
strcpy.S | ||
strcspn-c.c | ||
strcspn.S | ||
strncase_l-ssse3.S | ||
strncase_l.S | ||
strncat-c.c | ||
strncat-sse2-unaligned.S | ||
strncat-ssse3.S | ||
strncat.S | ||
strncmp-ssse3.S | ||
strncmp.S | ||
strncpy-c.c | ||
strncpy-sse2-unaligned.S | ||
strncpy-ssse3.S | ||
strncpy.S | ||
strpbrk-c.c | ||
strpbrk.S | ||
strspn-c.c | ||
strspn.S | ||
strstr-sse2-unaligned.S | ||
strstr.c | ||
test-multiarch.c | ||
varshift.c | ||
varshift.h | ||
wcscpy-c.c | ||
wcscpy-ssse3.S | ||
wcscpy.S | ||
wmemcmp-c.c | ||
wmemcmp-sse4.S | ||
wmemcmp-ssse3.S | ||
wmemcmp.S |