mirror of
git://sourceware.org/git/glibc.git
synced 2025-04-12 14:21:18 +08:00
X86-64: Prepare memmove-vec-unaligned-erms.S
Prepare memmove-vec-unaligned-erms.S to make the SSE2 version as the default memcpy, mempcpy and memmove. * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S (MEMCPY_SYMBOL): New. (MEMPCPY_SYMBOL): Likewise. (MEMMOVE_CHK_SYMBOL): Likewise. Replace MEMMOVE_SYMBOL with MEMMOVE_CHK_SYMBOL on __mempcpy_chk symbols. Replace MEMMOVE_SYMBOL with MEMPCPY_SYMBOL on __mempcpy symbols. Provide alias for __memcpy_chk in libc.a. Provide alias for memcpy in libc.a and ld.so.
This commit is contained in:
parent
4af1bb06c5
commit
a7d1c51482
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
||||
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
||||
(MEMCPY_SYMBOL): New.
|
||||
(MEMPCPY_SYMBOL): Likewise.
|
||||
(MEMMOVE_CHK_SYMBOL): Likewise.
|
||||
Replace MEMMOVE_SYMBOL with MEMMOVE_CHK_SYMBOL on __mempcpy_chk
|
||||
symbols. Replace MEMMOVE_SYMBOL with MEMPCPY_SYMBOL on
|
||||
__mempcpy symbols. Provide alias for __memcpy_chk in libc.a.
|
||||
Provide alias for memcpy in libc.a and ld.so.
|
||||
|
||||
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
|
@ -32,18 +32,27 @@
|
||||
8 * VEC_SIZE at a time.
|
||||
8. Otherwise, forward copy 8 * VEC_SIZE at a time. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
#include <sysdep.h>
|
||||
|
||||
# include <sysdep.h>
|
||||
# include "asm-syntax.h"
|
||||
#ifndef MEMCPY_SYMBOL
|
||||
# define MEMCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
|
||||
#endif
|
||||
|
||||
# ifndef VZEROUPPER
|
||||
# if VEC_SIZE > 16
|
||||
# define VZEROUPPER vzeroupper
|
||||
# else
|
||||
# define VZEROUPPER
|
||||
# endif
|
||||
#ifndef MEMPCPY_SYMBOL
|
||||
# define MEMPCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
|
||||
#endif
|
||||
|
||||
#ifndef MEMMOVE_CHK_SYMBOL
|
||||
# define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
|
||||
#endif
|
||||
|
||||
#ifndef VZEROUPPER
|
||||
# if VEC_SIZE > 16
|
||||
# define VZEROUPPER vzeroupper
|
||||
# else
|
||||
# define VZEROUPPER
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Threshold to use Enhanced REP MOVSB. Since there is overhead to set
|
||||
up REP MOVSB operation, REP MOVSB isn't faster on short data. The
|
||||
@ -52,32 +61,36 @@
|
||||
on processors with Enhanced REP MOVSB. Since larger register size
|
||||
can move more data with a single load and store, the threshold is
|
||||
higher with larger register size. */
|
||||
# ifndef REP_MOVSB_THRESHOLD
|
||||
# define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16))
|
||||
# endif
|
||||
#ifndef REP_MOVSB_THRESHOLD
|
||||
# define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16))
|
||||
#endif
|
||||
|
||||
#ifndef SECTION
|
||||
# error SECTION is not defined!
|
||||
#endif
|
||||
|
||||
# ifndef SECTION
|
||||
# error SECTION is not defined!
|
||||
# endif
|
||||
.section SECTION(.text),"ax",@progbits
|
||||
|
||||
# ifdef SHARED
|
||||
ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_2))
|
||||
#if defined SHARED && IS_IN (libc)
|
||||
ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_2))
|
||||
cmpq %rdx, %rcx
|
||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||
END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_2))
|
||||
END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_2))
|
||||
#endif
|
||||
|
||||
ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_2))
|
||||
#if VEC_SIZE == 16 || defined SHARED
|
||||
ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned_2))
|
||||
movq %rdi, %rax
|
||||
addq %rdx, %rax
|
||||
jmp L(start)
|
||||
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_2))
|
||||
END (MEMPCPY_SYMBOL (__mempcpy, unaligned_2))
|
||||
#endif
|
||||
|
||||
ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2))
|
||||
#if defined SHARED && IS_IN (libc)
|
||||
ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2))
|
||||
cmpq %rdx, %rcx
|
||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||
END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2))
|
||||
# endif
|
||||
END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2))
|
||||
#endif
|
||||
|
||||
ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_2))
|
||||
movq %rdi, %rax
|
||||
@ -86,24 +99,29 @@ L(start):
|
||||
jb L(less_vec)
|
||||
cmpq $(VEC_SIZE * 2), %rdx
|
||||
ja L(more_2x_vec)
|
||||
#if !defined USE_MULTIARCH || !IS_IN (libc)
|
||||
L(last_2x_vec):
|
||||
#endif
|
||||
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
|
||||
VMOVU (%rsi), %VEC(0)
|
||||
VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
|
||||
VMOVU %VEC(0), (%rdi)
|
||||
VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
|
||||
VZEROUPPER
|
||||
#if !defined USE_MULTIARCH || !IS_IN (libc)
|
||||
L(nop):
|
||||
#endif
|
||||
ret
|
||||
#if defined USE_MULTIARCH && IS_IN (libc)
|
||||
END (MEMMOVE_SYMBOL (__memmove, unaligned_2))
|
||||
|
||||
# if VEC_SIZE == 16
|
||||
# if VEC_SIZE == 16 && defined SHARED
|
||||
/* Only used to measure performance of REP MOVSB. */
|
||||
# ifdef SHARED
|
||||
ENTRY (__mempcpy_erms)
|
||||
movq %rdi, %rax
|
||||
addq %rdx, %rax
|
||||
jmp L(start_movsb)
|
||||
END (__mempcpy_erms)
|
||||
# endif
|
||||
|
||||
ENTRY (__memmove_erms)
|
||||
movq %rdi, %rax
|
||||
@ -132,11 +150,10 @@ strong_alias (__memmove_erms, __memcpy_erms)
|
||||
# endif
|
||||
|
||||
# ifdef SHARED
|
||||
ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
|
||||
ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
|
||||
cmpq %rdx, %rcx
|
||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||
END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
|
||||
# endif
|
||||
END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
|
||||
|
||||
ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
|
||||
movq %rdi, %rax
|
||||
@ -144,11 +161,10 @@ ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
|
||||
jmp L(start_erms)
|
||||
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
|
||||
|
||||
# ifdef SHARED
|
||||
ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
|
||||
ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
|
||||
cmpq %rdx, %rcx
|
||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||
END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
|
||||
END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
|
||||
# endif
|
||||
|
||||
ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
|
||||
@ -192,6 +208,7 @@ L(movsb_more_2x_vec):
|
||||
/* Force 32-bit displacement to avoid long nop between
|
||||
instructions. */
|
||||
ja.d32 L(movsb)
|
||||
#endif
|
||||
.p2align 4
|
||||
L(more_2x_vec):
|
||||
/* More than 2 * VEC. */
|
||||
@ -227,13 +244,19 @@ L(copy_forward):
|
||||
VMOVU %VEC(2), -(VEC_SIZE * 3)(%rdi,%rdx)
|
||||
VMOVU %VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx)
|
||||
cmpq $(VEC_SIZE * 8), %rdx
|
||||
# if VEC_SIZE == 16
|
||||
#if VEC_SIZE == 16
|
||||
# if defined USE_MULTIARCH && IS_IN (libc)
|
||||
jbe L(return)
|
||||
# else
|
||||
/* Use 32-bit displacement to avoid long nop between
|
||||
instructions. */
|
||||
jbe.d32 L(return)
|
||||
# endif
|
||||
#else
|
||||
/* Use 8-bit displacement to avoid long nop between
|
||||
instructions. */
|
||||
jbe L(return_disp8)
|
||||
# endif
|
||||
#endif
|
||||
leaq (VEC_SIZE * 4)(%rdi), %rcx
|
||||
addq %rdi, %rdx
|
||||
andq $-(VEC_SIZE * 4), %rdx
|
||||
@ -263,22 +286,25 @@ L(loop):
|
||||
addq $(VEC_SIZE * 4), %rcx
|
||||
cmpq %rcx, %rdx
|
||||
jne L(loop)
|
||||
#if !defined USE_MULTIARCH || !IS_IN (libc)
|
||||
L(return):
|
||||
#endif
|
||||
L(return_disp8):
|
||||
VZEROUPPER
|
||||
ret
|
||||
L(less_vec):
|
||||
/* Less than 1 VEC. */
|
||||
# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
|
||||
# error Unsupported VEC_SIZE!
|
||||
# endif
|
||||
# if VEC_SIZE > 32
|
||||
#if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
|
||||
# error Unsupported VEC_SIZE!
|
||||
#endif
|
||||
#if VEC_SIZE > 32
|
||||
cmpb $32, %dl
|
||||
jae L(between_32_63)
|
||||
# endif
|
||||
# if VEC_SIZE > 16
|
||||
#endif
|
||||
#if VEC_SIZE > 16
|
||||
cmpb $16, %dl
|
||||
jae L(between_16_31)
|
||||
# endif
|
||||
#endif
|
||||
cmpb $8, %dl
|
||||
jae L(between_8_15)
|
||||
cmpb $4, %dl
|
||||
@ -290,7 +316,7 @@ L(less_vec):
|
||||
movb %cl, (%rdi)
|
||||
1:
|
||||
ret
|
||||
# if VEC_SIZE > 32
|
||||
#if VEC_SIZE > 32
|
||||
L(between_32_63):
|
||||
/* From 32 to 63. No branch when size == 32. */
|
||||
vmovdqu (%rsi), %ymm0
|
||||
@ -299,8 +325,8 @@ L(between_32_63):
|
||||
vmovdqu %ymm1, -32(%rdi,%rdx)
|
||||
VZEROUPPER
|
||||
ret
|
||||
# endif
|
||||
# if VEC_SIZE > 16
|
||||
#endif
|
||||
#if VEC_SIZE > 16
|
||||
/* From 16 to 31. No branch when size == 16. */
|
||||
L(between_16_31):
|
||||
vmovdqu (%rsi), %xmm0
|
||||
@ -308,7 +334,7 @@ L(between_16_31):
|
||||
vmovdqu %xmm0, (%rdi)
|
||||
vmovdqu %xmm1, -16(%rdi,%rdx)
|
||||
ret
|
||||
# endif
|
||||
#endif
|
||||
L(between_8_15):
|
||||
/* From 8 to 15. No branch when size == 8. */
|
||||
movq -8(%rsi,%rdx), %rcx
|
||||
@ -331,10 +357,10 @@ L(between_2_3):
|
||||
movw %si, (%rdi)
|
||||
ret
|
||||
|
||||
# if VEC_SIZE > 16
|
||||
#if VEC_SIZE > 16
|
||||
/* Align to 16 bytes to avoid long nop between instructions. */
|
||||
.p2align 4
|
||||
# endif
|
||||
#endif
|
||||
L(more_2x_vec_overlap):
|
||||
/* More than 2 * VEC and there is overlap bewteen destination
|
||||
and source. */
|
||||
@ -454,15 +480,19 @@ L(loop_8x_vec_backward):
|
||||
jmp L(between_4x_vec_and_8x_vec)
|
||||
END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
|
||||
|
||||
# ifdef SHARED
|
||||
#ifdef SHARED
|
||||
# if IS_IN (libc)
|
||||
# ifdef USE_MULTIARCH
|
||||
strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
|
||||
MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
|
||||
strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
|
||||
MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
|
||||
strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_2),
|
||||
MEMMOVE_SYMBOL (__memcpy, unaligned_2))
|
||||
strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_2),
|
||||
MEMMOVE_SYMBOL (__memcpy_chk, unaligned_2))
|
||||
# endif
|
||||
strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_2),
|
||||
MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned_2))
|
||||
# endif
|
||||
|
||||
#endif
|
||||
#if VEC_SIZE == 16 || defined SHARED
|
||||
strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_2),
|
||||
MEMCPY_SYMBOL (__memcpy, unaligned_2))
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user