mirror of
git://sourceware.org/git/glibc.git
synced 2024-12-09 04:11:27 +08:00
a7392db2ff
No bug.
The optimizations are as follows:
1) Always align entry to 64 bytes. This makes behavior more
predictable and makes other frontend optimizations easier.
2) Make the L(more_8x_vec) cases 4k aliasing aware. This can have
significant benefits in the case that:
0 < (dst - src) < [256, 512]
3) Align before `rep movsb`. For ERMS this is roughly a [0, 30%]
improvement and for FSRM [-10%, 25%].
In addition to these primary changes there is general cleanup
throughout to optimize the aligning routines and control flow logic.
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit a6b7502ec0
)
34 lines
732 B
ArmAsm
34 lines
732 B
ArmAsm
#if IS_IN (libc)
|
|
# define VEC_SIZE 64
|
|
# define XMM0 xmm16
|
|
# define XMM1 xmm17
|
|
# define YMM0 ymm16
|
|
# define YMM1 ymm17
|
|
# define VEC0 zmm16
|
|
# define VEC1 zmm17
|
|
# define VEC2 zmm18
|
|
# define VEC3 zmm19
|
|
# define VEC4 zmm20
|
|
# define VEC5 zmm21
|
|
# define VEC6 zmm22
|
|
# define VEC7 zmm23
|
|
# define VEC8 zmm24
|
|
# define VEC9 zmm25
|
|
# define VEC10 zmm26
|
|
# define VEC11 zmm27
|
|
# define VEC12 zmm28
|
|
# define VEC13 zmm29
|
|
# define VEC14 zmm30
|
|
# define VEC15 zmm31
|
|
# define VEC(i) VEC##i
|
|
# define VMOVNT vmovntdq
|
|
# define VMOVU vmovdqu64
|
|
# define VMOVA vmovdqa64
|
|
# define VZEROUPPER
|
|
# define MOV_SIZE 6
|
|
# define SECTION(p) p##.evex512
|
|
# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
|
|
|
|
# include "memmove-vec-unaligned-erms.S"
|
|
#endif
|