mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-30 12:31:53 +08:00
AArch64: Improve SVE memcpy and memmove
Improve SVE memcpy by copying 2 vectors if the size is small enough. This improves performance of random memcpy by ~9% on Neoverse V1, and 33-64 byte copies are ~16% faster. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
parent
c980549cc6
commit
d2d3f3720c
@ -67,14 +67,15 @@ ENTRY (__memcpy_sve)
|
||||
|
||||
cmp count, 128
|
||||
b.hi L(copy_long)
|
||||
cmp count, 32
|
||||
b.hi L(copy32_128)
|
||||
|
||||
whilelo p0.b, xzr, count
|
||||
cntb vlen
|
||||
tbnz vlen, 4, L(vlen128)
|
||||
ld1b z0.b, p0/z, [src]
|
||||
st1b z0.b, p0, [dstin]
|
||||
cmp count, vlen, lsl 1
|
||||
b.hi L(copy32_128)
|
||||
whilelo p0.b, xzr, count
|
||||
whilelo p1.b, vlen, count
|
||||
ld1b z0.b, p0/z, [src, 0, mul vl]
|
||||
ld1b z1.b, p1/z, [src, 1, mul vl]
|
||||
st1b z0.b, p0, [dstin, 0, mul vl]
|
||||
st1b z1.b, p1, [dstin, 1, mul vl]
|
||||
ret
|
||||
|
||||
/* Medium copies: 33..128 bytes. */
|
||||
@ -102,14 +103,6 @@ L(copy96):
|
||||
stp C_q, D_q, [dstend, -32]
|
||||
ret
|
||||
|
||||
L(vlen128):
|
||||
whilelo p1.b, vlen, count
|
||||
ld1b z0.b, p0/z, [src, 0, mul vl]
|
||||
ld1b z1.b, p1/z, [src, 1, mul vl]
|
||||
st1b z0.b, p0, [dstin, 0, mul vl]
|
||||
st1b z1.b, p1, [dstin, 1, mul vl]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy more than 128 bytes. */
|
||||
L(copy_long):
|
||||
@ -158,14 +151,15 @@ ENTRY (__memmove_sve)
|
||||
|
||||
cmp count, 128
|
||||
b.hi L(move_long)
|
||||
cmp count, 32
|
||||
b.hi L(copy32_128)
|
||||
|
||||
whilelo p0.b, xzr, count
|
||||
cntb vlen
|
||||
tbnz vlen, 4, L(vlen128)
|
||||
ld1b z0.b, p0/z, [src]
|
||||
st1b z0.b, p0, [dstin]
|
||||
cmp count, vlen, lsl 1
|
||||
b.hi L(copy32_128)
|
||||
whilelo p0.b, xzr, count
|
||||
whilelo p1.b, vlen, count
|
||||
ld1b z0.b, p0/z, [src, 0, mul vl]
|
||||
ld1b z1.b, p1/z, [src, 1, mul vl]
|
||||
st1b z0.b, p0, [dstin, 0, mul vl]
|
||||
st1b z1.b, p1, [dstin, 1, mul vl]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
|
Loading…
Reference in New Issue
Block a user