mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-07 13:28:05 +08:00
AArch64: Add memset_zva64
Add a specialized memset for the common ZVA size of 64 to avoid the
overhead of reading the ZVA size. Since the code is identical to
__memset_falkor, remove the latter.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 3d7090f14b
)
This commit is contained in:
parent
f9ae26cbbe
commit
1da0176152
@ -101,19 +101,19 @@ L(tail64):
|
|||||||
ret
|
ret
|
||||||
|
|
||||||
L(try_zva):
|
L(try_zva):
|
||||||
#ifdef ZVA_MACRO
|
#ifndef ZVA64_ONLY
|
||||||
zva_macro
|
|
||||||
#else
|
|
||||||
.p2align 3
|
.p2align 3
|
||||||
mrs tmp1, dczid_el0
|
mrs tmp1, dczid_el0
|
||||||
tbnz tmp1w, 4, L(no_zva)
|
tbnz tmp1w, 4, L(no_zva)
|
||||||
and tmp1w, tmp1w, 15
|
and tmp1w, tmp1w, 15
|
||||||
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
||||||
b.ne L(zva_128)
|
b.ne L(zva_128)
|
||||||
|
nop
|
||||||
|
#endif
|
||||||
/* Write the first and last 64 byte aligned block using stp rather
|
/* Write the first and last 64 byte aligned block using stp rather
|
||||||
than using DC ZVA. This is faster on some cores.
|
than using DC ZVA. This is faster on some cores.
|
||||||
*/
|
*/
|
||||||
|
.p2align 4
|
||||||
L(zva_64):
|
L(zva_64):
|
||||||
str q0, [dst, 16]
|
str q0, [dst, 16]
|
||||||
stp q0, q0, [dst, 32]
|
stp q0, q0, [dst, 32]
|
||||||
@ -123,7 +123,6 @@ L(zva_64):
|
|||||||
sub count, dstend, dst /* Count is now 128 too large. */
|
sub count, dstend, dst /* Count is now 128 too large. */
|
||||||
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||||
add dst, dst, 128
|
add dst, dst, 128
|
||||||
nop
|
|
||||||
1: dc zva, dst
|
1: dc zva, dst
|
||||||
add dst, dst, 64
|
add dst, dst, 64
|
||||||
subs count, count, 64
|
subs count, count, 64
|
||||||
@ -134,6 +133,7 @@ L(zva_64):
|
|||||||
stp q0, q0, [dstend, -32]
|
stp q0, q0, [dstend, -32]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
#ifndef ZVA64_ONLY
|
||||||
.p2align 3
|
.p2align 3
|
||||||
L(zva_128):
|
L(zva_128):
|
||||||
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
||||||
|
@ -12,10 +12,10 @@ sysdep_routines += \
|
|||||||
memmove_mops \
|
memmove_mops \
|
||||||
memset_a64fx \
|
memset_a64fx \
|
||||||
memset_emag \
|
memset_emag \
|
||||||
memset_falkor \
|
|
||||||
memset_generic \
|
memset_generic \
|
||||||
memset_kunpeng \
|
memset_kunpeng \
|
||||||
memset_mops \
|
memset_mops \
|
||||||
|
memset_zva64 \
|
||||||
strlen_asimd \
|
strlen_asimd \
|
||||||
strlen_generic \
|
strlen_generic \
|
||||||
# sysdep_routines
|
# sysdep_routines
|
||||||
|
@ -54,9 +54,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
|
IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
|
||||||
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
|
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
|
||||||
IFUNC_IMPL (i, name, memset,
|
IFUNC_IMPL (i, name, memset,
|
||||||
/* Enable this on non-falkor processors too so that other cores
|
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64)
|
||||||
can do a comparative analysis with __memset_generic. */
|
|
||||||
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
|
|
||||||
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
|
||||||
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
|
||||||
#if HAVE_AARCH64_SVE_ASM
|
#if HAVE_AARCH64_SVE_ASM
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
extern __typeof (__redirect_memset) __libc_memset;
|
extern __typeof (__redirect_memset) __libc_memset;
|
||||||
|
|
||||||
extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
|
extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden;
|
||||||
extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
|
extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
|
||||||
extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
|
extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
|
||||||
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
|
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
|
||||||
@ -47,18 +47,17 @@ select_memset_ifunc (void)
|
|||||||
{
|
{
|
||||||
if (IS_A64FX (midr) && zva_size == 256)
|
if (IS_A64FX (midr) && zva_size == 256)
|
||||||
return __memset_a64fx;
|
return __memset_a64fx;
|
||||||
return __memset_generic;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_KUNPENG920 (midr))
|
if (IS_KUNPENG920 (midr))
|
||||||
return __memset_kunpeng;
|
return __memset_kunpeng;
|
||||||
|
|
||||||
if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64)
|
|
||||||
return __memset_falkor;
|
|
||||||
|
|
||||||
if (IS_EMAG (midr))
|
if (IS_EMAG (midr))
|
||||||
return __memset_emag;
|
return __memset_emag;
|
||||||
|
|
||||||
|
if (zva_size == 64)
|
||||||
|
return __memset_zva64;
|
||||||
|
|
||||||
return __memset_generic;
|
return __memset_generic;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,54 +0,0 @@
|
|||||||
/* Memset for falkor.
|
|
||||||
Copyright (C) 2017-2022 Free Software Foundation, Inc.
|
|
||||||
|
|
||||||
This file is part of the GNU C Library.
|
|
||||||
|
|
||||||
The GNU C Library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
The GNU C Library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with the GNU C Library. If not, see
|
|
||||||
<https://www.gnu.org/licenses/>. */
|
|
||||||
|
|
||||||
#include <sysdep.h>
|
|
||||||
#include <memset-reg.h>
|
|
||||||
|
|
||||||
/* Reading dczid_el0 is expensive on falkor so move it into the ifunc
|
|
||||||
resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to
|
|
||||||
use this function only when ZVA is enabled. */
|
|
||||||
|
|
||||||
#if IS_IN (libc)
|
|
||||||
.macro zva_macro
|
|
||||||
.p2align 4
|
|
||||||
/* Write the first and last 64 byte aligned block using stp rather
|
|
||||||
than using DC ZVA. This is faster on some cores. */
|
|
||||||
str q0, [dst, 16]
|
|
||||||
stp q0, q0, [dst, 32]
|
|
||||||
bic dst, dst, 63
|
|
||||||
stp q0, q0, [dst, 64]
|
|
||||||
stp q0, q0, [dst, 96]
|
|
||||||
sub count, dstend, dst /* Count is now 128 too large. */
|
|
||||||
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
|
||||||
add dst, dst, 128
|
|
||||||
1: dc zva, dst
|
|
||||||
add dst, dst, 64
|
|
||||||
subs count, count, 64
|
|
||||||
b.hi 1b
|
|
||||||
stp q0, q0, [dst, 0]
|
|
||||||
stp q0, q0, [dst, 32]
|
|
||||||
stp q0, q0, [dstend, -64]
|
|
||||||
stp q0, q0, [dstend, -32]
|
|
||||||
ret
|
|
||||||
.endm
|
|
||||||
|
|
||||||
# define ZVA_MACRO zva_macro
|
|
||||||
# define MEMSET __memset_falkor
|
|
||||||
# include <sysdeps/aarch64/memset.S>
|
|
||||||
#endif
|
|
27
sysdeps/aarch64/multiarch/memset_zva64.S
Normal file
27
sysdeps/aarch64/multiarch/memset_zva64.S
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/* Optimized memset for zva size = 64.
|
||||||
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library. If not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
|
||||||
|
#define ZVA64_ONLY 1
|
||||||
|
#define MEMSET __memset_zva64
|
||||||
|
#undef libc_hidden_builtin_def
|
||||||
|
#define libc_hidden_builtin_def(X)
|
||||||
|
|
||||||
|
#include "../memset.S"
|
Loading…
Reference in New Issue
Block a user