mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-30 12:31:53 +08:00
aarch64: Optimize __libc_mtag_tag_zero_region
This is a target hook for memory tagging, the original was a naive implementation. Uses the same algorithm as __libc_mtag_tag_region, but with instructions that also zero the memory. This was not benchmarked on real cpu, but expected to be faster than the naive implementation.
This commit is contained in:
parent
23fd760add
commit
1dc17ea8f8
@ -20,30 +20,94 @@
|
||||
|
||||
#ifdef USE_MTAG
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, MTE, LP64 ABI.
|
||||
*
|
||||
* Interface contract:
|
||||
* Address is 16 byte aligned and size is multiple of 16.
|
||||
* Returns the passed pointer.
|
||||
* The memory region may remain untagged if tagging is not enabled.
|
||||
*/
|
||||
.arch armv8.5-a
|
||||
.arch_extension memtag
|
||||
|
||||
/* NB, only supported on variants with 64-bit pointers. */
|
||||
#define dstin x0
|
||||
#define count x1
|
||||
#define dst x2
|
||||
#define dstend x3
|
||||
#define tmp x4
|
||||
#define zva_val x4
|
||||
|
||||
/* FIXME: This is a minimal implementation. We could do much better than
|
||||
this for large values of COUNT. */
|
||||
ENTRY (__libc_mtag_tag_zero_region)
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (1)
|
||||
|
||||
#define dstin x0
|
||||
#define count x1
|
||||
#define dst x2
|
||||
add dstend, dstin, count
|
||||
|
||||
ENTRY(__libc_mtag_tag_zero_region)
|
||||
cmp count, 96
|
||||
b.hi L(set_long)
|
||||
|
||||
mov dst, dstin
|
||||
L(loop):
|
||||
stzg dst, [dst], #16
|
||||
subs count, count, 16
|
||||
bne L(loop)
|
||||
#if 0
|
||||
/* This is not currently needed, since for now we are only called
|
||||
to tag memory that is taggable. */
|
||||
ldg dstin, [dstin] // Recover the tag created (might be untagged).
|
||||
#endif
|
||||
tbnz count, 6, L(set96)
|
||||
|
||||
/* Set 0, 16, 32, or 48 bytes. */
|
||||
lsr tmp, count, 5
|
||||
add tmp, dstin, tmp, lsl 4
|
||||
cbz count, L(end)
|
||||
stzg dstin, [dstin]
|
||||
stzg dstin, [tmp]
|
||||
stzg dstin, [dstend, -16]
|
||||
L(end):
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(set96):
|
||||
stz2g dstin, [dstin]
|
||||
stz2g dstin, [dstin, 32]
|
||||
stz2g dstin, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Size is > 96 bytes. */
|
||||
L(set_long):
|
||||
cmp count, 160
|
||||
b.lo L(no_zva)
|
||||
|
||||
#ifndef SKIP_ZVA_CHECK
|
||||
mrs zva_val, dczid_el0
|
||||
and zva_val, zva_val, 31
|
||||
cmp zva_val, 4 /* ZVA size is 64 bytes. */
|
||||
b.ne L(no_zva)
|
||||
#endif
|
||||
stz2g dstin, [dstin]
|
||||
stz2g dstin, [dstin, 32]
|
||||
bic dst, dstin, 63
|
||||
sub count, dstend, dst /* Count is now 64 too large. */
|
||||
sub count, count, 128 /* Adjust count and bias for loop. */
|
||||
|
||||
.p2align 4
|
||||
L(zva_loop):
|
||||
add dst, dst, 64
|
||||
dc gzva, dst
|
||||
subs count, count, 64
|
||||
b.hi L(zva_loop)
|
||||
stz2g dstin, [dstend, -64]
|
||||
stz2g dstin, [dstend, -32]
|
||||
ret
|
||||
|
||||
L(no_zva):
|
||||
sub dst, dstin, 32 /* Dst is biased by -32. */
|
||||
sub count, count, 64 /* Adjust count for loop. */
|
||||
L(no_zva_loop):
|
||||
stz2g dstin, [dst, 32]
|
||||
stz2g dstin, [dst, 64]!
|
||||
subs count, count, 64
|
||||
b.hi L(no_zva_loop)
|
||||
stz2g dstin, [dstend, -64]
|
||||
stz2g dstin, [dstend, -32]
|
||||
ret
|
||||
|
||||
END (__libc_mtag_tag_zero_region)
|
||||
#endif /* USE_MTAG */
|
||||
|
Loading…
Reference in New Issue
Block a user