mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-25 13:51:04 +08:00
x86: Update memrchr to use new VEC macros
Replace %VEC(n) -> %VMM(n) This commit does not change libc.so Tested build on x86-64
This commit is contained in:
parent
52ab7604db
commit
3088a66ff8
@ -21,7 +21,7 @@
|
||||
#if ISA_SHOULD_BUILD (4)
|
||||
|
||||
# include <sysdep.h>
|
||||
# include "evex256-vecs.h"
|
||||
# include "x86-evex256-vecs.h"
|
||||
# if VEC_SIZE != 32
|
||||
# error "VEC_SIZE != 32 unimplemented"
|
||||
# endif
|
||||
@ -31,7 +31,7 @@
|
||||
# endif
|
||||
|
||||
# define PAGE_SIZE 4096
|
||||
# define VECMATCH VEC(0)
|
||||
# define VMMMATCH VMM(0)
|
||||
|
||||
.section SECTION(.text), "ax", @progbits
|
||||
ENTRY_P2ALIGN(MEMRCHR, 6)
|
||||
@ -47,7 +47,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6)
|
||||
correct page cross check and 2) it correctly sets up end ptr to be
|
||||
subtract by lzcnt aligned. */
|
||||
leaq -1(%rdi, %rdx), %rax
|
||||
vpbroadcastb %esi, %VECMATCH
|
||||
vpbroadcastb %esi, %VMMMATCH
|
||||
|
||||
/* Check if we can load 1x VEC without cross a page. */
|
||||
testl $(PAGE_SIZE - VEC_SIZE), %eax
|
||||
@ -55,7 +55,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6)
|
||||
|
||||
/* Don't use rax for pointer here because EVEX has better encoding with
|
||||
offset % VEC_SIZE == 0. */
|
||||
vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
/* Fall through for rdx (len) <= VEC_SIZE (expect small sizes). */
|
||||
@ -96,7 +96,7 @@ L(more_1x_vec):
|
||||
movq %rax, %rdx
|
||||
|
||||
/* Need no matter what. */
|
||||
vpcmpb $0, -(VEC_SIZE)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
subq %rdi, %rdx
|
||||
@ -115,7 +115,7 @@ L(last_2x_vec):
|
||||
|
||||
/* Don't use rax for pointer here because EVEX has better encoding with
|
||||
offset % VEC_SIZE == 0. */
|
||||
vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
/* NB: 64-bit lzcnt. This will naturally add 32 to position. */
|
||||
lzcntq %rcx, %rcx
|
||||
@ -131,7 +131,7 @@ L(last_2x_vec):
|
||||
L(page_cross):
|
||||
movq %rax, %rsi
|
||||
andq $-VEC_SIZE, %rsi
|
||||
vpcmpb $0, (%rsi), %VECMATCH, %k0
|
||||
vpcmpb $0, (%rsi), %VMMMATCH, %k0
|
||||
kmovd %k0, %r8d
|
||||
/* Shift out negative alignment (because we are starting from endptr and
|
||||
working backwards). */
|
||||
@ -165,13 +165,13 @@ L(more_2x_vec):
|
||||
testl %ecx, %ecx
|
||||
jnz L(ret_vec_x0_dec)
|
||||
|
||||
vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
testl %ecx, %ecx
|
||||
jnz L(ret_vec_x1)
|
||||
|
||||
/* Need no matter what. */
|
||||
vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
subq $(VEC_SIZE * 4), %rdx
|
||||
@ -185,7 +185,7 @@ L(last_vec):
|
||||
|
||||
|
||||
/* Need no matter what. */
|
||||
vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
lzcntl %ecx, %ecx
|
||||
subq $(VEC_SIZE * 3 + 1), %rax
|
||||
@ -220,7 +220,7 @@ L(more_4x_vec):
|
||||
testl %ecx, %ecx
|
||||
jnz L(ret_vec_x2)
|
||||
|
||||
vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
testl %ecx, %ecx
|
||||
@ -243,17 +243,17 @@ L(more_4x_vec):
|
||||
L(loop_4x_vec):
|
||||
/* Store 1 were not-equals and 0 where equals in k1 (used to mask later
|
||||
on). */
|
||||
vpcmpb $4, (VEC_SIZE * 3)(%rax), %VECMATCH, %k1
|
||||
vpcmpb $4, (VEC_SIZE * 3)(%rax), %VMMMATCH, %k1
|
||||
|
||||
/* VEC(2/3) will have zero-byte where we found a CHAR. */
|
||||
vpxorq (VEC_SIZE * 2)(%rax), %VECMATCH, %VEC(2)
|
||||
vpxorq (VEC_SIZE * 1)(%rax), %VECMATCH, %VEC(3)
|
||||
vpcmpb $0, (VEC_SIZE * 0)(%rax), %VECMATCH, %k4
|
||||
vpxorq (VEC_SIZE * 2)(%rax), %VMMMATCH, %VMM(2)
|
||||
vpxorq (VEC_SIZE * 1)(%rax), %VMMMATCH, %VMM(3)
|
||||
vpcmpb $0, (VEC_SIZE * 0)(%rax), %VMMMATCH, %k4
|
||||
|
||||
/* Combine VEC(2/3) with min and maskz with k1 (k1 has zero bit where
|
||||
CHAR is found and VEC(2/3) have zero-byte where CHAR is found. */
|
||||
vpminub %VEC(2), %VEC(3), %VEC(3){%k1}{z}
|
||||
vptestnmb %VEC(3), %VEC(3), %k2
|
||||
vpminub %VMM(2), %VMM(3), %VMM(3){%k1}{z}
|
||||
vptestnmb %VMM(3), %VMM(3), %k2
|
||||
|
||||
/* Any 1s and we found CHAR. */
|
||||
kortestd %k2, %k4
|
||||
@ -270,7 +270,7 @@ L(loop_4x_vec):
|
||||
L(last_4x_vec):
|
||||
|
||||
/* Used no matter what. */
|
||||
vpcmpb $0, (VEC_SIZE * -1)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, (VEC_SIZE * -1)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
cmpl $(VEC_SIZE * 2), %edx
|
||||
@ -280,14 +280,14 @@ L(last_4x_vec):
|
||||
jnz L(ret_vec_x0_dec)
|
||||
|
||||
|
||||
vpcmpb $0, (VEC_SIZE * -2)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, (VEC_SIZE * -2)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
testl %ecx, %ecx
|
||||
jnz L(ret_vec_x1)
|
||||
|
||||
/* Used no matter what. */
|
||||
vpcmpb $0, (VEC_SIZE * -3)(%rax), %VECMATCH, %k0
|
||||
vpcmpb $0, (VEC_SIZE * -3)(%rax), %VMMMATCH, %k0
|
||||
kmovd %k0, %ecx
|
||||
|
||||
cmpl $(VEC_SIZE * 3), %edx
|
||||
@ -309,7 +309,7 @@ L(loop_end):
|
||||
testl %ecx, %ecx
|
||||
jnz L(ret_vec_x0_end)
|
||||
|
||||
vptestnmb %VEC(2), %VEC(2), %k0
|
||||
vptestnmb %VMM(2), %VMM(2), %k0
|
||||
kmovd %k0, %ecx
|
||||
testl %ecx, %ecx
|
||||
jnz L(ret_vec_x1_end)
|
||||
|
Loading…
x
Reference in New Issue
Block a user