Fix up new x86 string functions.

This commit is contained in:
Ulrich Drepper 2010-02-15 13:04:54 -08:00
parent 904057bc17
commit 6bb74d9f86
5 changed files with 161 additions and 38 deletions

View File

@ -1,3 +1,13 @@
2010-02-15 Ulrich Drepper <drepper@redhat.com>
* sysdeps/i386/i686/multiarch/memcmp-sse4.S: Fix unwind info.
* sysdeps/i386/i686/multiarch/memcmp-ssse3.S: Likewise.
* sysdeps/i386/i686/multiarch/strcmp-sse4.S: Likewise.
* sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Likewise.
* sysdeps/i386/i686/multiarch/strcmp-sse4.S: Don't fall through to
undefined code.
2010-02-12 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add

View File

@ -105,43 +105,43 @@ L(less8bytes):
mov 1(%eax), %bl
cmpb 1(%edx), %bl
jne L(nonzero)
cmp $2, %ecx
cmp $2, %ecx
jz L(0bytes)
mov 2(%eax), %bl
cmpb 2(%edx), %bl
jne L(nonzero)
cmp $3, %ecx
cmp $3, %ecx
jz L(0bytes)
mov 3(%eax), %bl
cmpb 3(%edx), %bl
jne L(nonzero)
cmp $4, %ecx
cmp $4, %ecx
jz L(0bytes)
mov 4(%eax), %bl
cmpb 4(%edx), %bl
jne L(nonzero)
cmp $5, %ecx
cmp $5, %ecx
jz L(0bytes)
mov 5(%eax), %bl
cmpb 5(%edx), %bl
jne L(nonzero)
cmp $6, %ecx
cmp $6, %ecx
jz L(0bytes)
mov 6(%eax), %bl
cmpb 6(%edx), %bl
je L(0bytes)
L(nonzero):
POP (%ebx)
POP (%ebx)
mov $1, %eax
ja L(above)
neg %eax
@ -151,11 +151,11 @@ L(above):
ALIGN (4)
L(0bytes):
POP (%ebx)
POP (%ebx)
xor %eax, %eax
ret
CFI_PUSH (%ebx)
ALIGN (4)
L(less1bytes):
jb L(0bytesend)
@ -609,7 +609,7 @@ L(26bytes):
mov -6(%edx), %ebx
cmp %ebx, %ecx
jne L(find_diff)
movzwl -2(%eax), %ecx
movzwl -2(%edx), %ebx
cmp %bl, %cl
@ -873,7 +873,7 @@ L(32bytes):
L(less16bytes):
add %ebx, %eax
add %ebx, %edx
mov (%eax), %ecx
mov (%edx), %ebx
cmp %ebx, %ecx
@ -908,7 +908,7 @@ L(find_diff):
jne L(end)
cmp %bx, %cx
L(end):
POP (%ebx)
POP (%ebx)
mov $1, %eax
ja L(bigger)
neg %eax

View File

@ -43,8 +43,7 @@
#define BLK2 BLK1+4
#define LEN BLK2+4
#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
#define RETURN RETURN_END; CFI_PUSH (%ebx); CFI_PUSH (%edi); \
CFI_PUSH (%esi)
#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
.section .text.ssse3,"ax",@progbits
ENTRY (MEMCMP)
@ -76,12 +75,13 @@ L(1bytesend):
L(zero):
mov $0, %eax
ret
ALIGN (4)
L(48bytesormore):
PUSH (%ebx)
PUSH (%esi)
PUSH (%edi)
cfi_remember_state
movdqu (%eax), %xmm3
movdqu (%edx), %xmm0
movl %eax, %edi
@ -155,7 +155,7 @@ L(shr_0):
add $32, %esi
sub $0xffff, %edx
jnz L(exit)
lea (%ecx, %edi,1), %eax
lea (%ecx, %esi,1), %edx
@ -163,6 +163,8 @@ L(shr_0):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_0_gobble):
lea -48(%ecx), %ecx
@ -207,6 +209,8 @@ L(shr_0_gobble_loop_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_1):
cmp $80, %ecx
@ -235,6 +239,8 @@ L(shr_1):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_1_gobble):
sub $32, %ecx
@ -286,6 +292,8 @@ L(shr_1_gobble_next):
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_2):
cmp $80, %ecx
@ -314,6 +322,8 @@ L(shr_2):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_2_gobble):
sub $32, %ecx
@ -364,6 +374,8 @@ L(shr_2_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_3):
cmp $80, %ecx
@ -392,6 +404,8 @@ L(shr_3):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_3_gobble):
sub $32, %ecx
@ -442,6 +456,8 @@ L(shr_3_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_4):
cmp $80, %ecx
@ -470,6 +486,8 @@ L(shr_4):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_4_gobble):
sub $32, %ecx
@ -520,6 +538,8 @@ L(shr_4_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_5):
cmp $80, %ecx
@ -548,6 +568,8 @@ L(shr_5):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_5_gobble):
sub $32, %ecx
@ -598,6 +620,8 @@ L(shr_5_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_6):
cmp $80, %ecx
@ -626,6 +650,8 @@ L(shr_6):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_6_gobble):
sub $32, %ecx
@ -676,6 +702,8 @@ L(shr_6_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_7):
cmp $80, %ecx
@ -704,6 +732,8 @@ L(shr_7):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_7_gobble):
sub $32, %ecx
@ -754,6 +784,8 @@ L(shr_7_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_8):
cmp $80, %ecx
@ -782,6 +814,8 @@ L(shr_8):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_8_gobble):
sub $32, %ecx
@ -832,6 +866,8 @@ L(shr_8_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_9):
cmp $80, %ecx
@ -860,6 +896,8 @@ L(shr_9):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_9_gobble):
sub $32, %ecx
@ -910,6 +948,8 @@ L(shr_9_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_10):
cmp $80, %ecx
@ -938,6 +978,8 @@ L(shr_10):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_10_gobble):
sub $32, %ecx
@ -988,6 +1030,8 @@ L(shr_10_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_11):
cmp $80, %ecx
@ -1016,6 +1060,8 @@ L(shr_11):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_11_gobble):
sub $32, %ecx
@ -1066,6 +1112,8 @@ L(shr_11_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_12):
cmp $80, %ecx
@ -1094,6 +1142,8 @@ L(shr_12):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_12_gobble):
sub $32, %ecx
@ -1144,6 +1194,8 @@ L(shr_12_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_13):
cmp $80, %ecx
@ -1172,6 +1224,8 @@ L(shr_13):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_13_gobble):
sub $32, %ecx
@ -1222,6 +1276,8 @@ L(shr_13_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_14):
cmp $80, %ecx
@ -1250,6 +1306,8 @@ L(shr_14):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_14_gobble):
sub $32, %ecx
@ -1300,6 +1358,8 @@ L(shr_14_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_15):
cmp $80, %ecx
@ -1328,6 +1388,8 @@ L(shr_15):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(shr_15_gobble):
sub $32, %ecx
@ -1378,6 +1440,8 @@ L(shr_15_gobble_next):
POP (%esi)
jmp L(less48bytes)
cfi_restore_state
cfi_remember_state
ALIGN (4)
L(exit):
pmovmskb %xmm1, %ebx
@ -1497,8 +1561,9 @@ L(Byte31):
movzbl -9(%edi), %eax
movzbl -9(%esi), %edx
sub %edx, %eax
RETURN
RETURN_END
CFI_PUSH (%ebx)
ALIGN (4)
L(more8bytes):
cmp $16, %ecx

View File

@ -176,6 +176,7 @@ L(first4bytes):
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
cfi_remember_state
mov %edx, %edi
mov %eax, %esi
xorl %eax, %eax
@ -241,6 +242,7 @@ L(ret):
#endif
ret
cfi_restore_state
#ifdef USE_AS_STRNCMP
L(more16byteseq):
POP (%esi)
@ -253,6 +255,10 @@ L(eq):
POP (%ebp)
#endif
ret
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(neq):
mov $1, %eax
ja L(neq_bigger)
@ -263,6 +269,9 @@ L(neq_bigger):
#endif
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(less16bytes):
add $0xfefefeff, %ecx
jnc L(less4bytes)
@ -370,8 +379,13 @@ L(more4bytes):
movzbl 7(%eax), %ecx
cmpb %cl, 7(%edx)
jne L(neq)
#if 0
// XXX bug in original code. It had a fallthru without any code
cmpl $0, %ecx
je L(eq)
#else
jmp L(eq)
#endif
END (STRCMP)

View File

@ -160,6 +160,9 @@ L(crosspage):
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
#ifdef USE_AS_STRNCMP
cfi_remember_state
#endif
movl %edx, %edi
movl %eax, %ecx
@ -254,7 +257,7 @@ L(loop_ashr_0):
/*
* The following cases will be handled by ashr_1
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
@ -360,7 +363,7 @@ L(ashr_1_exittail):
/*
* The following cases will be handled by ashr_2
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(14~15) n -14 1(15 +(n-14) - n) ashr_2
*/
.p2align 4
@ -467,7 +470,7 @@ L(ashr_2_exittail):
/*
* The following cases will be handled by ashr_3
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(13~15) n -13 2(15 +(n-13) - n) ashr_3
*/
.p2align 4
@ -573,7 +576,7 @@ L(ashr_3_exittail):
/*
* The following cases will be handled by ashr_4
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(12~15) n -12 3(15 +(n-12) - n) ashr_4
*/
.p2align 4
@ -682,7 +685,7 @@ L(ashr_4_exittail):
/*
* The following cases will be handled by ashr_5
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(11~15) n -11 4(15 +(n-11) - n) ashr_5
*/
.p2align 4
@ -788,7 +791,7 @@ L(ashr_5_exittail):
/*
* The following cases will be handled by ashr_6
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(10~15) n -10 5(15 +(n-10) - n) ashr_6
*/
@ -896,7 +899,7 @@ L(ashr_6_exittail):
/*
* The following cases will be handled by ashr_7
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
*/
@ -1006,7 +1009,7 @@ L(ashr_7_exittail):
/*
* The following cases will be handled by ashr_8
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
*/
.p2align 4
@ -1113,7 +1116,7 @@ L(ashr_8_exittail):
/*
* The following cases will be handled by ashr_9
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
*/
.p2align 4
@ -1219,7 +1222,7 @@ L(ashr_9_exittail):
/*
* The following cases will be handled by ashr_10
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
*/
.p2align 4
@ -1325,7 +1328,7 @@ L(ashr_10_exittail):
/*
* The following cases will be handled by ashr_11
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
*/
.p2align 4
@ -1431,7 +1434,7 @@ L(ashr_11_exittail):
/*
* The following cases will be handled by ashr_12
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
*/
.p2align 4
@ -1537,7 +1540,7 @@ L(ashr_12_exittail):
/*
* The following cases will be handled by ashr_13
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
*/
.p2align 4
@ -1643,7 +1646,7 @@ L(ashr_13_exittail):
/*
* The following cases will be handled by ashr_14
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
*/
.p2align 4
@ -1749,7 +1752,7 @@ L(ashr_14_exittail):
/*
* The following cases will be handled by ashr_14
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
*/
@ -1916,6 +1919,9 @@ L(less16bytes):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte0):
#ifdef USE_AS_STRNCMP
cmp $0, %ebp
@ -1931,6 +1937,9 @@ L(Byte0):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte1):
#ifdef USE_AS_STRNCMP
cmp $1, %ebp
@ -1946,6 +1955,9 @@ L(Byte1):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte2):
#ifdef USE_AS_STRNCMP
cmp $2, %ebp
@ -1961,6 +1973,9 @@ L(Byte2):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte3):
#ifdef USE_AS_STRNCMP
cmp $3, %ebp
@ -1976,6 +1991,9 @@ L(Byte3):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte4):
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
@ -1989,7 +2007,11 @@ L(Byte4):
POP (%ebp)
#endif
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte5):
#ifdef USE_AS_STRNCMP
cmp $5, %ebp
@ -2005,6 +2027,9 @@ L(Byte5):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(Byte6):
#ifdef USE_AS_STRNCMP
cmp $6, %ebp
@ -2020,6 +2045,9 @@ L(Byte6):
ret
.p2align 4
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(2next_8_bytes):
add $8, %eax
add $8, %edx
@ -2063,6 +2091,9 @@ L(2next_8_bytes):
#endif
ret
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
#endif
L(neq):
mov $1, %eax
ja L(neq_bigger)
@ -2074,6 +2105,7 @@ L(neq_bigger):
ret
#ifdef USE_AS_STRNCMP
cfi_remember_state
L(more8byteseq):
POP (%esi)
POP (%edi)
@ -2087,7 +2119,9 @@ L(eq):
#endif
xorl %eax, %eax
ret
#ifdef USE_AS_STRNCMP
CFI_PUSH (%ebp)
L(less16bytes_sncmp):
test %ebp, %ebp
jz L(eq)