mirror of
git://sourceware.org/git/glibc.git
synced 2025-01-24 12:25:35 +08:00
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations. * sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before defining.
This commit is contained in:
parent
91613ed9d8
commit
beb03cee27
@ -1,3 +1,11 @@
|
||||
2003-04-04 Steven Munroe <sjmunroe@us.ibm.com>
|
||||
|
||||
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
|
||||
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.
|
||||
|
||||
* sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
|
||||
defining.
|
||||
|
||||
2003-04-04 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized strchr implementation for PowerPC64.
|
||||
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -29,6 +29,11 @@ ENTRY (BP_SYM (strchr))
|
||||
|
||||
#define rTMP1 r0
|
||||
#define rRTN r3 /* outgoing result */
|
||||
/* Note: The Bounded pointer support in this code is broken. This code
|
||||
was inherited from PPC32 and and that support was never completed.
|
||||
Currently PPC gcc does not support -fbounds-check or -fbounded-pointers.
|
||||
These artifacts are left in the code as a reminder in case we need
|
||||
bounded pointer support in the future. */
|
||||
#if __BOUNDED_POINTERS__
|
||||
# define rSTR r4
|
||||
# define rCHR r5 /* byte we're looking for, spread over the whole word */
|
||||
@ -39,8 +44,8 @@ ENTRY (BP_SYM (strchr))
|
||||
# define rWORD r5 /* the current word */
|
||||
#endif
|
||||
#define rCLZB rCHR /* leading zero byte count */
|
||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||
#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
|
||||
#define rTMP2 r9
|
||||
#define rIGN r10 /* number of bits we should ignore in the first word */
|
||||
#define rMASK r11 /* mask with the bits to ignore set to 0 */
|
||||
@ -49,18 +54,23 @@ ENTRY (BP_SYM (strchr))
|
||||
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
|
||||
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
|
||||
|
||||
dcbt 0,rRTN
|
||||
rlwimi rCHR, rCHR, 8, 16, 23
|
||||
li rMASK, -1
|
||||
rlwimi rCHR, rCHR, 16, 0, 15
|
||||
rlwinm rIGN, rRTN, 3, 27, 28
|
||||
rlwinm rIGN, rRTN, 3, 26, 28
|
||||
insrdi rCHR, rCHR, 32, 0
|
||||
lis rFEFE, -0x101
|
||||
lis r7F7F, 0x7f7f
|
||||
clrrdi rSTR, rRTN, 2
|
||||
clrrdi rSTR, rRTN, 3
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
sldi rTMP1, rFEFE, 32
|
||||
insrdi r7F7F, r7F7F, 32, 0
|
||||
add rFEFE, rFEFE, rTMP1
|
||||
/* Test the first (partial?) word. */
|
||||
lwz rWORD, 0(rSTR)
|
||||
srw rMASK, rMASK, rIGN
|
||||
ld rWORD, 0(rSTR)
|
||||
srd rMASK, rMASK, rIGN
|
||||
orc rWORD, rWORD, rMASK
|
||||
add rTMP1, rFEFE, rWORD
|
||||
nor rTMP2, r7F7F, rWORD
|
||||
@ -71,7 +81,7 @@ ENTRY (BP_SYM (strchr))
|
||||
|
||||
/* The loop. */
|
||||
|
||||
L(loop):lwzu rWORD, 4(rSTR)
|
||||
L(loop):ldu rWORD, 8(rSTR)
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
/* Test for 0. */
|
||||
add rTMP1, rFEFE, rWORD
|
||||
@ -104,12 +114,12 @@ L(missed):
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor rWORD, rMASK, rFEFE
|
||||
nor rTMP2, rIGN, rTMP1
|
||||
cmplw rWORD, rTMP2
|
||||
cmpld rWORD, rTMP2
|
||||
bgtlr
|
||||
cntlzw rCLZB, rTMP2
|
||||
srwi rCLZB, rCLZB, 3
|
||||
cntlzd rCLZB, rTMP2
|
||||
srdi rCLZB, rCLZB, 3
|
||||
add rRTN, rSTR, rCLZB
|
||||
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
|
||||
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
|
||||
STORE_RETURN_VALUE (rSTR)
|
||||
blr
|
||||
|
||||
@ -118,11 +128,11 @@ L(foundit):
|
||||
or rIGN, r7F7F, rTMP3
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor rTMP2, rIGN, rTMP1
|
||||
cntlzw rCLZB, rTMP2
|
||||
subi rSTR, rSTR, 4
|
||||
srwi rCLZB, rCLZB, 3
|
||||
cntlzd rCLZB, rTMP2
|
||||
subi rSTR, rSTR, 8
|
||||
srdi rCLZB, rCLZB, 3
|
||||
add rRTN, rSTR, rCLZB
|
||||
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
|
||||
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
|
||||
STORE_RETURN_VALUE (rSTR)
|
||||
blr
|
||||
END (BP_SYM (strchr))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized strlen implementation for PowerPC64.
|
||||
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -60,7 +60,12 @@
|
||||
2) How popular are bytes with the high bit set? If they are very rare,
|
||||
on some processors it might be useful to use the simpler expression
|
||||
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
|
||||
ALU), but this fails when any character has its high bit set. */
|
||||
ALU), but this fails when any character has its high bit set.
|
||||
|
||||
Answer:
|
||||
1) Added a Data Cache Block Touch early to prefetch the first 128
|
||||
byte cache line. Adding dcbt instructions to the loop would not be
|
||||
effective since most strings will be shorter than the cache line.*/
|
||||
|
||||
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
|
||||
0 and 3 through 12 (so long as we don't call any procedures) without
|
||||
@ -80,63 +85,68 @@ ENTRY (BP_SYM (strlen))
|
||||
#define rSTR r4 /* current string position */
|
||||
#define rPADN r5 /* number of padding bits we prepend to the
|
||||
string to make it start at a word boundary */
|
||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||
#define rWORD1 r8 /* current string word */
|
||||
#define rWORD2 r9 /* next string word */
|
||||
#define rMASK r9 /* mask for first string word */
|
||||
#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
|
||||
#define rWORD1 r8 /* current string doubleword */
|
||||
#define rWORD2 r9 /* next string doubleword */
|
||||
#define rMASK r9 /* mask for first string doubleword */
|
||||
#define rTMP2 r10
|
||||
#define rTMP3 r11
|
||||
#define rTMP4 r12
|
||||
|
||||
/* Note: The Bounded pointer support in this code is broken. This code
|
||||
was inherited from PPC32 and and that support was never completed.
|
||||
Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
|
||||
These artifacts are left in the code as a reminder in case we need
|
||||
bounded pointer support in the future. */
|
||||
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
|
||||
|
||||
clrrdi rSTR, rRTN, 2
|
||||
dcbt 0,rRTN
|
||||
clrrdi rSTR, rRTN, 3
|
||||
lis r7F7F, 0x7f7f
|
||||
rlwinm rPADN, rRTN, 3, 27, 28
|
||||
lwz rWORD1, 0(rSTR)
|
||||
li rMASK, -1
|
||||
rlwinm rPADN, rRTN, 3, 26, 28
|
||||
ld rWORD1, 0(rSTR)
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
/* That's the setup done, now do the first pair of words.
|
||||
We make an exception and use method (2) on the first two words, to reduce
|
||||
overhead. */
|
||||
srw rMASK, rMASK, rPADN
|
||||
li rMASK, -1
|
||||
insrdi r7F7F, r7F7F, 32, 0
|
||||
/* That's the setup done, now do the first pair of doublewords.
|
||||
We make an exception and use method (2) on the first two doublewords,
|
||||
to reduce overhead. */
|
||||
srd rMASK, rMASK, rPADN
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
or rTMP2, r7F7F, rWORD1
|
||||
lis rFEFE, -0x101
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
nor rTMP1, rTMP2, rTMP1
|
||||
and. rWORD1, rTMP1, rMASK
|
||||
mtcrf 0x01, rRTN
|
||||
bne L(done0)
|
||||
lis rFEFE, -0x101
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
clrldi rFEFE,rFEFE,32 /* clear upper 32 */
|
||||
sldi rTMP1, rFEFE, 32
|
||||
add rFEFE, rFEFE, rTMP1
|
||||
/* Are we now aligned to a doubleword boundary? */
|
||||
bt 29, L(loop)
|
||||
bt 28, L(loop)
|
||||
|
||||
/* Handle second word of pair. */
|
||||
lwzu rWORD1, 4(rSTR)
|
||||
/* Handle second doubleword of pair. */
|
||||
ldu rWORD1, 8(rSTR)
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
or rTMP2, r7F7F, rWORD1
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor. rWORD1, rTMP2, rTMP1
|
||||
clrldi. rWORD1,rWORD1,32 /* clear upper 32 */
|
||||
bne L(done0)
|
||||
|
||||
/* The loop. */
|
||||
|
||||
L(loop):
|
||||
lwz rWORD1, 4(rSTR)
|
||||
lwzu rWORD2, 8(rSTR)
|
||||
ld rWORD1, 8(rSTR)
|
||||
ldu rWORD2, 16(rSTR)
|
||||
add rTMP1, rFEFE, rWORD1
|
||||
nor rTMP2, r7F7F, rWORD1
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
|
||||
add rTMP3, rFEFE, rWORD2
|
||||
nor rTMP4, r7F7F, rWORD2
|
||||
bne L(done1)
|
||||
and. rTMP1, rTMP3, rTMP4
|
||||
clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
|
||||
beq L(loop)
|
||||
|
||||
and rTMP1, r7F7F, rWORD2
|
||||
@ -146,17 +156,17 @@ L(loop):
|
||||
|
||||
L(done1):
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
subi rSTR, rSTR, 4
|
||||
subi rSTR, rSTR, 8
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
andc rWORD1, rTMP2, rTMP1
|
||||
|
||||
/* When we get to here, rSTR points to the first word in the string that
|
||||
/* When we get to here, rSTR points to the first doubleword in the string that
|
||||
contains a zero byte, and the most significant set bit in rWORD1 is in that
|
||||
byte. */
|
||||
L(done0):
|
||||
cntlzw rTMP3, rWORD1
|
||||
cntlzd rTMP3, rWORD1
|
||||
subf rTMP1, rRTN, rSTR
|
||||
srwi rTMP3, rTMP3, 3
|
||||
srdi rTMP3, rTMP3, 3
|
||||
add rRTN, rTMP1, rTMP3
|
||||
/* GKM FIXME: check high bound. */
|
||||
blr
|
||||
|
Loading…
Reference in New Issue
Block a user