mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise. * sysdeps/powerpc/memset.S: Define & use symbolic register names. Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise.
This commit is contained in:
parent
019357d234
commit
1d280d9f1e
@ -1,5 +1,12 @@
|
||||
2000-06-06 Greg McGary <greg@mcgary.org>
|
||||
|
||||
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
|
||||
Use C comments throughout. Line up operands column with tabs.
|
||||
* sysdeps/powerpc/strchr.S: Likewise.
|
||||
* sysdeps/powerpc/strcmp.S: Likewise.
|
||||
* sysdeps/powerpc/strcpy.S: Likewise.
|
||||
* sysdeps/powerpc/strlen.S: Likewise.
|
||||
|
||||
* sysdeps/unix/sysv/linux/powerpc/brk.S [!PIC]:
|
||||
Get low part of &__curbrk with @l.
|
||||
|
||||
|
@ -19,124 +19,135 @@
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
EALIGN(memset,5,1)
|
||||
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
|
||||
Returns 's'.
|
||||
|
||||
The memset is done in three sizes: byte (8 bits), word (32 bits),
|
||||
cache line (256 bits). There is a special case for setting cache lines
|
||||
to 0, to take advantage of the dcbz instruction.
|
||||
r6: current address we are storing at
|
||||
r7: number of bytes we are setting now (when aligning) */
|
||||
to 0, to take advantage of the dcbz instruction. */
|
||||
|
||||
EALIGN (memset, 5, 1)
|
||||
|
||||
#define rTMP r0
|
||||
#define rRTN r3 /* initial value of 1st argument */
|
||||
#define rCHR r4 /* char to set in each byte */
|
||||
#define rLEN r5 /* length of region to set */
|
||||
#define rMEMP r6 /* address at which we are storing */
|
||||
#define rALIGN r7 /* number of bytes we are setting now (when aligning) */
|
||||
#define rMEMP2 r8
|
||||
|
||||
#define rPOS32 r7 /* constant +32 for clearing with dcbz */
|
||||
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
|
||||
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
|
||||
|
||||
/* take care of case for size <= 4 */
|
||||
cmplwi cr1,r5,4
|
||||
andi. r7,r3,3
|
||||
mr r6,r3
|
||||
ble- cr1,L(small)
|
||||
cmplwi cr1, rLEN, 4
|
||||
andi. rALIGN, rRTN, 3
|
||||
mr rMEMP, rRTN
|
||||
ble- cr1, L(small)
|
||||
/* align to word boundary */
|
||||
cmplwi cr5,r5,31
|
||||
rlwimi r4,r4,8,16,23
|
||||
beq+ L(aligned) # 8th instruction from .align
|
||||
mtcrf 0x01,r3
|
||||
subfic r7,r7,4
|
||||
add r6,r6,r7
|
||||
sub r5,r5,r7
|
||||
bf+ 31,L(g0)
|
||||
stb r4,0(r3)
|
||||
bt 30,L(aligned)
|
||||
L(g0): sth r4,-2(r6) # 16th instruction from .align
|
||||
cmplwi cr5, rLEN, 31
|
||||
rlwimi rCHR, rCHR, 8, 16, 23
|
||||
beq+ L(aligned) /* 8th instruction from .align */
|
||||
mtcrf 0x01, rRTN
|
||||
subfic rALIGN, rALIGN, 4
|
||||
add rMEMP, rMEMP, rALIGN
|
||||
sub rLEN, rLEN, rALIGN
|
||||
bf+ 31, L(g0)
|
||||
stb rCHR, 0(rRTN)
|
||||
bt 30, L(aligned)
|
||||
L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */
|
||||
/* take care of case for size < 31 */
|
||||
L(aligned):
|
||||
mtcrf 0x01,r5
|
||||
rlwimi r4,r4,16,0,15
|
||||
ble cr5,L(medium)
|
||||
mtcrf 0x01, rLEN
|
||||
rlwimi rCHR, rCHR, 16, 0, 15
|
||||
ble cr5, L(medium)
|
||||
/* align to cache line boundary... */
|
||||
andi. r7,r6,0x1C
|
||||
subfic r7,r7,0x20
|
||||
andi. rALIGN, rMEMP, 0x1C
|
||||
subfic rALIGN, rALIGN, 0x20
|
||||
beq L(caligned)
|
||||
mtcrf 0x01,r7
|
||||
add r6,r6,r7
|
||||
sub r5,r5,r7
|
||||
cmplwi cr1,r7,0x10
|
||||
mr r8,r6
|
||||
bf 28,L(a1)
|
||||
stw r4,-4(r8)
|
||||
stwu r4,-8(r8)
|
||||
L(a1): blt cr1,L(a2)
|
||||
stw r4,-4(r8) # 32nd instruction from .align
|
||||
stw r4,-8(r8)
|
||||
stw r4,-12(r8)
|
||||
stwu r4,-16(r8)
|
||||
L(a2): bf 29,L(caligned)
|
||||
stw r4,-4(r8)
|
||||
mtcrf 0x01, rALIGN
|
||||
add rMEMP, rMEMP, rALIGN
|
||||
sub rLEN, rLEN, rALIGN
|
||||
cmplwi cr1, rALIGN, 0x10
|
||||
mr rMEMP2, rMEMP
|
||||
bf 28, L(a1)
|
||||
stw rCHR, -4(rMEMP2)
|
||||
stwu rCHR, -8(rMEMP2)
|
||||
L(a1): blt cr1, L(a2)
|
||||
stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */
|
||||
stw rCHR, -8(rMEMP2)
|
||||
stw rCHR, -12(rMEMP2)
|
||||
stwu rCHR, -16(rMEMP2)
|
||||
L(a2): bf 29, L(caligned)
|
||||
stw rCHR, -4(rMEMP2)
|
||||
/* now aligned to a cache line. */
|
||||
L(caligned):
|
||||
cmplwi cr1,r4,0
|
||||
clrrwi. r7,r5,5
|
||||
mtcrf 0x01,r5 # 40th instruction from .align
|
||||
beq cr1,L(zloopstart) # special case for clearing memory using dcbz
|
||||
srwi r0,r7,5
|
||||
mtctr r0
|
||||
beq L(medium) # we may not actually get to do a full line
|
||||
clrlwi. r5,r5,27
|
||||
add r6,r6,r7
|
||||
li r8,-0x40
|
||||
bdz L(cloopdone) # 48th instruction from .align
|
||||
cmplwi cr1, rCHR, 0
|
||||
clrrwi. rALIGN, rLEN, 5
|
||||
mtcrf 0x01, rLEN /* 40th instruction from .align */
|
||||
beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
|
||||
srwi rTMP, rALIGN, 5
|
||||
mtctr rTMP
|
||||
beq L(medium) /* we may not actually get to do a full line */
|
||||
clrlwi. rLEN, rLEN, 27
|
||||
add rMEMP, rMEMP, rALIGN
|
||||
li rNEG64, -0x40
|
||||
bdz L(cloopdone) /* 48th instruction from .align */
|
||||
|
||||
L(c3): dcbz r8,r6
|
||||
stw r4,-4(r6)
|
||||
stw r4,-8(r6)
|
||||
stw r4,-12(r6)
|
||||
stw r4,-16(r6)
|
||||
nop # let 601 fetch last 4 instructions of loop
|
||||
stw r4,-20(r6)
|
||||
stw r4,-24(r6) # 56th instruction from .align
|
||||
nop # let 601 fetch first 8 instructions of loop
|
||||
stw r4,-28(r6)
|
||||
stwu r4,-32(r6)
|
||||
L(c3): dcbz rNEG64, rMEMP
|
||||
stw rCHR, -4(rMEMP)
|
||||
stw rCHR, -8(rMEMP)
|
||||
stw rCHR, -12(rMEMP)
|
||||
stw rCHR, -16(rMEMP)
|
||||
nop /* let 601 fetch last 4 instructions of loop */
|
||||
stw rCHR, -20(rMEMP)
|
||||
stw rCHR, -24(rMEMP) /* 56th instruction from .align */
|
||||
nop /* let 601 fetch first 8 instructions of loop */
|
||||
stw rCHR, -28(rMEMP)
|
||||
stwu rCHR, -32(rMEMP)
|
||||
bdnz L(c3)
|
||||
L(cloopdone):
|
||||
stw r4,-4(r6)
|
||||
stw r4,-8(r6)
|
||||
stw r4,-12(r6)
|
||||
stw r4,-16(r6) # 64th instruction from .align
|
||||
stw r4,-20(r6)
|
||||
cmplwi cr1,r5,16
|
||||
stw r4,-24(r6)
|
||||
stw r4,-28(r6)
|
||||
stwu r4,-32(r6)
|
||||
stw rCHR, -4(rMEMP)
|
||||
stw rCHR, -8(rMEMP)
|
||||
stw rCHR, -12(rMEMP)
|
||||
stw rCHR, -16(rMEMP) /* 64th instruction from .align */
|
||||
stw rCHR, -20(rMEMP)
|
||||
cmplwi cr1, rLEN, 16
|
||||
stw rCHR, -24(rMEMP)
|
||||
stw rCHR, -28(rMEMP)
|
||||
stwu rCHR, -32(rMEMP)
|
||||
beqlr
|
||||
add r6,r6,r7
|
||||
b L(medium_tail2) # 72nd instruction from .align
|
||||
add rMEMP, rMEMP, rALIGN
|
||||
b L(medium_tail2) /* 72nd instruction from .align */
|
||||
|
||||
.align 5
|
||||
nop
|
||||
/* Clear lines of memory in 128-byte chunks. */
|
||||
L(zloopstart):
|
||||
clrlwi r5,r5,27
|
||||
mtcrf 0x02,r7
|
||||
srwi. r0,r7,7
|
||||
mtctr r0
|
||||
li r7,0x20
|
||||
li r8,-0x40
|
||||
cmplwi cr1,r5,16 # 8
|
||||
bf 26,L(z0)
|
||||
dcbz 0,r6
|
||||
addi r6,r6,0x20
|
||||
L(z0): li r9,-0x20
|
||||
bf 25,L(z1)
|
||||
dcbz 0,r6
|
||||
dcbz r7,r6
|
||||
addi r6,r6,0x40 # 16
|
||||
L(z1): cmplwi cr5,r5,0
|
||||
clrlwi rLEN, rLEN, 27
|
||||
mtcrf 0x02, rALIGN
|
||||
srwi. rTMP, rALIGN, 7
|
||||
mtctr rTMP
|
||||
li rPOS32, 0x20
|
||||
li rNEG64, -0x40
|
||||
cmplwi cr1, rLEN, 16 /* 8 */
|
||||
bf 26, L(z0)
|
||||
dcbz 0, rMEMP
|
||||
addi rMEMP, rMEMP, 0x20
|
||||
L(z0): li rNEG32, -0x20
|
||||
bf 25, L(z1)
|
||||
dcbz 0, rMEMP
|
||||
dcbz rPOS32, rMEMP
|
||||
addi rMEMP, rMEMP, 0x40 /* 16 */
|
||||
L(z1): cmplwi cr5, rLEN, 0
|
||||
beq L(medium)
|
||||
L(zloop):
|
||||
dcbz 0,r6
|
||||
dcbz r7,r6
|
||||
addi r6,r6,0x80
|
||||
dcbz r8,r6
|
||||
dcbz r9,r6
|
||||
dcbz 0, rMEMP
|
||||
dcbz rPOS32, rMEMP
|
||||
addi rMEMP, rMEMP, 0x80
|
||||
dcbz rNEG64, rMEMP
|
||||
dcbz rNEG32, rMEMP
|
||||
bdnz L(zloop)
|
||||
beqlr cr5
|
||||
b L(medium_tail2)
|
||||
@ -144,56 +155,56 @@ L(zloop):
|
||||
.align 5
|
||||
L(small):
|
||||
/* Memset of 4 bytes or less. */
|
||||
cmplwi cr5,r5,1
|
||||
cmplwi cr1,r5,3
|
||||
cmplwi cr5, rLEN, 1
|
||||
cmplwi cr1, rLEN, 3
|
||||
bltlr cr5
|
||||
stb r4,0(r6)
|
||||
stb rCHR, 0(rMEMP)
|
||||
beqlr cr5
|
||||
nop
|
||||
stb r4,1(r6)
|
||||
stb rCHR, 1(rMEMP)
|
||||
bltlr cr1
|
||||
stb r4,2(r6)
|
||||
stb rCHR, 2(rMEMP)
|
||||
beqlr cr1
|
||||
nop
|
||||
stb r4,3(r6)
|
||||
stb rCHR, 3(rMEMP)
|
||||
blr
|
||||
|
||||
/* Memset of 0-31 bytes. */
|
||||
.align 5
|
||||
L(medium):
|
||||
cmplwi cr1,r5,16
|
||||
cmplwi cr1, rLEN, 16
|
||||
L(medium_tail2):
|
||||
add r6,r6,r5
|
||||
add rMEMP, rMEMP, rLEN
|
||||
L(medium_tail):
|
||||
bt- 31,L(medium_31t)
|
||||
bt- 30,L(medium_30t)
|
||||
bt- 31, L(medium_31t)
|
||||
bt- 30, L(medium_30t)
|
||||
L(medium_30f):
|
||||
bt- 29,L(medium_29t)
|
||||
bt- 29, L(medium_29t)
|
||||
L(medium_29f):
|
||||
bge- cr1,L(medium_27t)
|
||||
bge- cr1, L(medium_27t)
|
||||
bflr- 28
|
||||
stw r4,-4(r6) # 8th instruction from .align
|
||||
stw r4,-8(r6)
|
||||
stw rCHR, -4(rMEMP) /* 8th instruction from .align */
|
||||
stw rCHR, -8(rMEMP)
|
||||
blr
|
||||
|
||||
L(medium_31t):
|
||||
stbu r4,-1(r6)
|
||||
bf- 30,L(medium_30f)
|
||||
stbu rCHR, -1(rMEMP)
|
||||
bf- 30, L(medium_30f)
|
||||
L(medium_30t):
|
||||
sthu r4,-2(r6)
|
||||
bf- 29,L(medium_29f)
|
||||
sthu rCHR, -2(rMEMP)
|
||||
bf- 29, L(medium_29f)
|
||||
L(medium_29t):
|
||||
stwu r4,-4(r6)
|
||||
blt- cr1,L(medium_27f) # 16th instruction from .align
|
||||
stwu rCHR, -4(rMEMP)
|
||||
blt- cr1, L(medium_27f) /* 16th instruction from .align */
|
||||
L(medium_27t):
|
||||
stw r4,-4(r6)
|
||||
stw r4,-8(r6)
|
||||
stw r4,-12(r6)
|
||||
stwu r4,-16(r6)
|
||||
stw rCHR, -4(rMEMP)
|
||||
stw rCHR, -8(rMEMP)
|
||||
stw rCHR, -12(rMEMP)
|
||||
stwu rCHR, -16(rMEMP)
|
||||
L(medium_27f):
|
||||
bflr- 28
|
||||
L(medium_28t):
|
||||
stw r4,-4(r6)
|
||||
stw r4,-8(r6)
|
||||
stw rCHR, -4(rMEMP)
|
||||
stw rCHR, -8(rMEMP)
|
||||
blr
|
||||
END(memset)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized strchr implementation for PowerPC.
|
||||
Copyright (C) 1997, 1999 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -21,91 +21,95 @@
|
||||
|
||||
/* See strlen.s for comments on how this works. */
|
||||
|
||||
/* char * [r3] strchr (const char *s [r3] , int c [r4] )
|
||||
/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */
|
||||
|
||||
r0: a temporary
|
||||
r3: our return result.
|
||||
r4: byte we're looking for, spread over the whole word
|
||||
r5: the current word
|
||||
r6: the constant 0xfefefeff (-0x01010101)
|
||||
r7: the constant 0x7f7f7f7f
|
||||
r8: pointer to the current word.
|
||||
r9: a temporary
|
||||
r10: the number of bits we should ignore in the first word
|
||||
r11: a mask with the bits to ignore set to 0
|
||||
r12: a temporary */
|
||||
ENTRY(strchr)
|
||||
rlwimi r4,r4,8,16,23
|
||||
li r11,-1
|
||||
rlwimi r4,r4,16,0,15
|
||||
lis r6,0xfeff
|
||||
lis r7,0x7f7f
|
||||
clrrwi r8,r3,2
|
||||
addi r7,r7,0x7f7f
|
||||
addi r6,r6,0xfffffeff
|
||||
rlwinm r10,r3,3,27,28
|
||||
ENTRY (strchr)
|
||||
|
||||
#define rTMP1 r0
|
||||
#define rRTN r3 /* outgoing result */
|
||||
#define rSTRin r3 /* incoming string arg */
|
||||
#define rCHR r4 /* byte we're looking for, spread over the whole word */
|
||||
#define rCLZB rCHR /* leading zero byte count */
|
||||
#define rWORD r5 /* the current word */
|
||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||
#define rSTR r8 /* current word pointer */
|
||||
#define rTMP2 r9
|
||||
#define rIGN r10 /* number of bits we should ignore in the first word */
|
||||
#define rMASK r11 /* mask with the bits to ignore set to 0 */
|
||||
#define rTMP3 r12
|
||||
|
||||
rlwimi rCHR, rCHR, 8, 16, 23
|
||||
li rMASK, -1
|
||||
rlwimi rCHR, rCHR, 16, 0, 15
|
||||
lis rFEFE, -0x101
|
||||
lis r7F7F, 0x7f7f
|
||||
clrrwi rSTR, rSTRin, 2
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
rlwinm rIGN, rSTRin, 3, 27, 28
|
||||
/* Test the first (partial?) word. */
|
||||
lwz r5,0(r8)
|
||||
srw r11,r11,r10
|
||||
orc r5,r5,r11
|
||||
add r0,r6,r5
|
||||
nor r9,r7,r5
|
||||
and. r0,r0,r9
|
||||
xor r12,r4,r5
|
||||
orc r12,r12,r11
|
||||
lwz rWORD, 0(rSTR)
|
||||
srw rMASK, rMASK, rIGN
|
||||
orc rWORD, rWORD, rMASK
|
||||
add rTMP1, rFEFE, rWORD
|
||||
nor rTMP2, r7F7F, rWORD
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
xor rTMP3, rCHR, rWORD
|
||||
orc rTMP3, rTMP3, rMASK
|
||||
b L(loopentry)
|
||||
|
||||
/* The loop. */
|
||||
|
||||
L(loop):lwzu r5,4(r8)
|
||||
and. r0,r0,r9
|
||||
L(loop):lwzu rWORD, 4(rSTR)
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
/* Test for 0. */
|
||||
add r0,r6,r5
|
||||
nor r9,r7,r5
|
||||
add rTMP1, rFEFE, rWORD
|
||||
nor rTMP2, r7F7F, rWORD
|
||||
bne L(foundit)
|
||||
and. r0,r0,r9
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
/* Start test for the bytes we're looking for. */
|
||||
xor r12,r4,r5
|
||||
xor rTMP3, rCHR, rWORD
|
||||
L(loopentry):
|
||||
add r0,r6,r12
|
||||
nor r9,r7,r12
|
||||
add rTMP1, rFEFE, rTMP3
|
||||
nor rTMP2, r7F7F, rTMP3
|
||||
beq L(loop)
|
||||
/* There is a zero byte in the word, but may also be a matching byte (either
|
||||
before or after the zero byte). In fact, we may be looking for a
|
||||
zero byte, in which case we return a match. We guess that this hasn't
|
||||
happened, though. */
|
||||
L(missed):
|
||||
and. r0,r0,r9
|
||||
li r3,0
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
li rRTN, 0
|
||||
beqlr
|
||||
/* It did happen. Decide which one was first...
|
||||
I'm not sure if this is actually faster than a sequence of
|
||||
rotates, compares, and branches (we use it anyway because it's shorter). */
|
||||
and r6,r7,r5
|
||||
or r11,r7,r5
|
||||
and r0,r7,r12
|
||||
or r10,r7,r12
|
||||
add r6,r6,r7
|
||||
add r0,r0,r7
|
||||
nor r5,r11,r6
|
||||
nor r9,r10,r0
|
||||
cmplw r5,r9
|
||||
and rFEFE, r7F7F, rWORD
|
||||
or rMASK, r7F7F, rWORD
|
||||
and rTMP1, r7F7F, rTMP3
|
||||
or rIGN, r7F7F, rTMP3
|
||||
add rFEFE, rFEFE, r7F7F
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor rWORD, rMASK, rFEFE
|
||||
nor rTMP2, rIGN, rTMP1
|
||||
cmplw rWORD, rTMP2
|
||||
bgtlr
|
||||
cntlzw r4,r9
|
||||
srwi r4,r4,3
|
||||
add r3,r8,r4
|
||||
cntlzw rCLZB, rTMP2
|
||||
srwi rCLZB, rCLZB, 3
|
||||
add rRTN, rSTR, rCLZB
|
||||
blr
|
||||
|
||||
L(foundit):
|
||||
and r0,r7,r12
|
||||
or r10,r7,r12
|
||||
add r0,r0,r7
|
||||
nor r9,r10,r0
|
||||
cntlzw r4,r9
|
||||
subi r8,r8,4
|
||||
srwi r4,r4,3
|
||||
add r3,r8,r4
|
||||
and rTMP1, r7F7F, rTMP3
|
||||
or rIGN, r7F7F, rTMP3
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor rTMP2, rIGN, rTMP1
|
||||
cntlzw rCLZB, rTMP2
|
||||
subi rSTR, rSTR, 4
|
||||
srwi rCLZB, rCLZB, 3
|
||||
add rRTN, rSTR, rCLZB
|
||||
blr
|
||||
END(strchr)
|
||||
END (strchr)
|
||||
|
||||
weak_alias(strchr,index)
|
||||
weak_alias(strchr, index)
|
||||
|
@ -21,95 +21,93 @@
|
||||
|
||||
/* See strlen.s for comments on how the end-of-string testing works. */
|
||||
|
||||
EALIGN(strcmp,4,0)
|
||||
/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) */
|
||||
/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */
|
||||
|
||||
/* General register assignments:
|
||||
r0: temporary
|
||||
r3: pointer to previous word in s1
|
||||
r4: pointer to previous word in s2
|
||||
r5: current word from s1
|
||||
r6: current word from s2
|
||||
r7: 0xfefefeff
|
||||
r8: 0x7f7f7f7f
|
||||
r9: ~(word in s1 | 0x7f7f7f7f) */
|
||||
EALIGN (strcmp, 4, 0)
|
||||
|
||||
/* Register assignments in the prologue:
|
||||
r10: low 2 bits of p2-p1
|
||||
r11: mask to orc with r5/r6 */
|
||||
#define rTMP r0
|
||||
#define rRTN r3 /* return value */
|
||||
#define rSTR1 r3 /* first string arg */
|
||||
#define rSTR2 r4 /* second string arg */
|
||||
#define rWORD1 r5 /* current word in s1 */
|
||||
#define rWORD2 r6 /* current word in s2 */
|
||||
#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r8 /* constant 0x7f7f7f7f */
|
||||
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
|
||||
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
|
||||
|
||||
or r0,r4,r3
|
||||
clrlwi. r0,r0,30
|
||||
lis r7,0xfeff
|
||||
or rTMP, rSTR2, rSTR1
|
||||
clrlwi. rTMP, rTMP, 30
|
||||
lis rFEFE, -0x101
|
||||
bne L(unaligned)
|
||||
|
||||
lwz r5,0(r3)
|
||||
lwz r6,0(r4)
|
||||
lis r8,0x7f7f
|
||||
addi r7,r7,-0x101
|
||||
addi r8,r8,0x7f7f
|
||||
lwz rWORD1, 0(rSTR1)
|
||||
lwz rWORD2, 0(rSTR2)
|
||||
lis r7F7F, 0x7f7f
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
b L(g1)
|
||||
|
||||
L(g0): lwzu r5,4(r3)
|
||||
bne cr1,L(different)
|
||||
lwzu r6,4(r4)
|
||||
L(g1): add r0,r7,r5
|
||||
nor r9,r8,r5
|
||||
and. r0,r0,r9
|
||||
cmpw cr1,r5,r6
|
||||
L(g0): lwzu rWORD1, 4(rSTR1)
|
||||
bne cr1, L(different)
|
||||
lwzu rWORD2, 4(rSTR2)
|
||||
L(g1): add rTMP, rFEFE, rWORD1
|
||||
nor rNEG, r7F7F, rWORD1
|
||||
and. rTMP, rTMP, rNEG
|
||||
cmpw cr1, rWORD1, rWORD2
|
||||
beq+ L(g0)
|
||||
L(endstring):
|
||||
/* OK. We've hit the end of the string. We need to be careful that
|
||||
we don't compare two strings as different because of gunk beyond
|
||||
the end of the strings... */
|
||||
and r0,r8,r5
|
||||
beq cr1,L(equal)
|
||||
add r0,r0,r8
|
||||
xor. r10,r5,r6
|
||||
andc r9,r9,r0
|
||||
and rTMP, r7F7F, rWORD1
|
||||
beq cr1, L(equal)
|
||||
add rTMP, rTMP, r7F7F
|
||||
xor. rBITDIF, rWORD1, rWORD2
|
||||
andc rNEG, rNEG, rTMP
|
||||
blt- L(highbit)
|
||||
cntlzw r10,r10
|
||||
cntlzw r9,r9
|
||||
addi r9,r9,7
|
||||
cmpw cr1,r9,r10
|
||||
sub r3,r5,r6
|
||||
cntlzw rBITDIF, rBITDIF
|
||||
cntlzw rNEG, rNEG
|
||||
addi rNEG, rNEG, 7
|
||||
cmpw cr1, rNEG, rBITDIF
|
||||
sub rRTN, rWORD1, rWORD2
|
||||
bgelr+ cr1
|
||||
L(equal):
|
||||
li r3,0
|
||||
li rRTN, 0
|
||||
blr
|
||||
|
||||
L(different):
|
||||
lwz r5,-4(r3)
|
||||
xor. r10,r5,r6
|
||||
sub r3,r5,r6
|
||||
lwz rWORD1, -4(rSTR1)
|
||||
xor. rBITDIF, rWORD1, rWORD2
|
||||
sub rRTN, rWORD1, rWORD2
|
||||
bgelr+
|
||||
L(highbit):
|
||||
ori r3,r6,1
|
||||
ori rRTN, rWORD2, 1
|
||||
blr
|
||||
|
||||
|
||||
/* Oh well. In this case, we just do a byte-by-byte comparison. */
|
||||
.align 4
|
||||
L(unaligned):
|
||||
lbz r5,0(r3)
|
||||
lbz r6,0(r4)
|
||||
lbz rWORD1, 0(rSTR1)
|
||||
lbz rWORD2, 0(rSTR2)
|
||||
b L(u1)
|
||||
|
||||
L(u0): lbzu r5,1(r3)
|
||||
L(u0): lbzu rWORD1, 1(rSTR1)
|
||||
bne- L(u4)
|
||||
lbzu r6,1(r4)
|
||||
L(u1): cmpwi cr1,r5,0
|
||||
beq- cr1,L(u3)
|
||||
cmpw r5,r6
|
||||
lbzu rWORD2, 1(rSTR2)
|
||||
L(u1): cmpwi cr1, rWORD1, 0
|
||||
beq- cr1, L(u3)
|
||||
cmpw rWORD1, rWORD2
|
||||
bne- L(u3)
|
||||
lbzu r5,1(r3)
|
||||
lbzu r6,1(r4)
|
||||
cmpwi cr1,r5,0
|
||||
cmpw r5,r6
|
||||
bne+ cr1,L(u0)
|
||||
L(u3): sub r3,r5,r6
|
||||
lbzu rWORD1, 1(rSTR1)
|
||||
lbzu rWORD2, 1(rSTR2)
|
||||
cmpwi cr1, rWORD1, 0
|
||||
cmpw rWORD1, rWORD2
|
||||
bne+ cr1, L(u0)
|
||||
L(u3): sub rRTN, rWORD1, rWORD2
|
||||
blr
|
||||
L(u4): lbz r5,-1(r3)
|
||||
sub r3,r5,r6
|
||||
L(u4): lbz rWORD1, -1(rSTR1)
|
||||
sub rRTN, rWORD1, rWORD2
|
||||
blr
|
||||
END(strcmp)
|
||||
|
@ -21,80 +21,80 @@
|
||||
|
||||
/* See strlen.s for comments on how the end-of-string testing works. */
|
||||
|
||||
EALIGN(strcpy,4,0)
|
||||
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
|
||||
|
||||
/* General register assignments:
|
||||
r0: temporary
|
||||
r3: saved `dest'
|
||||
r4: pointer to previous word in src
|
||||
r5: pointer to previous word in dest
|
||||
r6: current word from src
|
||||
r7: 0xfefefeff
|
||||
r8: 0x7f7f7f7f
|
||||
r9: ~(word in src | 0x7f7f7f7f)
|
||||
r10: alternate word from src. */
|
||||
EALIGN(strcpy, 4, 0)
|
||||
|
||||
or r0,r4,r3
|
||||
clrlwi. r0,r0,30
|
||||
addi r5,r3,-4
|
||||
#define rTMP r0
|
||||
#define rRTN r3 /* incoming DEST arg preserved as result */
|
||||
#define rSRC r4 /* pointer to previous word in src */
|
||||
#define rDEST r5 /* pointer to previous word in dest */
|
||||
#define rWORD r6 /* current word from src */
|
||||
#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r8 /* constant 0x7f7f7f7f */
|
||||
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
|
||||
#define rALT r10 /* alternate word from src */
|
||||
|
||||
or rTMP, rSRC, rRTN
|
||||
clrlwi. rTMP, rTMP, 30
|
||||
addi rDEST, rRTN, -4
|
||||
bne L(unaligned)
|
||||
|
||||
lis r7,0xfeff
|
||||
lis r8,0x7f7f
|
||||
lwz r6,0(r4)
|
||||
addi r7,r7,-0x101
|
||||
addi r8,r8,0x7f7f
|
||||
lis rFEFE, -0x101
|
||||
lis r7F7F, 0x7f7f
|
||||
lwz rWORD, 0(rSRC)
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
b L(g2)
|
||||
|
||||
L(g0): lwzu r10,4(r4)
|
||||
stwu r6,4(r5)
|
||||
add r0,r7,r10
|
||||
nor r9,r8,r10
|
||||
and. r0,r0,r9
|
||||
L(g0): lwzu rALT, 4(rSRC)
|
||||
stwu rWORD, 4(rDEST)
|
||||
add rTMP, rFEFE, rALT
|
||||
nor rNEG, r7F7F, rALT
|
||||
and. rTMP, rTMP, rNEG
|
||||
bne- L(g1)
|
||||
lwzu r6,4(r4)
|
||||
stwu r10,4(r5)
|
||||
L(g2): add r0,r7,r6
|
||||
nor r9,r8,r6
|
||||
and. r0,r0,r9
|
||||
lwzu rWORD, 4(rSRC)
|
||||
stwu rALT, 4(rDEST)
|
||||
L(g2): add rTMP, rFEFE, rWORD
|
||||
nor rNEG, r7F7F, rWORD
|
||||
and. rTMP, rTMP, rNEG
|
||||
beq+ L(g0)
|
||||
|
||||
mr r10,r6
|
||||
mr rALT, rWORD
|
||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||
L(g1): rlwinm. r0,r10,8,24,31
|
||||
stb r0,4(r5)
|
||||
L(g1): rlwinm. rTMP, rALT, 8, 24, 31
|
||||
stb rTMP, 4(rDEST)
|
||||
beqlr-
|
||||
rlwinm. r0,r10,16,24,31
|
||||
stb r0,5(r5)
|
||||
rlwinm. rTMP, rALT, 16, 24, 31
|
||||
stb rTMP, 5(rDEST)
|
||||
beqlr-
|
||||
rlwinm. r0,r10,24,24,31
|
||||
stb r0,6(r5)
|
||||
rlwinm. rTMP, rALT, 24, 24, 31
|
||||
stb rTMP, 6(rDEST)
|
||||
beqlr-
|
||||
stb r10,7(r5)
|
||||
stb rALT, 7(rDEST)
|
||||
blr
|
||||
|
||||
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
||||
.align 4
|
||||
nop
|
||||
L(unaligned):
|
||||
lbz r6,0(r4)
|
||||
addi r5,r3,-1
|
||||
cmpwi r6,0
|
||||
lbz rWORD, 0(rSRC)
|
||||
addi rDEST, rRTN, -1
|
||||
cmpwi rWORD, 0
|
||||
beq- L(u2)
|
||||
|
||||
L(u0): lbzu r10,1(r4)
|
||||
stbu r6,1(r5)
|
||||
cmpwi r10,0
|
||||
L(u0): lbzu rALT, 1(rSRC)
|
||||
stbu rWORD, 1(rDEST)
|
||||
cmpwi rALT, 0
|
||||
beq- L(u1)
|
||||
nop /* Let 601 load start of loop. */
|
||||
lbzu r6,1(r4)
|
||||
stbu r10,1(r5)
|
||||
cmpwi r6,0
|
||||
lbzu rWORD, 1(rSRC)
|
||||
stbu rALT, 1(rDEST)
|
||||
cmpwi rWORD, 0
|
||||
bne+ L(u0)
|
||||
L(u2): stb r6,1(r5)
|
||||
L(u2): stb rWORD, 1(rDEST)
|
||||
blr
|
||||
L(u1): stb r10,1(r5)
|
||||
L(u1): stb rALT, 1(rDEST)
|
||||
blr
|
||||
|
||||
END(strcpy)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized strlen implementation for PowerPC.
|
||||
Copyright (C) 1997, 1999 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -69,76 +69,86 @@
|
||||
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
|
||||
them, the others we must save. */
|
||||
|
||||
ENTRY(strlen)
|
||||
/* On entry, r3 points to the string, and it's left that way.
|
||||
We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
|
||||
r4 is used to keep the current index into the string; r5 holds
|
||||
the number of padding bits we prepend to the string to make it
|
||||
start at a word boundary. r8 holds the 'current' word.
|
||||
r9-12 are temporaries. r0 is used as a temporary and for discarded
|
||||
results. */
|
||||
clrrwi r4,r3,2
|
||||
lis r7,0x7f7f
|
||||
rlwinm r5,r3,3,27,28
|
||||
lwz r8,0(r4)
|
||||
li r9,-1
|
||||
addi r7,r7,0x7f7f
|
||||
/* int [r3] strlen (char *s [r3]) */
|
||||
|
||||
ENTRY (strlen)
|
||||
|
||||
#define rTMP1 r0
|
||||
#define rRTN r3 /* incoming STR arg, outgoing result */
|
||||
#define rSTR r4 /* current string position */
|
||||
#define rPADN r5 /* number of padding bits we prepend to the
|
||||
string to make it start at a word boundary */
|
||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||
#define rWORD1 r8 /* current string word */
|
||||
#define rWORD2 r9 /* next string word */
|
||||
#define rMASK r9 /* mask for first string word */
|
||||
#define rTMP2 r10
|
||||
#define rTMP3 r11
|
||||
#define rTMP4 r12
|
||||
|
||||
clrrwi rSTR, rRTN, 2
|
||||
lis r7F7F, 0x7f7f
|
||||
rlwinm rPADN, rRTN, 3, 27, 28
|
||||
lwz rWORD1, 0(rSTR)
|
||||
li rMASK, -1
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
/* That's the setup done, now do the first pair of words.
|
||||
We make an exception and use method (2) on the first two words, to reduce
|
||||
overhead. */
|
||||
srw r9,r9,r5
|
||||
and r0,r7,r8
|
||||
or r10,r7,r8
|
||||
add r0,r0,r7
|
||||
nor r0,r10,r0
|
||||
and. r8,r0,r9
|
||||
mtcrf 0x01,r3
|
||||
srw rMASK, rMASK, rPADN
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
or rTMP2, r7F7F, rWORD1
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor rTMP1, rTMP2, rTMP1
|
||||
and. rWORD1, rTMP1, rMASK
|
||||
mtcrf 0x01, rRTN
|
||||
bne L(done0)
|
||||
lis r6,0xfeff
|
||||
addi r6,r6,-0x101
|
||||
lis rFEFE, -0x101
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
/* Are we now aligned to a doubleword boundary? */
|
||||
bt 29,L(loop)
|
||||
bt 29, L(loop)
|
||||
|
||||
/* Handle second word of pair. */
|
||||
lwzu r8,4(r4)
|
||||
and r0,r7,r8
|
||||
or r10,r7,r8
|
||||
add r0,r0,r7
|
||||
nor. r8,r10,r0
|
||||
lwzu rWORD1, 4(rSTR)
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
or rTMP2, r7F7F, rWORD1
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor. rWORD1, rTMP2, rTMP1
|
||||
bne L(done0)
|
||||
|
||||
/* The loop. */
|
||||
|
||||
L(loop):
|
||||
lwz r8,4(r4)
|
||||
lwzu r9,8(r4)
|
||||
add r0,r6,r8
|
||||
nor r10,r7,r8
|
||||
and. r0,r0,r10
|
||||
add r11,r6,r9
|
||||
nor r12,r7,r9
|
||||
lwz rWORD1, 4(rSTR)
|
||||
lwzu rWORD2, 8(rSTR)
|
||||
add rTMP1, rFEFE, rWORD1
|
||||
nor rTMP2, r7F7F, rWORD1
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
add rTMP3, rFEFE, rWORD2
|
||||
nor rTMP4, r7F7F, rWORD2
|
||||
bne L(done1)
|
||||
and. r0,r11,r12
|
||||
and. rTMP1, rTMP3, rTMP4
|
||||
beq L(loop)
|
||||
|
||||
and r0,r7,r9
|
||||
add r0,r0,r7
|
||||
andc r8,r12,r0
|
||||
and rTMP1, r7F7F, rWORD2
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
andc rWORD1, rTMP4, rTMP1
|
||||
b L(done0)
|
||||
|
||||
L(done1):
|
||||
and r0,r7,r8
|
||||
subi r4,r4,4
|
||||
add r0,r0,r7
|
||||
andc r8,r10,r0
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
subi rSTR, rSTR, 4
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
andc rWORD1, rTMP2, rTMP1
|
||||
|
||||
/* When we get to here, r4 points to the first word in the string that
|
||||
contains a zero byte, and the most significant set bit in r8 is in that
|
||||
/* When we get to here, rSTR points to the first word in the string that
|
||||
contains a zero byte, and the most significant set bit in rWORD1 is in that
|
||||
byte. */
|
||||
L(done0):
|
||||
cntlzw r11,r8
|
||||
subf r0,r3,r4
|
||||
srwi r11,r11,3
|
||||
add r3,r0,r11
|
||||
cntlzw rTMP3, rWORD1
|
||||
subf rTMP1, rRTN, rSTR
|
||||
srwi rTMP3, rTMP3, 3
|
||||
add rRTN, rTMP1, rTMP3
|
||||
blr
|
||||
END(strlen)
|
||||
END (strlen)
|
||||
|
Loading…
Reference in New Issue
Block a user