glibc/sysdeps/x86_64/multiarch/strcpy-ssse3.S

3552 lines
62 KiB
ArmAsm
Raw Normal View History

/* strcpy with SSSE3
Copyright (C) 2011-2013 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef NOT_IN_libc
# ifndef USE_AS_STRCAT
# include <sysdep.h>
# ifndef STRCPY
# define STRCPY __strcpy_ssse3
# endif
.section .text.ssse3,"ax",@progbits
ENTRY (STRCPY)
2011-12-24 01:02:15 +08:00
mov %rsi, %rcx
# ifdef USE_AS_STRNCPY
mov %rdx, %r8
# endif
mov %rdi, %rdx
# ifdef USE_AS_STRNCPY
test %r8, %r8
jz L(Exit0)
cmp $8, %r8
jbe L(StrncpyExit8Bytes)
2011-12-24 01:02:15 +08:00
# endif
cmpb $0, (%rcx)
jz L(Exit1)
cmpb $0, 1(%rcx)
jz L(Exit2)
cmpb $0, 2(%rcx)
jz L(Exit3)
cmpb $0, 3(%rcx)
jz L(Exit4)
cmpb $0, 4(%rcx)
jz L(Exit5)
cmpb $0, 5(%rcx)
jz L(Exit6)
cmpb $0, 6(%rcx)
jz L(Exit7)
cmpb $0, 7(%rcx)
jz L(Exit8)
2011-12-24 01:02:15 +08:00
# ifdef USE_AS_STRNCPY
cmp $16, %r8
jb L(StrncpyExit15Bytes)
2011-12-24 01:02:15 +08:00
# endif
cmpb $0, 8(%rcx)
jz L(Exit9)
cmpb $0, 9(%rcx)
jz L(Exit10)
cmpb $0, 10(%rcx)
jz L(Exit11)
cmpb $0, 11(%rcx)
jz L(Exit12)
cmpb $0, 12(%rcx)
jz L(Exit13)
cmpb $0, 13(%rcx)
jz L(Exit14)
cmpb $0, 14(%rcx)
jz L(Exit15)
2011-12-24 01:02:15 +08:00
# ifdef USE_AS_STRNCPY
cmp $16, %r8
je L(Exit16)
2011-12-24 01:02:15 +08:00
# endif
cmpb $0, 15(%rcx)
jz L(Exit16)
# endif
# ifdef USE_AS_STRNCPY
mov %rcx, %rsi
sub $16, %r8
and $0xf, %rsi
2011-12-24 01:02:15 +08:00
/* add 16 bytes rcx_offset to r8 */
add %rsi, %r8
# endif
lea 16(%rcx), %rsi
and $-16, %rsi
pxor %xmm0, %xmm0
mov (%rcx), %r9
mov %r9, (%rdx)
pcmpeqb (%rsi), %xmm0
mov 8(%rcx), %r9
mov %r9, 8(%rdx)
/* convert byte mask in xmm0 to bit mask */
pmovmskb %xmm0, %rax
sub %rcx, %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
mov %rdx, %rax
lea 16(%rdx), %rdx
and $-16, %rdx
sub %rdx, %rax
# ifdef USE_AS_STRNCPY
add %rax, %rsi
lea -1(%rsi), %rsi
and $1<<31, %esi
test %rsi, %rsi
jnz L(ContinueCopy)
lea 16(%r8), %r8
L(ContinueCopy):
# endif
sub %rax, %rcx
mov %rcx, %rax
and $0xf, %rax
mov $0, %rsi
2011-12-24 01:02:15 +08:00
/* case: rcx_offset == rdx_offset */
jz L(Align16Both)
cmp $8, %rax
jae L(ShlHigh8)
cmp $1, %rax
je L(Shl1)
cmp $2, %rax
je L(Shl2)
cmp $3, %rax
je L(Shl3)
cmp $4, %rax
je L(Shl4)
cmp $5, %rax
je L(Shl5)
cmp $6, %rax
je L(Shl6)
jmp L(Shl7)
L(ShlHigh8):
je L(Shl8)
cmp $9, %rax
je L(Shl9)
cmp $10, %rax
je L(Shl10)
cmp $11, %rax
je L(Shl11)
cmp $12, %rax
je L(Shl12)
cmp $13, %rax
je L(Shl13)
cmp $14, %rax
je L(Shl14)
jmp L(Shl15)
L(Align16Both):
movaps (%rcx), %xmm1
movaps 16(%rcx), %xmm2
movaps %xmm1, (%rdx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm4
movaps %xmm3, (%rdx, %rsi)
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm1
movaps %xmm4, (%rdx, %rsi)
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm2
movaps %xmm1, (%rdx, %rsi)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
movaps %xmm3, (%rdx, %rsi)
mov %rcx, %rax
lea 16(%rcx, %rsi), %rcx
and $-0x40, %rcx
sub %rcx, %rax
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
2011-12-24 01:02:15 +08:00
lea 112(%r8, %rax), %r8
# endif
mov $-0x40, %rsi
2011-12-24 01:02:15 +08:00
.p2align 4
L(Aligned64Loop):
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rcx), %xmm5
movaps 32(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 48(%rcx), %xmm7
pminub %xmm5, %xmm2
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
pcmpeqb %xmm0, %xmm3
pmovmskb %xmm3, %rax
lea 64(%rdx), %rdx
lea 64(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeaveCase2OrCase3)
# endif
test %rax, %rax
jnz L(Aligned64Leave)
movaps %xmm4, -64(%rdx)
movaps %xmm5, -48(%rdx)
movaps %xmm6, -32(%rdx)
movaps %xmm7, -16(%rdx)
jmp L(Aligned64Loop)
L(Aligned64Leave):
# ifdef USE_AS_STRNCPY
lea 48(%r8), %r8
# endif
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rax
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm5, %xmm0
# ifdef USE_AS_STRNCPY
lea -16(%r8), %r8
# endif
pmovmskb %xmm0, %rax
movaps %xmm4, -64(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm6, %xmm0
# ifdef USE_AS_STRNCPY
lea -16(%r8), %r8
# endif
pmovmskb %xmm0, %rax
movaps %xmm5, -48(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
movaps %xmm6, -32(%rdx)
pcmpeqb %xmm7, %xmm0
# ifdef USE_AS_STRNCPY
lea -16(%r8), %r8
# endif
pmovmskb %xmm0, %rax
lea 16(%rsi), %rsi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl1):
movaps -1(%rcx), %xmm1
movaps 15(%rcx), %xmm2
L(Shl1Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
2011-12-24 01:02:15 +08:00
palignr $1, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit1Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl1LoopExit)
2011-12-24 01:02:15 +08:00
palignr $1, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 31(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -15(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -1(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl1LoopStart):
movaps 15(%rcx), %xmm2
movaps 31(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 47(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 63(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $1, %xmm4, %xmm5
test %rax, %rax
palignr $1, %xmm3, %xmm4
jnz L(Shl1Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave1)
# endif
palignr $1, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $1, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
2011-12-24 01:02:15 +08:00
movdqu -1(%rcx), %xmm1
mov $15, %rsi
2011-12-24 01:02:15 +08:00
movdqu %xmm1, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl2):
movaps -2(%rcx), %xmm1
movaps 14(%rcx), %xmm2
L(Shl2Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
2011-12-24 01:02:15 +08:00
palignr $2, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit2Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl2LoopExit)
2011-12-24 01:02:15 +08:00
palignr $2, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 30(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -14(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -2(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl2LoopStart):
movaps 14(%rcx), %xmm2
movaps 30(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 46(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 62(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $2, %xmm4, %xmm5
test %rax, %rax
palignr $2, %xmm3, %xmm4
jnz L(Shl2Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave2)
# endif
palignr $2, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $2, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
2011-12-24 01:02:15 +08:00
movdqu -2(%rcx), %xmm1
mov $14, %rsi
2011-12-24 01:02:15 +08:00
movdqu %xmm1, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl3):
movaps -3(%rcx), %xmm1
movaps 13(%rcx), %xmm2
L(Shl3Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
2011-12-24 01:02:15 +08:00
palignr $3, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit3Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl3LoopExit)
2011-12-24 01:02:15 +08:00
palignr $3, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 29(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -13(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -3(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl3LoopStart):
movaps 13(%rcx), %xmm2
movaps 29(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 45(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 61(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $3, %xmm4, %xmm5
test %rax, %rax
palignr $3, %xmm3, %xmm4
jnz L(Shl3Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave3)
# endif
palignr $3, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $3, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
2011-12-24 01:02:15 +08:00
movdqu -3(%rcx), %xmm1
mov $13, %rsi
2011-12-24 01:02:15 +08:00
movdqu %xmm1, -3(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl4):
movaps -4(%rcx), %xmm1
movaps 12(%rcx), %xmm2
L(Shl4Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
2011-12-24 01:02:15 +08:00
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit4Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl4LoopExit)
2011-12-24 01:02:15 +08:00
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -12(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -4(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl4LoopStart):
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 44(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 60(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $4, %xmm4, %xmm5
test %rax, %rax
palignr $4, %xmm3, %xmm4
jnz L(Shl4Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave4)
# endif
palignr $4, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
2011-12-24 01:02:15 +08:00
movdqu -4(%rcx), %xmm1
mov $12, %rsi
2011-12-24 01:02:15 +08:00
movdqu %xmm1, -4(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl5):
movaps -5(%rcx), %xmm1
movaps 11(%rcx), %xmm2
L(Shl5Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
2011-12-24 01:02:15 +08:00
palignr $5, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit5Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl5LoopExit)
2011-12-24 01:02:15 +08:00
palignr $5, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 27(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -11(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -5(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl5LoopStart):
movaps 11(%rcx), %xmm2
movaps 27(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 43(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 59(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $5, %xmm4, %xmm5
test %rax, %rax
palignr $5, %xmm3, %xmm4
jnz L(Shl5Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave5)
# endif
palignr $5, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $5, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
2011-12-24 01:02:15 +08:00
movdqu -5(%rcx), %xmm1
mov $11, %rsi
2011-12-24 01:02:15 +08:00
movdqu %xmm1, -5(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl6):
movaps -6(%rcx), %xmm1
movaps 10(%rcx), %xmm2
L(Shl6Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
2011-12-24 01:02:15 +08:00
palignr $6, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit6Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl6LoopExit)
2011-12-24 01:02:15 +08:00
palignr $6, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 26(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -10(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -6(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl6LoopStart):
movaps 10(%rcx), %xmm2
movaps 26(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 42(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 58(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $6, %xmm4, %xmm5
test %rax, %rax
palignr $6, %xmm3, %xmm4
jnz L(Shl6Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave6)
# endif
palignr $6, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $6, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
2011-12-24 01:02:15 +08:00
mov (%rcx), %r9
mov 6(%rcx), %esi
mov %r9, (%rdx)
mov %esi, 6(%rdx)
mov $10, %rsi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl7):
movaps -7(%rcx), %xmm1
movaps 9(%rcx), %xmm2
L(Shl7Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
2011-12-24 01:02:15 +08:00
palignr $7, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit7Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl7LoopExit)
2011-12-24 01:02:15 +08:00
palignr $7, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 25(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -9(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -7(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl7LoopStart):
movaps 9(%rcx), %xmm2
movaps 25(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 41(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 57(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $7, %xmm4, %xmm5
test %rax, %rax
palignr $7, %xmm3, %xmm4
jnz L(Shl7Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave7)
# endif
palignr $7, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $7, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
2011-12-24 01:02:15 +08:00
mov (%rcx), %r9
mov 5(%rcx), %esi
mov %r9, (%rdx)
mov %esi, 5(%rdx)
mov $9, %rsi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl8):
movaps -8(%rcx), %xmm1
movaps 8(%rcx), %xmm2
L(Shl8Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
2011-12-24 01:02:15 +08:00
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit8Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl8LoopExit)
2011-12-24 01:02:15 +08:00
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -8(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -8(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl8LoopStart):
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 40(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 56(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $8, %xmm4, %xmm5
test %rax, %rax
palignr $8, %xmm3, %xmm4
jnz L(Shl8Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave8)
# endif
palignr $8, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
2011-12-24 01:02:15 +08:00
mov (%rcx), %r9
mov $8, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl9):
movaps -9(%rcx), %xmm1
movaps 7(%rcx), %xmm2
L(Shl9Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
2011-12-24 01:02:15 +08:00
palignr $9, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit9Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl9LoopExit)
2011-12-24 01:02:15 +08:00
palignr $9, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 23(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -7(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -9(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl9LoopStart):
movaps 7(%rcx), %xmm2
movaps 23(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 39(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 55(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $9, %xmm4, %xmm5
test %rax, %rax
palignr $9, %xmm3, %xmm4
jnz L(Shl9Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave9)
# endif
palignr $9, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $9, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
2011-12-24 01:02:15 +08:00
mov -1(%rcx), %r9
mov $7, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl10):
movaps -10(%rcx), %xmm1
movaps 6(%rcx), %xmm2
L(Shl10Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
2011-12-24 01:02:15 +08:00
palignr $10, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit10Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl10LoopExit)
2011-12-24 01:02:15 +08:00
palignr $10, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 22(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -6(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -10(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl10LoopStart):
movaps 6(%rcx), %xmm2
movaps 22(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 38(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 54(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $10, %xmm4, %xmm5
test %rax, %rax
palignr $10, %xmm3, %xmm4
jnz L(Shl10Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave10)
# endif
palignr $10, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $10, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
2011-12-24 01:02:15 +08:00
mov -2(%rcx), %r9
mov $6, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl11):
movaps -11(%rcx), %xmm1
movaps 5(%rcx), %xmm2
L(Shl11Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
2011-12-24 01:02:15 +08:00
palignr $11, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit11Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl11LoopExit)
2011-12-24 01:02:15 +08:00
palignr $11, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 21(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -5(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -11(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl11LoopStart):
movaps 5(%rcx), %xmm2
movaps 21(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 37(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 53(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $11, %xmm4, %xmm5
test %rax, %rax
palignr $11, %xmm3, %xmm4
jnz L(Shl11Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave11)
# endif
palignr $11, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $11, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
2011-12-24 01:02:15 +08:00
mov -3(%rcx), %r9
mov $5, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, -3(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl12):
movaps -12(%rcx), %xmm1
movaps 4(%rcx), %xmm2
L(Shl12Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
2011-12-24 01:02:15 +08:00
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit12Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl12LoopExit)
2011-12-24 01:02:15 +08:00
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -4(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -12(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl12LoopStart):
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 36(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 52(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $12, %xmm4, %xmm5
test %rax, %rax
palignr $12, %xmm3, %xmm4
jnz L(Shl12Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave12)
# endif
palignr $12, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
2011-12-24 01:02:15 +08:00
mov (%rcx), %r9d
mov $4, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, (%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl13):
movaps -13(%rcx), %xmm1
movaps 3(%rcx), %xmm2
L(Shl13Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
2011-12-24 01:02:15 +08:00
palignr $13, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit13Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl13LoopExit)
2011-12-24 01:02:15 +08:00
palignr $13, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 19(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -3(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -13(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl13LoopStart):
movaps 3(%rcx), %xmm2
movaps 19(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 35(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 51(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $13, %xmm4, %xmm5
test %rax, %rax
palignr $13, %xmm3, %xmm4
jnz L(Shl13Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave13)
# endif
palignr $13, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $13, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
2011-12-24 01:02:15 +08:00
mov -1(%rcx), %r9d
mov $3, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, -1(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl14):
movaps -14(%rcx), %xmm1
movaps 2(%rcx), %xmm2
L(Shl14Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
2011-12-24 01:02:15 +08:00
palignr $14, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit14Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl14LoopExit)
2011-12-24 01:02:15 +08:00
palignr $14, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 18(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -2(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -14(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl14LoopStart):
movaps 2(%rcx), %xmm2
movaps 18(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 34(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 50(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $14, %xmm4, %xmm5
test %rax, %rax
palignr $14, %xmm3, %xmm4
jnz L(Shl14Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave14)
# endif
palignr $14, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $14, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
2011-12-24 01:02:15 +08:00
mov -2(%rcx), %r9d
mov $2, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, -2(%rdx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl15):
movaps -15(%rcx), %xmm1
movaps 1(%rcx), %xmm2
L(Shl15Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
2011-12-24 01:02:15 +08:00
movaps %xmm2, %xmm1
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
2011-12-24 01:02:15 +08:00
palignr $15, %xmm3, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%rdx), %rdx
pmovmskb %xmm0, %rax
lea 16(%rcx), %rcx
# ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(StrncpyExit15Case2OrCase3)
# endif
test %rax, %rax
jnz L(Shl15LoopExit)
2011-12-24 01:02:15 +08:00
palignr $15, %xmm3, %xmm2
movaps %xmm2, (%rdx)
lea 17(%rcx), %rcx
lea 16(%rdx), %rdx
mov %rcx, %rax
and $-0x40, %rcx
sub %rcx, %rax
lea -1(%rcx), %rcx
sub %rax, %rdx
# ifdef USE_AS_STRNCPY
add %rax, %r8
# endif
movaps -15(%rcx), %xmm1
2011-12-24 01:02:15 +08:00
/* 64 bytes loop */
.p2align 4
L(Shl15LoopStart):
movaps 1(%rcx), %xmm2
movaps 17(%rcx), %xmm3
movaps %xmm3, %xmm6
movaps 33(%rcx), %xmm4
movaps %xmm4, %xmm7
movaps 49(%rcx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %rax
movaps %xmm5, %xmm7
palignr $15, %xmm4, %xmm5
test %rax, %rax
palignr $15, %xmm3, %xmm4
jnz L(Shl15Start)
# ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(StrncpyLeave15)
# endif
palignr $15, %xmm2, %xmm3
lea 64(%rcx), %rcx
palignr $15, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
movaps %xmm2, (%rdx)
lea 64(%rdx), %rdx
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
2011-12-24 01:02:15 +08:00
mov -3(%rcx), %r9d
mov $1, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, -3(%rdx)
# ifdef USE_AS_STRCAT
jmp L(CopyFrom1To16Bytes)
# endif
# ifndef USE_AS_STRCAT
2011-12-24 01:02:15 +08:00
.p2align 4
L(CopyFrom1To16Bytes):
# ifdef USE_AS_STRNCPY
add $16, %r8
# endif
add %rsi, %rdx
add %rsi, %rcx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
.p2align 4
L(Exit8):
mov (%rcx), %rax
mov %rax, (%rdx)
# ifdef USE_AS_STPCPY
lea 7(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $8, %r8
lea 8(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
.p2align 4
L(Exit16):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
# ifdef USE_AS_STPCPY
lea 15(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $16, %r8
lea 16(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
# ifdef USE_AS_STRNCPY
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %r8
add %rsi, %rcx
lea (%rsi, %rdx), %rsi
lea -9(%r8), %rdx
and $1<<7, %dh
or %al, %dh
test %dh, %dh
lea (%rsi), %rdx
jz L(ExitHighCase2)
cmp $1, %r8
je L(Exit1)
test $0x01, %al
jnz L(Exit1)
cmp $2, %r8
je L(Exit2)
test $0x02, %al
jnz L(Exit2)
cmp $3, %r8
je L(Exit3)
test $0x04, %al
jnz L(Exit3)
cmp $4, %r8
je L(Exit4)
test $0x08, %al
jnz L(Exit4)
cmp $5, %r8
je L(Exit5)
test $0x10, %al
jnz L(Exit5)
cmp $6, %r8
je L(Exit6)
test $0x20, %al
jnz L(Exit6)
cmp $7, %r8
je L(Exit7)
test $0x40, %al
jnz L(Exit7)
jmp L(Exit8)
.p2align 4
L(ExitHighCase2):
cmp $9, %r8
je L(Exit9)
test $0x01, %ah
jnz L(Exit9)
cmp $10, %r8
je L(Exit10)
test $0x02, %ah
jnz L(Exit10)
cmp $11, %r8
je L(Exit11)
test $0x04, %ah
jnz L(Exit11)
cmp $12, %r8
je L(Exit12)
test $0x8, %ah
jnz L(Exit12)
cmp $13, %r8
je L(Exit13)
test $0x10, %ah
jnz L(Exit13)
cmp $14, %r8
je L(Exit14)
test $0x20, %ah
jnz L(Exit14)
cmp $15, %r8
je L(Exit15)
test $0x40, %ah
jnz L(Exit15)
jmp L(Exit16)
L(CopyFrom1To16BytesCase2OrCase3):
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %r8
add %rsi, %rdx
add %rsi, %rcx
cmp $16, %r8
je L(Exit16)
cmp $8, %r8
je L(Exit8)
jg L(More8Case3)
cmp $4, %r8
je L(Exit4)
jg L(More4Case3)
cmp $2, %r8
jl L(Exit1)
je L(Exit2)
jg L(Exit3)
L(More8Case3): /* but less than 16 */
cmp $12, %r8
je L(Exit12)
jl L(Less12Case3)
cmp $14, %r8
jl L(Exit13)
je L(Exit14)
jg L(Exit15)
L(More4Case3): /* but less than 8 */
cmp $6, %r8
jl L(Exit5)
je L(Exit6)
jg L(Exit7)
L(Less12Case3): /* but more than 8 */
cmp $10, %r8
jl L(Exit9)
je L(Exit10)
jg L(Exit11)
# endif
.p2align 4
L(Exit1):
movb (%rcx), %al
movb %al, (%rdx)
# ifdef USE_AS_STPCPY
lea (%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $1, %r8
lea 1(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit2):
movw (%rcx), %ax
movw %ax, (%rdx)
# ifdef USE_AS_STPCPY
lea 1(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $2, %r8
lea 2(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit3):
movw (%rcx), %ax
movw %ax, (%rdx)
movb 2(%rcx), %al
movb %al, 2(%rdx)
# ifdef USE_AS_STPCPY
lea 2(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $3, %r8
lea 3(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit4):
movl (%rcx), %eax
movl %eax, (%rdx)
# ifdef USE_AS_STPCPY
lea 3(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $4, %r8
lea 4(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
2011-12-24 01:02:15 +08:00
# endif
# endif
ret
.p2align 4
L(Exit5):
movl (%rcx), %eax
movl %eax, (%rdx)
movb 4(%rcx), %al
movb %al, 4(%rdx)
# ifdef USE_AS_STPCPY
lea 4(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $5, %r8
lea 5(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
2011-12-24 01:02:15 +08:00
# endif
# endif
ret
.p2align 4
L(Exit6):
movl (%rcx), %eax
movl %eax, (%rdx)
movw 4(%rcx), %ax
movw %ax, 4(%rdx)
# ifdef USE_AS_STPCPY
lea 5(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $6, %r8
lea 6(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
2011-12-24 01:02:15 +08:00
# endif
# endif
ret
.p2align 4
L(Exit7):
movl (%rcx), %eax
movl %eax, (%rdx)
movl 3(%rcx), %eax
movl %eax, 3(%rdx)
# ifdef USE_AS_STPCPY
lea 6(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $7, %r8
lea 7(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit9):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 5(%rcx), %eax
mov %eax, 5(%rdx)
# ifdef USE_AS_STPCPY
lea 8(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $9, %r8
lea 9(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit10):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 6(%rcx), %eax
mov %eax, 6(%rdx)
# ifdef USE_AS_STPCPY
lea 9(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $10, %r8
lea 10(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit11):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 7(%rcx), %eax
mov %eax, 7(%rdx)
# ifdef USE_AS_STPCPY
lea 10(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $11, %r8
lea 11(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit12):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
# ifdef USE_AS_STPCPY
lea 11(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $12, %r8
lea 12(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
2011-12-24 01:02:15 +08:00
# endif
# endif
ret
.p2align 4
L(Exit13):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 5(%rcx), %rax
mov %rax, 5(%rdx)
# ifdef USE_AS_STPCPY
lea 12(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $13, %r8
lea 13(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit14):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 6(%rcx), %rax
mov %rax, 6(%rdx)
# ifdef USE_AS_STPCPY
lea 13(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $14, %r8
lea 14(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
.p2align 4
L(Exit15):
mov (%rcx), %rax
mov %rax, (%rdx)
mov 7(%rcx), %rax
mov %rax, 7(%rdx)
# ifdef USE_AS_STPCPY
lea 14(%rdx), %rax
# else
mov %rdi, %rax
# endif
# ifdef USE_AS_STRNCPY
sub $15, %r8
lea 15(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
# endif
# endif
ret
# ifdef USE_AS_STRNCPY
.p2align 4
L(Fill0):
ret
.p2align 4
L(Fill1):
movb %dl, (%rcx)
ret
.p2align 4
L(Fill2):
movw %dx, (%rcx)
ret
.p2align 4
L(Fill3):
movw %dx, (%rcx)
movb %dl, 2(%rcx)
ret
.p2align 4
L(Fill4):
movl %edx, (%rcx)
ret
.p2align 4
L(Fill5):
movl %edx, (%rcx)
movb %dl, 4(%rcx)
ret
.p2align 4
L(Fill6):
movl %edx, (%rcx)
movw %dx, 4(%rcx)
ret
.p2align 4
L(Fill7):
movl %edx, (%rcx)
movl %edx, 3(%rcx)
ret
.p2align 4
L(Fill8):
mov %rdx, (%rcx)
ret
.p2align 4
L(Fill9):
mov %rdx, (%rcx)
movb %dl, 8(%rcx)
ret
.p2align 4
L(Fill10):
mov %rdx, (%rcx)
movw %dx, 8(%rcx)
ret
.p2align 4
L(Fill11):
mov %rdx, (%rcx)
movl %edx, 7(%rcx)
ret
.p2align 4
L(Fill12):
mov %rdx, (%rcx)
movl %edx, 8(%rcx)
ret
.p2align 4
L(Fill13):
mov %rdx, (%rcx)
mov %rdx, 5(%rcx)
ret
.p2align 4
L(Fill14):
mov %rdx, (%rcx)
mov %rdx, 6(%rcx)
ret
.p2align 4
L(Fill15):
mov %rdx, (%rcx)
mov %rdx, 7(%rcx)
ret
.p2align 4
L(Fill16):
mov %rdx, (%rcx)
mov %rdx, 8(%rcx)
ret
.p2align 4
L(StrncpyFillExit1):
lea 16(%r8), %r8
L(FillFrom1To16Bytes):
test %r8, %r8
jz L(Fill0)
cmp $16, %r8
je L(Fill16)
cmp $8, %r8
je L(Fill8)
jg L(FillMore8)
cmp $4, %r8
je L(Fill4)
jg L(FillMore4)
cmp $2, %r8
jl L(Fill1)
je L(Fill2)
jg L(Fill3)
L(FillMore8): /* but less than 16 */
cmp $12, %r8
je L(Fill12)
jl L(FillLess12)
cmp $14, %r8
jl L(Fill13)
je L(Fill14)
jg L(Fill15)
L(FillMore4): /* but less than 8 */
cmp $6, %r8
jl L(Fill5)
je L(Fill6)
jg L(Fill7)
L(FillLess12): /* but more than 8 */
cmp $10, %r8
jl L(Fill9)
je L(Fill10)
jmp L(Fill11)
.p2align 4
L(StrncpyFillTailWithZero1):
xor %rdx, %rdx
sub $16, %r8
jbe L(StrncpyFillExit1)
pxor %xmm0, %xmm0
mov %rdx, (%rcx)
mov %rdx, 8(%rcx)
lea 16(%rcx), %rcx
mov %rcx, %rdx
and $0xf, %rdx
sub %rdx, %rcx
add %rdx, %r8
xor %rdx, %rdx
sub $64, %r8
jb L(StrncpyFillLess64)
L(StrncpyFillLoopMovdqa):
movdqa %xmm0, (%rcx)
movdqa %xmm0, 16(%rcx)
movdqa %xmm0, 32(%rcx)
movdqa %xmm0, 48(%rcx)
lea 64(%rcx), %rcx
sub $64, %r8
jae L(StrncpyFillLoopMovdqa)
L(StrncpyFillLess64):
add $32, %r8
jl L(StrncpyFillLess32)
movdqa %xmm0, (%rcx)
movdqa %xmm0, 16(%rcx)
lea 32(%rcx), %rcx
sub $16, %r8
jl L(StrncpyFillExit1)
movdqa %xmm0, (%rcx)
lea 16(%rcx), %rcx
jmp L(FillFrom1To16Bytes)
L(StrncpyFillLess32):
add $16, %r8
jl L(StrncpyFillExit1)
movdqa %xmm0, (%rcx)
lea 16(%rcx), %rcx
jmp L(FillFrom1To16Bytes)
.p2align 4
L(Exit0):
mov %rdx, %rax
ret
.p2align 4
L(StrncpyExit15Bytes):
cmp $9, %r8
je L(Exit9)
cmpb $0, 8(%rcx)
jz L(Exit9)
cmp $10, %r8
je L(Exit10)
cmpb $0, 9(%rcx)
jz L(Exit10)
cmp $11, %r8
je L(Exit11)
cmpb $0, 10(%rcx)
jz L(Exit11)
cmp $12, %r8
je L(Exit12)
cmpb $0, 11(%rcx)
jz L(Exit12)
cmp $13, %r8
je L(Exit13)
cmpb $0, 12(%rcx)
jz L(Exit13)
cmp $14, %r8
je L(Exit14)
cmpb $0, 13(%rcx)
jz L(Exit14)
mov (%rcx), %rax
mov %rax, (%rdx)
mov 7(%rcx), %rax
mov %rax, 7(%rdx)
# ifdef USE_AS_STPCPY
lea 14(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
# else
mov %rdi, %rax
# endif
ret
.p2align 4
L(StrncpyExit8Bytes):
cmp $1, %r8
je L(Exit1)
cmpb $0, (%rcx)
jz L(Exit1)
cmp $2, %r8
je L(Exit2)
cmpb $0, 1(%rcx)
jz L(Exit2)
cmp $3, %r8
je L(Exit3)
cmpb $0, 2(%rcx)
jz L(Exit3)
cmp $4, %r8
je L(Exit4)
cmpb $0, 3(%rcx)
jz L(Exit4)
cmp $5, %r8
je L(Exit5)
cmpb $0, 4(%rcx)
jz L(Exit5)
cmp $6, %r8
je L(Exit6)
cmpb $0, 5(%rcx)
jz L(Exit6)
cmp $7, %r8
je L(Exit7)
cmpb $0, 6(%rcx)
jz L(Exit7)
mov (%rcx), %rax
mov %rax, (%rdx)
# ifdef USE_AS_STPCPY
lea 7(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
# else
mov %rdi, %rax
# endif
ret
# endif
# endif
# ifdef USE_AS_STRNCPY
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeaveCase2OrCase3):
test %rax, %rax
jnz L(Aligned64LeaveCase2)
L(Aligned64LeaveCase3):
lea 64(%r8), %r8
sub $16, %r8
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm4, -64(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm5, -48(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm6, -32(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
jmp L(CopyFrom1To16BytesCase3)
L(Aligned64LeaveCase2):
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rax
add $48, %r8
jle L(CopyFrom1To16BytesCase2OrCase3)
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm4, -64(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm5, -48(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %rax
movaps %xmm6, -32(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
jmp L(CopyFrom1To16BytesCase2)
/*--------------------------------------------------*/
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit1Case2OrCase3):
2011-12-24 01:02:15 +08:00
movdqu -1(%rcx), %xmm0
movdqu %xmm0, -1(%rdx)
mov $15, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit2Case2OrCase3):
2011-12-24 01:02:15 +08:00
movdqu -2(%rcx), %xmm0
movdqu %xmm0, -2(%rdx)
mov $14, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit3Case2OrCase3):
2011-12-24 01:02:15 +08:00
movdqu -3(%rcx), %xmm0
movdqu %xmm0, -3(%rdx)
mov $13, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit4Case2OrCase3):
2011-12-24 01:02:15 +08:00
movdqu -4(%rcx), %xmm0
movdqu %xmm0, -4(%rdx)
mov $12, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit5Case2OrCase3):
2011-12-24 01:02:15 +08:00
movdqu -5(%rcx), %xmm0
movdqu %xmm0, -5(%rdx)
mov $11, %rsi
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit6Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov (%rcx), %rsi
mov 6(%rcx), %r9d
mov %r9d, 6(%rdx)
mov %rsi, (%rdx)
test %rax, %rax
2011-12-24 01:02:15 +08:00
mov $10, %rsi
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit7Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov (%rcx), %rsi
mov 5(%rcx), %r9d
mov %r9d, 5(%rdx)
mov %rsi, (%rdx)
test %rax, %rax
2011-12-24 01:02:15 +08:00
mov $9, %rsi
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit8Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov (%rcx), %r9
mov $8, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit9Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov -1(%rcx), %r9
mov $7, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, -1(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit10Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov -2(%rcx), %r9
mov $6, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, -2(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit11Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov -3(%rcx), %r9
mov $5, %rsi
2011-12-24 01:02:15 +08:00
mov %r9, -3(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit12Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov (%rcx), %r9d
mov $4, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, (%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit13Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov -1(%rcx), %r9d
mov $3, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, -1(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit14Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov -2(%rcx), %r9d
mov $2, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, -2(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyExit15Case2OrCase3):
2011-12-24 01:02:15 +08:00
mov -3(%rcx), %r9d
mov $1, %rsi
2011-12-24 01:02:15 +08:00
mov %r9d, -3(%rdx)
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave1):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 31(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
2011-12-24 01:02:15 +08:00
palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit1)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit1):
2011-12-24 01:02:15 +08:00
lea 15(%rdx, %rsi), %rdx
lea 15(%rcx, %rsi), %rcx
mov -15(%rcx), %rsi
mov -8(%rcx), %rax
mov %rsi, -15(%rdx)
mov %rax, -8(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave2):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 30(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
2011-12-24 01:02:15 +08:00
palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit2)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit2):
2011-12-24 01:02:15 +08:00
lea 14(%rdx, %rsi), %rdx
lea 14(%rcx, %rsi), %rcx
mov -14(%rcx), %rsi
mov -8(%rcx), %rax
mov %rsi, -14(%rdx)
mov %rax, -8(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave3):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 29(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
2011-12-24 01:02:15 +08:00
palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit3)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit3):
2011-12-24 01:02:15 +08:00
lea 13(%rdx, %rsi), %rdx
lea 13(%rcx, %rsi), %rcx
mov -13(%rcx), %rsi
mov -8(%rcx), %rax
mov %rsi, -13(%rdx)
mov %rax, -8(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave4):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
2011-12-24 01:02:15 +08:00
palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit4)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit4):
2011-12-24 01:02:15 +08:00
lea 12(%rdx, %rsi), %rdx
lea 12(%rcx, %rsi), %rcx
mov -12(%rcx), %rsi
mov -4(%rcx), %eax
mov %rsi, -12(%rdx)
mov %eax, -4(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave5):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 27(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
2011-12-24 01:02:15 +08:00
palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit5)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit5):
2011-12-24 01:02:15 +08:00
lea 11(%rdx, %rsi), %rdx
lea 11(%rcx, %rsi), %rcx
mov -11(%rcx), %rsi
mov -4(%rcx), %eax
mov %rsi, -11(%rdx)
mov %eax, -4(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave6):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 26(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
2011-12-24 01:02:15 +08:00
palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit6)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit6):
2011-12-24 01:02:15 +08:00
lea 10(%rdx, %rsi), %rdx
lea 10(%rcx, %rsi), %rcx
mov -10(%rcx), %rsi
movw -2(%rcx), %ax
mov %rsi, -10(%rdx)
movw %ax, -2(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave7):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 25(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
2011-12-24 01:02:15 +08:00
palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit7)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit7):
2011-12-24 01:02:15 +08:00
lea 9(%rdx, %rsi), %rdx
lea 9(%rcx, %rsi), %rcx
mov -9(%rcx), %rsi
movb -1(%rcx), %ah
mov %rsi, -9(%rdx)
movb %ah, -1(%rdx)
xor %rsi, %rsi
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave8):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
2011-12-24 01:02:15 +08:00
palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit8)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit8):
2011-12-24 01:02:15 +08:00
lea 8(%rdx, %rsi), %rdx
lea 8(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave9):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 23(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
2011-12-24 01:02:15 +08:00
palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit9)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit9):
2011-12-24 01:02:15 +08:00
lea 7(%rdx, %rsi), %rdx
lea 7(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave10):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 22(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
2011-12-24 01:02:15 +08:00
palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit10)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit10):
2011-12-24 01:02:15 +08:00
lea 6(%rdx, %rsi), %rdx
lea 6(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave11):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 21(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
2011-12-24 01:02:15 +08:00
palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit11)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit11):
2011-12-24 01:02:15 +08:00
lea 5(%rdx, %rsi), %rdx
lea 5(%rcx, %rsi), %rcx
mov -8(%rcx), %rax
xor %rsi, %rsi
mov %rax, -8(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave12):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
2011-12-24 01:02:15 +08:00
palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit12)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit12):
2011-12-24 01:02:15 +08:00
lea 4(%rdx, %rsi), %rdx
lea 4(%rcx, %rsi), %rcx
mov -4(%rcx), %eax
xor %rsi, %rsi
mov %eax, -4(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave13):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 19(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
2011-12-24 01:02:15 +08:00
palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit13)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit13):
2011-12-24 01:02:15 +08:00
lea 3(%rdx, %rsi), %rdx
lea 3(%rcx, %rsi), %rcx
mov -4(%rcx), %eax
xor %rsi, %rsi
mov %eax, -4(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave14):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 18(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
2011-12-24 01:02:15 +08:00
palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit14)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit14):
2011-12-24 01:02:15 +08:00
lea 2(%rdx, %rsi), %rdx
lea 2(%rcx, %rsi), %rcx
movw -2(%rcx), %ax
xor %rsi, %rsi
movw %ax, -2(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
.p2align 4
L(StrncpyLeave15):
movaps %xmm2, %xmm3
add $48, %r8
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%rdx)
movaps 17(%rcx), %xmm2
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
2011-12-24 01:02:15 +08:00
palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
movaps %xmm4, 32(%rdx)
lea 16(%rsi), %rsi
sub $16, %r8
jbe L(StrncpyExit15)
movaps %xmm5, 48(%rdx)
lea 16(%rsi), %rsi
lea -16(%r8), %r8
L(StrncpyExit15):
2011-12-24 01:02:15 +08:00
lea 1(%rdx, %rsi), %rdx
lea 1(%rcx, %rsi), %rcx
movb -1(%rcx), %ah
xor %rsi, %rsi
movb %ah, -1(%rdx)
jmp L(CopyFrom1To16BytesCase3)
2011-12-24 01:02:15 +08:00
# endif
# ifndef USE_AS_STRCAT
END (STRCPY)
# endif
#endif