Optimize x86-64 version of sem_timedwait.

This commit is contained in:
Ulrich Drepper 2009-08-08 17:48:09 -07:00
parent 5d368296ea
commit efa0569d2b
2 changed files with 185 additions and 102 deletions

View File

@ -1,5 +1,8 @@
2009-08-08 Ulrich Drepper <drepper@redhat.com> 2009-08-08 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait):
Optimize code path used when FUTEX_CLOCK_REALTIME is supported.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
(__pthread_cond_wait): Optimize by avoiding use of callee-safe (__pthread_cond_wait): Optimize by avoiding use of callee-safe
register. register.

View File

@ -65,7 +65,113 @@ sem_timedwait:
retq retq
/* Check whether the timeout value is valid. */ /* Check whether the timeout value is valid. */
1: pushq %r12 1: cmpq $1000000000, 8(%rsi)
jae 6f
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
# ifdef PIC
cmpl $0, __have_futex_clock_realtime(%rip)
# else
cmpl $0, __have_futex_clock_realtime
# endif
je .Lreltmo
#endif
/* This push is only needed to store the sem_t pointer for the
exception handler. */
pushq %rdi
cfi_adjust_cfa_offset(8)
movq %rsi, %r10
LOCK
addq $1, NWAITERS(%rdi)
.LcleanupSTART:
13: call __pthread_enable_asynccancel
movl %eax, %r8d
#if VALUE != 0
leaq VALUE(%rdi), %rdi
#endif
movl $0xffffffff, %r9d
movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi
orl PRIVATE(%rdi), %esi
movl $SYS_futex, %eax
xorl %edx, %edx
syscall
movq %rax, %r9
#if VALUE != 0
leaq -VALUE(%rdi), %rdi
#endif
xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
movq %r8, %rdi
testq %r9, %r9
je 11f
cmpq $-EWOULDBLOCK, %r9
jne 3f
11:
#if VALUE == 0
movl (%rdi), %eax
#else
movl VALUE(%rdi), %eax
#endif
14: testl %eax, %eax
je 13b
leaq -1(%rax), %rcx
LOCK
#if VALUE == 0
cmpxchgl %ecx, (%rdi)
#else
cmpxchgl %ecx, VALUE(%rdi)
#endif
jne 14b
xorl %eax, %eax
15: LOCK
subq $1, NWAITERS(%rdi)
leaq 8(%rsp), %rsp
cfi_adjust_cfa_offset(-8)
retq
cfi_adjust_cfa_offset(8)
3: negq %r9
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
movl %r9d, %fs:(%rdx)
#else
callq __errno_location@plt
movl %r9d, (%rax)
#endif
orl $-1, %eax
jmp 15b
cfi_adjust_cfa_offset(-8)
6:
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
movl $EINVAL, %fs:(%rdx)
#else
callq __errno_location@plt
movl $EINVAL, (%rax)
#endif
orl $-1, %eax
retq
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.Lreltmo:
pushq %r12
cfi_adjust_cfa_offset(8) cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r12, 0) cfi_rel_offset(%r12, 0)
pushq %r13 pushq %r13
@ -74,6 +180,7 @@ sem_timedwait:
pushq %r14 pushq %r14
cfi_adjust_cfa_offset(8) cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r14, 0) cfi_rel_offset(%r14, 0)
#ifdef __ASSUME_FUTEX_CLOCK_REALTIME #ifdef __ASSUME_FUTEX_CLOCK_REALTIME
# define STACKFRAME 8 # define STACKFRAME 8
#else #else
@ -85,105 +192,9 @@ sem_timedwait:
movq %rdi, %r12 movq %rdi, %r12
movq %rsi, %r13 movq %rsi, %r13
/* Check for invalid nanosecond field. */
cmpq $1000000000, 8(%r13)
movl $EINVAL, %r14d
jae 6f
LOCK LOCK
addq $1, NWAITERS(%r12) addq $1, NWAITERS(%r12)
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
# ifdef PIC
cmpl $0, __have_futex_clock_realtime(%rip)
# else
cmpl $0, __have_futex_clock_realtime
# endif
je .Lreltmo
#endif
.LcleanupSTART:
13: call __pthread_enable_asynccancel
movl %eax, (%rsp)
movq %r13, %r10
#if VALUE == 0
movq %r12, %rdi
#else
leaq VALUE(%r12), %rdi
#endif
movl $0xffffffff, %r9d
movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi
orl PRIVATE(%rdi), %esi
movl $SYS_futex, %eax
xorl %edx, %edx
syscall
movq %rax, %r14
movl (%rsp), %edi
call __pthread_disable_asynccancel
.LcleanupEND:
testq %r14, %r14
je 11f
cmpq $-EWOULDBLOCK, %r14
jne 3f
11:
#if VALUE == 0
movl (%r12), %eax
#else
movl VALUE(%r12), %eax
#endif
14: testl %eax, %eax
je 13b
leaq -1(%rax), %rcx
LOCK
#if VALUE == 0
cmpxchgl %ecx, (%r12)
#else
cmpxchgl %ecx, VALUE(%r12)
#endif
jne 14b
10: xorl %eax, %eax
15: LOCK
subq $1, NWAITERS(%r12)
addq $STACKFRAME, %rsp
cfi_adjust_cfa_offset(-STACKFRAME)
popq %r14
cfi_adjust_cfa_offset(-8)
cfi_restore(%r14)
popq %r13
cfi_adjust_cfa_offset(-8)
cfi_restore(%r13)
popq %r12
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)
retq
cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
cfi_rel_offset(%r14, STACKFRAME)
3: negq %r14
6:
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
movl %r14d, %fs:(%rdx)
#else
callq __errno_location@plt
movl %r14d, (%rax)
#endif
orl $-1, %eax
jmp 15b
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.Lreltmo:
7: xorl %esi, %esi 7: xorl %esi, %esi
movq %rsp, %rdi movq %rsp, %rdi
movq $VSYSCALL_ADDR_vgettimeofday, %rax movq $VSYSCALL_ADDR_vgettimeofday, %rax
@ -202,7 +213,7 @@ sem_timedwait:
decq %rdi decq %rdi
5: testq %rdi, %rdi 5: testq %rdi, %rdi
movl $ETIMEDOUT, %r14d movl $ETIMEDOUT, %r14d
js 6b /* Time is already up. */ js 36f /* Time is already up. */
movq %rdi, (%rsp) /* Store relative timeout. */ movq %rdi, (%rsp) /* Store relative timeout. */
movq %rsi, 8(%rsp) movq %rsi, 8(%rsp)
@ -235,7 +246,7 @@ sem_timedwait:
testq %r14, %r14 testq %r14, %r14
je 9f je 9f
cmpq $-EWOULDBLOCK, %r14 cmpq $-EWOULDBLOCK, %r14
jne 3b jne 33f
9: 9:
# if VALUE == 0 # if VALUE == 0
@ -254,15 +265,54 @@ sem_timedwait:
cmpxchgl %ecx, VALUE(%r12) cmpxchgl %ecx, VALUE(%r12)
# endif # endif
jne 8b jne 8b
jmp 10b
xorl %eax, %eax
45: LOCK
subq $1, NWAITERS(%r12)
addq $STACKFRAME, %rsp
cfi_adjust_cfa_offset(-STACKFRAME)
popq %r14
cfi_adjust_cfa_offset(-8)
cfi_restore(%r14)
popq %r13
cfi_adjust_cfa_offset(-8)
cfi_restore(%r13)
popq %r12
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)
retq
cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
cfi_rel_offset(%r14, STACKFRAME)
33: negq %r14
36:
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
movl %r14d, %fs:(%rdx)
#else
callq __errno_location@plt
movl %r14d, (%rax)
#endif #endif
orl $-1, %eax
jmp 45b
#endif
cfi_endproc
.size sem_timedwait,.-sem_timedwait .size sem_timedwait,.-sem_timedwait
.type sem_timedwait_cleanup,@function .type sem_timedwait_cleanup,@function
sem_timedwait_cleanup: sem_timedwait_cleanup:
cfi_startproc
cfi_adjust_cfa_offset(8)
movq (%rsp), %rdi
LOCK LOCK
subq $1, NWAITERS(%r12) subq $1, NWAITERS(%rdi)
movq %rax, %rdi movq %rax, %rdi
.LcallUR: .LcallUR:
call _Unwind_Resume@PLT call _Unwind_Resume@PLT
@ -272,6 +322,30 @@ sem_timedwait_cleanup:
.size sem_timedwait_cleanup,.-sem_timedwait_cleanup .size sem_timedwait_cleanup,.-sem_timedwait_cleanup
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.type sem_timedwait_cleanup2,@function
sem_timedwait_cleanup2:
cfi_startproc
cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
cfi_rel_offset(%r14, STACKFRAME)
LOCK
subq $1, NWAITERS(%r12)
movq %rax, %rdi
movq STACKFRAME(%rsp), %r14
movq STACKFRAME+8(%rsp), %r13
movq STACKFRAME+16(%rsp), %r12
.LcallUR2:
call _Unwind_Resume@PLT
hlt
.LENDCODE2:
cfi_endproc
.size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2
#endif
.section .gcc_except_table,"a",@progbits .section .gcc_except_table,"a",@progbits
.LexceptSTART: .LexceptSTART:
.byte DW_EH_PE_omit # @LPStart format .byte DW_EH_PE_omit # @LPStart format
@ -286,13 +360,19 @@ sem_timedwait_cleanup:
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME #ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.uleb128 .LcleanupSTART2-.LSTARTCODE .uleb128 .LcleanupSTART2-.LSTARTCODE
.uleb128 .LcleanupEND2-.LcleanupSTART2 .uleb128 .LcleanupEND2-.LcleanupSTART2
.uleb128 sem_timedwait_cleanup-.LSTARTCODE .uleb128 sem_timedwait_cleanup2-.LSTARTCODE
.uleb128 0 .uleb128 0
#endif #endif
.uleb128 .LcallUR-.LSTARTCODE .uleb128 .LcallUR-.LSTARTCODE
.uleb128 .LENDCODE-.LcallUR .uleb128 .LENDCODE-.LcallUR
.uleb128 0 .uleb128 0
.uleb128 0 .uleb128 0
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.uleb128 .LcallUR2-.LSTARTCODE
.uleb128 .LENDCODE2-.LcallUR2
.uleb128 0
.uleb128 0
#endif
.Lcstend: .Lcstend: