{arm64|x86_64}cpuid.pl: add special 16-byte case to OPENSSL_memcmp.

OPENSSL_memcmp is a must in GCM decrypt and general-purpose loop takes
quite a portion of execution time for short inputs, more than GHASH for
few-byte inputs according to profiler. Special 16-byte case takes it off
top five list in profiler output.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6312)
This commit is contained in:
Andy Polyakov 2018-05-20 12:13:16 +02:00
parent c1b2569d23
commit 9a708bf982
2 changed files with 25 additions and 0 deletions

View File

@ -115,6 +115,19 @@ OPENSSL_cleanse:
CRYPTO_memcmp:
eor w3,w3,w3
cbz x2,.Lno_data // len==0?
cmp x2,#16
b.ne .Loop_cmp
ldp x8,x9,[x0]
ldp x10,x11,[x1]
eor x8,x8,x10
eor x9,x9,x11
orr x8,x8,x9
mov x0,#1
cmp x8,#0
csel x0,xzr,x0,eq
ret
.align 4
.Loop_cmp:
ldrb w4,[x0],#1
ldrb w5,[x1],#1

View File

@ -271,6 +271,18 @@ CRYPTO_memcmp:
xor %r10,%r10
cmp \$0,$arg3
je .Lno_data
cmp \$16,$arg3
jne .Loop_cmp
mov ($arg1),%r10
mov 8($arg1),%r11
mov \$1,$arg3
xor ($arg2),%r10
xor 8($arg2),%r11
or %r11,%r10
cmovnz $arg3,%rax
ret
.align 16
.Loop_cmp:
mov ($arg1),%r10b
lea 1($arg1),$arg1