mirror of
https://github.com/openssl/openssl.git
synced 2025-01-18 13:44:20 +08:00
Fix reported performance degradation on aarch64
This restores the implementation prior to
commit 2621751
("aes/asm/aesv8-armx.pl: avoid 32-bit lane assignment in CTR mode")
for 64bit targets only, since it is reportedly 2-17% slower,
and the silicon errata only affects 32bit targets.
Only for 32bit targets the new algorithm is used.
Fixes #18445
Reviewed-by: Paul Dale <pauli@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/18581)
This commit is contained in:
parent
5cc9ab5cf5
commit
65523758e5
@ -1809,6 +1809,21 @@ $code.=<<___;
|
||||
#ifndef __ARMEB__
|
||||
rev $ctr, $ctr
|
||||
#endif
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
vorr $dat1,$dat0,$dat0
|
||||
add $tctr1, $ctr, #1
|
||||
vorr $dat2,$dat0,$dat0
|
||||
add $ctr, $ctr, #2
|
||||
vorr $ivec,$dat0,$dat0
|
||||
rev $tctr1, $tctr1
|
||||
vmov.32 ${dat1}[3],$tctr1
|
||||
b.ls .Lctr32_tail
|
||||
rev $tctr2, $ctr
|
||||
sub $len,$len,#3 // bias
|
||||
vmov.32 ${dat2}[3],$tctr2
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
add $tctr1, $ctr, #1
|
||||
vorr $ivec,$dat0,$dat0
|
||||
rev $tctr1, $tctr1
|
||||
@ -2015,11 +2030,25 @@ $code.=<<___;
|
||||
aese $dat1,q8
|
||||
aesmc $tmp1,$dat1
|
||||
vld1.8 {$in0},[$inp],#16
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
vorr $dat0,$ivec,$ivec
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
add $tctr0,$ctr,#1
|
||||
___
|
||||
$code.=<<___;
|
||||
aese $dat2,q8
|
||||
aesmc $dat2,$dat2
|
||||
vld1.8 {$in1},[$inp],#16
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
vorr $dat1,$ivec,$ivec
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
rev $tctr0,$tctr0
|
||||
___
|
||||
$code.=<<___;
|
||||
aese $tmp0,q9
|
||||
aesmc $tmp0,$tmp0
|
||||
aese $tmp1,q9
|
||||
@ -2028,6 +2057,12 @@ $code.=<<___;
|
||||
mov $key_,$key
|
||||
aese $dat2,q9
|
||||
aesmc $tmp2,$dat2
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
vorr $dat2,$ivec,$ivec
|
||||
add $tctr0,$ctr,#1
|
||||
___
|
||||
$code.=<<___;
|
||||
aese $tmp0,q12
|
||||
aesmc $tmp0,$tmp0
|
||||
aese $tmp1,q12
|
||||
@ -2043,22 +2078,47 @@ $code.=<<___;
|
||||
aese $tmp1,q13
|
||||
aesmc $tmp1,$tmp1
|
||||
veor $in2,$in2,$rndlast
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
rev $tctr0,$tctr0
|
||||
aese $tmp2,q13
|
||||
aesmc $tmp2,$tmp2
|
||||
vmov.32 ${dat0}[3], $tctr0
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
vmov.32 ${ivec}[3], $tctr0
|
||||
aese $tmp2,q13
|
||||
aesmc $tmp2,$tmp2
|
||||
vorr $dat0,$ivec,$ivec
|
||||
___
|
||||
$code.=<<___;
|
||||
rev $tctr1,$tctr1
|
||||
aese $tmp0,q14
|
||||
aesmc $tmp0,$tmp0
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
vmov.32 ${ivec}[3], $tctr1
|
||||
rev $tctr2,$ctr
|
||||
___
|
||||
$code.=<<___;
|
||||
aese $tmp1,q14
|
||||
aesmc $tmp1,$tmp1
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
vmov.32 ${dat1}[3], $tctr1
|
||||
rev $tctr2,$ctr
|
||||
aese $tmp2,q14
|
||||
aesmc $tmp2,$tmp2
|
||||
vmov.32 ${dat2}[3], $tctr2
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
vorr $dat1,$ivec,$ivec
|
||||
vmov.32 ${ivec}[3], $tctr2
|
||||
aese $tmp2,q14
|
||||
aesmc $tmp2,$tmp2
|
||||
vorr $dat2,$ivec,$ivec
|
||||
___
|
||||
$code.=<<___;
|
||||
subs $len,$len,#3
|
||||
aese $tmp0,q15
|
||||
aese $tmp1,q15
|
||||
|
Loading…
Reference in New Issue
Block a user