mirror of
https://github.com/openssl/openssl.git
synced 2025-02-17 14:32:04 +08:00
bn/asm/sparcv9-mont.pl: fix squaring code path.
This module is used only with odd input lengths, i.e. not used in normal PKI cases, on contemporary processors. The problem was "illuminated" by fuzzing tests. Reviewed-by: Richard Levitte <levitte@openssl.org>
This commit is contained in:
parent
3ba4dac67a
commit
120a9e1a82
@ -300,7 +300,7 @@ ___
|
||||
######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
|
||||
######## code without following dedicated squaring procedure.
|
||||
########
|
||||
$sbit="%i2"; # re-use $bp!
|
||||
$sbit="%o5";
|
||||
|
||||
$code.=<<___;
|
||||
.align 32
|
||||
@ -413,7 +413,7 @@ $code.=<<___;
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $acc0,$car0,$car0
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
ld [$np+$j],$npj ! np[j]
|
||||
@ -422,7 +422,7 @@ $code.=<<___;
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
add $acc0,$acc0,$acc0
|
||||
add $j,4,$j ! j++
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
cmp $j,$num
|
||||
@ -436,12 +436,12 @@ $code.=<<___;
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $acc0,$car0,$car0
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc1,$car1,$car1
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
@ -449,7 +449,7 @@ $code.=<<___;
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
or $sbit,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
@ -509,7 +509,7 @@ $code.=<<___;
|
||||
.Lsqr_inner2:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
add $acc0,$car0,$car0
|
||||
ld [$ap+$j],$apj ! ap[j]
|
||||
and $car0,$mask,$acc0
|
||||
@ -517,7 +517,7 @@ $code.=<<___;
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
ld [$tp+8],$tpj ! tp[j]
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
add $j,4,$j ! j++
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
@ -532,12 +532,12 @@ $code.=<<___;
|
||||
.Lsqr_no_inner2:
|
||||
mulx $apj,$mul0,$acc0
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$sbit,$sbit
|
||||
add $acc0,$car0,$car0
|
||||
and $car0,$mask,$acc0
|
||||
srlx $car0,32,$car0
|
||||
add $acc0,$acc0,$acc0
|
||||
or $sbit,$acc0,$acc0
|
||||
add $sbit,$acc0,$acc0
|
||||
srlx $acc0,32,$sbit
|
||||
and $acc0,$mask,$acc0
|
||||
add $acc0,$car1,$car1
|
||||
@ -546,7 +546,7 @@ $code.=<<___;
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0
|
||||
or $sbit,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
@ -591,14 +591,17 @@ $code.=<<___;
|
||||
!.Lsqr_last
|
||||
|
||||
mulx $npj,$mul1,$acc1
|
||||
add $tpj,$car1,$car1
|
||||
add $tpj,$acc0,$acc0
|
||||
srlx $acc0,32,$tmp0
|
||||
and $acc0,$mask,$acc0
|
||||
add $tmp0,$sbit,$sbit
|
||||
add $acc0,$car1,$car1
|
||||
add $acc1,$car1,$car1
|
||||
st $car1,[$tp]
|
||||
srlx $car1,32,$car1
|
||||
|
||||
add $car0,$car0,$car0 ! recover $car0
|
||||
or $sbit,$car0,$car0
|
||||
add $sbit,$car0,$car0
|
||||
add $car0,$car1,$car1
|
||||
add $car2,$car1,$car1
|
||||
st $car1,[$tp+4]
|
||||
|
Loading…
Reference in New Issue
Block a user