mirror of
https://github.com/openssl/openssl.git
synced 2024-11-21 01:15:20 +08:00
LoongArch64 assembly pack: Fix ChaCha20 ABI breakage
The [LP64D ABI][1] requires the floating-point registers f24-f31 (aka fs0-fs7) callee-saved. The low 64 bits of a LSX/LASX vector register aliases with the corresponding FPR, so we must save and restore the callee-saved FPR when we writes into the corresponding vector register. This ABI breakage can be easily demonstrated by injecting the use of a saved FPR into the test in bio_enc_test.c: static int test_bio_enc_chacha20(int idx) { register double fs7 asm("f31") = 114.514; asm("#optimize barrier":"+f"(fs7)); return do_test_bio_cipher(EVP_chacha20(), idx) && fs7 == 114.514; } So fix it. To make the logic simpler, jump into the scalar implementation earlier when LSX and LASX are not enumerated in AT_HWCAP, or the input is too short. [1]: https://github.com/loongson/la-abi-specs/blob/v2.20/lapcs.adoc#floating-point-registers Reviewed-by: Neil Horman <nhorman@openssl.org> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/22817)
This commit is contained in:
parent
dfd986b6f5
commit
b46de72c26
@ -17,6 +17,14 @@ my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$r$_",(4..11));
|
||||
my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$x)=map("\$r$_",(12..21));
|
||||
my ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8)=map("\$r$_",(23..31));
|
||||
|
||||
# The saved floating-point registers in the LP64D ABI. In LoongArch
|
||||
# with vector extension, the low 64 bits of a vector register alias with
|
||||
# the corresponding FPR. So we must save and restore the corresponding
|
||||
# FPR if we'll write into a vector register. The ABI only requires
|
||||
# saving and restoring the FPR (i.e. 64 bits of the corresponding vector
|
||||
# register), not the entire vector register.
|
||||
my ($fs0,$fs1,$fs2,$fs3,$fs4,$fs5,$fs6,$fs7)=map("\$f$_",(24..31));
|
||||
|
||||
# Here is the 128-bit vector register layout for LSX extension.
|
||||
my ($vr0,$vr1,$vr2,$vr3,$vr4,$vr5,$vr6,$vr7,$vr8,$vr9,$vr10,
|
||||
$vr11,$vr12,$vr13,$vr14,$vr15,$vr16,$vr17,$vr18,$vr19,
|
||||
@ -66,13 +74,25 @@ ChaCha20_ctr32:
|
||||
la.pcrel $t0,OPENSSL_loongarch_hwcap_P
|
||||
ld.w $t0,$t0,0
|
||||
|
||||
bleu $len,$t3,.LChaCha20_1x # goto 1x when len <= 64
|
||||
|
||||
andi $t0,$t0,LOONGARCH_HWCAP_LASX | LOONGARCH_HWCAP_LSX
|
||||
beqz $t0,.LChaCha20_1x
|
||||
|
||||
addi.d $sp,$sp,-64
|
||||
fst.d $fs0,$sp,0
|
||||
fst.d $fs1,$sp,8
|
||||
fst.d $fs2,$sp,16
|
||||
fst.d $fs3,$sp,24
|
||||
fst.d $fs4,$sp,32
|
||||
fst.d $fs5,$sp,40
|
||||
fst.d $fs6,$sp,48
|
||||
fst.d $fs7,$sp,56
|
||||
|
||||
andi $t1,$t0,LOONGARCH_HWCAP_LASX
|
||||
bnez $t1,.LChaCha20_8x
|
||||
|
||||
andi $t2,$t0,LOONGARCH_HWCAP_LSX
|
||||
bnez $t2,.LChaCha20_4x
|
||||
|
||||
b .LChaCha20_1x
|
||||
b .LChaCha20_4x
|
||||
|
||||
EOF
|
||||
|
||||
@ -442,8 +462,6 @@ $code .= <<EOF;
|
||||
.align 6
|
||||
.LChaCha20_4x:
|
||||
ori $t3,$zero,64
|
||||
bleu $len,$t3,.LChaCha20_1x # goto 1x when len <= 64
|
||||
|
||||
addi.d $sp,$sp,-128
|
||||
|
||||
# Save the initial block counter in $t4
|
||||
@ -783,7 +801,7 @@ $code .= <<EOF;
|
||||
|
||||
.Ldone_4x:
|
||||
addi.d $sp,$sp,128
|
||||
b .Lend
|
||||
b .Lrestore_saved_fpr
|
||||
|
||||
EOF
|
||||
}
|
||||
@ -869,8 +887,6 @@ $code .= <<EOF;
|
||||
.align 6
|
||||
.LChaCha20_8x:
|
||||
ori $t3,$zero,64
|
||||
bleu $len,$t3,.LChaCha20_1x # goto 1x when len <= 64
|
||||
|
||||
addi.d $sp,$sp,-128
|
||||
|
||||
# Save the initial block counter in $t4
|
||||
@ -1394,12 +1410,22 @@ $code .= <<EOF;
|
||||
|
||||
.Ldone_8x:
|
||||
addi.d $sp,$sp,128
|
||||
b .Lend
|
||||
b .Lrestore_saved_fpr
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
$code .= <<EOF;
|
||||
.Lrestore_saved_fpr:
|
||||
fld.d $fs0,$sp,0
|
||||
fld.d $fs1,$sp,8
|
||||
fld.d $fs2,$sp,16
|
||||
fld.d $fs3,$sp,24
|
||||
fld.d $fs4,$sp,32
|
||||
fld.d $fs5,$sp,40
|
||||
fld.d $fs6,$sp,48
|
||||
fld.d $fs7,$sp,56
|
||||
addi.d $sp,$sp,64
|
||||
.Lno_data:
|
||||
.Lend:
|
||||
jr $ra
|
||||
|
Loading…
Reference in New Issue
Block a user