mirror of
https://github.com/openssl/openssl.git
synced 2025-04-12 20:30:52 +08:00
Make it able to run asm code on OpenBSD (arm64)
In order to get asm code running on OpenBSD we must place all constants into .rodata sections. The change to crypto/perlasm/arm-xlate.pl adjusts changes from Theo for additional assembler variants/flavours we use for building OpenSSL. Fixes #23312 Reviewed-by: Hugo Landau <hlandau@devever.net> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/24137)
This commit is contained in:
parent
3b7bd871c1
commit
c6e65c1f8e
crypto
aes/asm
bn/asm
chacha/asm
ec/asm
modes/asm
poly1305/asm
sha/asm
@ -107,12 +107,13 @@ my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
.rodata
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.previous
|
||||
.globl ${prefix}_set_encrypt_key
|
||||
.type ${prefix}_set_encrypt_key,%function
|
||||
.align 5
|
||||
@ -139,7 +140,8 @@ $code.=<<___;
|
||||
tst $bits,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
adr $ptr,.Lrcon
|
||||
adrp $ptr,.Lrcon
|
||||
add $ptr,$ptr,:lo12:.Lrcon
|
||||
cmp $bits,#192
|
||||
|
||||
veor $zero,$zero,$zero
|
||||
|
@ -55,7 +55,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\""
|
||||
$code.=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
.rodata
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
.align 7 // totally strategic alignment
|
||||
@ -146,6 +146,9 @@ _vpaes_consts:
|
||||
.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
|
||||
.size _vpaes_consts,.-_vpaes_consts
|
||||
.align 6
|
||||
|
||||
.text
|
||||
|
||||
___
|
||||
|
||||
{
|
||||
@ -165,7 +168,8 @@ $code.=<<___;
|
||||
.type _vpaes_encrypt_preheat,%function
|
||||
.align 4
|
||||
_vpaes_encrypt_preheat:
|
||||
adr x10, .Lk_inv
|
||||
adrp x10, .Lk_inv
|
||||
add x10, x10, :lo12:.Lk_inv
|
||||
movi v17.16b, #0x0f
|
||||
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
||||
ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
|
||||
@ -193,7 +197,8 @@ _vpaes_encrypt_preheat:
|
||||
_vpaes_encrypt_core:
|
||||
mov x9, $key
|
||||
ldr w8, [$key,#240] // pull rounds
|
||||
adr x11, .Lk_mc_forward+16
|
||||
adrp x11, .Lk_mc_forward+16
|
||||
add x11, x11, :lo12:.Lk_mc_forward+16
|
||||
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
||||
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@ -280,7 +285,8 @@ vpaes_encrypt:
|
||||
_vpaes_encrypt_2x:
|
||||
mov x9, $key
|
||||
ldr w8, [$key,#240] // pull rounds
|
||||
adr x11, .Lk_mc_forward+16
|
||||
adrp x11, .Lk_mc_forward+16
|
||||
add x11, x11, :lo12:.Lk_mc_forward+16
|
||||
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
||||
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@ -383,9 +389,11 @@ _vpaes_encrypt_2x:
|
||||
.type _vpaes_decrypt_preheat,%function
|
||||
.align 4
|
||||
_vpaes_decrypt_preheat:
|
||||
adr x10, .Lk_inv
|
||||
adrp x10, .Lk_inv
|
||||
add x10, x10, :lo12:.Lk_inv
|
||||
movi v17.16b, #0x0f
|
||||
adr x11, .Lk_dipt
|
||||
adrp x11, .Lk_dipt
|
||||
add x11, x11, :lo12:.Lk_dipt
|
||||
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
||||
ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
|
||||
ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
|
||||
@ -407,10 +415,12 @@ _vpaes_decrypt_core:
|
||||
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
||||
eor x11, x11, #0x30 // xor \$0x30, %r11
|
||||
adr x10, .Lk_sr
|
||||
adrp x10, .Lk_sr
|
||||
add x10, x10, :lo12:.Lk_sr
|
||||
and x11, x11, #0x30 // and \$0x30, %r11
|
||||
add x11, x11, x10
|
||||
adr x10, .Lk_mc_forward+48
|
||||
adrp x10, .Lk_mc_forward+48
|
||||
add x10, x10, :lo12:.Lk_mc_forward+48
|
||||
|
||||
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
||||
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@ -518,10 +528,12 @@ _vpaes_decrypt_2x:
|
||||
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
||||
eor x11, x11, #0x30 // xor \$0x30, %r11
|
||||
adr x10, .Lk_sr
|
||||
adrp x10, .Lk_sr
|
||||
add x10, x10, :lo12:.Lk_sr
|
||||
and x11, x11, #0x30 // and \$0x30, %r11
|
||||
add x11, x11, x10
|
||||
adr x10, .Lk_mc_forward+48
|
||||
adrp x10, .Lk_mc_forward+48
|
||||
add x10, x10, :lo12:.Lk_mc_forward+48
|
||||
|
||||
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
||||
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@ -657,14 +669,18 @@ $code.=<<___;
|
||||
.type _vpaes_key_preheat,%function
|
||||
.align 4
|
||||
_vpaes_key_preheat:
|
||||
adr x10, .Lk_inv
|
||||
adrp x10, .Lk_inv
|
||||
add x10, x10, :lo12:.Lk_inv
|
||||
movi v16.16b, #0x5b // .Lk_s63
|
||||
adr x11, .Lk_sb1
|
||||
adrp x11, .Lk_sb1
|
||||
add x11, x11, :lo12:.Lk_sb1
|
||||
movi v17.16b, #0x0f // .Lk_s0F
|
||||
ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt
|
||||
adr x10, .Lk_dksd
|
||||
adrp x10, .Lk_dksd
|
||||
add x10, x10, :lo12:.Lk_dksd
|
||||
ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1
|
||||
adr x11, .Lk_mc_forward
|
||||
adrp x11, .Lk_mc_forward
|
||||
add x11, x11, :lo12:.Lk_mc_forward
|
||||
ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
|
||||
ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
|
||||
ld1 {v8.2d}, [x10] // .Lk_rcon
|
||||
@ -688,7 +704,8 @@ _vpaes_schedule_core:
|
||||
bl _vpaes_schedule_transform
|
||||
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
|
||||
|
||||
adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
|
||||
adrp x10, .Lk_sr // lea .Lk_sr(%rip),%r10
|
||||
add x10, x10, :lo12:.Lk_sr
|
||||
add x8, x8, x10
|
||||
cbnz $dir, .Lschedule_am_decrypting
|
||||
|
||||
@ -814,12 +831,14 @@ _vpaes_schedule_core:
|
||||
.align 4
|
||||
.Lschedule_mangle_last:
|
||||
// schedule last round key from xmm0
|
||||
adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
||||
adrp x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
||||
add x11, x11, :lo12:.Lk_deskew
|
||||
cbnz $dir, .Lschedule_mangle_last_dec
|
||||
|
||||
// encrypting
|
||||
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
|
||||
adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
||||
adrp x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
||||
add x11, x11, :lo12:.Lk_opt
|
||||
add $out, $out, #32 // add \$32, %rdx
|
||||
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
|
||||
|
||||
|
@ -1898,6 +1898,7 @@ __bn_mul4x_mont:
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
.rodata
|
||||
.asciz "Montgomery Multiplication for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 4
|
||||
___
|
||||
|
@ -140,7 +140,7 @@ $code.=<<___;
|
||||
.extern ChaCha20_ctr32_sve
|
||||
#endif
|
||||
|
||||
.text
|
||||
.rodata
|
||||
|
||||
.align 5
|
||||
.Lsigma:
|
||||
@ -151,6 +151,8 @@ $code.=<<___;
|
||||
.long 0x02010003,0x06050407,0x0a09080b,0x0e0d0c0f
|
||||
.asciz "ChaCha20 for ARMv8, CRYPTOGAMS by \@dot-asm"
|
||||
|
||||
.text
|
||||
|
||||
.globl ChaCha20_ctr32_dflt
|
||||
.type ChaCha20_ctr32_dflt,%function
|
||||
.align 5
|
||||
@ -170,7 +172,8 @@ ChaCha20_ctr32_dflt:
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr @x[0],.Lsigma
|
||||
adrp @x[0],.Lsigma
|
||||
add @x[0],@x[0],:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
@ -473,7 +476,8 @@ ChaCha20_neon:
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr @x[0],.Lsigma
|
||||
adrp @x[0],.Lsigma
|
||||
add @x[0],@x[0],:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
@ -884,7 +888,8 @@ ChaCha20_512_neon:
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr @x[0],.Lsigma
|
||||
adrp @x[0],.Lsigma
|
||||
add @x[0],@x[0],:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
@ -55,7 +55,7 @@ my ($acc6,$acc7)=($ap,$bp); # used in __ecp_nistz256_sqr_mont
|
||||
$code.=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
.rodata
|
||||
___
|
||||
########################################################################
|
||||
# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7
|
||||
@ -117,6 +117,8 @@ $code.=<<___;
|
||||
.quad 0xccd1c8aaee00bc4f
|
||||
.asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
|
||||
.text
|
||||
|
||||
// void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
|
||||
.globl ecp_nistz256_to_mont
|
||||
.type ecp_nistz256_to_mont,%function
|
||||
@ -127,12 +129,16 @@ ecp_nistz256_to_mont:
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
|
||||
ldr $bi,.LRR // bp[0]
|
||||
adrp $bi,.LRR
|
||||
ldr $bi,[$bi,:lo12:.LRR] // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adr $bp,.LRR // &bp[0]
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
adrp $bp,.LRR // &bp[0]
|
||||
add $bp,$bp,:lo12:.LRR
|
||||
|
||||
bl __ecp_nistz256_mul_mont
|
||||
|
||||
@ -155,9 +161,12 @@ ecp_nistz256_from_mont:
|
||||
mov $bi,#1 // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adr $bp,.Lone // &bp[0]
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
adrp $bp,.Lone // &bp[0]
|
||||
add $bp,$bp,:lo12:.Lone
|
||||
|
||||
bl __ecp_nistz256_mul_mont
|
||||
|
||||
@ -181,8 +190,10 @@ ecp_nistz256_mul_mont:
|
||||
ldr $bi,[$bp] // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_mul_mont
|
||||
|
||||
@ -204,8 +215,10 @@ ecp_nistz256_sqr_mont:
|
||||
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_sqr_mont
|
||||
|
||||
@ -229,8 +242,10 @@ ecp_nistz256_add:
|
||||
ldp $t0,$t1,[$bp]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
ldp $t2,$t3,[$bp,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_add
|
||||
|
||||
@ -250,8 +265,10 @@ ecp_nistz256_div_by_2:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_div_by_2
|
||||
|
||||
@ -271,8 +288,10 @@ ecp_nistz256_mul_by_2:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
mov $t0,$acc0
|
||||
mov $t1,$acc1
|
||||
mov $t2,$acc2
|
||||
@ -296,8 +315,10 @@ ecp_nistz256_mul_by_3:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
mov $t0,$acc0
|
||||
mov $t1,$acc1
|
||||
mov $t2,$acc2
|
||||
@ -333,8 +354,10 @@ ecp_nistz256_sub:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_sub_from
|
||||
|
||||
@ -357,8 +380,10 @@ ecp_nistz256_neg:
|
||||
mov $acc1,xzr
|
||||
mov $acc2,xzr
|
||||
mov $acc3,xzr
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_sub_from
|
||||
|
||||
@ -736,9 +761,11 @@ ecp_nistz256_point_double:
|
||||
mov $rp_real,$rp
|
||||
ldp $acc2,$acc3,[$ap,#48]
|
||||
mov $ap_real,$ap
|
||||
ldr $poly1,.Lpoly+8
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
mov $t0,$acc0
|
||||
ldr $poly3,.Lpoly+24
|
||||
ldr $poly3,[$poly3,#24]
|
||||
mov $t1,$acc1
|
||||
ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont
|
||||
mov $t2,$acc2
|
||||
@ -897,8 +924,10 @@ ecp_nistz256_point_add:
|
||||
mov $rp_real,$rp
|
||||
mov $ap_real,$ap
|
||||
mov $bp_real,$bp
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
orr $t0,$a0,$a1
|
||||
orr $t2,$a2,$a3
|
||||
orr $in2infty,$t0,$t2
|
||||
@ -1151,8 +1180,10 @@ ecp_nistz256_point_add_affine:
|
||||
mov $rp_real,$rp
|
||||
mov $ap_real,$ap
|
||||
mov $bp_real,$bp
|
||||
ldr $poly1,.Lpoly+8
|
||||
ldr $poly3,.Lpoly+24
|
||||
adrp $poly3,.Lpoly
|
||||
add $poly3,$poly3,:lo12:.Lpoly
|
||||
ldr $poly1,[$poly3,#8]
|
||||
ldr $poly3,[$poly3,#24]
|
||||
|
||||
ldp $a0,$a1,[$ap,#64] // in1_z
|
||||
ldp $a2,$a3,[$ap,#64+16]
|
||||
@ -1303,7 +1334,8 @@ $code.=<<___;
|
||||
stp $acc2,$acc3,[$rp_real,#$i+16]
|
||||
___
|
||||
$code.=<<___ if ($i == 0);
|
||||
adr $bp_real,.Lone_mont-64
|
||||
adrp $bp_real,.Lone_mont-64
|
||||
add $bp_real,$bp_real,:lo12:.Lone_mont-64
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
@ -1354,7 +1386,8 @@ ecp_nistz256_ord_mul_mont:
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
||||
adr $ordk,.Lord
|
||||
adrp $ordk,.Lord
|
||||
add $ordk,$ordk,:lo12:.Lord
|
||||
ldr $bi,[$bp] // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
@ -1497,7 +1530,8 @@ ecp_nistz256_ord_sqr_mont:
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
||||
adr $ordk,.Lord
|
||||
adrp $ordk,.Lord
|
||||
add $ordk,$ordk,:lo12:.Lord
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
|
||||
|
@ -42,7 +42,8 @@ $code.=<<___;
|
||||
adc $t4,xzr,xzr
|
||||
|
||||
// Load polynomial
|
||||
adr x2,$mod
|
||||
adrp x2,$mod
|
||||
add x2,x2,:lo12:$mod
|
||||
ldp $s4,$s5,[x2]
|
||||
ldp $s6,$s7,[x2,#16]
|
||||
|
||||
@ -88,7 +89,8 @@ $code.=<<___;
|
||||
sbc $t4,xzr,xzr
|
||||
|
||||
// Load polynomial
|
||||
adr x2,$mod
|
||||
adrp x2,$mod
|
||||
add x2,x2,:lo12:$mod
|
||||
ldp $s4,$s5,[x2]
|
||||
ldp $s6,$s7,[x2,#16]
|
||||
|
||||
@ -134,7 +136,8 @@ $code.=<<___;
|
||||
lsr $s3,$s3,#1
|
||||
|
||||
// Load mod
|
||||
adr x2,$mod
|
||||
adrp x2,$mod
|
||||
add x2,x2,:lo12:$mod
|
||||
ldp $s4,$s5,[x2]
|
||||
ldp $s6,$s7,[x2,#16]
|
||||
|
||||
@ -161,7 +164,7 @@ ___
|
||||
$code.=<<___;
|
||||
#include "arm_arch.h"
|
||||
.arch armv8-a
|
||||
.text
|
||||
.rodata
|
||||
|
||||
.align 5
|
||||
// The polynomial p
|
||||
@ -177,6 +180,8 @@ $code.=<<___;
|
||||
.Lord_div_2:
|
||||
.quad 0xa9ddfa049ceaa092,0xb901efb590e30295,0xffffffffffffffff,0x7fffffff7fffffff
|
||||
|
||||
.text
|
||||
|
||||
// void bn_rshift1(BN_ULONG *a);
|
||||
.globl bn_rshift1
|
||||
.type bn_rshift1,%function
|
||||
@ -272,7 +277,8 @@ ecp_sm2p256_mul_by_3:
|
||||
mov $t3,$s3
|
||||
|
||||
// Sub polynomial
|
||||
adr x2,.Lpoly
|
||||
adrp x2,.Lpoly
|
||||
add x2,x2,:lo12:.Lpoly
|
||||
ldp $s4,$s5,[x2]
|
||||
ldp $s6,$s7,[x2,#16]
|
||||
subs $s0,$s0,$s4
|
||||
@ -302,7 +308,8 @@ ecp_sm2p256_mul_by_3:
|
||||
mov $t3,$s3
|
||||
|
||||
// Sub polynomial
|
||||
adr x2,.Lpoly
|
||||
adrp x2,.Lpoly
|
||||
add x2,x2,:lo12:.Lpoly
|
||||
ldp $s4,$s5,[x2]
|
||||
ldp $s6,$s7,[x2,#16]
|
||||
subs $s0,$s0,$s4
|
||||
@ -508,7 +515,8 @@ $code.=<<___;
|
||||
mov $s6,$s2
|
||||
mov $s7,$s3
|
||||
|
||||
adr $t0,.Lpoly
|
||||
adrp $t0,.Lpoly
|
||||
add $t0,$t0,:lo12:.Lpoly
|
||||
ldp $t1,$t2,[$t0]
|
||||
ldp $t3,$t4,[$t0,#16]
|
||||
|
||||
|
@ -6035,6 +6035,7 @@ ___
|
||||
}
|
||||
|
||||
$code.=<<___;
|
||||
.rodata
|
||||
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
#endif
|
||||
|
@ -810,6 +810,7 @@ ___
|
||||
}
|
||||
|
||||
$code.=<<___;
|
||||
.rodata
|
||||
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
#endif
|
||||
|
@ -442,7 +442,8 @@ poly1305_blocks_neon:
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
add $in2,$inp,#32
|
||||
adr $zeros,.Lzeros
|
||||
adrp $zeros,.Lzeros
|
||||
add $zeros,$zeros,:lo12:.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
@ -454,7 +455,8 @@ poly1305_blocks_neon:
|
||||
.align 4
|
||||
.Leven_neon:
|
||||
add $in2,$inp,#32
|
||||
adr $zeros,.Lzeros
|
||||
adrp $zeros,.Lzeros
|
||||
add $zeros,$zeros,:lo12:.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
@ -937,6 +939,8 @@ poly1305_emit_neon:
|
||||
ret
|
||||
.size poly1305_emit_neon,.-poly1305_emit_neon
|
||||
|
||||
.rodata
|
||||
|
||||
.align 5
|
||||
.Lzeros:
|
||||
.long 0,0,0,0,0,0,0,0
|
||||
|
@ -82,7 +82,7 @@ my @rhotates = ([ 0, 1, 62, 28, 27 ],
|
||||
$code.=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
.rodata
|
||||
|
||||
.align 8 // strategic alignment and padding that allows to use
|
||||
// address value as loop termination condition...
|
||||
@ -123,11 +123,14 @@ my @A = map([ "x$_", "x".($_+1), "x".($_+2), "x".($_+3), "x".($_+4) ],
|
||||
my @C = map("x$_", (26,27,28,30));
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.type KeccakF1600_int,%function
|
||||
.align 5
|
||||
KeccakF1600_int:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
adr $C[2],iotas
|
||||
adrp $C[2],iotas
|
||||
add $C[2],$C[2],:lo12:iotas
|
||||
stp $C[2],x30,[sp,#16] // 32 bytes on top are mine
|
||||
b .Loop
|
||||
.align 4
|
||||
@ -556,7 +559,8 @@ $code.=<<___;
|
||||
.align 5
|
||||
KeccakF1600_ce:
|
||||
mov x9,#24
|
||||
adr x10,iotas
|
||||
adrp x10,iotas
|
||||
add x10,x10,:lo12:iotas
|
||||
b .Loop_ce
|
||||
.align 4
|
||||
.Loop_ce:
|
||||
|
@ -259,7 +259,8 @@ sha1_block_armv8:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr x4,.Lconst
|
||||
adrp x4,.Lconst
|
||||
add x4,x4,:lo12:.Lconst
|
||||
eor $E,$E,$E
|
||||
ld1.32 {$ABCD},[$ctx],#16
|
||||
ld1.32 {$E}[0],[$ctx]
|
||||
@ -319,6 +320,9 @@ $code.=<<___;
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size sha1_block_armv8,.-sha1_block_armv8
|
||||
|
||||
.rodata
|
||||
|
||||
.align 6
|
||||
.Lconst:
|
||||
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
|
||||
|
@ -235,7 +235,8 @@ $code.=<<___;
|
||||
ldp $E,$F,[$ctx,#4*$SZ]
|
||||
add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
|
||||
ldp $G,$H,[$ctx,#6*$SZ]
|
||||
adr $Ktbl,.LK$BITS
|
||||
adrp $Ktbl,.LK$BITS
|
||||
add $Ktbl,$Ktbl,:lo12:.LK$BITS
|
||||
stp $ctx,$num,[x29,#96]
|
||||
|
||||
.Loop:
|
||||
@ -285,6 +286,8 @@ $code.=<<___;
|
||||
ret
|
||||
.size $func,.-$func
|
||||
|
||||
.rodata
|
||||
|
||||
.align 6
|
||||
.type .LK$BITS,%object
|
||||
.LK$BITS:
|
||||
@ -355,6 +358,8 @@ $code.=<<___;
|
||||
.size .LK$BITS,.-.LK$BITS
|
||||
.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
|
||||
.text
|
||||
___
|
||||
|
||||
if ($SZ==4) {
|
||||
@ -376,7 +381,8 @@ sha256_block_armv8:
|
||||
add x29,sp,#0
|
||||
|
||||
ld1.32 {$ABCD,$EFGH},[$ctx]
|
||||
adr $Ktbl,.LK256
|
||||
adrp $Ktbl,.LK256
|
||||
add $Ktbl,$Ktbl,:lo12:.LK256
|
||||
|
||||
.Loop_hw:
|
||||
ld1 {@MSG[0]-@MSG[3]},[$inp],#64
|
||||
@ -641,7 +647,8 @@ sha256_block_neon:
|
||||
mov x29, sp
|
||||
sub sp,sp,#16*4
|
||||
|
||||
adr $Ktbl,.LK256
|
||||
adrp $Ktbl,.LK256
|
||||
add $Ktbl,$Ktbl,:lo12:.LK256
|
||||
add $num,$inp,$num,lsl#6 // len to point at the end of inp
|
||||
|
||||
ld1.8 {@X[0]},[$inp], #16
|
||||
@ -755,7 +762,8 @@ sha512_block_armv8:
|
||||
ld1 {@MSG[4]-@MSG[7]},[$inp],#64
|
||||
|
||||
ld1.64 {@H[0]-@H[3]},[$ctx] // load context
|
||||
adr $Ktbl,.LK512
|
||||
adrp $Ktbl,.LK512
|
||||
add $Ktbl,$Ktbl,:lo12:.LK512
|
||||
|
||||
rev64 @MSG[0],@MSG[0]
|
||||
rev64 @MSG[1],@MSG[1]
|
||||
|
Loading…
x
Reference in New Issue
Block a user