mirror of
https://github.com/openssl/openssl.git
synced 2025-01-12 13:36:28 +08:00
636 lines
16 KiB
Perl
636 lines
16 KiB
Perl
|
#! /usr/bin/env perl
|
||
|
# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
||
|
# this file except in compliance with the License. You can obtain a copy
|
||
|
# in the file LICENSE in the source distribution or at
|
||
|
# https://www.openssl.org/source/license.html
|
||
|
|
||
|
#
|
||
|
# This module implements support for SM4 hw support on aarch64
|
||
|
# Oct 2021
|
||
|
#
|
||
|
|
||
|
# $output is the last argument if it looks like a file (it has an extension)
|
||
|
# $flavour is the first argument if it doesn't look like a file
|
||
|
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
||
|
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
||
|
|
||
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||
|
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||
|
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||
|
die "can't locate arm-xlate.pl";
|
||
|
|
||
|
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
|
||
|
or die "can't call $xlate: $!";
|
||
|
*STDOUT=*OUT;
|
||
|
|
||
|
$prefix="sm4_v8";
|
||
|
my @rks=map("v$_",(0..7));
|
||
|
|
||
|
sub rev32() {
|
||
|
my $dst = shift;
|
||
|
my $src = shift;
|
||
|
$code.=<<___;
|
||
|
#ifndef __ARMEB__
|
||
|
rev32 $dst.16b,$src.16b
|
||
|
#endif
|
||
|
___
|
||
|
}
|
||
|
|
||
|
sub enc_blk () {
|
||
|
my $data = shift;
|
||
|
$code.=<<___;
|
||
|
sm4e $data.4s,@rks[0].4s
|
||
|
sm4e $data.4s,@rks[1].4s
|
||
|
sm4e $data.4s,@rks[2].4s
|
||
|
sm4e $data.4s,@rks[3].4s
|
||
|
sm4e $data.4s,@rks[4].4s
|
||
|
sm4e $data.4s,@rks[5].4s
|
||
|
sm4e $data.4s,@rks[6].4s
|
||
|
sm4e $data.4s,@rks[7].4s
|
||
|
rev64 $data.4S,$data.4S
|
||
|
ext $data.16b,$data.16b,$data.16b,#8
|
||
|
___
|
||
|
}
|
||
|
|
||
|
sub enc_4blks () {
|
||
|
my $data0 = shift;
|
||
|
my $data1 = shift;
|
||
|
my $data2 = shift;
|
||
|
my $data3 = shift;
|
||
|
$code.=<<___;
|
||
|
sm4e $data0.4s,@rks[0].4s
|
||
|
sm4e $data1.4s,@rks[0].4s
|
||
|
sm4e $data2.4s,@rks[0].4s
|
||
|
sm4e $data3.4s,@rks[0].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[1].4s
|
||
|
sm4e $data1.4s,@rks[1].4s
|
||
|
sm4e $data2.4s,@rks[1].4s
|
||
|
sm4e $data3.4s,@rks[1].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[2].4s
|
||
|
sm4e $data1.4s,@rks[2].4s
|
||
|
sm4e $data2.4s,@rks[2].4s
|
||
|
sm4e $data3.4s,@rks[2].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[3].4s
|
||
|
sm4e $data1.4s,@rks[3].4s
|
||
|
sm4e $data2.4s,@rks[3].4s
|
||
|
sm4e $data3.4s,@rks[3].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[4].4s
|
||
|
sm4e $data1.4s,@rks[4].4s
|
||
|
sm4e $data2.4s,@rks[4].4s
|
||
|
sm4e $data3.4s,@rks[4].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[5].4s
|
||
|
sm4e $data1.4s,@rks[5].4s
|
||
|
sm4e $data2.4s,@rks[5].4s
|
||
|
sm4e $data3.4s,@rks[5].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[6].4s
|
||
|
sm4e $data1.4s,@rks[6].4s
|
||
|
sm4e $data2.4s,@rks[6].4s
|
||
|
sm4e $data3.4s,@rks[6].4s
|
||
|
|
||
|
sm4e $data0.4s,@rks[7].4s
|
||
|
rev64 $data0.4S,$data0.4S
|
||
|
sm4e $data1.4s,@rks[7].4s
|
||
|
ext $data0.16b,$data0.16b,$data0.16b,#8
|
||
|
rev64 $data1.4S,$data1.4S
|
||
|
sm4e $data2.4s,@rks[7].4s
|
||
|
ext $data1.16b,$data1.16b,$data1.16b,#8
|
||
|
rev64 $data2.4S,$data2.4S
|
||
|
sm4e $data3.4s,@rks[7].4s
|
||
|
ext $data2.16b,$data2.16b,$data2.16b,#8
|
||
|
rev64 $data3.4S,$data3.4S
|
||
|
ext $data3.16b,$data3.16b,$data3.16b,#8
|
||
|
___
|
||
|
}
|
||
|
|
||
|
$code=<<___;
|
||
|
#include "arm_arch.h"
|
||
|
.arch armv8-a+crypto
|
||
|
.text
|
||
|
___
|
||
|
|
||
|
{{{
|
||
|
$code.=<<___;
|
||
|
.align 6
|
||
|
.Lck:
|
||
|
.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269
|
||
|
.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9
|
||
|
.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249
|
||
|
.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9
|
||
|
.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229
|
||
|
.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299
|
||
|
.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209
|
||
|
.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
|
||
|
.Lfk:
|
||
|
.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
||
|
___
|
||
|
}}}
|
||
|
|
||
|
{{{
|
||
|
my ($key,$keys)=("x0","x1");
|
||
|
my ($tmp)=("x2");
|
||
|
my ($key0,$key1,$key2,$key3,$key4,$key5,$key6,$key7)=map("v$_",(0..7));
|
||
|
my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23));
|
||
|
my ($fkconst) = ("v24");
|
||
|
$code.=<<___;
|
||
|
.globl ${prefix}_set_encrypt_key
|
||
|
.type ${prefix}_set_encrypt_key,%function
|
||
|
.align 5
|
||
|
${prefix}_set_encrypt_key:
|
||
|
AARCH64_VALID_CALL_TARGET
|
||
|
ld1 {$key0.4s},[$key]
|
||
|
adr $tmp,.Lfk
|
||
|
ld1 {$fkconst.4s},[$tmp]
|
||
|
adr $tmp,.Lck
|
||
|
ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64
|
||
|
___
|
||
|
&rev32($key0, $key0);
|
||
|
$code.=<<___;
|
||
|
ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp]
|
||
|
eor $key0.16b,$key0.16b,$fkconst.16b;
|
||
|
sm4ekey $key0.4S,$key0.4S,$const0.4S
|
||
|
sm4ekey $key1.4S,$key0.4S,$const1.4S
|
||
|
sm4ekey $key2.4S,$key1.4S,$const2.4S
|
||
|
sm4ekey $key3.4S,$key2.4S,$const3.4S
|
||
|
sm4ekey $key4.4S,$key3.4S,$const4.4S
|
||
|
st1 {$key0.4s,$key1.4s,$key2.4s,$key3.4s},[$keys],64
|
||
|
sm4ekey $key5.4S,$key4.4S,$const5.4S
|
||
|
sm4ekey $key6.4S,$key5.4S,$const6.4S
|
||
|
sm4ekey $key7.4S,$key6.4S,$const7.4S
|
||
|
st1 {$key4.4s,$key5.4s,$key6.4s,$key7.4s},[$keys]
|
||
|
ret
|
||
|
.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
|
||
|
___
|
||
|
}}}
|
||
|
|
||
|
{{{
|
||
|
my ($key,$keys)=("x0","x1");
|
||
|
my ($tmp)=("x2");
|
||
|
my ($key7,$key6,$key5,$key4,$key3,$key2,$key1,$key0)=map("v$_",(0..7));
|
||
|
my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23));
|
||
|
my ($fkconst) = ("v24");
|
||
|
$code.=<<___;
|
||
|
.globl ${prefix}_set_decrypt_key
|
||
|
.type ${prefix}_set_decrypt_key,%function
|
||
|
.align 5
|
||
|
${prefix}_set_decrypt_key:
|
||
|
AARCH64_VALID_CALL_TARGET
|
||
|
ld1 {$key0.4s},[$key]
|
||
|
adr $tmp,.Lfk
|
||
|
ld1 {$fkconst.4s},[$tmp]
|
||
|
adr $tmp, .Lck
|
||
|
ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64
|
||
|
___
|
||
|
&rev32($key0, $key0);
|
||
|
$code.=<<___;
|
||
|
ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp]
|
||
|
eor $key0.16b, $key0.16b,$fkconst.16b;
|
||
|
sm4ekey $key0.4S,$key0.4S,$const0.4S
|
||
|
sm4ekey $key1.4S,$key0.4S,$const1.4S
|
||
|
sm4ekey $key2.4S,$key1.4S,$const2.4S
|
||
|
rev64 $key0.4s,$key0.4s
|
||
|
rev64 $key1.4s,$key1.4s
|
||
|
ext $key0.16b,$key0.16b,$key0.16b,#8
|
||
|
ext $key1.16b,$key1.16b,$key1.16b,#8
|
||
|
sm4ekey $key3.4S,$key2.4S,$const3.4S
|
||
|
sm4ekey $key4.4S,$key3.4S,$const4.4S
|
||
|
rev64 $key2.4s,$key2.4s
|
||
|
rev64 $key3.4s,$key3.4s
|
||
|
ext $key2.16b,$key2.16b,$key2.16b,#8
|
||
|
ext $key3.16b,$key3.16b,$key3.16b,#8
|
||
|
sm4ekey $key5.4S,$key4.4S,$const5.4S
|
||
|
sm4ekey $key6.4S,$key5.4S,$const6.4S
|
||
|
rev64 $key4.4s,$key4.4s
|
||
|
rev64 $key5.4s,$key5.4s
|
||
|
ext $key4.16b,$key4.16b,$key4.16b,#8
|
||
|
ext $key5.16b,$key5.16b,$key5.16b,#8
|
||
|
sm4ekey $key7.4S,$key6.4S,$const7.4S
|
||
|
rev64 $key6.4s, $key6.4s
|
||
|
rev64 $key7.4s, $key7.4s
|
||
|
ext $key6.16b,$key6.16b,$key6.16b,#8
|
||
|
ext $key7.16b,$key7.16b,$key7.16b,#8
|
||
|
st1 {$key7.4s,$key6.4s,$key5.4s,$key4.4s},[$keys],64
|
||
|
st1 {$key3.4s,$key2.4s,$key1.4s,$key0.4s},[$keys]
|
||
|
ret
|
||
|
.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
|
||
|
___
|
||
|
}}}
|
||
|
|
||
|
{{{
|
||
|
sub gen_block () {
|
||
|
my $dir = shift;
|
||
|
my ($inp,$out,$rk)=map("x$_",(0..2));
|
||
|
my ($data)=("v16");
|
||
|
$code.=<<___;
|
||
|
.globl ${prefix}_${dir}crypt
|
||
|
.type ${prefix}_${dir}crypt,%function
|
||
|
.align 5
|
||
|
${prefix}_${dir}crypt:
|
||
|
AARCH64_VALID_CALL_TARGET
|
||
|
ld1 {$data.4s},[$inp]
|
||
|
ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64
|
||
|
ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
||
|
___
|
||
|
&rev32($data,$data);
|
||
|
&enc_blk($data);
|
||
|
&rev32($data,$data);
|
||
|
$code.=<<___;
|
||
|
st1 {$data.4s},[$out]
|
||
|
ret
|
||
|
.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
|
||
|
___
|
||
|
}
|
||
|
|
||
|
&gen_block("en");
|
||
|
&gen_block("de");
|
||
|
}}}
|
||
|
|
||
|
{{{
|
||
|
my ($inp,$out,$len,$rk)=map("x$_",(0..3));
|
||
|
my ($enc) = ("w4");
|
||
|
my @dat=map("v$_",(16..23));
|
||
|
$code.=<<___;
|
||
|
.globl ${prefix}_ecb_encrypt
|
||
|
.type ${prefix}_ecb_encrypt,%function
|
||
|
.align 5
|
||
|
${prefix}_ecb_encrypt:
|
||
|
AARCH64_VALID_CALL_TARGET
|
||
|
ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64
|
||
|
ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
||
|
1:
|
||
|
cmp $len,#64
|
||
|
b.lt 1f
|
||
|
ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64
|
||
|
cmp $len,#128
|
||
|
b.lt 2f
|
||
|
ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp],#64
|
||
|
// 8 blocks
|
||
|
___
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
&rev32(@dat[4],@dat[4]);
|
||
|
&rev32(@dat[5],@dat[5]);
|
||
|
&rev32(@dat[6],@dat[6]);
|
||
|
&rev32(@dat[7],@dat[7]);
|
||
|
&enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
||
|
&enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
&rev32(@dat[4],@dat[4]);
|
||
|
&rev32(@dat[5],@dat[5]);
|
||
|
$code.=<<___;
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
___
|
||
|
&rev32(@dat[6],@dat[6]);
|
||
|
&rev32(@dat[7],@dat[7]);
|
||
|
$code.=<<___;
|
||
|
st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
|
||
|
subs $len,$len,#128
|
||
|
b.gt 1b
|
||
|
ret
|
||
|
// 4 blocks
|
||
|
2:
|
||
|
___
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
&enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
$code.=<<___;
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
subs $len,$len,#64
|
||
|
b.gt 1b
|
||
|
1:
|
||
|
subs $len,$len,#16
|
||
|
b.lt 1f
|
||
|
ld1 {@dat[0].4s},[$inp],#16
|
||
|
___
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&enc_blk(@dat[0]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
$code.=<<___;
|
||
|
st1 {@dat[0].4s},[$out],#16
|
||
|
b.ne 1b
|
||
|
1:
|
||
|
ret
|
||
|
.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt
|
||
|
___
|
||
|
}}}
|
||
|
|
||
|
{{{
|
||
|
my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4));
|
||
|
my ($enc) = ("w5");
|
||
|
my @dat=map("v$_",(16..23));
|
||
|
my @in=map("v$_",(24..31));
|
||
|
my ($ivec) = ("v8");
|
||
|
$code.=<<___;
|
||
|
.globl ${prefix}_cbc_encrypt
|
||
|
.type ${prefix}_cbc_encrypt,%function
|
||
|
.align 5
|
||
|
${prefix}_cbc_encrypt:
|
||
|
AARCH64_VALID_CALL_TARGET
|
||
|
stp d8,d9,[sp, #-16]!
|
||
|
|
||
|
ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64
|
||
|
ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
||
|
ld1 {$ivec.4s},[$ivp]
|
||
|
cmp $enc,#0
|
||
|
b.eq .Ldec
|
||
|
1:
|
||
|
cmp $len, #64
|
||
|
b.lt 1f
|
||
|
ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64
|
||
|
eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
||
|
___
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
&enc_blk(@dat[0]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[1].16b,@dat[1].16b,@dat[0].16b
|
||
|
___
|
||
|
&enc_blk(@dat[1]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[2].16b,@dat[2].16b,@dat[1].16b
|
||
|
___
|
||
|
&enc_blk(@dat[2]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[3].16b,@dat[3].16b,@dat[2].16b
|
||
|
___
|
||
|
&enc_blk(@dat[3]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
$code.=<<___;
|
||
|
mov $ivec.16b,@dat[3].16b
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
subs $len,$len,#64
|
||
|
b.ne 1b
|
||
|
1:
|
||
|
subs $len,$len,#16
|
||
|
b.lt 3f
|
||
|
ld1 {@dat[0].4s},[$inp],#16
|
||
|
eor $ivec.16b,$ivec.16b,@dat[0].16b
|
||
|
___
|
||
|
&rev32($ivec,$ivec);
|
||
|
&enc_blk($ivec);
|
||
|
&rev32($ivec,$ivec);
|
||
|
$code.=<<___;
|
||
|
st1 {$ivec.16b},[$out],#16
|
||
|
b.ne 1b
|
||
|
b 3f
|
||
|
.Ldec:
|
||
|
1:
|
||
|
cmp $len, #64
|
||
|
b.lt 1f
|
||
|
ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp]
|
||
|
ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64
|
||
|
cmp $len,#128
|
||
|
b.lt 2f
|
||
|
// 8 blocks mode
|
||
|
ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp]
|
||
|
ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64
|
||
|
___
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],$dat[3]);
|
||
|
&rev32(@dat[4],@dat[4]);
|
||
|
&rev32(@dat[5],@dat[5]);
|
||
|
&rev32(@dat[6],@dat[6]);
|
||
|
&rev32(@dat[7],$dat[7]);
|
||
|
&enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
||
|
&enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
&rev32(@dat[4],@dat[4]);
|
||
|
&rev32(@dat[5],@dat[5]);
|
||
|
&rev32(@dat[6],@dat[6]);
|
||
|
&rev32(@dat[7],@dat[7]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
||
|
eor @dat[1].16b,@dat[1].16b,@in[0].16b
|
||
|
eor @dat[2].16b,@dat[2].16b,@in[1].16b
|
||
|
mov $ivec.16b,@in[7].16b
|
||
|
eor @dat[3].16b,$dat[3].16b,@in[2].16b
|
||
|
eor @dat[4].16b,$dat[4].16b,@in[3].16b
|
||
|
eor @dat[5].16b,$dat[5].16b,@in[4].16b
|
||
|
eor @dat[6].16b,$dat[6].16b,@in[5].16b
|
||
|
eor @dat[7].16b,$dat[7].16b,@in[6].16b
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
|
||
|
subs $len,$len,128
|
||
|
b.gt 1b
|
||
|
b 3f
|
||
|
// 4 blocks mode
|
||
|
2:
|
||
|
___
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],$dat[3]);
|
||
|
&enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
||
|
eor @dat[1].16b,@dat[1].16b,@in[0].16b
|
||
|
mov $ivec.16b,@in[3].16b
|
||
|
eor @dat[2].16b,@dat[2].16b,@in[1].16b
|
||
|
eor @dat[3].16b,$dat[3].16b,@in[2].16b
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
subs $len,$len,#64
|
||
|
b.gt 1b
|
||
|
1:
|
||
|
subs $len,$len,#16
|
||
|
b.lt 3f
|
||
|
ld1 {@dat[0].4s},[$inp],#16
|
||
|
mov @in[0].16b,@dat[0].16b
|
||
|
___
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&enc_blk(@dat[0]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
||
|
mov $ivec.16b,@in[0].16b
|
||
|
st1 {@dat[0].16b},[$out],#16
|
||
|
b.ne 1b
|
||
|
3:
|
||
|
// save back IV
|
||
|
st1 {$ivec.16b},[$ivp]
|
||
|
ldp d8,d9,[sp],#16
|
||
|
ret
|
||
|
.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
|
||
|
___
|
||
|
}}}
|
||
|
|
||
|
{{{
|
||
|
my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4));
|
||
|
my ($ctr)=("w5");
|
||
|
my @dat=map("v$_",(16..23));
|
||
|
my @in=map("v$_",(24..31));
|
||
|
my ($ivec)=("v8");
|
||
|
$code.=<<___;
|
||
|
.globl ${prefix}_ctr32_encrypt_blocks
|
||
|
.type ${prefix}_ctr32_encrypt_blocks,%function
|
||
|
.align 5
|
||
|
${prefix}_ctr32_encrypt_blocks:
|
||
|
AARCH64_VALID_CALL_TARGET
|
||
|
stp d8,d9,[sp, #-16]!
|
||
|
|
||
|
ld1 {$ivec.4s},[$ivp]
|
||
|
ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64
|
||
|
ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
||
|
___
|
||
|
&rev32($ivec,$ivec);
|
||
|
$code.=<<___;
|
||
|
mov $ctr,$ivec.s[3]
|
||
|
1:
|
||
|
cmp $len,#4
|
||
|
b.lt 1f
|
||
|
ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64
|
||
|
mov @dat[0].16b,$ivec.16b
|
||
|
mov @dat[1].16b,$ivec.16b
|
||
|
mov @dat[2].16b,$ivec.16b
|
||
|
mov @dat[3].16b,$ivec.16b
|
||
|
add $ctr,$ctr,#1
|
||
|
mov $dat[1].s[3],$ctr
|
||
|
add $ctr,$ctr,#1
|
||
|
mov @dat[2].s[3],$ctr
|
||
|
add $ctr,$ctr,#1
|
||
|
mov @dat[3].s[3],$ctr
|
||
|
cmp $len,#8
|
||
|
b.lt 2f
|
||
|
ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64
|
||
|
mov @dat[4].16b,$ivec.16b
|
||
|
mov @dat[5].16b,$ivec.16b
|
||
|
mov @dat[6].16b,$ivec.16b
|
||
|
mov @dat[7].16b,$ivec.16b
|
||
|
add $ctr,$ctr,#1
|
||
|
mov $dat[4].s[3],$ctr
|
||
|
add $ctr,$ctr,#1
|
||
|
mov @dat[5].s[3],$ctr
|
||
|
add $ctr,$ctr,#1
|
||
|
mov @dat[6].s[3],$ctr
|
||
|
add $ctr,$ctr,#1
|
||
|
mov @dat[7].s[3],$ctr
|
||
|
___
|
||
|
&enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
||
|
&enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
&rev32(@dat[4],@dat[4]);
|
||
|
&rev32(@dat[5],@dat[5]);
|
||
|
&rev32(@dat[6],@dat[6]);
|
||
|
&rev32(@dat[7],@dat[7]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[0].16b,@dat[0].16b,@in[0].16b
|
||
|
eor @dat[1].16b,@dat[1].16b,@in[1].16b
|
||
|
eor @dat[2].16b,@dat[2].16b,@in[2].16b
|
||
|
eor @dat[3].16b,@dat[3].16b,@in[3].16b
|
||
|
eor @dat[4].16b,@dat[4].16b,@in[4].16b
|
||
|
eor @dat[5].16b,@dat[5].16b,@in[5].16b
|
||
|
eor @dat[6].16b,@dat[6].16b,@in[6].16b
|
||
|
eor @dat[7].16b,@dat[7].16b,@in[7].16b
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
|
||
|
subs $len,$len,#8
|
||
|
b.eq 3f
|
||
|
add $ctr,$ctr,#1
|
||
|
mov $ivec.s[3],$ctr
|
||
|
b 1b
|
||
|
2:
|
||
|
___
|
||
|
&enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
&rev32(@dat[1],@dat[1]);
|
||
|
&rev32(@dat[2],@dat[2]);
|
||
|
&rev32(@dat[3],@dat[3]);
|
||
|
$code.=<<___;
|
||
|
eor @dat[0].16b,@dat[0].16b,@in[0].16b
|
||
|
eor @dat[1].16b,@dat[1].16b,@in[1].16b
|
||
|
eor @dat[2].16b,@dat[2].16b,@in[2].16b
|
||
|
eor @dat[3].16b,@dat[3].16b,@in[3].16b
|
||
|
st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
||
|
subs $len,$len,#4
|
||
|
b.eq 3f
|
||
|
add $ctr,$ctr,#1
|
||
|
mov $ivec.s[3],$ctr
|
||
|
b 1b
|
||
|
1:
|
||
|
subs $len,$len,#1
|
||
|
b.lt 3f
|
||
|
mov $dat[0].16b,$ivec.16b
|
||
|
ld1 {@in[0].4s},[$inp],#16
|
||
|
___
|
||
|
&enc_blk(@dat[0]);
|
||
|
&rev32(@dat[0],@dat[0]);
|
||
|
$code.=<<___;
|
||
|
eor $dat[0].16b,$dat[0].16b,@in[0].16b
|
||
|
st1 {$dat[0].4s},[$out],#16
|
||
|
b.eq 3f
|
||
|
add $ctr,$ctr,#1
|
||
|
mov $ivec.s[3],$ctr
|
||
|
b 1b
|
||
|
3:
|
||
|
ldp d8,d9,[sp],#16
|
||
|
ret
|
||
|
.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
|
||
|
___
|
||
|
}}}
|
||
|
########################################
|
||
|
{ my %opcode = (
|
||
|
"sm4e" => 0xcec08400,
|
||
|
"sm4ekey" => 0xce60c800);
|
||
|
|
||
|
sub unsm4 {
|
||
|
my ($mnemonic,$arg)=@_;
|
||
|
|
||
|
$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
|
||
|
&&
|
||
|
sprintf ".inst\t0x%08x\t//%s %s",
|
||
|
$opcode{$mnemonic}|$1|($2<<5)|($3<<16),
|
||
|
$mnemonic,$arg;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
open SELF,$0;
|
||
|
while(<SELF>) {
|
||
|
next if (/^#!/);
|
||
|
last if (!s/^#/\/\// and !/^$/);
|
||
|
print;
|
||
|
}
|
||
|
close SELF;
|
||
|
|
||
|
foreach(split("\n",$code)) {
|
||
|
s/\`([^\`]*)\`/eval($1)/ge;
|
||
|
|
||
|
s/\b(sm4\w+)\s+([qv].*)/unsm4($1,$2)/ge;
|
||
|
print $_,"\n";
|
||
|
}
|
||
|
|
||
|
close STDOUT or die "error closing STDOUT: $!";
|