mirror of
https://github.com/openssl/openssl.git
synced 2024-12-21 06:09:35 +08:00
da1c088f59
Reviewed-by: Richard Levitte <levitte@openssl.org> Release: yes
281 lines
8.0 KiB
Perl
281 lines
8.0 KiB
Perl
#! /usr/bin/env perl
|
|
# Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
#
|
|
# This module implements support for Armv8 SM3 instructions
|
|
|
|
# $output is the last argument if it looks like a file (it has an extension)
|
|
# $flavour is the first argument if it doesn't look like a file
|
|
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
|
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
|
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
|
die "can't locate arm-xlate.pl";
|
|
|
|
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
|
|
or die "can't call $xlate: $!";
|
|
*STDOUT=*OUT;
|
|
|
|
# Message expanding:
|
|
# Wj <- P1(W[j-16]^W[j-9]^(W[j-3]<<<15))^(W[j-13]<<<7)^W[j-6]
|
|
# Input: s0, s1, s2, s3
|
|
# s0 = w0 | w1 | w2 | w3
|
|
# s1 = w4 | w5 | w6 | w7
|
|
# s2 = w8 | w9 | w10 | w11
|
|
# s3 = w12 | w13 | w14 | w15
|
|
# Output: s4
|
|
sub msg_exp () {
|
|
my $s0 = shift;
|
|
my $s1 = shift;
|
|
my $s2 = shift;
|
|
my $s3 = shift;
|
|
my $s4 = shift;
|
|
my $vtmp1 = shift;
|
|
my $vtmp2 = shift;
|
|
$code.=<<___;
|
|
// s4 = w7 | w8 | w9 | w10
|
|
ext $s4.16b, $s1.16b, $s2.16b, #12
|
|
// vtmp1 = w3 | w4 | w5 | w6
|
|
ext $vtmp1.16b, $s0.16b, $s1.16b, #12
|
|
// vtmp2 = w10 | w11 | w12 | w13
|
|
ext $vtmp2.16b, $s2.16b, $s3.16b, #8
|
|
sm3partw1 $s4.4s, $s0.4s, $s3.4s
|
|
sm3partw2 $s4.4s, $vtmp2.4s, $vtmp1.4s
|
|
___
|
|
}
|
|
|
|
# A round of compresson function
|
|
# Input:
|
|
# ab - choose instruction among sm3tt1a, sm3tt1b, sm3tt2a, sm3tt2b
|
|
# vstate0 - vstate1, store digest status(A - H)
|
|
# vconst0 - vconst1, interleaved used to store Tj <<< j
|
|
# vtmp - temporary register
|
|
# vw - for sm3tt1ab, vw = s0 eor s1
|
|
# s0 - for sm3tt2ab, just be s0
|
|
# i, choose wj' or wj from vw
|
|
sub round () {
|
|
my $ab = shift;
|
|
my $vstate0 = shift;
|
|
my $vstate1 = shift;
|
|
my $vconst0 = shift;
|
|
my $vconst1 = shift;
|
|
my $vtmp = shift;
|
|
my $vw = shift;
|
|
my $s0 = shift;
|
|
my $i = shift;
|
|
$code.=<<___;
|
|
sm3ss1 $vtmp.4s, $vstate0.4s, $vconst0.4s, $vstate1.4s
|
|
shl $vconst1.4s, $vconst0.4s, #1
|
|
sri $vconst1.4s, $vconst0.4s, #31
|
|
sm3tt1$ab $vstate0.4s, $vtmp.4s, $vw.4s[$i]
|
|
sm3tt2$ab $vstate1.4s, $vtmp.4s, $s0.4s[$i]
|
|
___
|
|
}
|
|
|
|
sub qround () {
|
|
my $ab = shift;
|
|
my $vstate0 = shift;
|
|
my $vstate1 = shift;
|
|
my $vconst0 = shift;
|
|
my $vconst1 = shift;
|
|
my $vtmp1 = shift;
|
|
my $vtmp2 = shift;
|
|
my $s0 = shift;
|
|
my $s1 = shift;
|
|
my $s2 = shift;
|
|
my $s3 = shift;
|
|
my $s4 = shift;
|
|
if($s4) {
|
|
&msg_exp($s0, $s1, $s2, $s3, $s4, $vtmp1, $vtmp2);
|
|
}
|
|
$code.=<<___;
|
|
eor $vtmp1.16b, $s0.16b, $s1.16b
|
|
___
|
|
&round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2,
|
|
$vtmp1, $s0, 0);
|
|
&round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2,
|
|
$vtmp1, $s0, 1);
|
|
&round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2,
|
|
$vtmp1, $s0, 2);
|
|
&round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2,
|
|
$vtmp1, $s0, 3);
|
|
}
|
|
|
|
$code=<<___;
|
|
#include "arm_arch.h"
|
|
.text
|
|
___
|
|
|
|
{{{
|
|
my ($pstate,$pdata,$num)=("x0","x1","w2");
|
|
my ($state1,$state2)=("v5","v6");
|
|
my ($sconst1, $sconst2)=("s16","s17");
|
|
my ($vconst1, $vconst2)=("v16","v17");
|
|
my ($s0,$s1,$s2,$s3,$s4)=map("v$_",(0..4));
|
|
my ($bkstate1,$bkstate2)=("v18","v19");
|
|
my ($vconst_tmp1,$vconst_tmp2)=("v20","v21");
|
|
my ($vtmp1,$vtmp2)=("v22","v23");
|
|
my $constaddr="x8";
|
|
# void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num)
|
|
$code.=<<___;
|
|
.globl ossl_hwsm3_block_data_order
|
|
.type ossl_hwsm3_block_data_order,%function
|
|
.align 5
|
|
ossl_hwsm3_block_data_order:
|
|
AARCH64_VALID_CALL_TARGET
|
|
// load state
|
|
ld1 {$state1.4s-$state2.4s}, [$pstate]
|
|
rev64 $state1.4s, $state1.4s
|
|
rev64 $state2.4s, $state2.4s
|
|
ext $state1.16b, $state1.16b, $state1.16b, #8
|
|
ext $state2.16b, $state2.16b, $state2.16b, #8
|
|
|
|
adr $constaddr, .Tj
|
|
ldp $sconst1, $sconst2, [$constaddr]
|
|
|
|
.Loop:
|
|
// load input
|
|
ld1 {$s0.16b-$s3.16b}, [$pdata], #64
|
|
sub $num, $num, #1
|
|
|
|
mov $bkstate1.16b, $state1.16b
|
|
mov $bkstate2.16b, $state2.16b
|
|
|
|
#ifndef __ARMEB__
|
|
rev32 $s0.16b, $s0.16b
|
|
rev32 $s1.16b, $s1.16b
|
|
rev32 $s2.16b, $s2.16b
|
|
rev32 $s3.16b, $s3.16b
|
|
#endif
|
|
|
|
ext $vconst_tmp1.16b, $vconst1.16b, $vconst1.16b, #4
|
|
___
|
|
&qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s0,$s1,$s2,$s3,$s4);
|
|
&qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s1,$s2,$s3,$s4,$s0);
|
|
&qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s2,$s3,$s4,$s0,$s1);
|
|
&qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s3,$s4,$s0,$s1,$s2);
|
|
|
|
$code.=<<___;
|
|
ext $vconst_tmp1.16b, $vconst2.16b, $vconst2.16b, #4
|
|
___
|
|
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s4,$s0,$s1,$s2,$s3);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s0,$s1,$s2,$s3,$s4);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s1,$s2,$s3,$s4,$s0);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s2,$s3,$s4,$s0,$s1);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s3,$s4,$s0,$s1,$s2);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s4,$s0,$s1,$s2,$s3);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s0,$s1,$s2,$s3,$s4);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s1,$s2,$s3,$s4,$s0);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s2,$s3,$s4,$s0,$s1);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s3,$s4);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s4,$s0);
|
|
&qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
$s0,$s1);
|
|
|
|
$code.=<<___;
|
|
eor $state1.16b, $state1.16b, $bkstate1.16b
|
|
eor $state2.16b, $state2.16b, $bkstate2.16b
|
|
|
|
// any remained blocks?
|
|
cbnz $num, .Loop
|
|
|
|
// save state
|
|
rev64 $state1.4s, $state1.4s
|
|
rev64 $state2.4s, $state2.4s
|
|
ext $state1.16b, $state1.16b, $state1.16b, #8
|
|
ext $state2.16b, $state2.16b, $state2.16b, #8
|
|
st1 {$state1.4s-$state2.4s}, [$pstate]
|
|
ret
|
|
.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order
|
|
|
|
.align 3
|
|
.Tj:
|
|
.word 0x79cc4519, 0x9d8a7a87
|
|
___
|
|
}}}
|
|
|
|
#########################################
|
|
my %sm3partopcode = (
|
|
"sm3partw1" => 0xce60C000,
|
|
"sm3partw2" => 0xce60C400);
|
|
|
|
my %sm3ss1opcode = (
|
|
"sm3ss1" => 0xce400000);
|
|
|
|
my %sm3ttopcode = (
|
|
"sm3tt1a" => 0xce408000,
|
|
"sm3tt1b" => 0xce408400,
|
|
"sm3tt2a" => 0xce408800,
|
|
"sm3tt2b" => 0xce408C00);
|
|
|
|
sub unsm3part {
|
|
my ($mnemonic,$arg)=@_;
|
|
|
|
$arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o
|
|
&&
|
|
sprintf ".inst\t0x%08x\t//%s %s",
|
|
$sm3partopcode{$mnemonic}|$1|($2<<5)|($3<<16),
|
|
$mnemonic,$arg;
|
|
}
|
|
|
|
sub unsm3ss1 {
|
|
my ($mnemonic,$arg)=@_;
|
|
|
|
$arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o
|
|
&&
|
|
sprintf ".inst\t0x%08x\t//%s %s",
|
|
$sm3ss1opcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<10),
|
|
$mnemonic,$arg;
|
|
}
|
|
|
|
sub unsm3tt {
|
|
my ($mnemonic,$arg)=@_;
|
|
|
|
$arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*\[([0-3])\]/o
|
|
&&
|
|
sprintf ".inst\t0x%08x\t//%s %s",
|
|
$sm3ttopcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<12),
|
|
$mnemonic,$arg;
|
|
}
|
|
|
|
open SELF,$0;
|
|
while(<SELF>) {
|
|
next if (/^#!/);
|
|
last if (!s/^#/\/\// and !/^$/);
|
|
print;
|
|
}
|
|
close SELF;
|
|
|
|
foreach(split("\n",$code)) {
|
|
s/\`([^\`]*)\`/eval($1)/ge;
|
|
|
|
s/\b(sm3partw[1-2])\s+([qv].*)/unsm3part($1,$2)/ge;
|
|
s/\b(sm3ss1)\s+([qv].*)/unsm3ss1($1,$2)/ge;
|
|
s/\b(sm3tt[1-2][a-b])\s+([qv].*)/unsm3tt($1,$2)/ge;
|
|
print $_,"\n";
|
|
}
|
|
|
|
close STDOUT or die "error closing STDOUT: $!";
|