mirror of
https://github.com/openssl/openssl.git
synced 2024-12-15 06:01:37 +08:00
6181a33367
To allow re-use of the already reviewed openSSL crypto code for RISC-V in other projects - like the Linux kernel, add a second license (2-clause BSD) to the 32+64bit aes implementations using the Zkn extension. Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Todd Short <todd.short@me.com> (Merged from https://github.com/openssl/openssl/pull/21018)
1089 lines
28 KiB
Perl
1089 lines
28 KiB
Perl
#! /usr/bin/env perl
|
|
# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
# This file is dual-licensed and is also available under the following
|
|
# terms:
|
|
#
|
|
# Copyright (c) 2022, Hongren (Zenithal) Zheng <i@zenithal.me>
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
# $output is the last argument if it looks like a file (it has an extension)
|
|
# $flavour is the first argument if it doesn't look like a file
|
|
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
|
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
|
|
|
$output and open STDOUT,">$output";
|
|
|
|
################################################################################
|
|
# Utility functions to help with keeping track of which registers to stack/
|
|
# unstack when entering / exiting routines.
|
|
################################################################################
|
|
{
|
|
# Callee-saved registers
|
|
my @callee_saved = map("x$_",(2,8,9,18..27));
|
|
# Caller-saved registers
|
|
my @caller_saved = map("x$_",(1,5..7,10..17,28..31));
|
|
my @must_save;
|
|
sub use_reg {
|
|
my $reg = shift;
|
|
if (grep(/^$reg$/, @callee_saved)) {
|
|
push(@must_save, $reg);
|
|
} elsif (!grep(/^$reg$/, @caller_saved)) {
|
|
# Register is not usable!
|
|
die("Unusable register ".$reg);
|
|
}
|
|
return $reg;
|
|
}
|
|
sub use_regs {
|
|
return map(use_reg("x$_"), @_);
|
|
}
|
|
sub save_regs {
|
|
my $ret = '';
|
|
my $stack_reservation = ($#must_save + 1) * 8;
|
|
my $stack_offset = $stack_reservation;
|
|
if ($stack_reservation % 16) {
|
|
$stack_reservation += 8;
|
|
}
|
|
$ret.=" addi sp,sp,-$stack_reservation\n";
|
|
foreach (@must_save) {
|
|
$stack_offset -= 8;
|
|
$ret.=" sw $_,$stack_offset(sp)\n";
|
|
}
|
|
return $ret;
|
|
}
|
|
sub load_regs {
|
|
my $ret = '';
|
|
my $stack_reservation = ($#must_save + 1) * 8;
|
|
my $stack_offset = $stack_reservation;
|
|
if ($stack_reservation % 16) {
|
|
$stack_reservation += 8;
|
|
}
|
|
foreach (@must_save) {
|
|
$stack_offset -= 8;
|
|
$ret.=" lw $_,$stack_offset(sp)\n";
|
|
}
|
|
$ret.=" addi sp,sp,$stack_reservation\n";
|
|
return $ret;
|
|
}
|
|
sub clear_regs {
|
|
@must_save = ();
|
|
}
|
|
}
|
|
|
|
################################################################################
|
|
# util for encoding scalar crypto extension instructions
|
|
################################################################################
|
|
|
|
my @regs = map("x$_",(0..31));
|
|
my %reglookup;
|
|
@reglookup{@regs} = @regs;
|
|
|
|
# Takes a register name, possibly an alias, and converts it to a register index
|
|
# from 0 to 31
|
|
sub read_reg {
|
|
my $reg = lc shift;
|
|
if (!exists($reglookup{$reg})) {
|
|
die("Unknown register ".$reg);
|
|
}
|
|
my $regstr = $reglookup{$reg};
|
|
if (!($regstr =~ /^x([0-9]+)$/)) {
|
|
die("Could not process register ".$reg);
|
|
}
|
|
return $1;
|
|
}
|
|
|
|
sub aes32dsi {
|
|
# Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32
|
|
# bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b00_10101_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
my $bs = shift;
|
|
|
|
return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes32dsmi {
|
|
# Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32
|
|
# bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b00_10111_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
my $bs = shift;
|
|
|
|
return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes32esi {
|
|
# Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32
|
|
# bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b00_10001_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
my $bs = shift;
|
|
|
|
return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes32esmi {
|
|
# Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32
|
|
# bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b00_10011_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
my $bs = shift;
|
|
|
|
return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub rori {
|
|
# Encoding for ror rd, rs1, imm instruction on RV64
|
|
# XXXXXXX_shamt_ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0110000_00000_00000_101_00000_0010011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $shamt = shift;
|
|
|
|
return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
################################################################################
|
|
# Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt
|
|
################################################################################
|
|
|
|
# Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere)
|
|
my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9);
|
|
|
|
# Function arguments (x10-x12 are a0-a2 in the ABI)
|
|
# Input block pointer, output block pointer, key pointer
|
|
my ($INP,$OUTP,$KEYP) = use_regs(10..12);
|
|
|
|
# Registers initially to hold Key
|
|
my ($T0,$T1,$T2,$T3) = use_regs(13..16);
|
|
|
|
# Loop counter
|
|
my ($loopcntr) = use_regs(30);
|
|
|
|
################################################################################
|
|
# Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt
|
|
################################################################################
|
|
|
|
# outer product of whole state into one column of key
|
|
sub outer {
|
|
my $inst = shift;
|
|
my $key = shift;
|
|
# state 0 to 3
|
|
my $s0 = shift;
|
|
my $s1 = shift;
|
|
my $s2 = shift;
|
|
my $s3 = shift;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
@{[$inst->($key,$key,$s0,0)]}
|
|
@{[$inst->($key,$key,$s1,1)]}
|
|
@{[$inst->($key,$key,$s2,2)]}
|
|
@{[$inst->($key,$key,$s3,3)]}
|
|
___
|
|
return $ret;
|
|
}
|
|
|
|
sub aes32esmi4 {
|
|
return outer(\&aes32esmi, @_)
|
|
}
|
|
|
|
sub aes32esi4 {
|
|
return outer(\&aes32esi, @_)
|
|
}
|
|
|
|
sub aes32dsmi4 {
|
|
return outer(\&aes32dsmi, @_)
|
|
}
|
|
|
|
sub aes32dsi4 {
|
|
return outer(\&aes32dsi, @_)
|
|
}
|
|
|
|
################################################################################
|
|
# void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out,
|
|
# const AES_KEY *key);
|
|
################################################################################
|
|
my $code .= <<___;
|
|
.text
|
|
.balign 16
|
|
.globl rv32i_zkne_encrypt
|
|
.type rv32i_zkne_encrypt,\@function
|
|
rv32i_zkne_encrypt:
|
|
___
|
|
|
|
$code .= save_regs();
|
|
|
|
$code .= <<___;
|
|
# Load input to block cipher
|
|
lw $Q0,0($INP)
|
|
lw $Q1,4($INP)
|
|
lw $Q2,8($INP)
|
|
lw $Q3,12($INP)
|
|
|
|
# Load key
|
|
lw $T0,0($KEYP)
|
|
lw $T1,4($KEYP)
|
|
lw $T2,8($KEYP)
|
|
lw $T3,12($KEYP)
|
|
|
|
# Load number of rounds
|
|
lw $loopcntr,240($KEYP)
|
|
|
|
# initial transformation
|
|
xor $Q0,$Q0,$T0
|
|
xor $Q1,$Q1,$T1
|
|
xor $Q2,$Q2,$T2
|
|
xor $Q3,$Q3,$T3
|
|
|
|
# The main loop only executes the first N-2 rounds, each loop consumes two rounds
|
|
add $loopcntr,$loopcntr,-2
|
|
srli $loopcntr,$loopcntr,1
|
|
1:
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,16
|
|
lw $T0,0($KEYP)
|
|
lw $T1,4($KEYP)
|
|
lw $T2,8($KEYP)
|
|
lw $T3,12($KEYP)
|
|
|
|
@{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
|
|
@{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
|
|
@{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
|
|
@{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
|
|
# now T0~T3 hold the new state
|
|
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,16
|
|
lw $Q0,0($KEYP)
|
|
lw $Q1,4($KEYP)
|
|
lw $Q2,8($KEYP)
|
|
lw $Q3,12($KEYP)
|
|
|
|
@{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]}
|
|
@{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]}
|
|
@{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]}
|
|
@{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]}
|
|
# now Q0~Q3 hold the new state
|
|
|
|
add $loopcntr,$loopcntr,-1
|
|
bgtz $loopcntr,1b
|
|
|
|
# final two rounds
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,16
|
|
lw $T0,0($KEYP)
|
|
lw $T1,4($KEYP)
|
|
lw $T2,8($KEYP)
|
|
lw $T3,12($KEYP)
|
|
|
|
@{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
|
|
@{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
|
|
@{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
|
|
@{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
|
|
# now T0~T3 hold the new state
|
|
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,16
|
|
lw $Q0,0($KEYP)
|
|
lw $Q1,4($KEYP)
|
|
lw $Q2,8($KEYP)
|
|
lw $Q3,12($KEYP)
|
|
|
|
# no mix column now
|
|
@{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]}
|
|
@{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]}
|
|
@{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]}
|
|
@{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]}
|
|
# now Q0~Q3 hold the new state
|
|
|
|
sw $Q0,0($OUTP)
|
|
sw $Q1,4($OUTP)
|
|
sw $Q2,8($OUTP)
|
|
sw $Q3,12($OUTP)
|
|
|
|
# Pop registers and return
|
|
___
|
|
|
|
$code .= load_regs();
|
|
|
|
$code .= <<___;
|
|
ret
|
|
___
|
|
|
|
################################################################################
|
|
# void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out,
|
|
# const AES_KEY *key);
|
|
################################################################################
|
|
$code .= <<___;
|
|
.text
|
|
.balign 16
|
|
.globl rv32i_zknd_decrypt
|
|
.type rv32i_zknd_decrypt,\@function
|
|
rv32i_zknd_decrypt:
|
|
___
|
|
|
|
$code .= save_regs();
|
|
|
|
$code .= <<___;
|
|
# Load input to block cipher
|
|
lw $Q0,0($INP)
|
|
lw $Q1,4($INP)
|
|
lw $Q2,8($INP)
|
|
lw $Q3,12($INP)
|
|
|
|
# Load number of rounds
|
|
lw $loopcntr,240($KEYP)
|
|
|
|
# Load the last key
|
|
# use T0 as temporary now
|
|
slli $T0,$loopcntr,4
|
|
add $KEYP,$KEYP,$T0
|
|
# Load key
|
|
lw $T0,0($KEYP)
|
|
lw $T1,4($KEYP)
|
|
lw $T2,8($KEYP)
|
|
lw $T3,12($KEYP)
|
|
|
|
# initial transformation
|
|
xor $Q0,$Q0,$T0
|
|
xor $Q1,$Q1,$T1
|
|
xor $Q2,$Q2,$T2
|
|
xor $Q3,$Q3,$T3
|
|
|
|
# The main loop only executes the first N-2 rounds, each loop consumes two rounds
|
|
add $loopcntr,$loopcntr,-2
|
|
srli $loopcntr,$loopcntr,1
|
|
1:
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,-16
|
|
lw $T0,0($KEYP)
|
|
lw $T1,4($KEYP)
|
|
lw $T2,8($KEYP)
|
|
lw $T3,12($KEYP)
|
|
|
|
@{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
|
|
@{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
|
|
@{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
|
|
@{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
|
|
# now T0~T3 hold the new state
|
|
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,-16
|
|
lw $Q0,0($KEYP)
|
|
lw $Q1,4($KEYP)
|
|
lw $Q2,8($KEYP)
|
|
lw $Q3,12($KEYP)
|
|
|
|
@{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]}
|
|
@{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]}
|
|
@{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]}
|
|
@{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]}
|
|
# now Q0~Q3 hold the new state
|
|
|
|
add $loopcntr,$loopcntr,-1
|
|
bgtz $loopcntr,1b
|
|
|
|
# final two rounds
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,-16
|
|
lw $T0,0($KEYP)
|
|
lw $T1,4($KEYP)
|
|
lw $T2,8($KEYP)
|
|
lw $T3,12($KEYP)
|
|
|
|
@{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
|
|
@{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
|
|
@{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
|
|
@{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
|
|
# now T0~T3 hold the new state
|
|
|
|
# Grab next key in schedule
|
|
add $KEYP,$KEYP,-16
|
|
lw $Q0,0($KEYP)
|
|
lw $Q1,4($KEYP)
|
|
lw $Q2,8($KEYP)
|
|
lw $Q3,12($KEYP)
|
|
|
|
# no mix column now
|
|
@{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]}
|
|
@{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]}
|
|
@{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]}
|
|
@{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]}
|
|
# now Q0~Q3 hold the new state
|
|
|
|
sw $Q0,0($OUTP)
|
|
sw $Q1,4($OUTP)
|
|
sw $Q2,8($OUTP)
|
|
sw $Q3,12($OUTP)
|
|
|
|
# Pop registers and return
|
|
___
|
|
|
|
$code .= load_regs();
|
|
|
|
$code .= <<___;
|
|
ret
|
|
___
|
|
|
|
clear_regs();
|
|
|
|
################################################################################
|
|
# Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt
|
|
################################################################################
|
|
|
|
# Function arguments (x10-x12 are a0-a2 in the ABI)
|
|
# Pointer to user key, number of bits in key, key pointer
|
|
my ($UKEY,$BITS,$KEYP) = use_regs(10..12);
|
|
|
|
# Temporaries
|
|
my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31);
|
|
|
|
################################################################################
|
|
# utility functions for rv32i_zkne_set_encrypt_key
|
|
################################################################################
|
|
|
|
my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36);
|
|
|
|
# do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd
|
|
sub sbox4 {
|
|
my $inst = shift;
|
|
my $rd = shift;
|
|
my $rs = shift;
|
|
my $ret = <<___;
|
|
@{[$inst->($rd,$rd,$rs,0)]}
|
|
@{[$inst->($rd,$rd,$rs,1)]}
|
|
@{[$inst->($rd,$rd,$rs,2)]}
|
|
@{[$inst->($rd,$rd,$rs,3)]}
|
|
___
|
|
return $ret;
|
|
}
|
|
|
|
sub fwdsbox4 {
|
|
return sbox4(\&aes32esi, @_);
|
|
}
|
|
|
|
sub ke128enc {
|
|
my $zbkb = shift;
|
|
my $rnum = 0;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
lw $T0,0($UKEY)
|
|
lw $T1,4($UKEY)
|
|
lw $T2,8($UKEY)
|
|
lw $T3,12($UKEY)
|
|
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
while($rnum < 10) {
|
|
$ret .= <<___;
|
|
# use T4 to store rcon
|
|
li $T4,$rcon[$rnum]
|
|
# as xor is associative and commutative
|
|
# we fist xor T0 with RCON, then use T0 to
|
|
# xor the result of each SBOX result of T3
|
|
xor $T0,$T0,$T4
|
|
# use T4 to store rotated T3
|
|
___
|
|
# right rotate by 8
|
|
if ($zbkb) {
|
|
$ret .= <<___;
|
|
@{[rori $T4,$T3,8]}
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
srli $T4,$T3,8
|
|
slli $T5,$T3,24
|
|
or $T4,$T4,$T5
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
# update T0
|
|
@{[fwdsbox4 $T0,$T4]}
|
|
|
|
# update new T1~T3
|
|
xor $T1,$T1,$T0
|
|
xor $T2,$T2,$T1
|
|
xor $T3,$T3,$T2
|
|
|
|
add $KEYP,$KEYP,16
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
$rnum++;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
sub ke192enc {
|
|
my $zbkb = shift;
|
|
my $rnum = 0;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
lw $T0,0($UKEY)
|
|
lw $T1,4($UKEY)
|
|
lw $T2,8($UKEY)
|
|
lw $T3,12($UKEY)
|
|
lw $T4,16($UKEY)
|
|
lw $T5,20($UKEY)
|
|
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
sw $T4,16($KEYP)
|
|
sw $T5,20($KEYP)
|
|
___
|
|
while($rnum < 8) {
|
|
$ret .= <<___;
|
|
# see the comment in ke128enc
|
|
li $T6,$rcon[$rnum]
|
|
xor $T0,$T0,$T6
|
|
___
|
|
# right rotate by 8
|
|
if ($zbkb) {
|
|
$ret .= <<___;
|
|
@{[rori $T6,$T5,8]}
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
srli $T6,$T5,8
|
|
slli $T7,$T5,24
|
|
or $T6,$T6,$T7
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
@{[fwdsbox4 $T0,$T6]}
|
|
xor $T1,$T1,$T0
|
|
xor $T2,$T2,$T1
|
|
xor $T3,$T3,$T2
|
|
___
|
|
if ($rnum != 7) {
|
|
# note that (8+1)*24 = 216, (12+1)*16 = 208
|
|
# thus the last 8 bytes can be dropped
|
|
$ret .= <<___;
|
|
xor $T4,$T4,$T3
|
|
xor $T5,$T5,$T4
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
add $KEYP,$KEYP,24
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
if ($rnum != 7) {
|
|
$ret .= <<___;
|
|
sw $T4,16($KEYP)
|
|
sw $T5,20($KEYP)
|
|
___
|
|
}
|
|
$rnum++;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
sub ke256enc {
|
|
my $zbkb = shift;
|
|
my $rnum = 0;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
lw $T0,0($UKEY)
|
|
lw $T1,4($UKEY)
|
|
lw $T2,8($UKEY)
|
|
lw $T3,12($UKEY)
|
|
lw $T4,16($UKEY)
|
|
lw $T5,20($UKEY)
|
|
lw $T6,24($UKEY)
|
|
lw $T7,28($UKEY)
|
|
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
sw $T4,16($KEYP)
|
|
sw $T5,20($KEYP)
|
|
sw $T6,24($KEYP)
|
|
sw $T7,28($KEYP)
|
|
___
|
|
while($rnum < 7) {
|
|
$ret .= <<___;
|
|
# see the comment in ke128enc
|
|
li $T8,$rcon[$rnum]
|
|
xor $T0,$T0,$T8
|
|
___
|
|
# right rotate by 8
|
|
if ($zbkb) {
|
|
$ret .= <<___;
|
|
@{[rori $T8,$T7,8]}
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
srli $T8,$T7,8
|
|
slli $BITS,$T7,24
|
|
or $T8,$T8,$BITS
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
@{[fwdsbox4 $T0,$T8]}
|
|
xor $T1,$T1,$T0
|
|
xor $T2,$T2,$T1
|
|
xor $T3,$T3,$T2
|
|
|
|
add $KEYP,$KEYP,32
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
if ($rnum != 6) {
|
|
# note that (7+1)*32 = 256, (14+1)*16 = 240
|
|
# thus the last 16 bytes can be dropped
|
|
$ret .= <<___;
|
|
# for aes256, T3->T4 needs 4sbox but no rotate/rcon
|
|
@{[fwdsbox4 $T4,$T3]}
|
|
xor $T5,$T5,$T4
|
|
xor $T6,$T6,$T5
|
|
xor $T7,$T7,$T6
|
|
sw $T4,16($KEYP)
|
|
sw $T5,20($KEYP)
|
|
sw $T6,24($KEYP)
|
|
sw $T7,28($KEYP)
|
|
___
|
|
}
|
|
$rnum++;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
################################################################################
|
|
# void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits,
|
|
# AES_KEY *key)
|
|
################################################################################
|
|
sub AES_set_common {
|
|
my ($ke128, $ke192, $ke256) = @_;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
bnez $UKEY,1f # if (!userKey || !key) return -1;
|
|
bnez $KEYP,1f
|
|
li a0,-1
|
|
ret
|
|
1:
|
|
# Determine number of rounds from key size in bits
|
|
li $T0,128
|
|
bne $BITS,$T0,1f
|
|
li $T1,10 # key->rounds = 10 if bits == 128
|
|
sw $T1,240($KEYP) # store key->rounds
|
|
$ke128
|
|
j 4f
|
|
1:
|
|
li $T0,192
|
|
bne $BITS,$T0,2f
|
|
li $T1,12 # key->rounds = 12 if bits == 192
|
|
sw $T1,240($KEYP) # store key->rounds
|
|
$ke192
|
|
j 4f
|
|
2:
|
|
li $T1,14 # key->rounds = 14 if bits == 256
|
|
li $T0,256
|
|
beq $BITS,$T0,3f
|
|
li a0,-2 # If bits != 128, 192, or 256, return -2
|
|
j 5f
|
|
3:
|
|
sw $T1,240($KEYP) # store key->rounds
|
|
$ke256
|
|
4: # return 0
|
|
li a0,0
|
|
5: # return a0
|
|
___
|
|
return $ret;
|
|
}
|
|
$code .= <<___;
|
|
.text
|
|
.balign 16
|
|
.globl rv32i_zkne_set_encrypt_key
|
|
.type rv32i_zkne_set_encrypt_key,\@function
|
|
rv32i_zkne_set_encrypt_key:
|
|
___
|
|
|
|
$code .= save_regs();
|
|
$code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0));
|
|
$code .= load_regs();
|
|
$code .= <<___;
|
|
ret
|
|
___
|
|
|
|
################################################################################
|
|
# void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey,
|
|
# const int bits, AES_KEY *key)
|
|
################################################################################
|
|
$code .= <<___;
|
|
.text
|
|
.balign 16
|
|
.globl rv32i_zbkb_zkne_set_encrypt_key
|
|
.type rv32i_zbkb_zkne_set_encrypt_key,\@function
|
|
rv32i_zbkb_zkne_set_encrypt_key:
|
|
___
|
|
|
|
$code .= save_regs();
|
|
$code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1));
|
|
$code .= load_regs();
|
|
$code .= <<___;
|
|
ret
|
|
___
|
|
|
|
################################################################################
|
|
# utility functions for rv32i_zknd_zkne_set_decrypt_key
|
|
################################################################################
|
|
|
|
sub invm4 {
|
|
# fwd sbox then inv sbox then mix column
|
|
# the result is only mix column
|
|
# this simulates aes64im T0
|
|
my $rd = shift;
|
|
my $tmp = shift;
|
|
my $rs = shift;
|
|
my $ret = <<___;
|
|
li $tmp,0
|
|
li $rd,0
|
|
@{[fwdsbox4 $tmp,$rs]}
|
|
@{[sbox4(\&aes32dsmi, $rd,$tmp)]}
|
|
___
|
|
return $ret;
|
|
}
|
|
|
|
sub ke128dec {
|
|
my $zbkb = shift;
|
|
my $rnum = 0;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
lw $T0,0($UKEY)
|
|
lw $T1,4($UKEY)
|
|
lw $T2,8($UKEY)
|
|
lw $T3,12($UKEY)
|
|
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
while($rnum < 10) {
|
|
$ret .= <<___;
|
|
# see comments in ke128enc
|
|
li $T4,$rcon[$rnum]
|
|
xor $T0,$T0,$T4
|
|
___
|
|
# right rotate by 8
|
|
if ($zbkb) {
|
|
$ret .= <<___;
|
|
@{[rori $T4,$T3,8]}
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
srli $T4,$T3,8
|
|
slli $T5,$T3,24
|
|
or $T4,$T4,$T5
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
@{[fwdsbox4 $T0,$T4]}
|
|
xor $T1,$T1,$T0
|
|
xor $T2,$T2,$T1
|
|
xor $T3,$T3,$T2
|
|
add $KEYP,$KEYP,16
|
|
___
|
|
# need to mixcolumn only for [1:N-1] round keys
|
|
# this is from the fact that aes32dsmi subwords first then mix column
|
|
# intuitively decryption needs to first mix column then subwords
|
|
# however, for merging datapaths (encryption first subwords then mix column)
|
|
# aes32dsmi chooses to inverse the order of them, thus
|
|
# transform should then be done on the round key
|
|
if ($rnum < 9) {
|
|
$ret .= <<___;
|
|
# T4 and T5 are temp variables
|
|
@{[invm4 $T5,$T4,$T0]}
|
|
sw $T5,0($KEYP)
|
|
@{[invm4 $T5,$T4,$T1]}
|
|
sw $T5,4($KEYP)
|
|
@{[invm4 $T5,$T4,$T2]}
|
|
sw $T5,8($KEYP)
|
|
@{[invm4 $T5,$T4,$T3]}
|
|
sw $T5,12($KEYP)
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
}
|
|
$rnum++;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
sub ke192dec {
|
|
my $zbkb = shift;
|
|
my $rnum = 0;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
lw $T0,0($UKEY)
|
|
lw $T1,4($UKEY)
|
|
lw $T2,8($UKEY)
|
|
lw $T3,12($UKEY)
|
|
lw $T4,16($UKEY)
|
|
lw $T5,20($UKEY)
|
|
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
# see the comment in ke128dec
|
|
# T7 and T6 are temp variables
|
|
@{[invm4 $T7,$T6,$T4]}
|
|
sw $T7,16($KEYP)
|
|
@{[invm4 $T7,$T6,$T5]}
|
|
sw $T7,20($KEYP)
|
|
___
|
|
while($rnum < 8) {
|
|
$ret .= <<___;
|
|
# see the comment in ke128enc
|
|
li $T6,$rcon[$rnum]
|
|
xor $T0,$T0,$T6
|
|
___
|
|
# right rotate by 8
|
|
if ($zbkb) {
|
|
$ret .= <<___;
|
|
@{[rori $T6,$T5,8]}
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
srli $T6,$T5,8
|
|
slli $T7,$T5,24
|
|
or $T6,$T6,$T7
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
@{[fwdsbox4 $T0,$T6]}
|
|
xor $T1,$T1,$T0
|
|
xor $T2,$T2,$T1
|
|
xor $T3,$T3,$T2
|
|
|
|
add $KEYP,$KEYP,24
|
|
___
|
|
if ($rnum < 7) {
|
|
$ret .= <<___;
|
|
xor $T4,$T4,$T3
|
|
xor $T5,$T5,$T4
|
|
|
|
# see the comment in ke128dec
|
|
# T7 and T6 are temp variables
|
|
@{[invm4 $T7,$T6,$T0]}
|
|
sw $T7,0($KEYP)
|
|
@{[invm4 $T7,$T6,$T1]}
|
|
sw $T7,4($KEYP)
|
|
@{[invm4 $T7,$T6,$T2]}
|
|
sw $T7,8($KEYP)
|
|
@{[invm4 $T7,$T6,$T3]}
|
|
sw $T7,12($KEYP)
|
|
@{[invm4 $T7,$T6,$T4]}
|
|
sw $T7,16($KEYP)
|
|
@{[invm4 $T7,$T6,$T5]}
|
|
sw $T7,20($KEYP)
|
|
___
|
|
} else { # rnum == 7
|
|
$ret .= <<___;
|
|
# the reason for dropping T4/T5 is in ke192enc
|
|
# the reason for not invm4 is in ke128dec
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
___
|
|
}
|
|
$rnum++;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
sub ke256dec {
|
|
my $zbkb = shift;
|
|
my $rnum = 0;
|
|
my $ret = '';
|
|
$ret .= <<___;
|
|
lw $T0,0($UKEY)
|
|
lw $T1,4($UKEY)
|
|
lw $T2,8($UKEY)
|
|
lw $T3,12($UKEY)
|
|
lw $T4,16($UKEY)
|
|
lw $T5,20($UKEY)
|
|
lw $T6,24($UKEY)
|
|
lw $T7,28($UKEY)
|
|
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
# see the comment in ke128dec
|
|
# BITS and T8 are temp variables
|
|
# BITS are not used anymore
|
|
@{[invm4 $T8,$BITS,$T4]}
|
|
sw $T8,16($KEYP)
|
|
@{[invm4 $T8,$BITS,$T5]}
|
|
sw $T8,20($KEYP)
|
|
@{[invm4 $T8,$BITS,$T6]}
|
|
sw $T8,24($KEYP)
|
|
@{[invm4 $T8,$BITS,$T7]}
|
|
sw $T8,28($KEYP)
|
|
___
|
|
while($rnum < 7) {
|
|
$ret .= <<___;
|
|
# see the comment in ke128enc
|
|
li $T8,$rcon[$rnum]
|
|
xor $T0,$T0,$T8
|
|
___
|
|
# right rotate by 8
|
|
if ($zbkb) {
|
|
$ret .= <<___;
|
|
@{[rori $T8,$T7,8]}
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
srli $T8,$T7,8
|
|
slli $BITS,$T7,24
|
|
or $T8,$T8,$BITS
|
|
___
|
|
}
|
|
$ret .= <<___;
|
|
@{[fwdsbox4 $T0,$T8]}
|
|
xor $T1,$T1,$T0
|
|
xor $T2,$T2,$T1
|
|
xor $T3,$T3,$T2
|
|
|
|
add $KEYP,$KEYP,32
|
|
___
|
|
if ($rnum < 6) {
|
|
$ret .= <<___;
|
|
# for aes256, T3->T4 needs 4sbox but no rotate/rcon
|
|
@{[fwdsbox4 $T4,$T3]}
|
|
xor $T5,$T5,$T4
|
|
xor $T6,$T6,$T5
|
|
xor $T7,$T7,$T6
|
|
|
|
# see the comment in ke128dec
|
|
# T8 and BITS are temp variables
|
|
@{[invm4 $T8,$BITS,$T0]}
|
|
sw $T8,0($KEYP)
|
|
@{[invm4 $T8,$BITS,$T1]}
|
|
sw $T8,4($KEYP)
|
|
@{[invm4 $T8,$BITS,$T2]}
|
|
sw $T8,8($KEYP)
|
|
@{[invm4 $T8,$BITS,$T3]}
|
|
sw $T8,12($KEYP)
|
|
@{[invm4 $T8,$BITS,$T4]}
|
|
sw $T8,16($KEYP)
|
|
@{[invm4 $T8,$BITS,$T5]}
|
|
sw $T8,20($KEYP)
|
|
@{[invm4 $T8,$BITS,$T6]}
|
|
sw $T8,24($KEYP)
|
|
@{[invm4 $T8,$BITS,$T7]}
|
|
sw $T8,28($KEYP)
|
|
___
|
|
} else {
|
|
$ret .= <<___;
|
|
sw $T0,0($KEYP)
|
|
sw $T1,4($KEYP)
|
|
sw $T2,8($KEYP)
|
|
sw $T3,12($KEYP)
|
|
# last 16 bytes are dropped
|
|
# see the comment in ke256enc
|
|
___
|
|
}
|
|
$rnum++;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
################################################################################
|
|
# void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits,
|
|
# AES_KEY *key)
|
|
################################################################################
|
|
# a note on naming: set_decrypt_key needs aes32esi thus add zkne on name
|
|
$code .= <<___;
|
|
.text
|
|
.balign 16
|
|
.globl rv32i_zknd_zkne_set_decrypt_key
|
|
.type rv32i_zknd_zkne_set_decrypt_key,\@function
|
|
rv32i_zknd_zkne_set_decrypt_key:
|
|
___
|
|
$code .= save_regs();
|
|
$code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0));
|
|
$code .= load_regs();
|
|
$code .= <<___;
|
|
ret
|
|
___
|
|
|
|
################################################################################
|
|
# void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey,
|
|
# const int bits, AES_KEY *key)
|
|
################################################################################
|
|
$code .= <<___;
|
|
.text
|
|
.balign 16
|
|
.globl rv32i_zbkb_zknd_zkne_set_decrypt_key
|
|
.type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function
|
|
rv32i_zbkb_zknd_zkne_set_decrypt_key:
|
|
___
|
|
|
|
$code .= save_regs();
|
|
$code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1));
|
|
$code .= load_regs();
|
|
$code .= <<___;
|
|
ret
|
|
___
|
|
|
|
|
|
|
|
print $code;
|
|
close STDOUT or die "error closing STDOUT: $!";
|