mirror of
https://github.com/openssl/openssl.git
synced 2024-12-15 06:01:37 +08:00
3e56c0efe7
To accelerate the performance of the AES-XTS mode, in this patch, we have the specialized multi-block implementation for AES-128-XTS and AES-256-XTS. Signed-off-by: Jerry Shih <jerry.shih@sifive.com> Signed-off-by: Phoebe Chen <phoebe.chen@sifive.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Hugo Landau <hlandau@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21923)
1006 lines
30 KiB
Perl
1006 lines
30 KiB
Perl
#! /usr/bin/env perl
|
|
# This file is dual-licensed, meaning that you can use it under your
|
|
# choice of either of the following two licenses:
|
|
#
|
|
# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You can obtain
|
|
# a copy in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
#
|
|
# or
|
|
#
|
|
# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
|
|
# Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
|
|
# Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
# Set $have_stacktrace to 1 if we have Devel::StackTrace
|
|
my $have_stacktrace = 0;
|
|
if (eval {require Devel::StackTrace;1;}) {
|
|
$have_stacktrace = 1;
|
|
}
|
|
|
|
my @regs = map("x$_",(0..31));
|
|
# Mapping from the RISC-V psABI ABI mnemonic names to the register number.
|
|
my @regaliases = ('zero','ra','sp','gp','tp','t0','t1','t2','s0','s1',
|
|
map("a$_",(0..7)),
|
|
map("s$_",(2..11)),
|
|
map("t$_",(3..6))
|
|
);
|
|
|
|
my %reglookup;
|
|
@reglookup{@regs} = @regs;
|
|
@reglookup{@regaliases} = @regs;
|
|
|
|
# Takes a register name, possibly an alias, and converts it to a register index
|
|
# from 0 to 31
|
|
sub read_reg {
|
|
my $reg = lc shift;
|
|
if (!exists($reglookup{$reg})) {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Unknown register ".$reg."\n".$trace);
|
|
}
|
|
my $regstr = $reglookup{$reg};
|
|
if (!($regstr =~ /^x([0-9]+)$/)) {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Could not process register ".$reg."\n".$trace);
|
|
}
|
|
return $1;
|
|
}
|
|
|
|
# Read the sew setting(8, 16, 32 and 64) and convert to vsew encoding.
|
|
sub read_sew {
|
|
my $sew_setting = shift;
|
|
|
|
if ($sew_setting eq "e8") {
|
|
return 0;
|
|
} elsif ($sew_setting eq "e16") {
|
|
return 1;
|
|
} elsif ($sew_setting eq "e32") {
|
|
return 2;
|
|
} elsif ($sew_setting eq "e64") {
|
|
return 3;
|
|
} else {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Unsupported SEW setting:".$sew_setting."\n".$trace);
|
|
}
|
|
}
|
|
|
|
# Read the LMUL settings and convert to vlmul encoding.
|
|
sub read_lmul {
|
|
my $lmul_setting = shift;
|
|
|
|
if ($lmul_setting eq "mf8") {
|
|
return 5;
|
|
} elsif ($lmul_setting eq "mf4") {
|
|
return 6;
|
|
} elsif ($lmul_setting eq "mf2") {
|
|
return 7;
|
|
} elsif ($lmul_setting eq "m1") {
|
|
return 0;
|
|
} elsif ($lmul_setting eq "m2") {
|
|
return 1;
|
|
} elsif ($lmul_setting eq "m4") {
|
|
return 2;
|
|
} elsif ($lmul_setting eq "m8") {
|
|
return 3;
|
|
} else {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Unsupported LMUL setting:".$lmul_setting."\n".$trace);
|
|
}
|
|
}
|
|
|
|
# Read the tail policy settings and convert to vta encoding.
|
|
sub read_tail_policy {
|
|
my $tail_setting = shift;
|
|
|
|
if ($tail_setting eq "ta") {
|
|
return 1;
|
|
} elsif ($tail_setting eq "tu") {
|
|
return 0;
|
|
} else {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Unsupported tail policy setting:".$tail_setting."\n".$trace);
|
|
}
|
|
}
|
|
|
|
# Read the mask policy settings and convert to vma encoding.
|
|
sub read_mask_policy {
|
|
my $mask_setting = shift;
|
|
|
|
if ($mask_setting eq "ma") {
|
|
return 1;
|
|
} elsif ($mask_setting eq "mu") {
|
|
return 0;
|
|
} else {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Unsupported mask policy setting:".$mask_setting."\n".$trace);
|
|
}
|
|
}
|
|
|
|
my @vregs = map("v$_",(0..31));
|
|
my %vreglookup;
|
|
@vreglookup{@vregs} = @vregs;
|
|
|
|
sub read_vreg {
|
|
my $vreg = lc shift;
|
|
if (!exists($vreglookup{$vreg})) {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Unknown vector register ".$vreg."\n".$trace);
|
|
}
|
|
if (!($vreg =~ /^v([0-9]+)$/)) {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("Could not process vector register ".$vreg."\n".$trace);
|
|
}
|
|
return $1;
|
|
}
|
|
|
|
# Read the vm settings and convert to mask encoding.
|
|
sub read_mask_vreg {
|
|
my $vreg = shift;
|
|
# The default value is unmasked.
|
|
my $mask_bit = 1;
|
|
|
|
if (defined($vreg)) {
|
|
my $reg_id = read_vreg $vreg;
|
|
if ($reg_id == 0) {
|
|
$mask_bit = 0;
|
|
} else {
|
|
my $trace = "";
|
|
if ($have_stacktrace) {
|
|
$trace = Devel::StackTrace->new->as_string;
|
|
}
|
|
die("The ".$vreg." is not the mask register v0.\n".$trace);
|
|
}
|
|
}
|
|
return $mask_bit;
|
|
}
|
|
|
|
# Helper functions
|
|
|
|
sub brev8_rv64i {
|
|
# brev8 without `brev8` instruction (only in Zbkb)
|
|
# Bit-reverses the first argument and needs two scratch registers
|
|
my $val = shift;
|
|
my $t0 = shift;
|
|
my $t1 = shift;
|
|
my $brev8_const = shift;
|
|
my $seq = <<___;
|
|
la $brev8_const, Lbrev8_const
|
|
|
|
ld $t0, 0($brev8_const) # 0xAAAAAAAAAAAAAAAA
|
|
slli $t1, $val, 1
|
|
and $t1, $t1, $t0
|
|
and $val, $val, $t0
|
|
srli $val, $val, 1
|
|
or $val, $t1, $val
|
|
|
|
ld $t0, 8($brev8_const) # 0xCCCCCCCCCCCCCCCC
|
|
slli $t1, $val, 2
|
|
and $t1, $t1, $t0
|
|
and $val, $val, $t0
|
|
srli $val, $val, 2
|
|
or $val, $t1, $val
|
|
|
|
ld $t0, 16($brev8_const) # 0xF0F0F0F0F0F0F0F0
|
|
slli $t1, $val, 4
|
|
and $t1, $t1, $t0
|
|
and $val, $val, $t0
|
|
srli $val, $val, 4
|
|
or $val, $t1, $val
|
|
___
|
|
return $seq;
|
|
}
|
|
|
|
sub sd_rev8_rv64i {
|
|
# rev8 without `rev8` instruction (only in Zbb or Zbkb)
|
|
# Stores the given value byte-reversed and needs one scratch register
|
|
my $val = shift;
|
|
my $addr = shift;
|
|
my $off = shift;
|
|
my $tmp = shift;
|
|
my $off0 = ($off + 0);
|
|
my $off1 = ($off + 1);
|
|
my $off2 = ($off + 2);
|
|
my $off3 = ($off + 3);
|
|
my $off4 = ($off + 4);
|
|
my $off5 = ($off + 5);
|
|
my $off6 = ($off + 6);
|
|
my $off7 = ($off + 7);
|
|
my $seq = <<___;
|
|
sb $val, $off7($addr)
|
|
srli $tmp, $val, 8
|
|
sb $tmp, $off6($addr)
|
|
srli $tmp, $val, 16
|
|
sb $tmp, $off5($addr)
|
|
srli $tmp, $val, 24
|
|
sb $tmp, $off4($addr)
|
|
srli $tmp, $val, 32
|
|
sb $tmp, $off3($addr)
|
|
srli $tmp, $val, 40
|
|
sb $tmp, $off2($addr)
|
|
srli $tmp, $val, 48
|
|
sb $tmp, $off1($addr)
|
|
srli $tmp, $val, 56
|
|
sb $tmp, $off0($addr)
|
|
___
|
|
return $seq;
|
|
}
|
|
|
|
# Scalar crypto instructions
|
|
|
|
sub aes64ds {
|
|
# Encoding for aes64ds rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0011101_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes64dsm {
|
|
# Encoding for aes64dsm rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0011111_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes64es {
|
|
# Encoding for aes64es rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0011001_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes64esm {
|
|
# Encoding for aes64esm rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0011011_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes64im {
|
|
# Encoding for aes64im rd, rs1 instruction on RV64
|
|
# XXXXXXXXXXXX_ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b001100000000_00000_001_00000_0010011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes64ks1i {
|
|
# Encoding for aes64ks1i rd, rs1, rnum instruction on RV64
|
|
# XXXXXXXX_rnum_ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b00110001_0000_00000_001_00000_0010011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rnum = shift;
|
|
return ".word ".($template | ($rnum << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub aes64ks2 {
|
|
# Encoding for aes64ks2 rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0111111_00000_00000_000_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub brev8 {
|
|
# brev8 rd, rs
|
|
my $template = 0b011010000111_00000_101_00000_0010011;
|
|
my $rd = read_reg shift;
|
|
my $rs = read_reg shift;
|
|
return ".word ".($template | ($rs << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub clmul {
|
|
# Encoding for clmul rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0000101_00000_00000_001_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub clmulh {
|
|
# Encoding for clmulh rd, rs1, rs2 instruction on RV64
|
|
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX
|
|
my $template = 0b0000101_00000_00000_011_00000_0110011;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub rev8 {
|
|
# Encoding for rev8 rd, rs instruction on RV64
|
|
# XXXXXXXXXXXXX_ rs _XXX_ rd _XXXXXXX
|
|
my $template = 0b011010111000_00000_101_00000_0010011;
|
|
my $rd = read_reg shift;
|
|
my $rs = read_reg shift;
|
|
return ".word ".($template | ($rs << 15) | ($rd << 7));
|
|
}
|
|
|
|
# Vector instructions
|
|
|
|
sub vadd_vv {
|
|
# vadd.vv vd, vs2, vs1, vm
|
|
my $template = 0b000000_0_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vadd_vx {
|
|
# vadd.vx vd, vs2, rs1, vm
|
|
my $template = 0b000000_0_00000_00000_100_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsub_vv {
|
|
# vsub.vv vd, vs2, vs1, vm
|
|
my $template = 0b000010_0_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsub_vx {
|
|
# vsub.vx vd, vs2, rs1, vm
|
|
my $template = 0b000010_0_00000_00000_100_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vid_v {
|
|
# vid.v vd
|
|
my $template = 0b0101001_00000_10001_010_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
return ".word ".($template | ($vd << 7));
|
|
}
|
|
|
|
sub viota_m {
|
|
# viota.m vd, vs2, vm
|
|
my $template = 0b010100_0_00000_10000_010_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vle8_v {
|
|
# vle8.v vd, (rs1), vm
|
|
my $template = 0b000000_0_00000_00000_000_00000_0000111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vle32_v {
|
|
# vle32.v vd, (rs1), vm
|
|
my $template = 0b000000_0_00000_00000_110_00000_0000111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vle64_v {
|
|
# vle64.v vd, (rs1)
|
|
my $template = 0b0000001_00000_00000_111_00000_0000111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vlse32_v {
|
|
# vlse32.v vd, (rs1), rs2
|
|
my $template = 0b0000101_00000_00000_110_00000_0000111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vlsseg_nf_e32_v {
|
|
# vlsseg<nf>e32.v vd, (rs1), rs2
|
|
my $template = 0b0000101_00000_00000_110_00000_0000111;
|
|
my $nf = shift;
|
|
$nf -= 1;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($nf << 29) | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vlse64_v {
|
|
# vlse64.v vd, (rs1), rs2
|
|
my $template = 0b0000101_00000_00000_111_00000_0000111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vluxei8_v {
|
|
# vluxei8.v vd, (rs1), vs2, vm
|
|
my $template = 0b000001_0_00000_00000_000_00000_0000111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vmerge_vim {
|
|
# vmerge.vim vd, vs2, imm, v0
|
|
my $template = 0b0101110_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $imm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($imm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vmerge_vvm {
|
|
# vmerge.vvm vd vs2 vs1
|
|
my $template = 0b0101110_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7))
|
|
}
|
|
|
|
sub vmseq_vi {
|
|
# vmseq.vi vd vs1, imm
|
|
my $template = 0b0110001_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
my $imm = shift;
|
|
return ".word ".($template | ($vs1 << 20) | ($imm << 15) | ($vd << 7))
|
|
}
|
|
|
|
sub vmsgtu_vx {
|
|
# vmsgtu.vx vd vs2, rs1, vm
|
|
my $template = 0b011110_0_00000_00000_100_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7))
|
|
}
|
|
|
|
sub vmv_v_i {
|
|
# vmv.v.i vd, imm
|
|
my $template = 0b0101111_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $imm = shift;
|
|
return ".word ".($template | ($imm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vmv_v_x {
|
|
# vmv.v.x vd, rs1
|
|
my $template = 0b0101111_00000_00000_100_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vmv_v_v {
|
|
# vmv.v.v vd, vs1
|
|
my $template = 0b0101111_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vor_vv_v0t {
|
|
# vor.vv vd, vs2, vs1, v0.t
|
|
my $template = 0b0010100_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vse8_v {
|
|
# vse8.v vd, (rs1), vm
|
|
my $template = 0b000000_0_00000_00000_000_00000_0100111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vse32_v {
|
|
# vse32.v vd, (rs1), vm
|
|
my $template = 0b000000_0_00000_00000_110_00000_0100111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vssseg_nf_e32_v {
|
|
# vssseg<nf>e32.v vs3, (rs1), rs2
|
|
my $template = 0b0000101_00000_00000_110_00000_0100111;
|
|
my $nf = shift;
|
|
$nf -= 1;
|
|
my $vs3 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($nf << 29) | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7));
|
|
}
|
|
|
|
sub vsuxei8_v {
|
|
# vsuxei8.v vs3, (rs1), vs2, vm
|
|
my $template = 0b000001_0_00000_00000_000_00000_0100111;
|
|
my $vs3 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vs3 << 7));
|
|
}
|
|
|
|
sub vse64_v {
|
|
# vse64.v vd, (rs1)
|
|
my $template = 0b0000001_00000_00000_111_00000_0100111;
|
|
my $vd = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsetivli__x0_2_e64_m1_tu_mu {
|
|
# vsetivli x0, 2, e64, m1, tu, mu
|
|
return ".word 0xc1817057";
|
|
}
|
|
|
|
sub vsetivli__x0_4_e32_m1_tu_mu {
|
|
# vsetivli x0, 4, e32, m1, tu, mu
|
|
return ".word 0xc1027057";
|
|
}
|
|
|
|
sub vsetivli__x0_4_e64_m1_tu_mu {
|
|
# vsetivli x0, 4, e64, m1, tu, mu
|
|
return ".word 0xc1827057";
|
|
}
|
|
|
|
sub vsetivli__x0_8_e32_m1_tu_mu {
|
|
# vsetivli x0, 8, e32, m1, tu, mu
|
|
return ".word 0xc1047057";
|
|
}
|
|
|
|
sub vsetvli {
|
|
# vsetvli rd, rs1, vtypei
|
|
my $template = 0b0_00000000000_00000_111_00000_1010111;
|
|
my $rd = read_reg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $sew = read_sew shift;
|
|
my $lmul = read_lmul shift;
|
|
my $tail_policy = read_tail_policy shift;
|
|
my $mask_policy = read_mask_policy shift;
|
|
my $vtypei = ($mask_policy << 7) | ($tail_policy << 6) | ($sew << 3) | $lmul;
|
|
|
|
return ".word ".($template | ($vtypei << 20) | ($rs1 << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub vsetivli {
|
|
# vsetvli rd, uimm, vtypei
|
|
my $template = 0b11_0000000000_00000_111_00000_1010111;
|
|
my $rd = read_reg shift;
|
|
my $uimm = shift;
|
|
my $sew = read_sew shift;
|
|
my $lmul = read_lmul shift;
|
|
my $tail_policy = read_tail_policy shift;
|
|
my $mask_policy = read_mask_policy shift;
|
|
my $vtypei = ($mask_policy << 7) | ($tail_policy << 6) | ($sew << 3) | $lmul;
|
|
|
|
return ".word ".($template | ($vtypei << 20) | ($uimm << 15) | ($rd << 7));
|
|
}
|
|
|
|
sub vslidedown_vi {
|
|
# vslidedown.vi vd, vs2, uimm
|
|
my $template = 0b0011111_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vslidedown_vx {
|
|
# vslidedown.vx vd, vs2, rs1
|
|
my $template = 0b0011111_00000_00000_100_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vslideup_vi_v0t {
|
|
# vslideup.vi vd, vs2, uimm, v0.t
|
|
my $template = 0b0011100_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vslideup_vi {
|
|
# vslideup.vi vd, vs2, uimm
|
|
my $template = 0b0011101_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsll_vi {
|
|
# vsll.vi vd, vs2, uimm, vm
|
|
my $template = 0b1001011_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsrl_vx {
|
|
# vsrl.vx vd, vs2, rs1
|
|
my $template = 0b1010001_00000_00000_100_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsse32_v {
|
|
# vse32.v vs3, (rs1), rs2
|
|
my $template = 0b0000101_00000_00000_110_00000_0100111;
|
|
my $vs3 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7));
|
|
}
|
|
|
|
sub vsse64_v {
|
|
# vsse64.v vs3, (rs1), rs2
|
|
my $template = 0b0000101_00000_00000_111_00000_0100111;
|
|
my $vs3 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
my $rs2 = read_reg shift;
|
|
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7));
|
|
}
|
|
|
|
sub vxor_vv_v0t {
|
|
# vxor.vv vd, vs2, vs1, v0.t
|
|
my $template = 0b0010110_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vxor_vv {
|
|
# vxor.vv vd, vs2, vs1
|
|
my $template = 0b0010111_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vzext_vf2 {
|
|
# vzext.vf2 vd, vs2, vm
|
|
my $template = 0b010010_0_00000_00110_010_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
# Vector crypto instructions
|
|
|
|
## Zvbb and Zvkb instructions
|
|
##
|
|
## vandn (also in zvkb)
|
|
## vbrev
|
|
## vbrev8 (also in zvkb)
|
|
## vrev8 (also in zvkb)
|
|
## vclz
|
|
## vctz
|
|
## vcpop
|
|
## vrol (also in zvkb)
|
|
## vror (also in zvkb)
|
|
## vwsll
|
|
|
|
sub vbrev8_v {
|
|
# vbrev8.v vd, vs2, vm
|
|
my $template = 0b010010_0_00000_01000_010_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vrev8_v {
|
|
# vrev8.v vd, vs2, vm
|
|
my $template = 0b010010_0_00000_01001_010_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vror_vi {
|
|
# vror.vi vd, vs2, uimm
|
|
my $template = 0b01010_0_1_00000_00000_011_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
my $uimm_i5 = $uimm >> 5;
|
|
my $uimm_i4_0 = $uimm & 0b11111;
|
|
|
|
return ".word ".($template | ($uimm_i5 << 26) | ($vs2 << 20) | ($uimm_i4_0 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vwsll_vv {
|
|
# vwsll.vv vd, vs2, vs1, vm
|
|
my $template = 0b110101_0_00000_00000_000_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
my $vm = read_mask_vreg shift;
|
|
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
## Zvbc instructions
|
|
|
|
sub vclmulh_vx {
|
|
# vclmulh.vx vd, vs2, rs1
|
|
my $template = 0b0011011_00000_00000_110_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vclmul_vx_v0t {
|
|
# vclmul.vx vd, vs2, rs1, v0.t
|
|
my $template = 0b0011000_00000_00000_110_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vclmul_vx {
|
|
# vclmul.vx vd, vs2, rs1
|
|
my $template = 0b0011001_00000_00000_110_00000_1010111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $rs1 = read_reg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
## Zvkg instructions
|
|
|
|
sub vghsh_vv {
|
|
# vghsh.vv vd, vs2, vs1
|
|
my $template = 0b1011001_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vgmul_vv {
|
|
# vgmul.vv vd, vs2
|
|
my $template = 0b1010001_00000_10001_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
## Zvkned instructions
|
|
|
|
sub vaesdf_vs {
|
|
# vaesdf.vs vd, vs2
|
|
my $template = 0b101001_1_00000_00001_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vaesdm_vs {
|
|
# vaesdm.vs vd, vs2
|
|
my $template = 0b101001_1_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vaesef_vs {
|
|
# vaesef.vs vd, vs2
|
|
my $template = 0b101001_1_00000_00011_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vaesem_vs {
|
|
# vaesem.vs vd, vs2
|
|
my $template = 0b101001_1_00000_00010_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vaeskf1_vi {
|
|
# vaeskf1.vi vd, vs2, uimmm
|
|
my $template = 0b100010_1_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($uimm << 15) | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
sub vaeskf2_vi {
|
|
# vaeskf2.vi vd, vs2, uimm
|
|
my $template = 0b101010_1_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vaesz_vs {
|
|
# vaesz.vs vd, vs2
|
|
my $template = 0b101001_1_00000_00111_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
## Zvknha and Zvknhb instructions
|
|
|
|
sub vsha2ms_vv {
|
|
# vsha2ms.vv vd, vs2, vs1
|
|
my $template = 0b1011011_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7));
|
|
}
|
|
|
|
sub vsha2ch_vv {
|
|
# vsha2ch.vv vd, vs2, vs1
|
|
my $template = 0b101110_10000_00000_001_00000_01110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7));
|
|
}
|
|
|
|
sub vsha2cl_vv {
|
|
# vsha2cl.vv vd, vs2, vs1
|
|
my $template = 0b101111_10000_00000_001_00000_01110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7));
|
|
}
|
|
|
|
## Zvksed instructions
|
|
|
|
sub vsm4k_vi {
|
|
# vsm4k.vi vd, vs2, uimm
|
|
my $template = 0b1000011_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7));
|
|
}
|
|
|
|
sub vsm4r_vs {
|
|
# vsm4r.vs vd, vs2
|
|
my $template = 0b1010011_00000_10000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
|
|
}
|
|
|
|
## zvksh instructions
|
|
|
|
sub vsm3c_vi {
|
|
# vsm3c.vi vd, vs2, uimm
|
|
my $template = 0b1010111_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $uimm = shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($uimm << 15 ) | ($vd << 7));
|
|
}
|
|
|
|
sub vsm3me_vv {
|
|
# vsm3me.vv vd, vs2, vs1
|
|
my $template = 0b1000001_00000_00000_010_00000_1110111;
|
|
my $vd = read_vreg shift;
|
|
my $vs2 = read_vreg shift;
|
|
my $vs1 = read_vreg shift;
|
|
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15 ) | ($vd << 7));
|
|
}
|
|
|
|
1;
|