openssl/crypto/bn/asm/c64xplus-gf2m.pl

#! /usr/bin/env perl
# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the Apache License 2.0 (the "License").  You may not use
# this file except in compliance with the License.  You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html

#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# February 2012
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication
# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
# C for the time being... The subroutine runs in 37 cycles, which is
# 4.5x faster than compiler-generated code. Though comparison is
# totally unfair, because this module utilizes Galois Field Multiply
# instruction.

$output = pop and open STDOUT,">$output";

($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8");   # argument vector

($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
($A,$B)=($Alo,$B_1);
$xFF="B1";

sub mul_1x1_upper {
my ($A,$B)=@_;
$code.=<<___;
	EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
||	AND	$B,$xFF,$B_0
||	SHRU	$B,24,$B_3
	SHRU	$A,16,   $Ahi		; smash $A to two halfwords
||	EXTU	$A,16,16,$Alo

	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits multiplication
||	XORMPY	$Ahi,$B_2,$Ahix2
||	EXTU	$B,16,24,$B_1
	XORMPY	$Alo,$B_0,$Alox0
||	XORMPY	$Ahi,$B_0,$Ahix0
	XORMPY	$Alo,$B_3,$Alox3
||	XORMPY	$Ahi,$B_3,$Ahix3
	XORMPY	$Alo,$B_1,$Alox1
||	XORMPY	$Ahi,$B_1,$Ahix1
___
}
sub mul_1x1_merged {
my ($OUTlo,$OUThi,$A,$B)=@_;
$code.=<<___;
	 EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
||	 AND	$B,$xFF,$B_0
||	 SHRU	$B,24,$B_3
	 SHRU	$A,16,   $Ahi		; smash $A to two halfwords
||	 EXTU	$A,16,16,$Alo

	XOR	$Ahix0,$Alox2,$Ahix0
||	MV	$Ahix2,$OUThi
||	 XORMPY	$Alo,$B_2,$Alox2
	 XORMPY	$Ahi,$B_2,$Ahix2
||	 EXTU	$B,16,24,$B_1
||	 XORMPY	$Alo,$B_0,A1		; $Alox0
	XOR	$Ahix1,$Alox3,$Ahix1
||	SHL	$Ahix0,16,$OUTlo
||	SHRU	$Ahix0,16,$Ahix0
	XOR	$Alox0,$OUTlo,$OUTlo
||	XOR	$Ahix0,$OUThi,$OUThi
||	 XORMPY	$Ahi,$B_0,$Ahix0
||	 XORMPY	$Alo,$B_3,$Alox3
||	SHL	$Alox1,8,$Alox1
||	SHL	$Ahix3,8,$Ahix3
	XOR	$Alox1,$OUTlo,$OUTlo
||	XOR	$Ahix3,$OUThi,$OUThi
||	 XORMPY	$Ahi,$B_3,$Ahix3
||	SHL	$Ahix1,24,$Alox1
||	SHRU	$Ahix1,8, $Ahix1
	XOR	$Alox1,$OUTlo,$OUTlo
||	XOR	$Ahix1,$OUThi,$OUThi
||	 XORMPY	$Alo,$B_1,$Alox1
||	 XORMPY	$Ahi,$B_1,$Ahix1
||	 MV	A1,$Alox0
___
}
sub mul_1x1_lower {
my ($OUTlo,$OUThi)=@_;
$code.=<<___;
	;NOP
	XOR	$Ahix0,$Alox2,$Ahix0
||	MV	$Ahix2,$OUThi
	NOP
	XOR	$Ahix1,$Alox3,$Ahix1
||	SHL	$Ahix0,16,$OUTlo
||	SHRU	$Ahix0,16,$Ahix0
	XOR	$Alox0,$OUTlo,$OUTlo
||	XOR	$Ahix0,$OUThi,$OUThi
||	SHL	$Alox1,8,$Alox1
||	SHL	$Ahix3,8,$Ahix3
	XOR	$Alox1,$OUTlo,$OUTlo
||	XOR	$Ahix3,$OUThi,$OUThi
||	SHL	$Ahix1,24,$Alox1
||	SHRU	$Ahix1,8, $Ahix1
	XOR	$Alox1,$OUTlo,$OUTlo
||	XOR	$Ahix1,$OUThi,$OUThi
___
}
$code.=<<___;
	.text

	.if	.ASSEMBLER_VERSION<7000000
	.asg	0,__TI_EABI__
	.endif
	.if	__TI_EABI__
	.asg	bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
	.endif

	.global	_bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2:
	.asmfunc
	MVK	0xFF,$xFF
___
	&mul_1x1_upper($a0,$b0);		# a0·b0
$code.=<<___;
||	MV	$b1,$B
	MV	$a1,$A
___
	&mul_1x1_merged("A28","B28",$A,$B);	# a0·b0/a1·b1
$code.=<<___;
||	XOR	$b0,$b1,$B
	XOR	$a0,$a1,$A
___
	&mul_1x1_merged("A31","B31",$A,$B);	# a1·b1/(a0+a1)·(b0+b1)
$code.=<<___;
	XOR	A28,A31,A29
||	XOR	B28,B31,B29			; a0·b0+a1·b1
___
	&mul_1x1_lower("A30","B30");		# (a0+a1)·(b0+b1)
$code.=<<___;
||	BNOP	B3
	XOR	A29,A30,A30
||	XOR	B29,B30,B30			; (a0+a1)·(b0+b1)-a0·b0-a1·b1
	XOR	B28,A30,A30
||	STW	A28,*${rp}[0]
	XOR	B30,A31,A31
||	STW	A30,*${rp}[1]
	STW	A31,*${rp}[2]
	STW	B31,*${rp}[3]
	.endasmfunc
___

print $code;
close STDOUT or die "error closing STDOUT: $!";
Add OpenSSL copyright to .pl files Reviewed-by: Richard Levitte <levitte@openssl.org> 2016-05-21 20:23:39 +08:00			`#! /usr/bin/env perl`
Update copyright year Reviewed-by: Richard Levitte <levitte@openssl.org> (Merged from https://github.com/openssl/openssl/pull/11616) 2020-04-23 20:55:52 +08:00			`# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.`
Add OpenSSL copyright to .pl files Reviewed-by: Richard Levitte <levitte@openssl.org> 2016-05-21 20:23:39 +08:00			`#`
Following the license change, modify the boilerplates in crypto/bn/ [skip ci] Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/7777) 2018-12-06 20:22:12 +08:00			`# Licensed under the Apache License 2.0 (the "License"). You may not use`
Add OpenSSL copyright to .pl files Reviewed-by: Richard Levitte <levitte@openssl.org> 2016-05-21 20:23:39 +08:00			`# this file except in compliance with the License. You can obtain a copy`
			`# in the file LICENSE in the source distribution or at`
			`# https://www.openssl.org/source/license.html`

C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`#`
			`# ====================================================================`
			`# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL`
			`# project. The module is, however, dual licensed under OpenSSL and`
			`# CRYPTOGAMS licenses depending on where you obtain it. For further`
			`# details see http://www.openssl.org/~appro/cryptogams/.`
			`# ====================================================================`
			`#`
			`# February 2012`
			`#`
			`# The module implements bn_GF2m_mul_2x2 polynomial multiplication`
			`# used in bn_gf2m.c. It's kind of low-hanging mechanical port from`
			`# C for the time being... The subroutine runs in 37 cycles, which is`
			`# 4.5x faster than compiler-generated code. Though comparison is`
			`# totally unfair, because this module utilizes Galois Field Multiply`
			`# instruction.`

Unify all assembler file generators They now generally conform to the following argument sequence: script.pl "$(PERLASM_SCHEME)" [ C preprocessor arguments ... ] \ $(PROCESSOR) <output file> However, in the spirit of being able to use these scripts manually, they also allow for no argument, or for only the flavour, or for only the output file. This is done by only using the last argument as output file if it's a file (it has an extension), and only using the first argument as flavour if it isn't a file (it doesn't have an extension). While we're at it, we make all $xlate calls the same, i.e. the $output argument is always quoted, and we always die on error when trying to start $xlate. There's a perl lesson in this, regarding operator priority... This will always succeed, even when it fails: open FOO, "something" \|\| die "ERR: $!"; The reason is that '\|\|' has higher priority than list operators (a function is essentially a list operator and gobbles up everything following it that isn't lower priority), and since a non-empty string is always true, so that ends up being exactly the same as: open FOO, "something"; This, however, will fail if "something" can't be opened: open FOO, "something" or die "ERR: $!"; The reason is that 'or' has lower priority that list operators, i.e. it's performed after the 'open' call. Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/9884) 2019-09-13 06:06:46 +08:00			`$output = pop and open STDOUT,">$output";`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00
			`($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector`

			`($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));`
			`($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));`
			`($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");`
			`($A,$B)=($Alo,$B_1);`
			`$xFF="B1";`

			`sub mul_1x1_upper {`
			`my ($A,$B)=@_;`
			`$code.=<<___;`
			`EXTU $B,8,24,$B_2 ; smash $B to 4 bytes`
			`\|\| AND $B,$xFF,$B_0`
			`\|\| SHRU $B,24,$B_3`
			`SHRU $A,16, $Ahi ; smash $A to two halfwords`
			`\|\| EXTU $A,16,16,$Alo`

Many spelling fixes/typo's corrected. Around 138 distinct errors found and fixed; thanks! Reviewed-by: Kurt Roeckx <kurt@roeckx.be> Reviewed-by: Tim Hudson <tjh@openssl.org> Reviewed-by: Rich Salz <rsalz@openssl.org> (Merged from https://github.com/openssl/openssl/pull/3459) 2017-11-12 08:03:10 +08:00			`XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`\|\| XORMPY $Ahi,$B_2,$Ahix2`
			`\|\| EXTU $B,16,24,$B_1`
			`XORMPY $Alo,$B_0,$Alox0`
			`\|\| XORMPY $Ahi,$B_0,$Ahix0`
			`XORMPY $Alo,$B_3,$Alox3`
			`\|\| XORMPY $Ahi,$B_3,$Ahix3`
			`XORMPY $Alo,$B_1,$Alox1`
			`\|\| XORMPY $Ahi,$B_1,$Ahix1`
			`___`
			`}`
			`sub mul_1x1_merged {`
			`my ($OUTlo,$OUThi,$A,$B)=@_;`
			`$code.=<<___;`
			`EXTU $B,8,24,$B_2 ; smash $B to 4 bytes`
			`\|\| AND $B,$xFF,$B_0`
			`\|\| SHRU $B,24,$B_3`
			`SHRU $A,16, $Ahi ; smash $A to two halfwords`
			`\|\| EXTU $A,16,16,$Alo`

			`XOR $Ahix0,$Alox2,$Ahix0`
			`\|\| MV $Ahix2,$OUThi`
			`\|\| XORMPY $Alo,$B_2,$Alox2`
			`XORMPY $Ahi,$B_2,$Ahix2`
			`\|\| EXTU $B,16,24,$B_1`
			`\|\| XORMPY $Alo,$B_0,A1 ; $Alox0`
			`XOR $Ahix1,$Alox3,$Ahix1`
			`\|\| SHL $Ahix0,16,$OUTlo`
			`\|\| SHRU $Ahix0,16,$Ahix0`
			`XOR $Alox0,$OUTlo,$OUTlo`
			`\|\| XOR $Ahix0,$OUThi,$OUThi`
			`\|\| XORMPY $Ahi,$B_0,$Ahix0`
			`\|\| XORMPY $Alo,$B_3,$Alox3`
			`\|\| SHL $Alox1,8,$Alox1`
			`\|\| SHL $Ahix3,8,$Ahix3`
			`XOR $Alox1,$OUTlo,$OUTlo`
			`\|\| XOR $Ahix3,$OUThi,$OUThi`
			`\|\| XORMPY $Ahi,$B_3,$Ahix3`
			`\|\| SHL $Ahix1,24,$Alox1`
			`\|\| SHRU $Ahix1,8, $Ahix1`
			`XOR $Alox1,$OUTlo,$OUTlo`
			`\|\| XOR $Ahix1,$OUThi,$OUThi`
			`\|\| XORMPY $Alo,$B_1,$Alox1`
			`\|\| XORMPY $Ahi,$B_1,$Ahix1`
			`\|\| MV A1,$Alox0`
			`___`
			`}`
			`sub mul_1x1_lower {`
			`my ($OUTlo,$OUThi)=@_;`
			`$code.=<<___;`
			`;NOP`
			`XOR $Ahix0,$Alox2,$Ahix0`
			`\|\| MV $Ahix2,$OUThi`
			`NOP`
			`XOR $Ahix1,$Alox3,$Ahix1`
			`\|\| SHL $Ahix0,16,$OUTlo`
			`\|\| SHRU $Ahix0,16,$Ahix0`
			`XOR $Alox0,$OUTlo,$OUTlo`
			`\|\| XOR $Ahix0,$OUThi,$OUThi`
			`\|\| SHL $Alox1,8,$Alox1`
			`\|\| SHL $Ahix3,8,$Ahix3`
			`XOR $Alox1,$OUTlo,$OUTlo`
			`\|\| XOR $Ahix3,$OUThi,$OUThi`
			`\|\| SHL $Ahix1,24,$Alox1`
			`\|\| SHRU $Ahix1,8, $Ahix1`
			`XOR $Alox1,$OUTlo,$OUTlo`
			`\|\| XOR $Ahix1,$OUThi,$OUThi`
			`___`
			`}`
			`$code.=<<___;`
			`.text`
C64x+ assembly pack: make it work with older toolchain. 2014-05-04 22:38:32 +08:00
			`.if .ASSEMBLER_VERSION<7000000`
			`.asg 0,__TI_EABI__`
			`.endif`
C64x+ assembly pack: improve EABI support. 2012-11-28 21:19:10 +08:00			`.if __TI_EABI__`
			`.asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2`
			`.endif`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00
			`.global _bn_GF2m_mul_2x2`
			`_bn_GF2m_mul_2x2:`
			`.asmfunc`
			`MVK 0xFF,$xFF`
			`___`
Conversion to UTF-8 where needed This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: Rich Salz <rsalz@openssl.org> 2015-07-13 22:53:37 +08:00			`&mul_1x1_upper($a0,$b0); # a0·b0`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`$code.=<<___;`
			`\|\| MV $b1,$B`
			`MV $a1,$A`
			`___`
Conversion to UTF-8 where needed This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: Rich Salz <rsalz@openssl.org> 2015-07-13 22:53:37 +08:00			`&mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`$code.=<<___;`
			`\|\| XOR $b0,$b1,$B`
			`XOR $a0,$a1,$A`
			`___`
Conversion to UTF-8 where needed This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: Rich Salz <rsalz@openssl.org> 2015-07-13 22:53:37 +08:00			`&mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`$code.=<<___;`
			`XOR A28,A31,A29`
Conversion to UTF-8 where needed This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: Rich Salz <rsalz@openssl.org> 2015-07-13 22:53:37 +08:00			`\|\| XOR B28,B31,B29 ; a0·b0+a1·b1`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`___`
Conversion to UTF-8 where needed This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: Rich Salz <rsalz@openssl.org> 2015-07-13 22:53:37 +08:00			`&mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`$code.=<<___;`
			`\|\| BNOP B3`
			`XOR A29,A30,A30`
Conversion to UTF-8 where needed This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: Rich Salz <rsalz@openssl.org> 2015-07-13 22:53:37 +08:00			`\|\| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1`
C64x+ assembler pack. linux-c64xplus build is not tested nor can it be tested, because kernel is not in shape to handle it yet. The code is committed mostly to stimulate the kernel development. 2012-04-18 21:01:36 +08:00			`XOR B28,A30,A30`
			`\|\| STW A28,*${rp}[0]`
			`XOR B30,A31,A31`
			`\|\| STW A30,*${rp}[1]`
			`STW A31,*${rp}[2]`
			`STW B31,*${rp}[3]`
			`.endasmfunc`
			`___`

			`print $code;`
Also check for errors in x86_64-xlate.pl. In https://github.com/openssl/openssl/pull/10883, I'd meant to exclude the perlasm drivers since they aren't opening pipes and do not particularly need it, but I only noticed x86_64-xlate.pl, so arm-xlate.pl and ppc-xlate.pl got the change. That seems to have been fine, so be consistent and also apply the change to x86_64-xlate.pl. Checking for errors is generally a good idea. Reviewed-by: Richard Levitte <levitte@openssl.org> Reviewed-by: David Benjamin <davidben@google.com> (Merged from https://github.com/openssl/openssl/pull/10930) 2020-02-17 10:17:53 +08:00			`close STDOUT or die "error closing STDOUT: $!";`