mirror of
https://github.com/openssl/openssl.git
synced 2024-12-03 05:41:46 +08:00
1aa89a7a3a
They now generally conform to the following argument sequence: script.pl "$(PERLASM_SCHEME)" [ C preprocessor arguments ... ] \ $(PROCESSOR) <output file> However, in the spirit of being able to use these scripts manually, they also allow for no argument, or for only the flavour, or for only the output file. This is done by only using the last argument as output file if it's a file (it has an extension), and only using the first argument as flavour if it isn't a file (it doesn't have an extension). While we're at it, we make all $xlate calls the same, i.e. the $output argument is always quoted, and we always die on error when trying to start $xlate. There's a perl lesson in this, regarding operator priority... This will always succeed, even when it fails: open FOO, "something" || die "ERR: $!"; The reason is that '||' has higher priority than list operators (a function is essentially a list operator and gobbles up everything following it that isn't lower priority), and since a non-empty string is always true, so that ends up being exactly the same as: open FOO, "something"; This, however, will fail if "something" can't be opened: open FOO, "something" or die "ERR: $!"; The reason is that 'or' has lower priority that list operators, i.e. it's performed after the 'open' call. Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/9884)
356 lines
8.2 KiB
Raku
Executable File
356 lines
8.2 KiB
Raku
Executable File
#! /usr/bin/env perl
|
|
# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
|
|
# I let hardware handle unaligned input(*), except on page boundaries
|
|
# (see below for details). Otherwise straightforward implementation
|
|
# with X vector in register bank.
|
|
#
|
|
# (*) this means that this module is inappropriate for PPC403? Does
|
|
# anybody know if pre-POWER3 can sustain unaligned load?
|
|
|
|
# -m64 -m32
|
|
# ----------------------------------
|
|
# PPC970,gcc-4.0.0 +76% +59%
|
|
# Power6,xlc-7 +68% +33%
|
|
|
|
# $output is the last argument if it looks like a file (it has an extension)
|
|
# $flavour is the first argument if it doesn't look like a file
|
|
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
|
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
|
|
|
if ($flavour =~ /64/) {
|
|
$SIZE_T =8;
|
|
$LRSAVE =2*$SIZE_T;
|
|
$UCMP ="cmpld";
|
|
$STU ="stdu";
|
|
$POP ="ld";
|
|
$PUSH ="std";
|
|
} elsif ($flavour =~ /32/) {
|
|
$SIZE_T =4;
|
|
$LRSAVE =$SIZE_T;
|
|
$UCMP ="cmplw";
|
|
$STU ="stwu";
|
|
$POP ="lwz";
|
|
$PUSH ="stw";
|
|
} else { die "nonsense $flavour"; }
|
|
|
|
# Define endianness based on flavour
|
|
# i.e.: linux64le
|
|
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
die "can't locate ppc-xlate.pl";
|
|
|
|
open STDOUT,"| $^X $xlate $flavour \"$output\""
|
|
or die "can't call $xlate: $!";
|
|
|
|
$FRAME=24*$SIZE_T+64;
|
|
$LOCALS=6*$SIZE_T;
|
|
|
|
$K ="r0";
|
|
$sp ="r1";
|
|
$toc="r2";
|
|
$ctx="r3";
|
|
$inp="r4";
|
|
$num="r5";
|
|
$t0 ="r15";
|
|
$t1 ="r6";
|
|
|
|
$A ="r7";
|
|
$B ="r8";
|
|
$C ="r9";
|
|
$D ="r10";
|
|
$E ="r11";
|
|
$T ="r12";
|
|
|
|
@V=($A,$B,$C,$D,$E,$T);
|
|
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
|
|
"r24","r25","r26","r27","r28","r29","r30","r31");
|
|
|
|
sub loadbe {
|
|
my ($dst, $src, $temp_reg) = @_;
|
|
$code.=<<___ if (!$LITTLE_ENDIAN);
|
|
lwz $dst,$src
|
|
___
|
|
$code.=<<___ if ($LITTLE_ENDIAN);
|
|
lwz $temp_reg,$src
|
|
rotlwi $dst,$temp_reg,8
|
|
rlwimi $dst,$temp_reg,24,0,7
|
|
rlwimi $dst,$temp_reg,24,16,23
|
|
___
|
|
}
|
|
|
|
sub BODY_00_19 {
|
|
my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
|
my $j=$i+1;
|
|
|
|
# Since the last value of $f is discarded, we can use
|
|
# it as a temp reg to swap byte-order when needed.
|
|
loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
|
|
loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
|
|
$code.=<<___ if ($i<15);
|
|
add $f,$K,$e
|
|
rotlwi $e,$a,5
|
|
add $f,$f,@X[$i]
|
|
and $t0,$c,$b
|
|
add $f,$f,$e
|
|
andc $t1,$d,$b
|
|
rotlwi $b,$b,30
|
|
or $t0,$t0,$t1
|
|
add $f,$f,$t0
|
|
___
|
|
$code.=<<___ if ($i>=15);
|
|
add $f,$K,$e
|
|
rotlwi $e,$a,5
|
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
|
add $f,$f,@X[$i%16]
|
|
and $t0,$c,$b
|
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
|
add $f,$f,$e
|
|
andc $t1,$d,$b
|
|
rotlwi $b,$b,30
|
|
or $t0,$t0,$t1
|
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
|
add $f,$f,$t0
|
|
rotlwi @X[$j%16],@X[$j%16],1
|
|
___
|
|
}
|
|
|
|
sub BODY_20_39 {
|
|
my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___ if ($i<79);
|
|
add $f,$K,$e
|
|
xor $t0,$b,$d
|
|
rotlwi $e,$a,5
|
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
|
add $f,$f,@X[$i%16]
|
|
xor $t0,$t0,$c
|
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
|
add $f,$f,$t0
|
|
rotlwi $b,$b,30
|
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
|
add $f,$f,$e
|
|
rotlwi @X[$j%16],@X[$j%16],1
|
|
___
|
|
$code.=<<___ if ($i==79);
|
|
add $f,$K,$e
|
|
xor $t0,$b,$d
|
|
rotlwi $e,$a,5
|
|
lwz r16,0($ctx)
|
|
add $f,$f,@X[$i%16]
|
|
xor $t0,$t0,$c
|
|
lwz r17,4($ctx)
|
|
add $f,$f,$t0
|
|
rotlwi $b,$b,30
|
|
lwz r18,8($ctx)
|
|
lwz r19,12($ctx)
|
|
add $f,$f,$e
|
|
lwz r20,16($ctx)
|
|
___
|
|
}
|
|
|
|
sub BODY_40_59 {
|
|
my ($i,$a,$b,$c,$d,$e,$f)=@_;
|
|
my $j=$i+1;
|
|
$code.=<<___;
|
|
add $f,$K,$e
|
|
rotlwi $e,$a,5
|
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
|
|
add $f,$f,@X[$i%16]
|
|
and $t0,$b,$c
|
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
|
|
add $f,$f,$e
|
|
or $t1,$b,$c
|
|
rotlwi $b,$b,30
|
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
|
|
and $t1,$t1,$d
|
|
or $t0,$t0,$t1
|
|
rotlwi @X[$j%16],@X[$j%16],1
|
|
add $f,$f,$t0
|
|
___
|
|
}
|
|
|
|
$code=<<___;
|
|
.machine "any"
|
|
.text
|
|
|
|
.globl .sha1_block_data_order
|
|
.align 4
|
|
.sha1_block_data_order:
|
|
$STU $sp,-$FRAME($sp)
|
|
mflr r0
|
|
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
|
|
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
|
|
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
|
|
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
|
|
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
|
|
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
|
|
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
|
|
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
|
|
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
|
|
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
|
|
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
|
|
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
|
|
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
|
|
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
|
|
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
|
|
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
|
|
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
|
|
$PUSH r0,`$FRAME+$LRSAVE`($sp)
|
|
lwz $A,0($ctx)
|
|
lwz $B,4($ctx)
|
|
lwz $C,8($ctx)
|
|
lwz $D,12($ctx)
|
|
lwz $E,16($ctx)
|
|
andi. r0,$inp,3
|
|
bne Lunaligned
|
|
Laligned:
|
|
mtctr $num
|
|
bl Lsha1_block_private
|
|
b Ldone
|
|
|
|
; PowerPC specification allows an implementation to be ill-behaved
|
|
; upon unaligned access which crosses page boundary. "Better safe
|
|
; than sorry" principle makes me treat it specially. But I don't
|
|
; look for particular offending word, but rather for 64-byte input
|
|
; block which crosses the boundary. Once found that block is aligned
|
|
; and hashed separately...
|
|
.align 4
|
|
Lunaligned:
|
|
subfic $t1,$inp,4096
|
|
andi. $t1,$t1,4095 ; distance to closest page boundary
|
|
srwi. $t1,$t1,6 ; t1/=64
|
|
beq Lcross_page
|
|
$UCMP $num,$t1
|
|
ble Laligned ; didn't cross the page boundary
|
|
mtctr $t1
|
|
subfc $num,$t1,$num
|
|
bl Lsha1_block_private
|
|
Lcross_page:
|
|
li $t1,16
|
|
mtctr $t1
|
|
addi r20,$sp,$LOCALS ; spot within the frame
|
|
Lmemcpy:
|
|
lbz r16,0($inp)
|
|
lbz r17,1($inp)
|
|
lbz r18,2($inp)
|
|
lbz r19,3($inp)
|
|
addi $inp,$inp,4
|
|
stb r16,0(r20)
|
|
stb r17,1(r20)
|
|
stb r18,2(r20)
|
|
stb r19,3(r20)
|
|
addi r20,r20,4
|
|
bdnz Lmemcpy
|
|
|
|
$PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
|
|
li $t1,1
|
|
addi $inp,$sp,$LOCALS
|
|
mtctr $t1
|
|
bl Lsha1_block_private
|
|
$POP $inp,`$FRAME-$SIZE_T*18`($sp)
|
|
addic. $num,$num,-1
|
|
bne Lunaligned
|
|
|
|
Ldone:
|
|
$POP r0,`$FRAME+$LRSAVE`($sp)
|
|
$POP r15,`$FRAME-$SIZE_T*17`($sp)
|
|
$POP r16,`$FRAME-$SIZE_T*16`($sp)
|
|
$POP r17,`$FRAME-$SIZE_T*15`($sp)
|
|
$POP r18,`$FRAME-$SIZE_T*14`($sp)
|
|
$POP r19,`$FRAME-$SIZE_T*13`($sp)
|
|
$POP r20,`$FRAME-$SIZE_T*12`($sp)
|
|
$POP r21,`$FRAME-$SIZE_T*11`($sp)
|
|
$POP r22,`$FRAME-$SIZE_T*10`($sp)
|
|
$POP r23,`$FRAME-$SIZE_T*9`($sp)
|
|
$POP r24,`$FRAME-$SIZE_T*8`($sp)
|
|
$POP r25,`$FRAME-$SIZE_T*7`($sp)
|
|
$POP r26,`$FRAME-$SIZE_T*6`($sp)
|
|
$POP r27,`$FRAME-$SIZE_T*5`($sp)
|
|
$POP r28,`$FRAME-$SIZE_T*4`($sp)
|
|
$POP r29,`$FRAME-$SIZE_T*3`($sp)
|
|
$POP r30,`$FRAME-$SIZE_T*2`($sp)
|
|
$POP r31,`$FRAME-$SIZE_T*1`($sp)
|
|
mtlr r0
|
|
addi $sp,$sp,$FRAME
|
|
blr
|
|
.long 0
|
|
.byte 0,12,4,1,0x80,18,3,0
|
|
.long 0
|
|
___
|
|
|
|
# This is private block function, which uses tailored calling
|
|
# interface, namely upon entry SHA_CTX is pre-loaded to given
|
|
# registers and counter register contains amount of chunks to
|
|
# digest...
|
|
$code.=<<___;
|
|
.align 4
|
|
Lsha1_block_private:
|
|
___
|
|
$code.=<<___; # load K_00_19
|
|
lis $K,0x5a82
|
|
ori $K,$K,0x7999
|
|
___
|
|
for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___; # load K_20_39
|
|
lis $K,0x6ed9
|
|
ori $K,$K,0xeba1
|
|
___
|
|
for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___; # load K_40_59
|
|
lis $K,0x8f1b
|
|
ori $K,$K,0xbcdc
|
|
___
|
|
for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___; # load K_60_79
|
|
lis $K,0xca62
|
|
ori $K,$K,0xc1d6
|
|
___
|
|
for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
|
$code.=<<___;
|
|
add r16,r16,$E
|
|
add r17,r17,$T
|
|
add r18,r18,$A
|
|
add r19,r19,$B
|
|
add r20,r20,$C
|
|
stw r16,0($ctx)
|
|
mr $A,r16
|
|
stw r17,4($ctx)
|
|
mr $B,r17
|
|
stw r18,8($ctx)
|
|
mr $C,r18
|
|
stw r19,12($ctx)
|
|
mr $D,r19
|
|
stw r20,16($ctx)
|
|
mr $E,r20
|
|
addi $inp,$inp,`16*4`
|
|
bdnz Lsha1_block_private
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,0,0
|
|
.size .sha1_block_data_order,.-.sha1_block_data_order
|
|
___
|
|
$code.=<<___;
|
|
.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
|
|
___
|
|
|
|
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
|
print $code;
|
|
close STDOUT;
|