mirror of
https://github.com/openssl/openssl.git
synced 2024-12-03 05:41:46 +08:00
1aa89a7a3a
They now generally conform to the following argument sequence: script.pl "$(PERLASM_SCHEME)" [ C preprocessor arguments ... ] \ $(PROCESSOR) <output file> However, in the spirit of being able to use these scripts manually, they also allow for no argument, or for only the flavour, or for only the output file. This is done by only using the last argument as output file if it's a file (it has an extension), and only using the first argument as flavour if it isn't a file (it doesn't have an extension). While we're at it, we make all $xlate calls the same, i.e. the $output argument is always quoted, and we always die on error when trying to start $xlate. There's a perl lesson in this, regarding operator priority... This will always succeed, even when it fails: open FOO, "something" || die "ERR: $!"; The reason is that '||' has higher priority than list operators (a function is essentially a list operator and gobbles up everything following it that isn't lower priority), and since a non-empty string is always true, so that ends up being exactly the same as: open FOO, "something"; This, however, will fail if "something" can't be opened: open FOO, "something" or die "ERR: $!"; The reason is that 'or' has lower priority that list operators, i.e. it's performed after the 'open' call. Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/9884)
586 lines
13 KiB
Perl
586 lines
13 KiB
Perl
#! /usr/bin/env perl
|
|
# Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
|
|
# September 2011
|
|
#
|
|
# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
|
|
# details.
|
|
|
|
# $output is the last argument if it looks like a file (it has an extension)
|
|
# $flavour is the first argument if it doesn't look like a file
|
|
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
|
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
|
|
|
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
|
( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
|
|
die "can't locate x86_64-xlate.pl";
|
|
|
|
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
|
|
or die "can't call $xlate: $!";
|
|
*STDOUT=*OUT;
|
|
|
|
$code=".text\n";
|
|
|
|
%PADLOCK_PREFETCH=(ecb=>128, cbc=>64, ctr32=>32); # prefetch errata
|
|
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
|
|
|
|
$ctx="%rdx";
|
|
$out="%rdi";
|
|
$inp="%rsi";
|
|
$len="%rcx";
|
|
$chunk="%rbx";
|
|
|
|
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
|
|
("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
|
|
|
$code.=<<___;
|
|
.globl padlock_capability
|
|
.type padlock_capability,\@abi-omnipotent
|
|
.align 16
|
|
padlock_capability:
|
|
mov %rbx,%r8
|
|
xor %eax,%eax
|
|
cpuid
|
|
xor %eax,%eax
|
|
cmp \$`"0x".unpack("H*",'tneC')`,%ebx
|
|
jne .Lzhaoxin
|
|
cmp \$`"0x".unpack("H*",'Hrua')`,%edx
|
|
jne .Lnoluck
|
|
cmp \$`"0x".unpack("H*",'slua')`,%ecx
|
|
jne .Lnoluck
|
|
jmp .LzhaoxinEnd
|
|
.Lzhaoxin:
|
|
cmp \$`"0x".unpack("H*",'hS ')`,%ebx
|
|
jne .Lnoluck
|
|
cmp \$`"0x".unpack("H*",'hgna')`,%edx
|
|
jne .Lnoluck
|
|
cmp \$`"0x".unpack("H*",' ia')`,%ecx
|
|
jne .Lnoluck
|
|
.LzhaoxinEnd:
|
|
mov \$0xC0000000,%eax
|
|
cpuid
|
|
mov %eax,%edx
|
|
xor %eax,%eax
|
|
cmp \$0xC0000001,%edx
|
|
jb .Lnoluck
|
|
mov \$0xC0000001,%eax
|
|
cpuid
|
|
mov %edx,%eax
|
|
and \$0xffffffef,%eax
|
|
or \$0x10,%eax # set Nano bit#4
|
|
.Lnoluck:
|
|
mov %r8,%rbx
|
|
ret
|
|
.size padlock_capability,.-padlock_capability
|
|
|
|
.globl padlock_key_bswap
|
|
.type padlock_key_bswap,\@abi-omnipotent,0
|
|
.align 16
|
|
padlock_key_bswap:
|
|
mov 240($arg1),%edx
|
|
.Lbswap_loop:
|
|
mov ($arg1),%eax
|
|
bswap %eax
|
|
mov %eax,($arg1)
|
|
lea 4($arg1),$arg1
|
|
sub \$1,%edx
|
|
jnz .Lbswap_loop
|
|
ret
|
|
.size padlock_key_bswap,.-padlock_key_bswap
|
|
|
|
.globl padlock_verify_context
|
|
.type padlock_verify_context,\@abi-omnipotent
|
|
.align 16
|
|
padlock_verify_context:
|
|
mov $arg1,$ctx
|
|
pushf
|
|
lea .Lpadlock_saved_context(%rip),%rax
|
|
call _padlock_verify_ctx
|
|
lea 8(%rsp),%rsp
|
|
ret
|
|
.size padlock_verify_context,.-padlock_verify_context
|
|
|
|
.type _padlock_verify_ctx,\@abi-omnipotent
|
|
.align 16
|
|
_padlock_verify_ctx:
|
|
mov 8(%rsp),%r8
|
|
bt \$30,%r8
|
|
jnc .Lverified
|
|
cmp (%rax),$ctx
|
|
je .Lverified
|
|
pushf
|
|
popf
|
|
.Lverified:
|
|
mov $ctx,(%rax)
|
|
ret
|
|
.size _padlock_verify_ctx,.-_padlock_verify_ctx
|
|
|
|
.globl padlock_reload_key
|
|
.type padlock_reload_key,\@abi-omnipotent
|
|
.align 16
|
|
padlock_reload_key:
|
|
pushf
|
|
popf
|
|
ret
|
|
.size padlock_reload_key,.-padlock_reload_key
|
|
|
|
.globl padlock_aes_block
|
|
.type padlock_aes_block,\@function,3
|
|
.align 16
|
|
padlock_aes_block:
|
|
mov %rbx,%r8
|
|
mov \$1,$len
|
|
lea 32($ctx),%rbx # key
|
|
lea 16($ctx),$ctx # control word
|
|
.byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
|
|
mov %r8,%rbx
|
|
ret
|
|
.size padlock_aes_block,.-padlock_aes_block
|
|
|
|
.globl padlock_xstore
|
|
.type padlock_xstore,\@function,2
|
|
.align 16
|
|
padlock_xstore:
|
|
mov %esi,%edx
|
|
.byte 0x0f,0xa7,0xc0 # xstore
|
|
ret
|
|
.size padlock_xstore,.-padlock_xstore
|
|
|
|
.globl padlock_sha1_oneshot
|
|
.type padlock_sha1_oneshot,\@function,3
|
|
.align 16
|
|
padlock_sha1_oneshot:
|
|
mov %rdx,%rcx
|
|
mov %rdi,%rdx # put aside %rdi
|
|
movups (%rdi),%xmm0 # copy-in context
|
|
sub \$128+8,%rsp
|
|
mov 16(%rdi),%eax
|
|
movaps %xmm0,(%rsp)
|
|
mov %rsp,%rdi
|
|
mov %eax,16(%rsp)
|
|
xor %rax,%rax
|
|
.byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
|
|
movaps (%rsp),%xmm0
|
|
mov 16(%rsp),%eax
|
|
add \$128+8,%rsp
|
|
movups %xmm0,(%rdx) # copy-out context
|
|
mov %eax,16(%rdx)
|
|
ret
|
|
.size padlock_sha1_oneshot,.-padlock_sha1_oneshot
|
|
|
|
.globl padlock_sha1_blocks
|
|
.type padlock_sha1_blocks,\@function,3
|
|
.align 16
|
|
padlock_sha1_blocks:
|
|
mov %rdx,%rcx
|
|
mov %rdi,%rdx # put aside %rdi
|
|
movups (%rdi),%xmm0 # copy-in context
|
|
sub \$128+8,%rsp
|
|
mov 16(%rdi),%eax
|
|
movaps %xmm0,(%rsp)
|
|
mov %rsp,%rdi
|
|
mov %eax,16(%rsp)
|
|
mov \$-1,%rax
|
|
.byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
|
|
movaps (%rsp),%xmm0
|
|
mov 16(%rsp),%eax
|
|
add \$128+8,%rsp
|
|
movups %xmm0,(%rdx) # copy-out context
|
|
mov %eax,16(%rdx)
|
|
ret
|
|
.size padlock_sha1_blocks,.-padlock_sha1_blocks
|
|
|
|
.globl padlock_sha256_oneshot
|
|
.type padlock_sha256_oneshot,\@function,3
|
|
.align 16
|
|
padlock_sha256_oneshot:
|
|
mov %rdx,%rcx
|
|
mov %rdi,%rdx # put aside %rdi
|
|
movups (%rdi),%xmm0 # copy-in context
|
|
sub \$128+8,%rsp
|
|
movups 16(%rdi),%xmm1
|
|
movaps %xmm0,(%rsp)
|
|
mov %rsp,%rdi
|
|
movaps %xmm1,16(%rsp)
|
|
xor %rax,%rax
|
|
.byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
|
|
movaps (%rsp),%xmm0
|
|
movaps 16(%rsp),%xmm1
|
|
add \$128+8,%rsp
|
|
movups %xmm0,(%rdx) # copy-out context
|
|
movups %xmm1,16(%rdx)
|
|
ret
|
|
.size padlock_sha256_oneshot,.-padlock_sha256_oneshot
|
|
|
|
.globl padlock_sha256_blocks
|
|
.type padlock_sha256_blocks,\@function,3
|
|
.align 16
|
|
padlock_sha256_blocks:
|
|
mov %rdx,%rcx
|
|
mov %rdi,%rdx # put aside %rdi
|
|
movups (%rdi),%xmm0 # copy-in context
|
|
sub \$128+8,%rsp
|
|
movups 16(%rdi),%xmm1
|
|
movaps %xmm0,(%rsp)
|
|
mov %rsp,%rdi
|
|
movaps %xmm1,16(%rsp)
|
|
mov \$-1,%rax
|
|
.byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
|
|
movaps (%rsp),%xmm0
|
|
movaps 16(%rsp),%xmm1
|
|
add \$128+8,%rsp
|
|
movups %xmm0,(%rdx) # copy-out context
|
|
movups %xmm1,16(%rdx)
|
|
ret
|
|
.size padlock_sha256_blocks,.-padlock_sha256_blocks
|
|
|
|
.globl padlock_sha512_blocks
|
|
.type padlock_sha512_blocks,\@function,3
|
|
.align 16
|
|
padlock_sha512_blocks:
|
|
mov %rdx,%rcx
|
|
mov %rdi,%rdx # put aside %rdi
|
|
movups (%rdi),%xmm0 # copy-in context
|
|
sub \$128+8,%rsp
|
|
movups 16(%rdi),%xmm1
|
|
movups 32(%rdi),%xmm2
|
|
movups 48(%rdi),%xmm3
|
|
movaps %xmm0,(%rsp)
|
|
mov %rsp,%rdi
|
|
movaps %xmm1,16(%rsp)
|
|
movaps %xmm2,32(%rsp)
|
|
movaps %xmm3,48(%rsp)
|
|
.byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
|
|
movaps (%rsp),%xmm0
|
|
movaps 16(%rsp),%xmm1
|
|
movaps 32(%rsp),%xmm2
|
|
movaps 48(%rsp),%xmm3
|
|
add \$128+8,%rsp
|
|
movups %xmm0,(%rdx) # copy-out context
|
|
movups %xmm1,16(%rdx)
|
|
movups %xmm2,32(%rdx)
|
|
movups %xmm3,48(%rdx)
|
|
ret
|
|
.size padlock_sha512_blocks,.-padlock_sha512_blocks
|
|
___
|
|
|
|
sub generate_mode {
|
|
my ($mode,$opcode) = @_;
|
|
# int padlock_$mode_encrypt(void *out, const void *inp,
|
|
# struct padlock_cipher_data *ctx, size_t len);
|
|
$code.=<<___;
|
|
.globl padlock_${mode}_encrypt
|
|
.type padlock_${mode}_encrypt,\@function,4
|
|
.align 16
|
|
padlock_${mode}_encrypt:
|
|
push %rbp
|
|
push %rbx
|
|
|
|
xor %eax,%eax
|
|
test \$15,$ctx
|
|
jnz .L${mode}_abort
|
|
test \$15,$len
|
|
jnz .L${mode}_abort
|
|
lea .Lpadlock_saved_context(%rip),%rax
|
|
pushf
|
|
cld
|
|
call _padlock_verify_ctx
|
|
lea 16($ctx),$ctx # control word
|
|
xor %eax,%eax
|
|
xor %ebx,%ebx
|
|
testl \$`1<<5`,($ctx) # align bit in control word
|
|
jnz .L${mode}_aligned
|
|
test \$0x0f,$out
|
|
setz %al # !out_misaligned
|
|
test \$0x0f,$inp
|
|
setz %bl # !inp_misaligned
|
|
test %ebx,%eax
|
|
jnz .L${mode}_aligned
|
|
neg %rax
|
|
mov \$$PADLOCK_CHUNK,$chunk
|
|
not %rax # out_misaligned?-1:0
|
|
lea (%rsp),%rbp
|
|
cmp $chunk,$len
|
|
cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
|
|
and $chunk,%rax # out_misaligned?chunk:0
|
|
mov $len,$chunk
|
|
neg %rax
|
|
and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
|
|
lea (%rax,%rbp),%rsp
|
|
mov \$$PADLOCK_CHUNK,%rax
|
|
cmovz %rax,$chunk # chunk=chunk?:PADLOCK_CHUNK
|
|
___
|
|
$code.=<<___ if ($mode eq "ctr32");
|
|
.L${mode}_reenter:
|
|
mov -4($ctx),%eax # pull 32-bit counter
|
|
bswap %eax
|
|
neg %eax
|
|
and \$`$PADLOCK_CHUNK/16-1`,%eax
|
|
mov \$$PADLOCK_CHUNK,$chunk
|
|
shl \$4,%eax
|
|
cmovz $chunk,%rax
|
|
cmp %rax,$len
|
|
cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
|
|
cmovbe $len,$chunk
|
|
___
|
|
$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
|
|
cmp $chunk,$len
|
|
ja .L${mode}_loop
|
|
mov $inp,%rax # check if prefetch crosses page
|
|
cmp %rsp,%rbp
|
|
cmove $out,%rax
|
|
add $len,%rax
|
|
neg %rax
|
|
and \$0xfff,%rax # distance to page boundary
|
|
cmp \$$PADLOCK_PREFETCH{$mode},%rax
|
|
mov \$-$PADLOCK_PREFETCH{$mode},%rax
|
|
cmovae $chunk,%rax # mask=distance<prefetch?-prefetch:-1
|
|
and %rax,$chunk
|
|
jz .L${mode}_unaligned_tail
|
|
___
|
|
$code.=<<___;
|
|
jmp .L${mode}_loop
|
|
.align 16
|
|
.L${mode}_loop:
|
|
cmp $len,$chunk # ctr32 artefact
|
|
cmova $len,$chunk # ctr32 artefact
|
|
mov $out,%r8 # save parameters
|
|
mov $inp,%r9
|
|
mov $len,%r10
|
|
mov $chunk,$len
|
|
mov $chunk,%r11
|
|
test \$0x0f,$out # out_misaligned
|
|
cmovnz %rsp,$out
|
|
test \$0x0f,$inp # inp_misaligned
|
|
jz .L${mode}_inp_aligned
|
|
shr \$3,$len
|
|
.byte 0xf3,0x48,0xa5 # rep movsq
|
|
sub $chunk,$out
|
|
mov $chunk,$len
|
|
mov $out,$inp
|
|
.L${mode}_inp_aligned:
|
|
lea -16($ctx),%rax # ivp
|
|
lea 16($ctx),%rbx # key
|
|
shr \$4,$len
|
|
.byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
|
|
___
|
|
$code.=<<___ if ($mode !~ /ecb|ctr/);
|
|
movdqa (%rax),%xmm0
|
|
movdqa %xmm0,-16($ctx) # copy [or refresh] iv
|
|
___
|
|
$code.=<<___ if ($mode eq "ctr32");
|
|
mov -4($ctx),%eax # pull 32-bit counter
|
|
test \$0xffff0000,%eax
|
|
jnz .L${mode}_no_carry
|
|
bswap %eax
|
|
add \$0x10000,%eax
|
|
bswap %eax
|
|
mov %eax,-4($ctx)
|
|
.L${mode}_no_carry:
|
|
___
|
|
$code.=<<___;
|
|
mov %r8,$out # restore parameters
|
|
mov %r11,$chunk
|
|
test \$0x0f,$out
|
|
jz .L${mode}_out_aligned
|
|
mov $chunk,$len
|
|
lea (%rsp),$inp
|
|
shr \$3,$len
|
|
.byte 0xf3,0x48,0xa5 # rep movsq
|
|
sub $chunk,$out
|
|
.L${mode}_out_aligned:
|
|
mov %r9,$inp
|
|
mov %r10,$len
|
|
add $chunk,$out
|
|
add $chunk,$inp
|
|
sub $chunk,$len
|
|
mov \$$PADLOCK_CHUNK,$chunk
|
|
___
|
|
if (!$PADLOCK_PREFETCH{$mode}) {
|
|
$code.=<<___;
|
|
jnz .L${mode}_loop
|
|
___
|
|
} else {
|
|
$code.=<<___;
|
|
jz .L${mode}_break
|
|
cmp $chunk,$len
|
|
jae .L${mode}_loop
|
|
___
|
|
$code.=<<___ if ($mode eq "ctr32");
|
|
mov $len,$chunk
|
|
mov $inp,%rax # check if prefetch crosses page
|
|
cmp %rsp,%rbp
|
|
cmove $out,%rax
|
|
add $len,%rax
|
|
neg %rax
|
|
and \$0xfff,%rax # distance to page boundary
|
|
cmp \$$PADLOCK_PREFETCH{$mode},%rax
|
|
mov \$-$PADLOCK_PREFETCH{$mode},%rax
|
|
cmovae $chunk,%rax
|
|
and %rax,$chunk
|
|
jnz .L${mode}_loop
|
|
___
|
|
$code.=<<___;
|
|
.L${mode}_unaligned_tail:
|
|
xor %eax,%eax
|
|
cmp %rsp,%rbp
|
|
cmove $len,%rax
|
|
mov $out,%r8 # save parameters
|
|
mov $len,$chunk
|
|
sub %rax,%rsp # alloca
|
|
shr \$3,$len
|
|
lea (%rsp),$out
|
|
.byte 0xf3,0x48,0xa5 # rep movsq
|
|
mov %rsp,$inp
|
|
mov %r8, $out # restore parameters
|
|
mov $chunk,$len
|
|
jmp .L${mode}_loop
|
|
.align 16
|
|
.L${mode}_break:
|
|
___
|
|
}
|
|
$code.=<<___;
|
|
cmp %rbp,%rsp
|
|
je .L${mode}_done
|
|
|
|
pxor %xmm0,%xmm0
|
|
lea (%rsp),%rax
|
|
.L${mode}_bzero:
|
|
movaps %xmm0,(%rax)
|
|
lea 16(%rax),%rax
|
|
cmp %rax,%rbp
|
|
ja .L${mode}_bzero
|
|
|
|
.L${mode}_done:
|
|
lea (%rbp),%rsp
|
|
jmp .L${mode}_exit
|
|
|
|
.align 16
|
|
.L${mode}_aligned:
|
|
___
|
|
$code.=<<___ if ($mode eq "ctr32");
|
|
mov -4($ctx),%eax # pull 32-bit counter
|
|
bswap %eax
|
|
neg %eax
|
|
and \$0xffff,%eax
|
|
mov \$`16*0x10000`,$chunk
|
|
shl \$4,%eax
|
|
cmovz $chunk,%rax
|
|
cmp %rax,$len
|
|
cmova %rax,$chunk # don't let counter cross 2^16
|
|
cmovbe $len,$chunk
|
|
jbe .L${mode}_aligned_skip
|
|
|
|
.L${mode}_aligned_loop:
|
|
mov $len,%r10 # save parameters
|
|
mov $chunk,$len
|
|
mov $chunk,%r11
|
|
|
|
lea -16($ctx),%rax # ivp
|
|
lea 16($ctx),%rbx # key
|
|
shr \$4,$len # len/=AES_BLOCK_SIZE
|
|
.byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
|
|
|
|
mov -4($ctx),%eax # pull 32-bit counter
|
|
bswap %eax
|
|
add \$0x10000,%eax
|
|
bswap %eax
|
|
mov %eax,-4($ctx)
|
|
|
|
mov %r10,$len # restore parameters
|
|
sub %r11,$len
|
|
mov \$`16*0x10000`,$chunk
|
|
jz .L${mode}_exit
|
|
cmp $chunk,$len
|
|
jae .L${mode}_aligned_loop
|
|
|
|
.L${mode}_aligned_skip:
|
|
___
|
|
$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
|
|
lea ($inp,$len),%rbp
|
|
neg %rbp
|
|
and \$0xfff,%rbp # distance to page boundary
|
|
xor %eax,%eax
|
|
cmp \$$PADLOCK_PREFETCH{$mode},%rbp
|
|
mov \$$PADLOCK_PREFETCH{$mode}-1,%rbp
|
|
cmovae %rax,%rbp
|
|
and $len,%rbp # remainder
|
|
sub %rbp,$len
|
|
jz .L${mode}_aligned_tail
|
|
___
|
|
$code.=<<___;
|
|
lea -16($ctx),%rax # ivp
|
|
lea 16($ctx),%rbx # key
|
|
shr \$4,$len # len/=AES_BLOCK_SIZE
|
|
.byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
|
|
___
|
|
$code.=<<___ if ($mode !~ /ecb|ctr/);
|
|
movdqa (%rax),%xmm0
|
|
movdqa %xmm0,-16($ctx) # copy [or refresh] iv
|
|
___
|
|
$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
|
|
test %rbp,%rbp # check remainder
|
|
jz .L${mode}_exit
|
|
|
|
.L${mode}_aligned_tail:
|
|
mov $out,%r8
|
|
mov %rbp,$chunk
|
|
mov %rbp,$len
|
|
lea (%rsp),%rbp
|
|
sub $len,%rsp
|
|
shr \$3,$len
|
|
lea (%rsp),$out
|
|
.byte 0xf3,0x48,0xa5 # rep movsq
|
|
lea (%r8),$out
|
|
lea (%rsp),$inp
|
|
mov $chunk,$len
|
|
jmp .L${mode}_loop
|
|
___
|
|
$code.=<<___;
|
|
.L${mode}_exit:
|
|
mov \$1,%eax
|
|
lea 8(%rsp),%rsp
|
|
.L${mode}_abort:
|
|
pop %rbx
|
|
pop %rbp
|
|
ret
|
|
.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
|
|
___
|
|
}
|
|
|
|
&generate_mode("ecb",0xc8);
|
|
&generate_mode("cbc",0xd0);
|
|
&generate_mode("cfb",0xe0);
|
|
&generate_mode("ofb",0xe8);
|
|
&generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
|
|
|
|
$code.=<<___;
|
|
.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
|
|
.align 16
|
|
.data
|
|
.align 8
|
|
.Lpadlock_saved_context:
|
|
.quad 0
|
|
___
|
|
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
|
|
|
print $code;
|
|
|
|
close STDOUT;
|