mirror of
https://github.com/openssl/openssl.git
synced 2025-02-17 14:32:04 +08:00
aes/asm/aesp8-ppc.pl: implement "tweak chaining".
This is useful in Linux kernel context, in cases data happens to be fragmented and processing can take multiple calls. Reviewed-by: Rich Salz <rsalz@openssl.org>
This commit is contained in:
parent
74daca72a5
commit
627c953376
@ -1909,6 +1909,15 @@ ___
|
||||
|
||||
#########################################################################
|
||||
{{{ # XTS procedures #
|
||||
# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
|
||||
# const AES_KEY *key1, const AES_KEY *key2, #
|
||||
# [const] unsigned char iv[16]); #
|
||||
# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
|
||||
# input tweak value is assumed to be encrypted already, and last tweak #
|
||||
# value, one suitable for consecutive call on same chunk of data, is #
|
||||
# written back to original buffer. In addition, in "tweak chaining" #
|
||||
# mode only complete input blocks are processed. #
|
||||
|
||||
my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
|
||||
my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
|
||||
my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
|
||||
@ -1943,18 +1952,21 @@ $code.=<<___;
|
||||
le?vxor $inpperm,$inpperm,$tmp
|
||||
vperm $tweak,$tweak,$inptail,$inpperm
|
||||
|
||||
?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
||||
lwz $rounds,240($key2)
|
||||
srwi $rounds,$rounds,1
|
||||
subi $rounds,$rounds,1
|
||||
li $idx,16
|
||||
|
||||
neg r11,$inp
|
||||
lvsr $inpperm,0,r11 # prepare for unaligned load
|
||||
lvx $inout,0,$inp
|
||||
addi $inp,$inp,15 # 15 is not typo
|
||||
le?vxor $inpperm,$inpperm,$tmp
|
||||
|
||||
${UCMP}i $key2,0 # key2==NULL?
|
||||
beq Lxts_enc_no_key2
|
||||
|
||||
?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
||||
lwz $rounds,240($key2)
|
||||
srwi $rounds,$rounds,1
|
||||
subi $rounds,$rounds,1
|
||||
li $idx,16
|
||||
|
||||
lvx $rndkey0,0,$key2
|
||||
lvx $rndkey1,$idx,$key2
|
||||
addi $idx,$idx,16
|
||||
@ -1978,10 +1990,18 @@ Ltweak_xts_enc:
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vcipher $tweak,$tweak,$rndkey1
|
||||
lvx $rndkey1,$idx,$key2
|
||||
li $idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vcipherlast $tweak,$tweak,$rndkey0
|
||||
|
||||
li $ivp,0 # don't chain the tweak
|
||||
b Lxts_enc
|
||||
|
||||
Lxts_enc_no_key2:
|
||||
li $idx,-16
|
||||
and $len,$len,$idx # in "tweak chaining"
|
||||
# mode only complete
|
||||
# blocks are processed
|
||||
Lxts_enc:
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,16
|
||||
|
||||
@ -2097,6 +2117,19 @@ Loop_xts_enc_steal:
|
||||
b Loop_xts_enc # one more time...
|
||||
|
||||
Lxts_enc_done:
|
||||
${UCMP}i $ivp,0
|
||||
beq Lxts_enc_ret
|
||||
|
||||
vsrab $tmp,$tweak,$seven # next tweak value
|
||||
vaddubm $tweak,$tweak,$tweak
|
||||
vsldoi $tmp,$tmp,$tmp,15
|
||||
vand $tmp,$tmp,$eighty7
|
||||
vxor $tweak,$tweak,$tmp
|
||||
|
||||
le?vperm $tweak,$tweak,$tweak,$leperm
|
||||
stvx_u $tweak,0,$ivp
|
||||
|
||||
Lxts_enc_ret:
|
||||
mtspr 256,r12 # restore vrsave
|
||||
li r3,0
|
||||
blr
|
||||
@ -2135,18 +2168,21 @@ Lxts_enc_done:
|
||||
le?vxor $inpperm,$inpperm,$tmp
|
||||
vperm $tweak,$tweak,$inptail,$inpperm
|
||||
|
||||
?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
||||
lwz $rounds,240($key2)
|
||||
srwi $rounds,$rounds,1
|
||||
subi $rounds,$rounds,1
|
||||
li $idx,16
|
||||
|
||||
neg r11,$inp
|
||||
lvsr $inpperm,0,r11 # prepare for unaligned load
|
||||
lvx $inout,0,$inp
|
||||
addi $inp,$inp,15 # 15 is not typo
|
||||
le?vxor $inpperm,$inpperm,$tmp
|
||||
|
||||
${UCMP}i $key2,0 # key2==NULL?
|
||||
beq Lxts_dec_no_key2
|
||||
|
||||
?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
||||
lwz $rounds,240($key2)
|
||||
srwi $rounds,$rounds,1
|
||||
subi $rounds,$rounds,1
|
||||
li $idx,16
|
||||
|
||||
lvx $rndkey0,0,$key2
|
||||
lvx $rndkey1,$idx,$key2
|
||||
addi $idx,$idx,16
|
||||
@ -2170,10 +2206,19 @@ Ltweak_xts_dec:
|
||||
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
||||
vcipher $tweak,$tweak,$rndkey1
|
||||
lvx $rndkey1,$idx,$key2
|
||||
li $idx,16
|
||||
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
||||
vcipherlast $tweak,$tweak,$rndkey0
|
||||
|
||||
li $ivp,0 # don't chain the tweak
|
||||
b Lxts_dec
|
||||
|
||||
Lxts_dec_no_key2:
|
||||
neg $idx,$len
|
||||
andi. $idx,$idx,15
|
||||
add $len,$len,$idx # in "tweak chaining"
|
||||
# mode only complete
|
||||
# blocks are processed
|
||||
Lxts_dec:
|
||||
lvx $inptail,0,$inp
|
||||
addi $inp,$inp,16
|
||||
|
||||
@ -2328,6 +2373,19 @@ Loop_xts_dec_steal:
|
||||
b Loop_xts_dec # one more time...
|
||||
|
||||
Lxts_dec_done:
|
||||
${UCMP}i $ivp,0
|
||||
beq Lxts_dec_ret
|
||||
|
||||
vsrab $tmp,$tweak,$seven # next tweak value
|
||||
vaddubm $tweak,$tweak,$tweak
|
||||
vsldoi $tmp,$tmp,$tmp,15
|
||||
vand $tmp,$tmp,$eighty7
|
||||
vxor $tweak,$tweak,$tmp
|
||||
|
||||
le?vperm $tweak,$tweak,$tweak,$leperm
|
||||
stvx_u $tweak,0,$ivp
|
||||
|
||||
Lxts_dec_ret:
|
||||
mtspr 256,r12 # restore vrsave
|
||||
li r3,0
|
||||
blr
|
||||
@ -2338,8 +2396,8 @@ Lxts_dec_done:
|
||||
___
|
||||
#########################################################################
|
||||
{{ # Optimized XTS procedures #
|
||||
my $key_="r11";
|
||||
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
||||
my $key_=$key2;
|
||||
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
|
||||
$x00=0 if ($flavour =~ /osx/);
|
||||
my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
|
||||
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
|
||||
@ -2353,33 +2411,32 @@ $code.=<<___;
|
||||
.align 5
|
||||
_aesp8_xts_encrypt6x:
|
||||
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
||||
mflr r0
|
||||
mflr r11
|
||||
li r7,`$FRAME+8*16+15`
|
||||
li r8,`$FRAME+8*16+31`
|
||||
$PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
||||
li r3,`$FRAME+8*16+31`
|
||||
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
||||
stvx v20,r7,$sp # ABI says so
|
||||
addi r7,r7,32
|
||||
stvx v21,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v21,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v22,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v23,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v23,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v24,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v25,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v25,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v26,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v27,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v27,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v28,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v29,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v29,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v30,r7,$sp
|
||||
stvx v31,r8,$sp
|
||||
mr r7,r0
|
||||
stvx v31,r3,$sp
|
||||
li r0,-1
|
||||
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
||||
li $x10,0x10
|
||||
@ -2842,12 +2899,12 @@ Lxts_enc6x_steal:
|
||||
vperm $out0,$out0,$out1,$inpperm
|
||||
vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
|
||||
|
||||
subi r3,$out,17
|
||||
subi r30,$out,17
|
||||
subi $out,$out,16
|
||||
mtctr $taillen
|
||||
Loop_xts_enc6x_steal:
|
||||
lbzu r0,1(r3)
|
||||
stb r0,16(r3)
|
||||
lbzu r0,1(r30)
|
||||
stb r0,16(r30)
|
||||
bdnz Loop_xts_enc6x_steal
|
||||
|
||||
li $taillen,0
|
||||
@ -2856,7 +2913,15 @@ Loop_xts_enc6x_steal:
|
||||
|
||||
.align 4
|
||||
Lxts_enc6x_done:
|
||||
mtlr r7
|
||||
${UCMP}i $ivp,0
|
||||
beq Lxts_enc6x_ret
|
||||
|
||||
vxor $tweak,$twk0,$rndkey0
|
||||
le?vperm $tweak,$tweak,$tweak,$leperm
|
||||
stvx_u $tweak,0,$ivp
|
||||
|
||||
Lxts_enc6x_ret:
|
||||
mtlr r11
|
||||
li r10,`$FRAME+15`
|
||||
li r11,`$FRAME+31`
|
||||
stvx $seven,r10,$sp # wipe copies of round keys
|
||||
@ -2998,33 +3063,32 @@ _aesp8_xts_enc5x:
|
||||
.align 5
|
||||
_aesp8_xts_decrypt6x:
|
||||
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
||||
mflr r0
|
||||
mflr r11
|
||||
li r7,`$FRAME+8*16+15`
|
||||
li r8,`$FRAME+8*16+31`
|
||||
$PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
||||
li r3,`$FRAME+8*16+31`
|
||||
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
||||
stvx v20,r7,$sp # ABI says so
|
||||
addi r7,r7,32
|
||||
stvx v21,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v21,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v22,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v23,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v23,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v24,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v25,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v25,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v26,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v27,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v27,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v28,r7,$sp
|
||||
addi r7,r7,32
|
||||
stvx v29,r8,$sp
|
||||
addi r8,r8,32
|
||||
stvx v29,r3,$sp
|
||||
addi r3,r3,32
|
||||
stvx v30,r7,$sp
|
||||
stvx v31,r8,$sp
|
||||
mr r7,r0
|
||||
stvx v31,r3,$sp
|
||||
li r0,-1
|
||||
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
||||
li $x10,0x10
|
||||
@ -3524,11 +3588,11 @@ Lxts_dec6x_steal:
|
||||
vsel $out0,$in0,$tmp,$out0
|
||||
vxor $out0,$out0,$twk0
|
||||
|
||||
subi r3,$out,1
|
||||
subi r30,$out,1
|
||||
mtctr $taillen
|
||||
Loop_xts_dec6x_steal:
|
||||
lbzu r0,1(r3)
|
||||
stb r0,16(r3)
|
||||
lbzu r0,1(r30)
|
||||
stb r0,16(r30)
|
||||
bdnz Loop_xts_dec6x_steal
|
||||
|
||||
li $taillen,0
|
||||
@ -3537,7 +3601,15 @@ Loop_xts_dec6x_steal:
|
||||
|
||||
.align 4
|
||||
Lxts_dec6x_done:
|
||||
mtlr r7
|
||||
${UCMP}i $ivp,0
|
||||
beq Lxts_dec6x_ret
|
||||
|
||||
vxor $tweak,$twk0,$rndkey0
|
||||
le?vperm $tweak,$tweak,$tweak,$leperm
|
||||
stvx_u $tweak,0,$ivp
|
||||
|
||||
Lxts_dec6x_ret:
|
||||
mtlr r11
|
||||
li r10,`$FRAME+15`
|
||||
li r11,`$FRAME+31`
|
||||
stvx $seven,r10,$sp # wipe copies of round keys
|
||||
|
Loading…
Reference in New Issue
Block a user