mirror of
https://github.com/openssl/openssl.git
synced 2025-02-17 14:32:04 +08:00
bn/asm/ppc.pl to use ppc-xlate.pl.
This commit is contained in:
parent
11d0ebc841
commit
31439046e0
@ -151,91 +151,15 @@ if ($opf =~ /32\.s/) {
|
||||
$TR= "td"; # conditional trap
|
||||
} else { die "nonsense $opf"; }
|
||||
|
||||
( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
||||
die "can't locate ppc-xlate.pl";
|
||||
|
||||
# function entry points from the AIX code
|
||||
#
|
||||
# There are other, more elegant, ways to handle this. We (IBM) chose
|
||||
# this approach as it plays well with scripts we run to 'namespace'
|
||||
# OpenSSL .i.e. we add a prefix to all the public symbols so we can
|
||||
# co-exist in the same process with other implementations of OpenSSL.
|
||||
# 'cleverer' ways of doing these substitutions tend to hide data we
|
||||
# need to be obvious.
|
||||
#
|
||||
my @items = ("bn_sqr_comba4",
|
||||
"bn_sqr_comba8",
|
||||
"bn_mul_comba4",
|
||||
"bn_mul_comba8",
|
||||
"bn_sub_words",
|
||||
"bn_add_words",
|
||||
"bn_div_words",
|
||||
"bn_sqr_words",
|
||||
"bn_mul_words",
|
||||
"bn_mul_add_words");
|
||||
( defined shift || open STDOUT,"| $^X $xlate $opf" ) ||
|
||||
die "can't call $xlate: $!";
|
||||
|
||||
if ($opf =~ /linux/) { do_linux(); }
|
||||
elsif ($opf =~ /aix/) { do_aix(); }
|
||||
elsif ($opf =~ /osx/) { do_osx(); }
|
||||
else { do_bsd(); }
|
||||
|
||||
sub do_linux {
|
||||
$d=&data();
|
||||
|
||||
if ($BITS==64) {
|
||||
foreach $t (@items) {
|
||||
$d =~ s/\.$t:/\
|
||||
\t.section\t".opd","aw"\
|
||||
\t.align\t3\
|
||||
\t.globl\t$t\
|
||||
$t:\
|
||||
\t.quad\t.$t,.TOC.\@tocbase,0\
|
||||
\t.size\t$t,24\
|
||||
\t.previous\n\
|
||||
\t.type\t.$t,\@function\
|
||||
\t.globl\t.$t\
|
||||
.$t:/g;
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach $t (@items) {
|
||||
$d=~s/\.$t/$t/g;
|
||||
}
|
||||
}
|
||||
# hide internal labels to avoid pollution of name table...
|
||||
$d=~s/Lppcasm_/.Lppcasm_/gm;
|
||||
print $d;
|
||||
}
|
||||
|
||||
sub do_aix {
|
||||
# AIX assembler is smart enough to please the linker without
|
||||
# making us do something special...
|
||||
print &data();
|
||||
}
|
||||
|
||||
# MacOSX 32 bit
|
||||
sub do_osx {
|
||||
$d=&data();
|
||||
# Change the bn symbol prefix from '.' to '_'
|
||||
foreach $t (@items) {
|
||||
$d=~s/\.$t/_$t/g;
|
||||
}
|
||||
# Change .machine to something OS X asm will accept
|
||||
$d=~s/\.machine.*/.text/g;
|
||||
$d=~s/\#/;/g; # change comment from '#' to ';'
|
||||
print $d;
|
||||
}
|
||||
|
||||
# BSD (Untested)
|
||||
sub do_bsd {
|
||||
$d=&data();
|
||||
foreach $t (@items) {
|
||||
$d=~s/\.$t/_$t/g;
|
||||
}
|
||||
print $d;
|
||||
}
|
||||
|
||||
sub data {
|
||||
local($data)=<<EOF;
|
||||
$data=<<EOF;
|
||||
#--------------------------------------------------------------------
|
||||
#
|
||||
#
|
||||
@ -297,33 +221,20 @@ sub data {
|
||||
#
|
||||
# Defines to be used in the assembly code.
|
||||
#
|
||||
.set r0,0 # we use it as storage for value of 0
|
||||
.set SP,1 # preserved
|
||||
.set RTOC,2 # preserved
|
||||
.set r3,3 # 1st argument/return value
|
||||
.set r4,4 # 2nd argument/volatile register
|
||||
.set r5,5 # 3rd argument/volatile register
|
||||
.set r6,6 # ...
|
||||
.set r7,7
|
||||
.set r8,8
|
||||
.set r9,9
|
||||
.set r10,10
|
||||
.set r11,11
|
||||
.set r12,12
|
||||
.set r13,13 # not used, nor any other "below" it...
|
||||
|
||||
.set BO_IF_NOT,4
|
||||
.set BO_IF,12
|
||||
.set BO_dCTR_NZERO,16
|
||||
.set BO_dCTR_ZERO,18
|
||||
.set BO_ALWAYS,20
|
||||
.set CR0_LT,0;
|
||||
.set CR0_GT,1;
|
||||
.set CR0_EQ,2
|
||||
.set CR1_FX,4;
|
||||
.set CR1_FEX,5;
|
||||
.set CR1_VX,6
|
||||
.set LR,8
|
||||
#.set r0,0 # we use it as storage for value of 0
|
||||
#.set SP,1 # preserved
|
||||
#.set RTOC,2 # preserved
|
||||
#.set r3,3 # 1st argument/return value
|
||||
#.set r4,4 # 2nd argument/volatile register
|
||||
#.set r5,5 # 3rd argument/volatile register
|
||||
#.set r6,6 # ...
|
||||
#.set r7,7
|
||||
#.set r8,8
|
||||
#.set r9,9
|
||||
#.set r10,10
|
||||
#.set r11,11
|
||||
#.set r12,12
|
||||
#.set r13,13 # not used, nor any other "below" it...
|
||||
|
||||
# Declare function names to be global
|
||||
# NOTE: For gcc these names MUST be changed to remove
|
||||
@ -478,7 +389,7 @@ sub data {
|
||||
|
||||
$ST r9,`6*$BNSZ`(r3) #r[6]=c1
|
||||
$ST r10,`7*$BNSZ`(r3) #r[7]=c2
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
#
|
||||
@ -903,7 +814,7 @@ sub data {
|
||||
$ST r9, `15*$BNSZ`(r3) #r[15]=c1;
|
||||
|
||||
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
|
||||
.long 0x00000000
|
||||
|
||||
@ -1055,7 +966,7 @@ sub data {
|
||||
|
||||
$ST r10,`6*$BNSZ`(r3) #r[6]=c1
|
||||
$ST r11,`7*$BNSZ`(r3) #r[7]=c2
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
#
|
||||
@ -1591,7 +1502,7 @@ sub data {
|
||||
adde r10,r10,r9
|
||||
$ST r12,`14*$BNSZ`(r3) #r[14]=c3;
|
||||
$ST r10,`15*$BNSZ`(r3) #r[15]=c1;
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
#
|
||||
@ -1623,7 +1534,7 @@ sub data {
|
||||
subfc. r7,r0,r6 # If r6 is 0 then result is 0.
|
||||
# if r6 > 0 then result !=0
|
||||
# In either case carry bit is set.
|
||||
bc BO_IF,CR0_EQ,Lppcasm_sub_adios
|
||||
beq Lppcasm_sub_adios
|
||||
addi r4,r4,-$BNSZ
|
||||
addi r3,r3,-$BNSZ
|
||||
addi r5,r5,-$BNSZ
|
||||
@ -1635,11 +1546,11 @@ Lppcasm_sub_mainloop:
|
||||
# if carry = 1 this is r7-r8. Else it
|
||||
# is r7-r8 -1 as we need.
|
||||
$STU r6,$BNSZ(r3)
|
||||
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
|
||||
bdnz- Lppcasm_sub_mainloop
|
||||
Lppcasm_sub_adios:
|
||||
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
|
||||
andi. r3,r3,1 # keep only last bit.
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
|
||||
@ -1670,7 +1581,7 @@ Lppcasm_sub_adios:
|
||||
# check for r6 = 0. Is this needed?
|
||||
#
|
||||
addic. r6,r6,0 #test r6 and clear carry bit.
|
||||
bc BO_IF,CR0_EQ,Lppcasm_add_adios
|
||||
beq Lppcasm_add_adios
|
||||
addi r4,r4,-$BNSZ
|
||||
addi r3,r3,-$BNSZ
|
||||
addi r5,r5,-$BNSZ
|
||||
@ -1680,10 +1591,10 @@ Lppcasm_add_mainloop:
|
||||
$LDU r8,$BNSZ(r5)
|
||||
adde r8,r7,r8
|
||||
$STU r8,$BNSZ(r3)
|
||||
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
|
||||
bdnz- Lppcasm_add_mainloop
|
||||
Lppcasm_add_adios:
|
||||
addze r3,r0 #return carry bit.
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
#
|
||||
@ -1707,24 +1618,24 @@ Lppcasm_add_adios:
|
||||
# r5 = d
|
||||
|
||||
$UCMPI 0,r5,0 # compare r5 and 0
|
||||
bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0
|
||||
bne Lppcasm_div1 # proceed if d!=0
|
||||
li r3,-1 # d=0 return -1
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
Lppcasm_div1:
|
||||
xor r0,r0,r0 #r0=0
|
||||
li r8,$BITS
|
||||
$CNTLZ. r7,r5 #r7 = num leading 0s in d.
|
||||
bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros
|
||||
beq Lppcasm_div2 #proceed if no leading zeros
|
||||
subf r8,r7,r8 #r8 = BN_num_bits_word(d)
|
||||
$SHR. r9,r3,r8 #are there any bits above r8'th?
|
||||
$TR 16,r9,r0 #if there're, signal to dump core...
|
||||
Lppcasm_div2:
|
||||
$UCMP 0,r3,r5 #h>=d?
|
||||
bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not
|
||||
blt Lppcasm_div3 #goto Lppcasm_div3 if not
|
||||
subf r3,r5,r3 #h-=d ;
|
||||
Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
|
||||
cmpi 0,0,r7,0 # is (i == 0)?
|
||||
bc BO_IF,CR0_EQ,Lppcasm_div4
|
||||
beq Lppcasm_div4
|
||||
$SHL r3,r3,r7 # h = (h<< i)
|
||||
$SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
|
||||
$SHL r5,r5,r7 # d<<=i
|
||||
@ -1741,7 +1652,7 @@ Lppcasm_divouterloop:
|
||||
$SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
|
||||
# compute here for innerloop.
|
||||
$UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
|
||||
bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not
|
||||
bne Lppcasm_div5 # goto Lppcasm_div5 if not
|
||||
|
||||
li r8,-1
|
||||
$CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
|
||||
@ -1762,9 +1673,9 @@ Lppcasm_divinnerloop:
|
||||
# the following 2 instructions do that
|
||||
$SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
|
||||
or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
|
||||
$UCMP 1,r6,r7 # compare (tl <= r7)
|
||||
bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
|
||||
bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
|
||||
$UCMP cr1,r6,r7 # compare (tl <= r7)
|
||||
bne Lppcasm_divinnerexit
|
||||
ble cr1,Lppcasm_divinnerexit
|
||||
addi r8,r8,-1 #q--
|
||||
subf r12,r9,r12 #th -=dh
|
||||
$CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
|
||||
@ -1773,14 +1684,14 @@ Lppcasm_divinnerloop:
|
||||
Lppcasm_divinnerexit:
|
||||
$SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
|
||||
$SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
|
||||
$UCMP 1,r4,r11 # compare l and tl
|
||||
$UCMP cr1,r4,r11 # compare l and tl
|
||||
add r12,r12,r10 # th+=t
|
||||
bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
|
||||
bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
|
||||
addi r12,r12,1 # th++
|
||||
Lppcasm_div7:
|
||||
subf r11,r11,r4 #r11=l-tl
|
||||
$UCMP 1,r3,r12 #compare h and th
|
||||
bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
|
||||
$UCMP cr1,r3,r12 #compare h and th
|
||||
bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
|
||||
addi r8,r8,-1 # q--
|
||||
add r3,r5,r3 # h+=d
|
||||
Lppcasm_div8:
|
||||
@ -1791,12 +1702,12 @@ Lppcasm_div8:
|
||||
# the following 2 instructions will do this.
|
||||
$INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
|
||||
$ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
|
||||
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
|
||||
bdz Lppcasm_div9 #if (count==0) break ;
|
||||
$SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
|
||||
b Lppcasm_divouterloop
|
||||
Lppcasm_div9:
|
||||
or r3,r8,r0
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
#
|
||||
@ -1822,7 +1733,7 @@ Lppcasm_div9:
|
||||
# No unrolling done here. Not performance critical.
|
||||
|
||||
addic. r5,r5,0 #test r5.
|
||||
bc BO_IF,CR0_EQ,Lppcasm_sqr_adios
|
||||
beq Lppcasm_sqr_adios
|
||||
addi r4,r4,-$BNSZ
|
||||
addi r3,r3,-$BNSZ
|
||||
mtctr r5
|
||||
@ -1833,9 +1744,9 @@ Lppcasm_sqr_mainloop:
|
||||
$UMULH r8,r6,r6
|
||||
$STU r7,$BNSZ(r3)
|
||||
$STU r8,$BNSZ(r3)
|
||||
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
|
||||
bdnz- Lppcasm_sqr_mainloop
|
||||
Lppcasm_sqr_adios:
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
|
||||
@ -1858,7 +1769,7 @@ Lppcasm_sqr_adios:
|
||||
xor r0,r0,r0
|
||||
xor r12,r12,r12 # used for carry
|
||||
rlwinm. r7,r5,30,2,31 # num >> 2
|
||||
bc BO_IF,CR0_EQ,Lppcasm_mw_REM
|
||||
beq Lppcasm_mw_REM
|
||||
mtctr r7
|
||||
Lppcasm_mw_LOOP:
|
||||
#mul(rp[0],ap[0],w,c1);
|
||||
@ -1896,11 +1807,11 @@ Lppcasm_mw_LOOP:
|
||||
|
||||
addi r3,r3,`4*$BNSZ`
|
||||
addi r4,r4,`4*$BNSZ`
|
||||
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
|
||||
bdnz- Lppcasm_mw_LOOP
|
||||
|
||||
Lppcasm_mw_REM:
|
||||
andi. r5,r5,0x3
|
||||
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
|
||||
beq Lppcasm_mw_OVER
|
||||
#mul(rp[0],ap[0],w,c1);
|
||||
$LD r8,`0*$BNSZ`(r4)
|
||||
$UMULL r9,r6,r8
|
||||
@ -1912,7 +1823,7 @@ Lppcasm_mw_REM:
|
||||
|
||||
addi r5,r5,-1
|
||||
cmpli 0,0,r5,0
|
||||
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
|
||||
beq Lppcasm_mw_OVER
|
||||
|
||||
|
||||
#mul(rp[1],ap[1],w,c1);
|
||||
@ -1926,7 +1837,7 @@ Lppcasm_mw_REM:
|
||||
|
||||
addi r5,r5,-1
|
||||
cmpli 0,0,r5,0
|
||||
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
|
||||
beq Lppcasm_mw_OVER
|
||||
|
||||
#mul_add(rp[2],ap[2],w,c1);
|
||||
$LD r8,`2*$BNSZ`(r4)
|
||||
@ -1939,7 +1850,7 @@ Lppcasm_mw_REM:
|
||||
|
||||
Lppcasm_mw_OVER:
|
||||
addi r3,r12,0
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
|
||||
#
|
||||
@ -1964,7 +1875,7 @@ Lppcasm_mw_OVER:
|
||||
xor r0,r0,r0 #r0 = 0
|
||||
xor r12,r12,r12 #r12 = 0 . used for carry
|
||||
rlwinm. r7,r5,30,2,31 # num >> 2
|
||||
bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
|
||||
beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
|
||||
mtctr r7
|
||||
Lppcasm_maw_mainloop:
|
||||
#mul_add(rp[0],ap[0],w,c1);
|
||||
@ -2017,11 +1928,11 @@ Lppcasm_maw_mainloop:
|
||||
$ST r11,`3*$BNSZ`(r3)
|
||||
addi r3,r3,`4*$BNSZ`
|
||||
addi r4,r4,`4*$BNSZ`
|
||||
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
|
||||
bdnz- Lppcasm_maw_mainloop
|
||||
|
||||
Lppcasm_maw_leftover:
|
||||
andi. r5,r5,0x3
|
||||
bc BO_IF,CR0_EQ,Lppcasm_maw_adios
|
||||
beq Lppcasm_maw_adios
|
||||
addi r3,r3,-$BNSZ
|
||||
addi r4,r4,-$BNSZ
|
||||
#mul_add(rp[0],ap[0],w,c1);
|
||||
@ -2036,7 +1947,7 @@ Lppcasm_maw_leftover:
|
||||
addze r12,r10
|
||||
$ST r9,0(r3)
|
||||
|
||||
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
|
||||
bdz Lppcasm_maw_adios
|
||||
#mul_add(rp[1],ap[1],w,c1);
|
||||
$LDU r8,$BNSZ(r4)
|
||||
$UMULL r9,r6,r8
|
||||
@ -2048,7 +1959,7 @@ Lppcasm_maw_leftover:
|
||||
addze r12,r10
|
||||
$ST r9,0(r3)
|
||||
|
||||
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
|
||||
bdz Lppcasm_maw_adios
|
||||
#mul_add(rp[2],ap[2],w,c1);
|
||||
$LDU r8,$BNSZ(r4)
|
||||
$UMULL r9,r6,r8
|
||||
@ -2062,17 +1973,10 @@ Lppcasm_maw_leftover:
|
||||
|
||||
Lppcasm_maw_adios:
|
||||
addi r3,r12,0
|
||||
bclr BO_ALWAYS,CR0_LT
|
||||
blr
|
||||
.long 0x00000000
|
||||
.align 4
|
||||
EOF
|
||||
$data =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
|
||||
# if some assembler chokes on some simplified mnemonic,
|
||||
# this is the spot to fix it up, e.g.:
|
||||
# GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
|
||||
$data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
|
||||
# assembler X doesn't accept li, load immediate value
|
||||
#$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
|
||||
return($data);
|
||||
}
|
||||
$data =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
print $data;
|
||||
close STDOUT;
|
||||
|
Loading…
Reference in New Issue
Block a user