mirror of
https://github.com/openssl/openssl.git
synced 2024-11-21 01:15:20 +08:00
Sparc v8plus assembler.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
This commit is contained in:
parent
d872c55c20
commit
4f5fac8011
3
CHANGES
3
CHANGES
@ -5,6 +5,9 @@
|
||||
|
||||
Changes between 0.9.2b and 0.9.3
|
||||
|
||||
*) Sparc v8plus assembler for the bignum library.
|
||||
[Andy Polyakov <appro@fy.chalmers.se>]
|
||||
|
||||
*) Accept any -xxx and +xxx compiler options in Configure.
|
||||
[Ulf Möller]
|
||||
|
||||
|
@ -115,8 +115,8 @@ my %table=(
|
||||
# Don't use -xtarget=ultra with SC4.2. It is broken, and will break exptest.
|
||||
# SC5.0 with the compiler common patch works.
|
||||
"solaris-sparc-sc4","cc:-xarch=v8 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
|
||||
"solaris-usparc-sc4","cc:-xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
|
||||
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
|
||||
"solaris-usparc-sc4","cc:-xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
|
||||
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::",
|
||||
|
||||
# Sunos configs, assuming sparc for the gcc one.
|
||||
##"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST:(unknown)::DES_UNROLL:::",
|
||||
|
@ -101,6 +101,9 @@ asm/co86unix.cpp: asm/co-586.pl
|
||||
asm/sparcv8.o: asm/sparcv8.S
|
||||
$(CC) -c -o asm/sparcv8.o asm/sparcv8.S
|
||||
|
||||
asm/sparcv8plus: asm/sparcv8plus.S
|
||||
$(CC) -c -xarch=v8plus -o asm/sparcv8plus.o asm/sparcv8plus.S
|
||||
|
||||
# MIPS 64 bit assember
|
||||
asm/mips3.o: asm/mips3.s
|
||||
/usr/bin/as -mips3 -O2 -o asm/mips3.o asm/mips3.s
|
||||
|
@ -1,4 +1,4 @@
|
||||
.ident "sparcv8.s, Version 1.1"
|
||||
.ident "sparcv8.s, Version 1.2"
|
||||
.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
|
||||
|
||||
/*
|
||||
@ -24,14 +24,14 @@
|
||||
/*
|
||||
* Revision history.
|
||||
*
|
||||
* 1.1 - new loop unrolling model(*)
|
||||
* - 10% performance boost(*)
|
||||
* 1.1 - new loop unrolling model(*);
|
||||
* 1.2 - made gas friendly;
|
||||
*
|
||||
* (*) see bn_asm.sparc.v8plus.S for details
|
||||
*/
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
.file "sparcv8.s"
|
||||
.file "bn_asm.sparc.v8.S"
|
||||
|
||||
.align 32
|
||||
|
||||
@ -546,48 +546,38 @@ bn_sub_words:
|
||||
.type bn_sub_words,#function
|
||||
.size bn_sub_words,(.-bn_sub_words)
|
||||
|
||||
#define FRAME_SIZE -96
|
||||
#define FRAME_SIZE -96
|
||||
|
||||
/*
|
||||
* Here is register usage map for *all* routines below.
|
||||
*/
|
||||
#define a_0 %l0
|
||||
#define a_0_ [%i1]
|
||||
#define a_1 %l1
|
||||
#define a_1_ [%i1+4]
|
||||
#define a_2 %l2
|
||||
#define a_2_ [%i1+8]
|
||||
#define a_3 %l3
|
||||
#define a_3_ [%i1+12]
|
||||
#define a_4 %l4
|
||||
#define a_4_ [%i1+16]
|
||||
#define a_5 %l5
|
||||
#define a_5_ [%i1+20]
|
||||
#define a_6 %l6
|
||||
#define a_6_ [%i1+24]
|
||||
#define a_7 %l7
|
||||
#define a_7_ [%i1+28]
|
||||
#define b_0 %g1
|
||||
#define b_0_ [%i2]
|
||||
#define b_1 %g2
|
||||
#define b_1_ [%i2+4]
|
||||
#define b_2 %g3
|
||||
#define b_2_ [%i2+8]
|
||||
#define b_3 %g4
|
||||
#define b_3_ [%i2+12]
|
||||
#define b_4 %i3
|
||||
#define b_4_ [%i2+16]
|
||||
#define b_5 %i4
|
||||
#define b_5_ [%i2+20]
|
||||
#define b_6 %i5
|
||||
#define b_6_ [%i2+24]
|
||||
#define b_7 %o5
|
||||
#define b_7_ [%i2+28]
|
||||
#define t_1 %o0
|
||||
#define t_2 %o1
|
||||
#define c_1 %o2
|
||||
#define c_2 %o3
|
||||
#define c_3 %o4
|
||||
#define t_1 %o0
|
||||
#define t_2 %o1
|
||||
|
||||
#define a(I) [%i1+4*I]
|
||||
#define b(I) [%i2+4*I]
|
||||
#define r(I) [%i0+4*I]
|
||||
|
||||
#define a_0 %l0
|
||||
#define a_1 %l1
|
||||
#define a_2 %l2
|
||||
#define a_3 %l3
|
||||
#define a_4 %l4
|
||||
#define a_5 %l5
|
||||
#define a_6 %l6
|
||||
#define a_7 %l7
|
||||
|
||||
#define b_0 %i3
|
||||
#define b_1 %i4
|
||||
#define b_2 %i5
|
||||
#define b_3 %o5
|
||||
#define b_4 %g1
|
||||
#define b_5 %g2
|
||||
#define b_6 %g3
|
||||
#define b_7 %g4
|
||||
|
||||
.align 32
|
||||
.global bn_mul_comba8
|
||||
@ -597,25 +587,25 @@ bn_sub_words:
|
||||
*/
|
||||
bn_mul_comba8:
|
||||
save %sp,FRAME_SIZE,%sp
|
||||
ld a_0_,a_0
|
||||
ld b_0_,b_0
|
||||
ld a(0),a_0
|
||||
ld b(0),b_0
|
||||
umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
|
||||
ld b_1_,b_1
|
||||
ld b(1),b_1
|
||||
rd %y,c_2
|
||||
st c_1,[%i0] !r[0]=c1;
|
||||
st c_1,r(0) !r[0]=c1;
|
||||
|
||||
umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
|
||||
ld a_1_,a_1
|
||||
ld a(1),a_1
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2
|
||||
addxcc %g0,t_2,c_3 !=
|
||||
addx %g0,%g0,c_1
|
||||
ld a_2_,a_2
|
||||
ld a(2),a_2
|
||||
umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2 !=
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
st c_2,[%i0+4] !r[1]=c2;
|
||||
st c_2,r(1) !r[1]=c2;
|
||||
addx c_1,%g0,c_1 !=
|
||||
|
||||
umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
|
||||
@ -623,19 +613,19 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
addx %g0,%g0,c_2
|
||||
ld b_2_,b_2
|
||||
ld b(2),b_2
|
||||
umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3 !=
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
ld b_3_,b_3
|
||||
ld b(3),b_3
|
||||
addx c_2,%g0,c_2 !=
|
||||
umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
addx c_2,%g0,c_2
|
||||
st c_3,[%i0+8] !r[2]=c3;
|
||||
st c_3,r(2) !r[2]=c3;
|
||||
|
||||
umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1 !=
|
||||
@ -647,19 +637,19 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
ld a_3_,a_3
|
||||
ld a(3),a_3
|
||||
umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
rd %y,t_2 !=
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3
|
||||
ld a_4_,a_4
|
||||
ld a(4),a_4
|
||||
umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
|
||||
addcc c_1,t_1,c_1
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+12] !r[3]=c1;
|
||||
st c_1,r(3) !r[3]=c1;
|
||||
|
||||
umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -676,19 +666,19 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1 !=
|
||||
ld b_4_,b_4
|
||||
ld b(4),b_4
|
||||
umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2 !=
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1
|
||||
ld b_5_,b_5
|
||||
ld b(5),b_5
|
||||
umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1 !=
|
||||
st c_2,[%i0+16] !r[4]=c2;
|
||||
st c_2,r(4) !r[4]=c2;
|
||||
|
||||
umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
@ -710,19 +700,19 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
addx c_2,%g0,c_2
|
||||
ld a_5_,a_5
|
||||
ld a(5),a_5
|
||||
umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3 !=
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
ld a_6_,a_6
|
||||
ld a(6),a_6
|
||||
addx c_2,%g0,c_2 !=
|
||||
umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
addx c_2,%g0,c_2
|
||||
st c_3,[%i0+20] !r[5]=c3;
|
||||
st c_3,r(5) !r[5]=c3;
|
||||
|
||||
umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1 !=
|
||||
@ -748,19 +738,19 @@ bn_mul_comba8:
|
||||
addcc c_1,t_1,c_1 !=
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
ld b_6_,b_6
|
||||
ld b(6),b_6
|
||||
addx c_3,%g0,c_3 !=
|
||||
umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2 !=
|
||||
addx c_3,%g0,c_3
|
||||
ld b_7_,b_7
|
||||
ld b(7),b_7
|
||||
umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1 !=
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
st c_1,[%i0+24] !r[6]=c1;
|
||||
st c_1,r(6) !r[6]=c1;
|
||||
addx c_3,%g0,c_3 !=
|
||||
|
||||
umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
|
||||
@ -793,7 +783,7 @@ bn_mul_comba8:
|
||||
rd %y,t_2 !=
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1
|
||||
ld a_7_,a_7
|
||||
ld a(7),a_7
|
||||
umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2
|
||||
@ -804,7 +794,7 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3 !=
|
||||
addx c_1,%g0,c_1
|
||||
st c_2,[%i0+28] !r[7]=c2;
|
||||
st c_2,r(7) !r[7]=c2;
|
||||
|
||||
umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3 !=
|
||||
@ -841,7 +831,7 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1 !
|
||||
addx c_2,%g0,c_2
|
||||
st c_3,[%i0+32] !r[8]=c3;
|
||||
st c_3,r(8) !r[8]=c3;
|
||||
|
||||
umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1 !=
|
||||
@ -873,7 +863,7 @@ bn_mul_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+36] !r[9]=c1;
|
||||
st c_1,r(9) !r[9]=c1;
|
||||
|
||||
umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -900,7 +890,7 @@ bn_mul_comba8:
|
||||
rd %y,t_2 !=
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1
|
||||
st c_2,[%i0+40] !r[10]=c2;
|
||||
st c_2,r(10) !r[10]=c2;
|
||||
|
||||
umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
@ -921,7 +911,7 @@ bn_mul_comba8:
|
||||
addcc c_3,t_1,c_3 !=
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
st c_3,[%i0+44] !r[11]=c3;
|
||||
st c_3,r(11) !r[11]=c3;
|
||||
addx c_2,%g0,c_2 !=
|
||||
|
||||
umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
|
||||
@ -938,7 +928,7 @@ bn_mul_comba8:
|
||||
addcc c_1,t_1,c_1 !=
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
st c_1,[%i0+48] !r[12]=c1;
|
||||
st c_1,r(12) !r[12]=c1;
|
||||
addx c_3,%g0,c_3 !=
|
||||
|
||||
umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
|
||||
@ -951,15 +941,15 @@ bn_mul_comba8:
|
||||
rd %y,t_2 !=
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1
|
||||
st c_2,[%i0+52] !r[13]=c2;
|
||||
st c_2,r(13) !r[13]=c2;
|
||||
|
||||
umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
nop !=
|
||||
st c_3,[%i0+56] !r[14]=c3;
|
||||
st c_1,[%i0+60] !r[15]=c1;
|
||||
st c_3,r(14) !r[14]=c3;
|
||||
st c_1,r(15) !r[15]=c1;
|
||||
|
||||
ret
|
||||
restore %g0,%g0,%o0
|
||||
@ -976,45 +966,45 @@ bn_mul_comba8:
|
||||
*/
|
||||
bn_mul_comba4:
|
||||
save %sp,FRAME_SIZE,%sp
|
||||
ld a_0_,a_0
|
||||
ld b_0_,b_0
|
||||
ld a(0),a_0
|
||||
ld b(0),b_0
|
||||
umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
|
||||
ld b_1_,b_1
|
||||
ld b(1),b_1
|
||||
rd %y,c_2
|
||||
st c_1,[%i0] !r[0]=c1;
|
||||
st c_1,r(0) !r[0]=c1;
|
||||
|
||||
umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
|
||||
ld a_1_,a_1
|
||||
ld a(1),a_1
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2 !=
|
||||
addxcc %g0,t_2,c_3
|
||||
addx %g0,%g0,c_1
|
||||
ld a_2_,a_2
|
||||
ld a(2),a_2
|
||||
umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1 !=
|
||||
st c_2,[%i0+4] !r[1]=c2;
|
||||
st c_2,r(1) !r[1]=c2;
|
||||
|
||||
umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2 !=
|
||||
addxcc c_1,t_2,c_1
|
||||
addx %g0,%g0,c_2
|
||||
ld b_2_,b_2
|
||||
ld b(2),b_2
|
||||
umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
addx c_2,%g0,c_2 !=
|
||||
ld b_3_,b_3
|
||||
ld b(3),b_3
|
||||
umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2 !=
|
||||
addxcc c_1,t_2,c_1
|
||||
addx c_2,%g0,c_2
|
||||
st c_3,[%i0+8] !r[2]=c3;
|
||||
st c_3,r(2) !r[2]=c3;
|
||||
|
||||
umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
@ -1026,7 +1016,7 @@ bn_mul_comba4:
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2 !=
|
||||
addx c_3,%g0,c_3
|
||||
ld a_3_,a_3
|
||||
ld a(3),a_3
|
||||
umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1 !=
|
||||
rd %y,t_2
|
||||
@ -1037,7 +1027,7 @@ bn_mul_comba4:
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+12] !r[3]=c1;
|
||||
st c_1,r(3) !r[3]=c1;
|
||||
|
||||
umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -1054,7 +1044,7 @@ bn_mul_comba4:
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1 !=
|
||||
st c_2,[%i0+16] !r[4]=c2;
|
||||
st c_2,r(4) !r[4]=c2;
|
||||
|
||||
umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
@ -1065,15 +1055,15 @@ bn_mul_comba4:
|
||||
addcc c_3,t_1,c_3 !=
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
st c_3,[%i0+20] !r[5]=c3;
|
||||
st c_3,r(5) !r[5]=c3;
|
||||
addx c_2,%g0,c_2 !=
|
||||
|
||||
umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2 !=
|
||||
st c_1,[%i0+24] !r[6]=c1;
|
||||
st c_2,[%i0+28] !r[7]=c2;
|
||||
st c_1,r(6) !r[6]=c1;
|
||||
st c_2,r(7) !r[7]=c2;
|
||||
|
||||
ret
|
||||
restore %g0,%g0,%o0
|
||||
@ -1086,13 +1076,13 @@ bn_mul_comba4:
|
||||
.global bn_sqr_comba8
|
||||
bn_sqr_comba8:
|
||||
save %sp,FRAME_SIZE,%sp
|
||||
ld a_0_,a_0
|
||||
ld a_1_,a_1
|
||||
ld a(0),a_0
|
||||
ld a(1),a_1
|
||||
umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
|
||||
rd %y,c_2
|
||||
st c_1,[%i0] !r[0]=c1;
|
||||
st c_1,r(0) !r[0]=c1;
|
||||
|
||||
ld a_2_,a_2
|
||||
ld a(2),a_2
|
||||
umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2
|
||||
@ -1100,7 +1090,7 @@ bn_sqr_comba8:
|
||||
addx %g0,%g0,c_1 !=
|
||||
addcc c_2,t_1,c_2
|
||||
addxcc c_3,t_2,c_3
|
||||
st c_2,[%i0+4] !r[1]=c2;
|
||||
st c_2,r(1) !r[1]=c2;
|
||||
addx c_1,%g0,c_1 !=
|
||||
|
||||
umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
|
||||
@ -1111,13 +1101,13 @@ bn_sqr_comba8:
|
||||
addcc c_3,t_1,c_3
|
||||
addxcc c_1,t_2,c_1
|
||||
addx c_2,%g0,c_2 !=
|
||||
ld a_3_,a_3
|
||||
ld a(3),a_3
|
||||
umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2 !=
|
||||
addxcc c_1,t_2,c_1
|
||||
addx c_2,%g0,c_2
|
||||
st c_3,[%i0+8] !r[2]=c3;
|
||||
st c_3,r(2) !r[2]=c3;
|
||||
|
||||
umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
@ -1126,7 +1116,7 @@ bn_sqr_comba8:
|
||||
addx %g0,%g0,c_3 !=
|
||||
addcc c_1,t_1,c_1
|
||||
addxcc c_2,t_2,c_2
|
||||
ld a_4_,a_4
|
||||
ld a(4),a_4
|
||||
addx c_3,%g0,c_3 !=
|
||||
umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
@ -1136,7 +1126,7 @@ bn_sqr_comba8:
|
||||
addcc c_1,t_1,c_1
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+12] !r[3]=c1;
|
||||
st c_1,r(3) !r[3]=c1;
|
||||
|
||||
umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -1154,12 +1144,12 @@ bn_sqr_comba8:
|
||||
addcc c_2,t_1,c_2
|
||||
addxcc c_3,t_2,c_3 !=
|
||||
addx c_1,%g0,c_1
|
||||
ld a_5_,a_5
|
||||
ld a(5),a_5
|
||||
umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2 !=
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
st c_2,[%i0+16] !r[4]=c2;
|
||||
st c_2,r(4) !r[4]=c2;
|
||||
addx c_1,%g0,c_1 !=
|
||||
|
||||
umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
|
||||
@ -1178,7 +1168,7 @@ bn_sqr_comba8:
|
||||
addcc c_3,t_1,c_3
|
||||
addxcc c_1,t_2,c_1
|
||||
addx c_2,%g0,c_2 !=
|
||||
ld a_6_,a_6
|
||||
ld a(6),a_6
|
||||
umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2 !=
|
||||
@ -1187,7 +1177,7 @@ bn_sqr_comba8:
|
||||
addcc c_3,t_1,c_3
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
addx c_2,%g0,c_2
|
||||
st c_3,[%i0+20] !r[5]=c3;
|
||||
st c_3,r(5) !r[5]=c3;
|
||||
|
||||
umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
|
||||
addcc c_1,t_1,c_1 !=
|
||||
@ -1213,13 +1203,13 @@ bn_sqr_comba8:
|
||||
addcc c_1,t_1,c_1 !=
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3
|
||||
ld a_7_,a_7
|
||||
ld a(7),a_7
|
||||
umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+24] !r[6]=c1;
|
||||
st c_1,r(6) !r[6]=c1;
|
||||
|
||||
umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -1253,7 +1243,7 @@ bn_sqr_comba8:
|
||||
addcc c_2,t_1,c_2
|
||||
addxcc c_3,t_2,c_3 !=
|
||||
addx c_1,%g0,c_1
|
||||
st c_2,[%i0+28] !r[7]=c2;
|
||||
st c_2,r(7) !r[7]=c2;
|
||||
|
||||
umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3 !=
|
||||
@ -1283,7 +1273,7 @@ bn_sqr_comba8:
|
||||
addcc c_3,t_1,c_3 !=
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
st c_3,[%i0+32] !r[8]=c3;
|
||||
st c_3,r(8) !r[8]=c3;
|
||||
addx c_2,%g0,c_2 !=
|
||||
|
||||
umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
|
||||
@ -1310,7 +1300,7 @@ bn_sqr_comba8:
|
||||
addcc c_1,t_1,c_1
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+36] !r[9]=c1;
|
||||
st c_1,r(9) !r[9]=c1;
|
||||
|
||||
umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -1333,7 +1323,7 @@ bn_sqr_comba8:
|
||||
rd %y,t_2 !=
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1
|
||||
st c_2,[%i0+40] !r[10]=c2;
|
||||
st c_2,r(10) !r[10]=c2;
|
||||
|
||||
umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
@ -1350,7 +1340,7 @@ bn_sqr_comba8:
|
||||
addx c_2,%g0,c_2 !=
|
||||
addcc c_3,t_1,c_3
|
||||
addxcc c_1,t_2,c_1
|
||||
st c_3,[%i0+44] !r[11]=c3;
|
||||
st c_3,r(11) !r[11]=c3;
|
||||
addx c_2,%g0,c_2 !=
|
||||
|
||||
umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
|
||||
@ -1366,7 +1356,7 @@ bn_sqr_comba8:
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2 !=
|
||||
addx c_3,%g0,c_3
|
||||
st c_1,[%i0+48] !r[12]=c1;
|
||||
st c_1,r(12) !r[12]=c1;
|
||||
|
||||
umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2 !=
|
||||
@ -1376,15 +1366,15 @@ bn_sqr_comba8:
|
||||
addcc c_2,t_1,c_2 !=
|
||||
rd %y,t_2
|
||||
addxcc c_3,t_2,c_3
|
||||
st c_2,[%i0+52] !r[13]=c2;
|
||||
st c_2,r(13) !r[13]=c2;
|
||||
addx c_1,%g0,c_1 !=
|
||||
|
||||
umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
st c_3,[%i0+56] !r[14]=c3;
|
||||
st c_1,[%i0+60] !r[15]=c1;
|
||||
st c_3,r(14) !r[14]=c3;
|
||||
st c_1,r(15) !r[15]=c1;
|
||||
|
||||
ret
|
||||
restore %g0,%g0,%o0
|
||||
@ -1401,23 +1391,23 @@ bn_sqr_comba8:
|
||||
*/
|
||||
bn_sqr_comba4:
|
||||
save %sp,FRAME_SIZE,%sp
|
||||
ld a_0_,a_0
|
||||
ld a(0),a_0
|
||||
umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
|
||||
ld a_1_,a_1 !=
|
||||
ld a(1),a_1 !=
|
||||
rd %y,c_2
|
||||
st c_1,[%i0] !r[0]=c1;
|
||||
st c_1,r(0) !r[0]=c1;
|
||||
|
||||
ld a_1_,a_1
|
||||
ld a(1),a_1
|
||||
umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
rd %y,t_2
|
||||
addxcc %g0,t_2,c_3
|
||||
addx %g0,%g0,c_1 !=
|
||||
ld a_2_,a_2
|
||||
ld a(2),a_2
|
||||
addcc c_2,t_1,c_2
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1 !=
|
||||
st c_2,[%i0+4] !r[1]=c2;
|
||||
st c_2,r(1) !r[1]=c2;
|
||||
|
||||
umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
@ -1427,12 +1417,12 @@ bn_sqr_comba4:
|
||||
addcc c_3,t_1,c_3
|
||||
addxcc c_1,t_2,c_1 !=
|
||||
addx c_2,%g0,c_2
|
||||
ld a_3_,a_3
|
||||
ld a(3),a_3
|
||||
umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3 !=
|
||||
rd %y,t_2
|
||||
addxcc c_1,t_2,c_1
|
||||
st c_3,[%i0+8] !r[2]=c3;
|
||||
st c_3,r(2) !r[2]=c3;
|
||||
addx c_2,%g0,c_2 !=
|
||||
|
||||
umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
|
||||
@ -1451,7 +1441,7 @@ bn_sqr_comba4:
|
||||
addcc c_1,t_1,c_1
|
||||
addxcc c_2,t_2,c_2
|
||||
addx c_3,%g0,c_3 !=
|
||||
st c_1,[%i0+12] !r[3]=c1;
|
||||
st c_1,r(3) !r[3]=c1;
|
||||
|
||||
umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
|
||||
addcc c_2,t_1,c_2
|
||||
@ -1466,7 +1456,7 @@ bn_sqr_comba4:
|
||||
rd %y,t_2 !=
|
||||
addxcc c_3,t_2,c_3
|
||||
addx c_1,%g0,c_1
|
||||
st c_2,[%i0+16] !r[4]=c2;
|
||||
st c_2,r(4) !r[4]=c2;
|
||||
|
||||
umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
|
||||
addcc c_3,t_1,c_3
|
||||
@ -1475,20 +1465,20 @@ bn_sqr_comba4:
|
||||
addx %g0,%g0,c_2 !=
|
||||
addcc c_3,t_1,c_3
|
||||
addxcc c_1,t_2,c_1
|
||||
st c_3,[%i0+20] !r[5]=c3;
|
||||
st c_3,r(5) !r[5]=c3;
|
||||
addx c_2,%g0,c_2 !=
|
||||
|
||||
umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
|
||||
addcc c_1,t_1,c_1
|
||||
rd %y,t_2
|
||||
addxcc c_2,t_2,c_2 !=
|
||||
st c_1,[%i0+24] !r[6]=c1;
|
||||
st c_2,[%i0+28] !r[7]=c2;
|
||||
st c_1,r(6) !r[6]=c1;
|
||||
st c_2,r(7) !r[7]=c2;
|
||||
|
||||
ret
|
||||
restore %g0,%g0,%o0
|
||||
|
||||
.type bn_sqr_comba4,#function
|
||||
.size bn_sqr_comba4,(.-bn_sqr_comba4)
|
||||
.align 32
|
||||
|
||||
.align 32
|
||||
|
1569
crypto/bn/asm/sparcv8plus.S
Normal file
1569
crypto/bn/asm/sparcv8plus.S
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user