openssl/crypto/bn/build.info
fangming.fang 1064616012 Optimize RSA on armv8
Add Neon path for RSA on armv8, this optimisation targets to A72
and N1 that are ones of important cores of infrastructure. Other
platforms are not impacted.

A72
                        old		new             improved
rsa  512 sign		9828.6		9738.7		-1%
rsa  512 verify		121497.2	122367.7	1%
rsa 1024 sign		1818		1816.9		0%
rsa 1024 verify		37175.6		37161.3		0%
rsa 2048 sign		267.3		267.4		0%
rsa 2048 verify		10127.6		10119.6		0%
rsa 3072 sign		86.8		87		0%
rsa 3072 verify		4604.2		4956.2		8%
rsa 4096 sign		38.3		38.5		1%
rsa 4096 verify		2619.8		2972.1		13%
rsa 7680 sign		5		7		40%
rsa 7680 verify		756	     	929.4		23%
rsa 15360 sign		0.8	     	1		25%
rsa 15360 verify	190.4	 	246		29%

N1
                        old		new             improved
rsa  512 sign		12599.2		12596.7		0%
rsa  512 verify		148636.1	148656.2	0%
rsa 1024 sign		2150.6		2148.9		0%
rsa 1024 verify		42353.5		42265.2		0%
rsa 2048 sign		305.5		305.3		0%
rsa 2048 verify		11209.7		11205.2		0%
rsa 3072 sign		97.8		98.2		0%
rsa 3072 verify		5061.3		5990.7		18%
rsa 4096 sign		42.8		43		0%
rsa 4096 verify		2867.6		3509.8		22%
rsa 7680 sign		5.5		8.4		53%
rsa 7680 verify		823.5		1058.3		29%
rsa 15360 sign		0.9		1.1		22%
rsa 15360 verify	207		273.9		32%

CustomizedGitHooks: yes
Change-Id: I01c732cc429d793c4eb5ffd27ccd30ff9cebf8af
Jira: SECLIB-540

Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/14761)
2021-05-09 23:15:07 +10:00

181 lines
6.0 KiB
Plaintext

LIBS=../../libcrypto
$BNASM=bn_asm.c
IF[{- !$disabled{asm} -}]
# Define source files and macros per asm architecture
# Known macros are:
#
# OPENSSL_BN_ASM_PART_WORDS For any collection with /-586/ file names
# OPENSSL_BN_ASM_MONT For any collection with /-mont/ file names
# OPENSSL_BN_ASM_MONT5 For any collection with /-mont5/ file names
# OPENSSL_BN_ASM_GF2m For any collection with /-gf2m/ file names
# OPENSSL_IA32_SSE2 For any collection with /86/ file names
# when sse2 is enabled
# BN_DIV3W For any collection with /-div3w/ file names
#
# All variables are named in such a way that they can be "indexed" with
# $target{asm_arch}
$BNASM_x86=bn-586.s co-586.s x86-mont.s x86-gf2m.s
# bn-586 is the only one implementing bn_*_part_words
# => OPENSSL_BN_ASM_PART_WORDS
$BNDEF_x86=OPENSSL_BN_ASM_PART_WORDS OPENSSL_BN_ASM_MONT OPENSSL_BN_ASM_GF2m
$BNDEF_x86_sse2=OPENSSL_IA32_SSE2
$BNASM_x86_64=\
x86_64-mont.s x86_64-mont5.s x86_64-gf2m.s rsaz_exp.c rsaz-x86_64.s \
rsaz-avx2.s rsaz_exp_x2.c rsaz-avx512.s
IF[{- $config{target} !~ /^VC/ -}]
$BNASM_x86_64=asm/x86_64-gcc.c $BNASM_x86_64
ELSE
$BNASM_x86_64=bn_asm.c $BNASM_x86_64
ENDIF
$BNDEF_x86_64=OPENSSL_BN_ASM_MONT OPENSSL_BN_ASM_MONT5 OPENSSL_BN_ASM_GF2m
$BNDEF_x86_64_sse2=OPENSSL_IA32_SSE2
IF[{- $config{target} !~ /^VC/ -}]
$BNASM_ia64=bn-ia64.s ia64-mont.s
ELSE
$BNASM_ia64=bn_asm.c ia64-mont.s
ENDIF
$BNASM_sparcv9=asm/sparcv8plus.S sparcv9-mont.S sparcv9a-mont.S vis3-mont.S \
sparct4-mont.S
$BNDEF_sparcv9=OPENSSL_BN_ASM_MONT
$BNASM_sparcv9_ec2m=sparcv9-gf2m.S
$BNDEF_sparcv9_ec2m=OPENSSL_BN_ASM_GF2m
$BNASM_sparcv8=asm/sparcv8.S
$BNASM_alpha=bn_asm.c alpha-mont.S
$BNDEF_alpha=OPENSSL_BN_ASM_MONT
$BNASM_mips32=bn-mips.S mips-mont.S
$BNDEF_mips32=OPENSSL_BN_ASM_MONT
$BNASM_mips64=$BNASM_mips32
$BNDEF_mips64=$BNDEF_mips32
IF[{- ($target{perlasm_scheme} // '') eq '31' -}]
$BNASM_s390x=bn_asm.c s390x-mont.S
ELSE
$BNASM_s390x=asm/s390x.S s390x-mont.S
ENDIF
$BNDEF_s390x=OPENSSL_BN_ASM_MONT
$BNASM_s390x_ec2m=s390x-gf2m.s
$BNDEF_s390x_ec2m=OPENSSL_BN_ASM_GF2m
$BNASM_armv4=bn_asm.c armv4-mont.S
$BNDEF_armv4=OPENSSL_BN_ASM_MONT
$BNASM_armv4_ec2m=armv4-gf2m.S
$BNDEF_armv4_ec2m=OPENSSL_BN_ASM_GF2m
$BNASM_aarch64=bn_asm.c armv8-mont.S
$BNDEF_aarch64=OPENSSL_BN_ASM_MONT
$BNASM_parisc11=bn_asm.c parisc-mont.s
$BNDEF_parisc11=OPENSSL_BN_ASM_MONT
$BNASM_parisc20_64=$BNASM_parisc11
$BNDEF_parisc20_64=$BNDEF_parisc11
$BNASM_ppc32=bn-ppc.s ppc-mont.s
$BNDEF_ppc32=OPENSSL_BN_ASM_MONT
$BNASM_ppc64=$BNASM_ppc32 ppc64-mont-fixed.s
$BNDEF_ppc64=$BNDEF_ppc32
$BNASM_c64xplus=asm/bn-c64xplus.asm
$BNASM_c64xplus_ec2m=c64xplus-gf2m.s
$BNDEF_c64xplus_ec2m=OPENSSL_BN_ASM_GF2m
# Now that we have defined all the arch specific variables, use the
# appropriate ones, and define the appropriate macros
IF[$BNASM_{- $target{asm_arch} -}]
$BNASM=$BNASM_{- $target{asm_arch} -}
$BNDEF=$BNDEF_{- $target{asm_arch} -}
IF[{- !$disabled{ec2m} -}]
$BNASM=$BNASM $BNASM_{- $target{asm_arch} -}_ec2m
$BNDEF=$BNDEF $BNDEF_{- $target{asm_arch} -}_ec2m
ENDIF
IF[{- !$disabled{sse2} -}]
$BNDEF=$BNDEF $BNDEF_{- $target{asm_arch} -}_sse2
ENDIF
ENDIF
ENDIF
$COMMON=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \
bn_mod.c bn_conv.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_sqr.c \
bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
bn_intern.c bn_dh.c bn_rsa_fips186_4.c bn_const.c
SOURCE[../../libcrypto]=$COMMON $BNASM bn_print.c bn_err.c bn_srp.c
DEFINE[../../libcrypto]=$BNDEF
IF[{- !$disabled{'deprecated-3.0'} -}]
SOURCE[../../libcrypto]=bn_depr.c bn_x931p.c
ENDIF
SOURCE[../../providers/libfips.a]=$COMMON $BNASM
DEFINE[../../providers/libfips.a]=$BNDEF
# Because some CPUID implementations use some BN assembler (!!!), we
# must include assembler code into the legacy provider under the same
# conditions as CPUID code is included. See ../build.info
SOURCE[../../providers/liblegacy.a]=$BNASM
DEFINE[../../providers/liblegacy.a]=$BNDEF
# Implementations are now spread across several libraries, so the defines
# need to be applied to all affected libraries and modules.
DEFINE[../../providers/libcommon.a]=$BNDEF
INCLUDE[bn_exp.o]=..
GENERATE[bn-586.s]=asm/bn-586.pl
DEPEND[bn-586.s]=../perlasm/x86asm.pl
GENERATE[co-586.s]=asm/co-586.pl
DEPEND[co-586.s]=../perlasm/x86asm.pl
GENERATE[x86-mont.s]=asm/x86-mont.pl
DEPEND[x86-mont.s]=../perlasm/x86asm.pl
GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl
DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl
GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl
INCLUDE[sparcv9a-mont.o]=..
GENERATE[sparcv9-mont.S]=asm/sparcv9-mont.pl
INCLUDE[sparcv9-mont.o]=..
GENERATE[vis3-mont.S]=asm/vis3-mont.pl
INCLUDE[vis3-mont.o]=..
GENERATE[sparct4-mont.S]=asm/sparct4-mont.pl
INCLUDE[sparct4-mont.o]=..
GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl
INCLUDE[sparcv9-gf2m.o]=..
GENERATE[bn-mips.S]=asm/mips.pl
INCLUDE[bn-mips.o]=..
GENERATE[mips-mont.S]=asm/mips-mont.pl
INCLUDE[mips-mont.o]=..
GENERATE[s390x-mont.S]=asm/s390x-mont.pl
GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl
GENERATE[x86_64-mont.s]=asm/x86_64-mont.pl
GENERATE[x86_64-mont5.s]=asm/x86_64-mont5.pl
GENERATE[x86_64-gf2m.s]=asm/x86_64-gf2m.pl
GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl
GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl
GENERATE[rsaz-avx512.s]=asm/rsaz-avx512.pl
GENERATE[bn-ia64.s]=asm/ia64.S
GENERATE[ia64-mont.s]=asm/ia64-mont.pl
GENERATE[parisc-mont.s]=asm/parisc-mont.pl
# ppc - AIX, Linux, MacOS X...
GENERATE[bn-ppc.s]=asm/ppc.pl
GENERATE[ppc-mont.s]=asm/ppc-mont.pl
GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl
GENERATE[ppc64-mont-fixed.s]=asm/ppc64-mont-fixed.pl
GENERATE[alpha-mont.S]=asm/alpha-mont.pl
GENERATE[armv4-mont.S]=asm/armv4-mont.pl
INCLUDE[armv4-mont.o]=..
GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl
INCLUDE[armv4-gf2m.o]=..
GENERATE[armv8-mont.S]=asm/armv8-mont.pl
INCLUDE[armv8-mont.o]=..