x86[_64] assembly pack: add Silvermont performance data.

Reviewed-by: Rich Salz <rsalz@openssl.org>
2025-03-19 19:50:42 +08:00 · 2014-08-30 19:13:49 +02:00 · 2014-08-30 19:13:49 +02:00 · b59f92e75d
commit b59f92e75d
parent 0f957287df
8 changed files with 13 additions and 2 deletions
--- a/crypto/aes/asm/bsaes-x86_64.pl
+++ b/crypto/aes/asm/bsaes-x86_64.pl
@ -40,6 +40,7 @@
 # Core 2    	9.30		8.69		+7%
 # Nehalem(**) 	7.63		6.88		+11%
 # Atom	    	17.1		16.4		+4%
+# Silvermont	-		12.9
 #
 # (*)	Comparison is not completely fair, because "this" is ECB,
 #	i.e. no extra processing such as counter values calculation
@ -78,6 +79,7 @@
 # Core 2	9.98
 # Nehalem	7.80
 # Atom		17.9
+# Silvermont	14.0
 #
 # November 2011.
 #
--- a/crypto/aes/asm/vpaes-x86.pl
+++ b/crypto/aes/asm/vpaes-x86.pl
@ -30,6 +30,7 @@
 # Core 2(**)	28.1/41.4/18.3		21.9/25.2(***)
 # Nehalem	27.9/40.4/18.1		10.2/11.9
 # Atom		70.7/92.1/60.1		61.1/75.4(***)
+# Silvermont	45.4/62.9/24.1		49.2/61.1(***)
 #
 # (*)	"Hyper-threading" in the context refers rather to cache shared
 #	among multiple cores, than to specifically Intel HTT. As vast
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@ -30,6 +30,7 @@
 # Core 2(**)	29.6/41.1/14.3		21.9/25.2(***)
 # Nehalem	29.6/40.3/14.6		10.0/11.8
 # Atom		57.3/74.2/32.1		60.9/77.2(***)
+# Silvermont	52.7/64.0/19.5		48.8/60.8(***)
 #
 # (*)	"Hyper-threading" in the context refers rather to cache shared
 #	among multiple cores, than to specifically Intel HTT. As vast
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@ -64,6 +64,7 @@
 # Ivy Bridge	1.80(+7%)
 # Haswell	0.55(+93%) (if system doesn't support AVX)
 # Bulldozer	1.49(+27%)
+# Silvermont	2.88(+13%)

 # March 2013
 #
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@ -93,16 +93,19 @@
 # P4		10.6		-
 # AMD K8	7.1		-
 # Core2		7.3		6.0/+22%	-
-# Atom		12.5		9.3(*)/+35%	-
 # Westmere	7.3		5.5/+33%	-
 # Sandy Bridge	8.8		6.2/+40%	5.1(**)/+73%
 # Ivy Bridge	7.2		4.8/+51%	4.7(**)/+53%
 # Haswell	6.5		4.3/+51%	4.1(**)/+58%
 # Bulldozer	11.6		6.0/+92%
 # VIA Nano	10.6		7.5/+41%
+# Atom		12.5		9.3(*)/+35%
+# Silvermont	14.5		9.9(*)/+46%
 #
 # (*)	Loop is 1056 instructions long and expected result is ~8.25.
-#	It remains mystery [to me] why ILP is limited to 1.7.
+#	The discrepancy is because of front-end limitations, so
+#	called MS-ROM penalties, and on Silvermont even rotate's
+#	limited parallelism.
 #
 # (**)	As per above comment, the result is for AVX *plus* sh[rl]d.

--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@ -53,6 +53,7 @@
 # Bulldozer	36	-	27/22		17.0	13.6
 # VIA Nano	36	-	25/22		16.8	16.5
 # Atom		50	-	30/25		21.9	18.9
+# Silvermont	40	-	34/31		22.9	20.6
 #
 # (*)	numbers after slash are for unrolled loop, where applicable;
 # (**)	x86_64 assembly performance is presented for reference
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@ -28,6 +28,7 @@
 # Bulldozer	121	-	50	14.0	13.5
 # VIA Nano	91	-	52	33	14.7
 # Atom		126	-	68	48(***)	14.7
+# Silvermont	97	-	58	42(***)	17.5
 #
 # (*)	whichever best applicable.
 # (**)	x86_64 assembler performance is presented for reference
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@ -89,6 +89,7 @@
 # Bulldozer	21.1	13.6(+54%)  13.6(+54%(***)) 13.5    8.58(+57%)
 # VIA Nano	23.0	16.5(+39%)  -		    14.7    -
 # Atom		23.0	18.9(+22%)  -		    14.7    -
+# Silvermont	27.4	20.6(+33%)  -               17.5    -
 #
 # (*)	whichever best applicable;
 # (**)	switch from ror to shrd stands for fair share of improvement;