summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2016-10-14 13:25:06 +0200
committerAndy Polyakov <appro@openssl.org>2016-10-24 13:01:13 +0200
commitace05265d2d599e350cf84ed60955b7f2b173bc9 (patch)
tree5c5ef8cf19ac7aa60e3eca068bd3ca8e1558129c /crypto
parentsha/keccak1600.c: add known answer and verify result with memcmp. (diff)
downloadopenssl-ace05265d2d599e350cf84ed60955b7f2b173bc9.tar.xz
openssl-ace05265d2d599e350cf84ed60955b7f2b173bc9.zip
x86_64 assembly pack: add Goldmont performance results.
Reviewed-by: Richard Levitte <levitte@openssl.org>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/aes/asm/aesni-x86_64.pl1
-rw-r--r--crypto/aes/asm/bsaes-x86_64.pl2
-rw-r--r--crypto/aes/asm/vpaes-x86_64.pl1
-rwxr-xr-xcrypto/chacha/asm/chacha-x86.pl1
-rwxr-xr-xcrypto/chacha/asm/chacha-x86_64.pl1
-rw-r--r--crypto/modes/asm/ghash-x86_64.pl1
-rwxr-xr-xcrypto/poly1305/asm/poly1305-x86.pl1
-rwxr-xr-xcrypto/poly1305/asm/poly1305-x86_64.pl1
-rwxr-xr-xcrypto/sha/asm/sha1-x86_64.pl2
-rw-r--r--crypto/sha/asm/sha512-586.pl1
-rwxr-xr-xcrypto/sha/asm/sha512-x86_64.pl3
11 files changed, 14 insertions, 1 deletions
diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl
index 397e82f8c7..443f2f7542 100644
--- a/crypto/aes/asm/aesni-x86_64.pl
+++ b/crypto/aes/asm/aesni-x86_64.pl
@@ -179,6 +179,7 @@
# Haswell 4.44/0.63 0.63 0.73 0.63 0.70
# Skylake 2.62/0.63 0.63 0.63 0.63
# Silvermont 5.75/3.54 3.56 4.12 3.87(*) 4.11
+# Goldmont 3.82/1.26 1.26 1.29 1.29 1.50
# Bulldozer 5.77/0.70 0.72 0.90 0.70 0.95
#
# (*) Atom Silvermont ECB result is suboptimal because of penalties
diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl
index d257a3efe6..0d49947d81 100644
--- a/crypto/aes/asm/bsaes-x86_64.pl
+++ b/crypto/aes/asm/bsaes-x86_64.pl
@@ -48,6 +48,7 @@
# Nehalem(**) 7.63 6.88 +11%
# Atom 17.1 16.4 +4%
# Silvermont - 12.9
+# Goldmont - 8.85
#
# (*) Comparison is not completely fair, because "this" is ECB,
# i.e. no extra processing such as counter values calculation
@@ -87,6 +88,7 @@
# Nehalem 7.80
# Atom 17.9
# Silvermont 14.0
+# Goldmont 10.2
#
# November 2011.
#
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl
index dd1f13a271..b715aca167 100644
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -38,6 +38,7 @@
# Nehalem 29.6/40.3/14.6 10.0/11.8
# Atom 57.3/74.2/32.1 60.9/77.2(***)
# Silvermont 52.7/64.0/19.5 48.8/60.8(***)
+# Goldmont 38.9/49.0/17.8 10.6/12.6
#
# (*) "Hyper-threading" in the context refers rather to cache shared
# among multiple cores, than to specifically Intel HTT. As vast
diff --git a/crypto/chacha/asm/chacha-x86.pl b/crypto/chacha/asm/chacha-x86.pl
index 3c6e67d9c8..f00b7d2935 100755
--- a/crypto/chacha/asm/chacha-x86.pl
+++ b/crypto/chacha/asm/chacha-x86.pl
@@ -29,6 +29,7 @@
# Sandy Bridge 10.5/+47% 3.20
# Haswell 8.15/+50% 2.83
# Silvermont 17.4/+36% 8.35
+# Goldmont 13.4/+40% 4.36
# Sledgehammer 10.2/+54%
# Bulldozer 13.4/+50% 4.38(*)
#
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl
index 4b1750cd5d..347dfcb3e5 100755
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -29,6 +29,7 @@
# Ivy Bridge 6.71/+46% 5.40/6.49 2.41
# Haswell 5.92/+43% 5.20/6.45 2.42 1.23
# Silvermont 12.0/+33% 7.75/7.40 7.03(iii)
+# Goldmont 10.6/+17% 5.10/- 3.28
# Sledgehammer 7.28/+52% -/14.2(ii) -
# Bulldozer 9.66/+28% 9.85/11.1 3.06(iv)
# VIA Nano 10.5/+46% 6.72/8.60 6.05
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index 6941b3c234..c782edcbb8 100644
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -74,6 +74,7 @@
# Skylake 0.44(+110%)(if system doesn't support AVX)
# Bulldozer 1.49(+27%)
# Silvermont 2.88(+13%)
+# Goldmont 1.08(+24%)
# March 2013
#
diff --git a/crypto/poly1305/asm/poly1305-x86.pl b/crypto/poly1305/asm/poly1305-x86.pl
index ecc0ee62ea..ab24dfcfad 100755
--- a/crypto/poly1305/asm/poly1305-x86.pl
+++ b/crypto/poly1305/asm/poly1305-x86.pl
@@ -30,6 +30,7 @@
# Sandy Bridge 3.90/+100% 1.36
# Haswell 3.88/+70% 1.18 0.72
# Silvermont 11.0/+40% 4.80
+# Goldmont 4.10/+200% 2.10
# VIA Nano 6.71/+90% 2.47
# Sledgehammer 3.51/+180% 4.27
# Bulldozer 4.53/+140% 1.31
diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl
index 784ff4b758..4c22ded580 100755
--- a/crypto/poly1305/asm/poly1305-x86_64.pl
+++ b/crypto/poly1305/asm/poly1305-x86_64.pl
@@ -29,6 +29,7 @@
# Haswell 1.14/+175% 1.11 0.65
# Skylake 1.13/+120% 0.96 0.51
# Silvermont 2.83/+95% -
+# Goldmont 1.70/+180% -
# VIA Nano 1.82/+150% -
# Sledgehammer 1.38/+160% -
# Bulldozer 2.30/+130% 0.97
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl
index 66054ceeae..f06fa515a2 100755
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -85,9 +85,11 @@
# VIA Nano 9.32 7.15/+30%
# Atom 10.3 9.17/+12%
# Silvermont 13.1(*) 9.37/+40%
+# Goldmont 8.13 6.42/+27% 1.70/+380%(**)
#
# (*) obviously suboptimal result, nothing was done about it,
# because SSSE3 code is compiled unconditionally;
+# (**) SHAEXT result
$flavour = shift;
$output = shift;
diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl
index 94cc0114f8..448ac73e06 100644
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -36,6 +36,7 @@
# VIA Nano 91 - 52 33 14.7
# Atom 126 - 68 48(***) 14.7
# Silvermont 97 - 58 42(***) 17.5
+# Goldmont 80 - 48 19.5 12.0
#
# (*) whichever best applicable.
# (**) x86_64 assembler performance is presented for reference
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl
index 01bbb7775f..5a1cbcf0ca 100755
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -98,8 +98,9 @@
# VIA Nano 23.0 16.5(+39%) - 14.7 -
# Atom 23.0 18.9(+22%) - 14.7 -
# Silvermont 27.4 20.6(+33%) - 17.5 -
+# Goldmont 18.9 14.3(+32%) 4.16(+350%) 12.0 -
#
-# (*) whichever best applicable;
+# (*) whichever best applicable, including SHAEXT;
# (**) switch from ror to shrd stands for fair share of improvement;
# (***) execution time is fully determined by remaining integer-only
# part, body_00_15; reducing the amount of SIMD instructions