diff options
author | Andy Polyakov <appro@openssl.org> | 2016-10-14 13:25:06 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2016-10-24 13:01:13 +0200 |
commit | ace05265d2d599e350cf84ed60955b7f2b173bc9 (patch) | |
tree | 5c5ef8cf19ac7aa60e3eca068bd3ca8e1558129c /crypto | |
parent | sha/keccak1600.c: add known answer and verify result with memcmp. (diff) | |
download | openssl-ace05265d2d599e350cf84ed60955b7f2b173bc9.tar.xz openssl-ace05265d2d599e350cf84ed60955b7f2b173bc9.zip |
x86_64 assembly pack: add Goldmont performance results.
Reviewed-by: Richard Levitte <levitte@openssl.org>
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/aes/asm/aesni-x86_64.pl | 1 | ||||
-rw-r--r-- | crypto/aes/asm/bsaes-x86_64.pl | 2 | ||||
-rw-r--r-- | crypto/aes/asm/vpaes-x86_64.pl | 1 | ||||
-rwxr-xr-x | crypto/chacha/asm/chacha-x86.pl | 1 | ||||
-rwxr-xr-x | crypto/chacha/asm/chacha-x86_64.pl | 1 | ||||
-rw-r--r-- | crypto/modes/asm/ghash-x86_64.pl | 1 | ||||
-rwxr-xr-x | crypto/poly1305/asm/poly1305-x86.pl | 1 | ||||
-rwxr-xr-x | crypto/poly1305/asm/poly1305-x86_64.pl | 1 | ||||
-rwxr-xr-x | crypto/sha/asm/sha1-x86_64.pl | 2 | ||||
-rw-r--r-- | crypto/sha/asm/sha512-586.pl | 1 | ||||
-rwxr-xr-x | crypto/sha/asm/sha512-x86_64.pl | 3 |
11 files changed, 14 insertions, 1 deletions
diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl index 397e82f8c7..443f2f7542 100644 --- a/crypto/aes/asm/aesni-x86_64.pl +++ b/crypto/aes/asm/aesni-x86_64.pl @@ -179,6 +179,7 @@ # Haswell 4.44/0.63 0.63 0.73 0.63 0.70 # Skylake 2.62/0.63 0.63 0.63 0.63 # Silvermont 5.75/3.54 3.56 4.12 3.87(*) 4.11 +# Goldmont 3.82/1.26 1.26 1.29 1.29 1.50 # Bulldozer 5.77/0.70 0.72 0.90 0.70 0.95 # # (*) Atom Silvermont ECB result is suboptimal because of penalties diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl index d257a3efe6..0d49947d81 100644 --- a/crypto/aes/asm/bsaes-x86_64.pl +++ b/crypto/aes/asm/bsaes-x86_64.pl @@ -48,6 +48,7 @@ # Nehalem(**) 7.63 6.88 +11% # Atom 17.1 16.4 +4% # Silvermont - 12.9 +# Goldmont - 8.85 # # (*) Comparison is not completely fair, because "this" is ECB, # i.e. no extra processing such as counter values calculation @@ -87,6 +88,7 @@ # Nehalem 7.80 # Atom 17.9 # Silvermont 14.0 +# Goldmont 10.2 # # November 2011. # diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index dd1f13a271..b715aca167 100644 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -38,6 +38,7 @@ # Nehalem 29.6/40.3/14.6 10.0/11.8 # Atom 57.3/74.2/32.1 60.9/77.2(***) # Silvermont 52.7/64.0/19.5 48.8/60.8(***) +# Goldmont 38.9/49.0/17.8 10.6/12.6 # # (*) "Hyper-threading" in the context refers rather to cache shared # among multiple cores, than to specifically Intel HTT. As vast diff --git a/crypto/chacha/asm/chacha-x86.pl b/crypto/chacha/asm/chacha-x86.pl index 3c6e67d9c8..f00b7d2935 100755 --- a/crypto/chacha/asm/chacha-x86.pl +++ b/crypto/chacha/asm/chacha-x86.pl @@ -29,6 +29,7 @@ # Sandy Bridge 10.5/+47% 3.20 # Haswell 8.15/+50% 2.83 # Silvermont 17.4/+36% 8.35 +# Goldmont 13.4/+40% 4.36 # Sledgehammer 10.2/+54% # Bulldozer 13.4/+50% 4.38(*) # diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl index 4b1750cd5d..347dfcb3e5 100755 --- a/crypto/chacha/asm/chacha-x86_64.pl +++ b/crypto/chacha/asm/chacha-x86_64.pl @@ -29,6 +29,7 @@ # Ivy Bridge 6.71/+46% 5.40/6.49 2.41 # Haswell 5.92/+43% 5.20/6.45 2.42 1.23 # Silvermont 12.0/+33% 7.75/7.40 7.03(iii) +# Goldmont 10.6/+17% 5.10/- 3.28 # Sledgehammer 7.28/+52% -/14.2(ii) - # Bulldozer 9.66/+28% 9.85/11.1 3.06(iv) # VIA Nano 10.5/+46% 6.72/8.60 6.05 diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 6941b3c234..c782edcbb8 100644 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -74,6 +74,7 @@ # Skylake 0.44(+110%)(if system doesn't support AVX) # Bulldozer 1.49(+27%) # Silvermont 2.88(+13%) +# Goldmont 1.08(+24%) # March 2013 # diff --git a/crypto/poly1305/asm/poly1305-x86.pl b/crypto/poly1305/asm/poly1305-x86.pl index ecc0ee62ea..ab24dfcfad 100755 --- a/crypto/poly1305/asm/poly1305-x86.pl +++ b/crypto/poly1305/asm/poly1305-x86.pl @@ -30,6 +30,7 @@ # Sandy Bridge 3.90/+100% 1.36 # Haswell 3.88/+70% 1.18 0.72 # Silvermont 11.0/+40% 4.80 +# Goldmont 4.10/+200% 2.10 # VIA Nano 6.71/+90% 2.47 # Sledgehammer 3.51/+180% 4.27 # Bulldozer 4.53/+140% 1.31 diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl index 784ff4b758..4c22ded580 100755 --- a/crypto/poly1305/asm/poly1305-x86_64.pl +++ b/crypto/poly1305/asm/poly1305-x86_64.pl @@ -29,6 +29,7 @@ # Haswell 1.14/+175% 1.11 0.65 # Skylake 1.13/+120% 0.96 0.51 # Silvermont 2.83/+95% - +# Goldmont 1.70/+180% - # VIA Nano 1.82/+150% - # Sledgehammer 1.38/+160% - # Bulldozer 2.30/+130% 0.97 diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl index 66054ceeae..f06fa515a2 100755 --- a/crypto/sha/asm/sha1-x86_64.pl +++ b/crypto/sha/asm/sha1-x86_64.pl @@ -85,9 +85,11 @@ # VIA Nano 9.32 7.15/+30% # Atom 10.3 9.17/+12% # Silvermont 13.1(*) 9.37/+40% +# Goldmont 8.13 6.42/+27% 1.70/+380%(**) # # (*) obviously suboptimal result, nothing was done about it, # because SSSE3 code is compiled unconditionally; +# (**) SHAEXT result $flavour = shift; $output = shift; diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl index 94cc0114f8..448ac73e06 100644 --- a/crypto/sha/asm/sha512-586.pl +++ b/crypto/sha/asm/sha512-586.pl @@ -36,6 +36,7 @@ # VIA Nano 91 - 52 33 14.7 # Atom 126 - 68 48(***) 14.7 # Silvermont 97 - 58 42(***) 17.5 +# Goldmont 80 - 48 19.5 12.0 # # (*) whichever best applicable. # (**) x86_64 assembler performance is presented for reference diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index 01bbb7775f..5a1cbcf0ca 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -98,8 +98,9 @@ # VIA Nano 23.0 16.5(+39%) - 14.7 - # Atom 23.0 18.9(+22%) - 14.7 - # Silvermont 27.4 20.6(+33%) - 17.5 - +# Goldmont 18.9 14.3(+32%) 4.16(+350%) 12.0 - # -# (*) whichever best applicable; +# (*) whichever best applicable, including SHAEXT; # (**) switch from ror to shrd stands for fair share of improvement; # (***) execution time is fully determined by remaining integer-only # part, body_00_15; reducing the amount of SIMD instructions |