summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2018-05-09 12:24:05 +0200
committerAndy Polyakov <appro@openssl.org>2018-05-10 11:44:21 +0200
commit13f6857db107b1b6f68daa7fc4a6dd1293428bb1 (patch)
tree0d2ed90ee0160b2d5ba88a3e409d0ca8e99db0de /crypto
parent.travis.yml: add pair of linux-ppc64le targets. (diff)
downloadopenssl-13f6857db107b1b6f68daa7fc4a6dd1293428bb1.tar.xz
openssl-13f6857db107b1b6f68daa7fc4a6dd1293428bb1.zip
PPC assembly pack: add POWER9 results.
Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto')
-rwxr-xr-xcrypto/aes/asm/aesp8-ppc.pl1
-rwxr-xr-xcrypto/chacha/asm/chacha-ppc.pl1
-rwxr-xr-xcrypto/modes/asm/ghashp8-ppc.pl1
-rwxr-xr-xcrypto/poly1305/asm/poly1305-ppc.pl1
-rwxr-xr-xcrypto/poly1305/asm/poly1305-ppcfp.pl1
-rw-r--r--crypto/poly1305/poly1305_ieee754.c1
-rwxr-xr-xcrypto/sha/asm/keccak1600-ppc64.pl1
-rwxr-xr-xcrypto/sha/asm/keccak1600p8-ppc.pl2
-rwxr-xr-xcrypto/sha/asm/sha512p8-ppc.pl6
9 files changed, 11 insertions, 4 deletions
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
index 7463df6c17..e1be23ab75 100755
--- a/crypto/aes/asm/aesp8-ppc.pl
+++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -40,6 +40,7 @@
# CBC en-/decrypt CTR XTS
# POWER8[le] 3.96/0.72 0.74 1.1
# POWER8[be] 3.75/0.65 0.66 1.0
+# POWER9[le] 3.05/0.65 0.65 0.80
$flavour = shift;
diff --git a/crypto/chacha/asm/chacha-ppc.pl b/crypto/chacha/asm/chacha-ppc.pl
index f972ee471a..af2f037c15 100755
--- a/crypto/chacha/asm/chacha-ppc.pl
+++ b/crypto/chacha/asm/chacha-ppc.pl
@@ -27,6 +27,7 @@
# PPC970/G5 9.29/+160% 4.60
# POWER7 8.62/+61% 4.27
# POWER8 8.70/+51% 3.96
+# POWER9 6.61/+29% 3.67
$flavour = shift;
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
index 45c6438497..a1d5789cc8 100755
--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/modes/asm/ghashp8-ppc.pl
@@ -30,6 +30,7 @@
# 2x aggregated reduction improves performance by 50% (resulting
# performance on POWER8 is 1 cycle per processed byte), and 4x
# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+# POWER9 delivers 0.40 cpb.
$flavour=shift;
$output =shift;
diff --git a/crypto/poly1305/asm/poly1305-ppc.pl b/crypto/poly1305/asm/poly1305-ppc.pl
index ab65910282..8e105d77fb 100755
--- a/crypto/poly1305/asm/poly1305-ppc.pl
+++ b/crypto/poly1305/asm/poly1305-ppc.pl
@@ -28,6 +28,7 @@
# PPC970 7.00/+114% 3.51/+205%
# POWER7 3.75/+260% 1.93/+100%
# POWER8 - 2.03/+200%
+# POWER9 - 1.56/+150%
#
# Do we need floating-point implementation for PPC? Results presented
# in poly1305_ieee754.c are tricky to compare to, because they are for
diff --git a/crypto/poly1305/asm/poly1305-ppcfp.pl b/crypto/poly1305/asm/poly1305-ppcfp.pl
index 49f70a8c03..fc62baa222 100755
--- a/crypto/poly1305/asm/poly1305-ppcfp.pl
+++ b/crypto/poly1305/asm/poly1305-ppcfp.pl
@@ -26,6 +26,7 @@
# PPC970 6.03/+80%
# POWER7 3.50/+30%
# POWER8 3.75/+10%
+# POWER9 2.80/+12%
$flavour = shift;
diff --git a/crypto/poly1305/poly1305_ieee754.c b/crypto/poly1305/poly1305_ieee754.c
index 995a02e5c1..1a06e03558 100644
--- a/crypto/poly1305/poly1305_ieee754.c
+++ b/crypto/poly1305/poly1305_ieee754.c
@@ -38,6 +38,7 @@
* POWER6 4.92
* POWER7 4.50
* POWER8 4.10
+ * POWER9 3.14
*
* z10 11.2
* z196+ 7.30
diff --git a/crypto/sha/asm/keccak1600-ppc64.pl b/crypto/sha/asm/keccak1600-ppc64.pl
index f89f71c825..60ed2f2326 100755
--- a/crypto/sha/asm/keccak1600-ppc64.pl
+++ b/crypto/sha/asm/keccak1600-ppc64.pl
@@ -30,6 +30,7 @@
# PPC970/G5 14.6/+120%
# POWER7 10.3/+100%
# POWER8 11.5/+85%
+# POWER9 7.2/+45%
#
# (*) Corresponds to SHA3-256. Percentage after slash is improvement
# over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
diff --git a/crypto/sha/asm/keccak1600p8-ppc.pl b/crypto/sha/asm/keccak1600p8-ppc.pl
index feec68839f..95e6242f99 100755
--- a/crypto/sha/asm/keccak1600p8-ppc.pl
+++ b/crypto/sha/asm/keccak1600p8-ppc.pl
@@ -23,7 +23,7 @@
# buffer for r=1088, which matches SHA3-256. This is 17% better than
# scalar PPC64 code. It probably should be noted that if POWER8's
# successor can achieve higher scalar instruction issue rate, then
-# this module will loose...
+# this module will loose... And it does on POWER9 with 8.8 vs. 7.2.
$flavour = shift;
diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl
index 93dfef20a9..e6e9467905 100755
--- a/crypto/sha/asm/sha512p8-ppc.pl
+++ b/crypto/sha/asm/sha512p8-ppc.pl
@@ -36,9 +36,9 @@
# little-endian system]. Numbers in square brackets are for 64-bit
# build of sha512-ppc.pl, presented for reference.
#
-# POWER8
-# SHA256 9.9 [15.8]
-# SHA512 6.3 [10.3]
+# POWER8 POWER9
+# SHA256 9.9 [15.8] 9.2 [9.3]
+# SHA512 6.3 [10.3] 5.8 [5.9]
$flavour=shift;
$output =shift;