summaryrefslogtreecommitdiffstats
path: root/crypto/poly1305/asm/poly1305-armv8.pl
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2016-03-29 10:02:45 +0200
committerAndy Polyakov <appro@openssl.org>2016-04-04 16:56:20 +0200
commit4b8736a22e758c371bc2f8b3534dc0c274acf42c (patch)
tree8b86fdcae6ab06f1308d63ccf44432e0d029787c /crypto/poly1305/asm/poly1305-armv8.pl
parentFix memory leaks in ASN.1 (diff)
downloadopenssl-4b8736a22e758c371bc2f8b3534dc0c274acf42c.tar.xz
openssl-4b8736a22e758c371bc2f8b3534dc0c274acf42c.zip
crypto/poly1305: don't break carry chains.
RT#4483 [poly1305-armv4.pl: remove redundant #ifdef __thumb2__] [poly1305-ppc*.pl: presumably more accurate benchmark results] Reviewed-by: Richard Levitte <levitte@openssl.org>
Diffstat (limited to 'crypto/poly1305/asm/poly1305-armv8.pl')
-rwxr-xr-xcrypto/poly1305/asm/poly1305-armv8.pl18
1 files changed, 11 insertions, 7 deletions
diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl
index f1359fd44a..2e1dae3df2 100755
--- a/crypto/poly1305/asm/poly1305-armv8.pl
+++ b/crypto/poly1305/asm/poly1305-armv8.pl
@@ -16,10 +16,10 @@
# IALU/gcc-4.9 NEON
#
# Apple A7 1.86/+5% 0.72
-# Cortex-A53 2.63/+58% 1.47
+# Cortex-A53 2.69/+58% 1.47
# Cortex-A57 2.70/+7% 1.14
-# Denver 1.39/+50% 1.18(*)
-# X-Gene 2.00/+68% 2.19
+# Denver 1.64/+50% 1.18(*)
+# X-Gene 2.13/+68% 2.19
#
# (*) estimate based on resources availability is less than 1.0,
# i.e. measured result is worse than expected, presumably binary
@@ -151,7 +151,8 @@ poly1305_blocks:
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
- adc $h1,$d1,xzr
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
cbnz $len,.Loop
@@ -235,7 +236,8 @@ poly1305_mult:
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
- adc $h1,$d1,xzr
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
ret
.size poly1305_mult,.-poly1305_mult
@@ -310,7 +312,8 @@ poly1305_blocks_neon:
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$h0,$t0
- adc $h1,$h1,xzr
+ adcs $h1,$h1,xzr
+ adc $h2,$h2,xzr
#ifdef __ARMEB__
rev $d0,$d0
@@ -870,7 +873,8 @@ poly1305_emit_neon:
add $d0,$d0,$h2,lsr#2
and $h2,$h2,#3
adds $h0,$h0,$d0
- adc $h1,$h1,xzr
+ adcs $h1,$h1,xzr
+ adc $h2,$h2,xzr
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr