summaryrefslogtreecommitdiffstats
path: root/crypto/arm64cpuid.pl
diff options
context:
space:
mode:
authorXiaokangQian <xiaokang.qian@arm.com>2021-06-09 08:35:46 +0200
committerPauli <pauli@openssl.org>2022-01-25 04:30:00 +0100
commit954f45ba4c504570206ff5bed811e512cf92dc8e (patch)
tree6d2521f79615afd4c8b35cb2c6794a57aded5602 /crypto/arm64cpuid.pl
parentAES-GCM performance optimzation with stitched method for p9+ ppc64le (diff)
downloadopenssl-954f45ba4c504570206ff5bed811e512cf92dc8e.tar.xz
openssl-954f45ba4c504570206ff5bed811e512cf92dc8e.zip
Optimize AES-GCM for uarchs with unroll and new instructions
Increase the block numbers to 8 for every iteration. Increase the hash table capacity. Make use of EOR3 instruction to improve the performance. This can improve performance 25-40% on out-of-order microarchitectures with a large number of fast execution units, such as Neoverse V1. We also see 20-30% performance improvements on other architectures such as the M1. Assembly code reviewd by Tom Cosgrove (ARM). Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/15916)
Diffstat (limited to 'crypto/arm64cpuid.pl')
-rwxr-xr-xcrypto/arm64cpuid.pl8
1 files changed, 8 insertions, 0 deletions
diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
index 1841c0cc04..ebea4be59c 100755
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -96,6 +96,14 @@ _armv8_sha512_probe:
ret
.size _armv8_sha512_probe,.-_armv8_sha512_probe
+.globl _armv8_eor3_probe
+.type _armv8_eor3_probe,%function
+_armv8_eor3_probe:
+ AARCH64_VALID_CALL_TARGET
+ .long 0xce010800 // eor3 v0.16b, v0.16b, v1.16b, v2.16b
+ ret
+.size _armv8_eor3_probe,.-_armv8_eor3_probe
+
.globl _armv8_cpuid_probe
.type _armv8_cpuid_probe,%function
_armv8_cpuid_probe: