diff options
author | Andy Polyakov <appro@openssl.org> | 2010-11-29 21:52:43 +0100 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2010-11-29 21:52:43 +0100 |
commit | e822c756b66024d49ab936bf77b745206660fcd2 (patch) | |
tree | c5c6cd2bec509720a0753e245bd3731e99c6de83 /crypto/rc4/asm | |
parent | apply J-PKAKE fix to HEAD (original by Ben) (diff) | |
download | openssl-e822c756b66024d49ab936bf77b745206660fcd2.tar.xz openssl-e822c756b66024d49ab936bf77b745206660fcd2.zip |
s390x assembler pack: adapt for -m31 build, see commentary in Configure
for more details.
Diffstat (limited to 'crypto/rc4/asm')
-rw-r--r-- | crypto/rc4/asm/rc4-s390x.pl | 35 |
1 files changed, 30 insertions, 5 deletions
diff --git a/crypto/rc4/asm/rc4-s390x.pl b/crypto/rc4/asm/rc4-s390x.pl index f26c515e78..1aa754820c 100644 --- a/crypto/rc4/asm/rc4-s390x.pl +++ b/crypto/rc4/asm/rc4-s390x.pl @@ -13,6 +13,26 @@ # "cluster" Address Generation Interlocks, so that one pipeline stall # resolves several dependencies. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 50% better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -42,7 +62,12 @@ $code.=<<___; .type RC4,\@function .align 64 RC4: - stmg %r6,%r11,48($sp) + stm${g} %r6,%r11,6*$SIZE_T($sp) +___ +$code.=<<___ if ($flavour =~ /3[12]/); + llgfr $len,$len +___ +$code.=<<___; llgc $XX[0],0($key) llgc $YY,1($key) la $XX[0],1($XX[0]) @@ -93,7 +118,7 @@ $code.=<<___; xgr $acc,$TX[1] stg $acc,0($out) la $out,8($out) - brct $cnt,.Loop8 + brctg $cnt,.Loop8 .Lshort: lghi $acc,7 @@ -125,7 +150,7 @@ $code.=<<___; ahi $XX[0],-1 stc $XX[0],0($key) stc $YY,1($key) - lmg %r6,%r11,48($sp) + lm${g} %r6,%r11,6*$SIZE_T($sp) br $rp .size RC4,.-RC4 .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" @@ -150,7 +175,7 @@ $code.=<<___; .type RC4_set_key,\@function .align 64 RC4_set_key: - stmg %r6,%r8,48($sp) + stm${g} %r6,%r8,6*$SIZE_T($sp) lhi $cnt,256 la $idx,0(%r0) sth $idx,0($key) @@ -183,7 +208,7 @@ RC4_set_key: la $iinp,0(%r0) j .L2ndloop .Ldone: - lmg %r6,%r8,48($sp) + lm${g} %r6,%r8,6*$SIZE_T($sp) br $rp .size RC4_set_key,.-RC4_set_key |