summaryrefslogtreecommitdiffstats
path: root/arch/arm/crypto/sha1-armv4-large.S
diff options
context:
space:
mode:
authorDave Martin <dave.martin@linaro.org>2013-01-10 12:20:15 +0100
committerRussell King <rmk+kernel@arm.linux.org.uk>2013-01-13 13:41:22 +0100
commit638591cd7b601d403ed703d55062b48c32ea8cfb (patch)
treeeb7f735b59d7609976b768fc7deb020b6f3d955a /arch/arm/crypto/sha1-armv4-large.S
parentLinux 3.8-rc3 (diff)
downloadlinux-638591cd7b601d403ed703d55062b48c32ea8cfb.tar.xz
linux-638591cd7b601d403ed703d55062b48c32ea8cfb.zip
ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work natively in Thumb. This allows ARM/Thumb interworking workarounds to be removed. I also take the opportunity to convert some explicit assembler directives for exported functions to the standard ENTRY()/ENDPROC(). For the code itself: * In sha1_block_data_order, use of TEQ with sp is deprecated in ARMv7 and not supported in Thumb. For the branches back to .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the assumption that clobbering the C flag here will not cause incorrect behaviour. For the first branch back to .L_20_39_or_60_79 the C flag is important, so sp is moved temporarily into another register so that TEQ can be used for the comparison. * In the AES code, most forms of register-indexed addressing with shifts and rotates are not permitted for loads and stores in Thumb, so the address calculation is done using a separate instruction for the Thumb case. The resulting code is unlikely to be optimally scheduled, but it should not have a large impact given the overall size of the code. I haven't run any benchmarks. Signed-off-by: Dave Martin <dave.martin@linaro.org> Tested-by: David McCullough <ucdevel@gmail.com> (ARM only) Acked-by: David McCullough <ucdevel@gmail.com> Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto/sha1-armv4-large.S')
-rw-r--r--arch/arm/crypto/sha1-armv4-large.S24
1 files changed, 9 insertions, 15 deletions
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
index 7050ab133b9d..92c6eed7aac9 100644
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ b/arch/arm/crypto/sha1-armv4-large.S
@@ -51,13 +51,12 @@
@ Profiler-assisted and platform-specific optimization resulted in 10%
@ improvement on Cortex A8 core and 12.2 cycles per byte.
-.text
+#include <linux/linkage.h>
-.global sha1_block_data_order
-.type sha1_block_data_order,%function
+.text
.align 2
-sha1_block_data_order:
+ENTRY(sha1_block_data_order)
stmdb sp!,{r4-r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
ldmia r0,{r3,r4,r5,r6,r7}
@@ -194,7 +193,7 @@ sha1_block_data_order:
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r3,r3,r10 @ E+=F_00_19(B,C,D)
- teq r14,sp
+ cmp r14,sp
bne .L_00_15 @ [((11+4)*5+2)*3]
#if __ARM_ARCH__<7
ldrb r10,[r1,#2]
@@ -374,7 +373,9 @@ sha1_block_data_order:
@ F_xx_xx
add r3,r3,r9 @ E+=X[i]
add r3,r3,r10 @ E+=F_20_39(B,C,D)
- teq r14,sp @ preserve carry
+ ARM( teq r14,sp ) @ preserve carry
+ THUMB( mov r11,sp )
+ THUMB( teq r14,r11 ) @ preserve carry
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
@@ -466,7 +467,7 @@ sha1_block_data_order:
add r3,r3,r9 @ E+=X[i]
add r3,r3,r10 @ E+=F_40_59(B,C,D)
add r3,r3,r11,ror#2
- teq r14,sp
+ cmp r14,sp
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr r8,.LK_60_79
@@ -485,19 +486,12 @@ sha1_block_data_order:
teq r1,r2
bne .Lloop @ [+18], total 1307
-#if __ARM_ARCH__>=5
ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
.align 2
.LK_00_19: .word 0x5a827999
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
-.size sha1_block_data_order,.-sha1_block_data_order
+ENDPROC(sha1_block_data_order)
.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
.align 2