diff options
author | Dave Martin <dave.martin@linaro.org> | 2013-01-10 12:20:15 +0100 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-01-13 13:41:22 +0100 |
commit | 638591cd7b601d403ed703d55062b48c32ea8cfb (patch) | |
tree | eb7f735b59d7609976b768fc7deb020b6f3d955a /arch/arm/crypto/sha1-armv4-large.S | |
parent | Linux 3.8-rc3 (diff) | |
download | linux-638591cd7b601d403ed703d55062b48c32ea8cfb.tar.xz linux-638591cd7b601d403ed703d55062b48c32ea8cfb.zip |
ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work
natively in Thumb. This allows ARM/Thumb interworking workarounds
to be removed.
I also take the opportunity to convert some explicit assembler
directives for exported functions to the standard
ENTRY()/ENDPROC().
For the code itself:
* In sha1_block_data_order, use of TEQ with sp is deprecated in
ARMv7 and not supported in Thumb. For the branches back to
.L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the
assumption that clobbering the C flag here will not cause
incorrect behaviour.
For the first branch back to .L_20_39_or_60_79 the C flag is
important, so sp is moved temporarily into another register so
that TEQ can be used for the comparison.
* In the AES code, most forms of register-indexed addressing with
shifts and rotates are not permitted for loads and stores in
Thumb, so the address calculation is done using a separate
instruction for the Thumb case.
The resulting code is unlikely to be optimally scheduled, but it
should not have a large impact given the overall size of the code.
I haven't run any benchmarks.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Tested-by: David McCullough <ucdevel@gmail.com> (ARM only)
Acked-by: David McCullough <ucdevel@gmail.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto/sha1-armv4-large.S')
-rw-r--r-- | arch/arm/crypto/sha1-armv4-large.S | 24 |
1 files changed, 9 insertions, 15 deletions
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S index 7050ab133b9d..92c6eed7aac9 100644 --- a/arch/arm/crypto/sha1-armv4-large.S +++ b/arch/arm/crypto/sha1-armv4-large.S @@ -51,13 +51,12 @@ @ Profiler-assisted and platform-specific optimization resulted in 10% @ improvement on Cortex A8 core and 12.2 cycles per byte. -.text +#include <linux/linkage.h> -.global sha1_block_data_order -.type sha1_block_data_order,%function +.text .align 2 -sha1_block_data_order: +ENTRY(sha1_block_data_order) stmdb sp!,{r4-r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ldmia r0,{r3,r4,r5,r6,r7} @@ -194,7 +193,7 @@ sha1_block_data_order: eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r3,r3,r10 @ E+=F_00_19(B,C,D) - teq r14,sp + cmp r14,sp bne .L_00_15 @ [((11+4)*5+2)*3] #if __ARM_ARCH__<7 ldrb r10,[r1,#2] @@ -374,7 +373,9 @@ sha1_block_data_order: @ F_xx_xx add r3,r3,r9 @ E+=X[i] add r3,r3,r10 @ E+=F_20_39(B,C,D) - teq r14,sp @ preserve carry + ARM( teq r14,sp ) @ preserve carry + THUMB( mov r11,sp ) + THUMB( teq r14,r11 ) @ preserve carry bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes @@ -466,7 +467,7 @@ sha1_block_data_order: add r3,r3,r9 @ E+=X[i] add r3,r3,r10 @ E+=F_40_59(B,C,D) add r3,r3,r11,ror#2 - teq r14,sp + cmp r14,sp bne .L_40_59 @ [+((12+5)*5+2)*4] ldr r8,.LK_60_79 @@ -485,19 +486,12 @@ sha1_block_data_order: teq r1,r2 bne .Lloop @ [+18], total 1307 -#if __ARM_ARCH__>=5 ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif .align 2 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order +ENDPROC(sha1_block_data_order) .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" .align 2 |