summaryrefslogtreecommitdiffstats
path: root/arch/arm64/crypto/aes-modes.S
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2022-01-27 10:52:11 +0100
committerHerbert Xu <herbert@gondor.apana.org.au>2022-02-05 05:10:51 +0100
commit8daa399edeed4cfa792ccea12beda50d445ab6a0 (patch)
tree744b6b42a0a24a6351898cbe3dbaa1e62ce03343 /arch/arm64/crypto/aes-modes.S
parentcrypto: octeontx2 - increase CPT HW instruction queue length (diff)
downloadlinux-8daa399edeed4cfa792ccea12beda50d445ab6a0.tar.xz
linux-8daa399edeed4cfa792ccea12beda50d445ab6a0.zip
crypto: arm64/aes-neon-ctr - improve handling of single tail block
Instead of falling back to C code to do a memcpy of the output of the last block, handle this in the asm code directly if possible, which is the case if the entire input is longer than 16 bytes. Cc: Nathan Huckleberry <nhuck@google.com> Cc: Eric Biggers <ebiggers@google.com> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64/crypto/aes-modes.S')
-rw-r--r--arch/arm64/crypto/aes-modes.S18
1 files changed, 13 insertions, 5 deletions
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index ff01f0167ba2..dc35eb0245c5 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int bytes, u8 ctr[], u8 finalbuf[])
+ * int bytes, u8 ctr[])
*/
AES_FUNC_START(aes_ctr_encrypt)
@@ -414,8 +414,8 @@ ST5( st1 {v4.16b}, [x0], #16 )
.Lctrtail:
/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
mov x16, #16
- ands x13, x4, #0xf
- csel x13, x13, x16, ne
+ ands x6, x4, #0xf
+ csel x13, x6, x16, ne
ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
ST5( csel x14, x16, xzr, gt )
@@ -424,10 +424,10 @@ ST5( csel x14, x16, xzr, gt )
cmp w4, #32 - (MAX_STRIDE << 4)
csel x16, x16, xzr, gt
cmp w4, #16 - (MAX_STRIDE << 4)
- ble .Lctrtail1x
adr_l x12, .Lcts_permute_table
add x12, x12, x13
+ ble .Lctrtail1x
ST5( ld1 {v5.16b}, [x1], x14 )
ld1 {v6.16b}, [x1], x15
@@ -462,11 +462,19 @@ ST5( st1 {v5.16b}, [x0], x14 )
b .Lctrout
.Lctrtail1x:
- csel x0, x0, x6, eq // use finalbuf if less than a full block
+ sub x7, x6, #16
+ csel x6, x6, x7, eq
+ add x1, x1, x6
+ add x0, x0, x6
ld1 {v5.16b}, [x1]
+ ld1 {v6.16b}, [x0]
ST5( mov v3.16b, v4.16b )
encrypt_block v3, w3, x2, x8, w7
+ ld1 {v10.16b-v11.16b}, [x12]
+ tbl v3.16b, {v3.16b}, v10.16b
+ sshr v11.16b, v11.16b, #7
eor v5.16b, v5.16b, v3.16b
+ bif v5.16b, v6.16b, v11.16b
st1 {v5.16b}, [x0]
b .Lctrout
AES_FUNC_END(aes_ctr_encrypt)