diff options
Diffstat (limited to 'arch/x86/crypto/camellia-aesni-avx2-asm_64.S')
-rw-r--r-- | arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 207 |
1 files changed, 0 insertions, 207 deletions
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0907243c501c..9561dee52de0 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -7,7 +7,6 @@ #include <linux/linkage.h> #include <asm/frame.h> -#include <asm/nospec-branch.h> #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -629,12 +628,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -/* For XTS mode */ -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - /* * pre-SubByte transform * @@ -1201,203 +1194,3 @@ SYM_FUNC_START(camellia_ctr_32way) FRAME_END ret; SYM_FUNC_END(camellia_ctr_32way) - -#define gf128mul_x_ble(iv, mask, tmp) \ - vpsrad $31, iv, tmp; \ - vpaddq iv, iv, iv; \ - vpshufd $0x13, tmp, tmp; \ - vpand mask, tmp, tmp; \ - vpxor tmp, iv, iv; - -#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ - vpsrad $31, iv, tmp0; \ - vpaddq iv, iv, tmp1; \ - vpsllq $2, iv, iv; \ - vpshufd $0x13, tmp0, tmp0; \ - vpsrad $31, tmp1, tmp1; \ - vpand mask2, tmp0, tmp0; \ - vpshufd $0x13, tmp1, tmp1; \ - vpxor tmp0, iv, iv; \ - vpand mask1, tmp1, tmp1; \ - vpxor tmp1, iv, iv; - -.align 8 -SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: index for input whitening key - * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32 - */ - FRAME_BEGIN - - vzeroupper; - - subq $(16 * 32), %rsp; - movq %rsp, %rax; - - vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12; - - /* load IV and construct second IV */ - vmovdqu (%rcx), %xmm0; - vmovdqa %xmm0, %xmm15; - gf128mul_x_ble(%xmm0, %xmm12, %xmm13); - vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13; - vinserti128 $1, %xmm0, %ymm15, %ymm0; - vpxor 0 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 15 * 32(%rax); - vmovdqu %ymm0, 0 * 32(%rsi); - - /* construct IVs */ - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 1 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 14 * 32(%rax); - vmovdqu %ymm0, 1 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 2 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 13 * 32(%rax); - vmovdqu %ymm0, 2 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 3 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 12 * 32(%rax); - vmovdqu %ymm0, 3 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 4 * 32(%rdx), %ymm0, %ymm11; - vmovdqu %ymm0, 4 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 5 * 32(%rdx), %ymm0, %ymm10; - vmovdqu %ymm0, 5 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 6 * 32(%rdx), %ymm0, %ymm9; - vmovdqu %ymm0, 6 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 7 * 32(%rdx), %ymm0, %ymm8; - vmovdqu %ymm0, 7 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 8 * 32(%rdx), %ymm0, %ymm7; - vmovdqu %ymm0, 8 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 9 * 32(%rdx), %ymm0, %ymm6; - vmovdqu %ymm0, 9 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 10 * 32(%rdx), %ymm0, %ymm5; - vmovdqu %ymm0, 10 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 11 * 32(%rdx), %ymm0, %ymm4; - vmovdqu %ymm0, 11 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 12 * 32(%rdx), %ymm0, %ymm3; - vmovdqu %ymm0, 12 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 13 * 32(%rdx), %ymm0, %ymm2; - vmovdqu %ymm0, 13 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 14 * 32(%rdx), %ymm0, %ymm1; - vmovdqu %ymm0, 14 * 32(%rsi); - - gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); - vpxor 15 * 32(%rdx), %ymm0, %ymm15; - vmovdqu %ymm15, 0 * 32(%rax); - vmovdqu %ymm0, 15 * 32(%rsi); - - vextracti128 $1, %ymm0, %xmm0; - gf128mul_x_ble(%xmm0, %xmm12, %xmm15); - vmovdqu %xmm0, (%rcx); - - /* inpack32_pre: */ - vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; - vpshufb .Lpack_bswap, %ymm15, %ymm15; - vpxor 0 * 32(%rax), %ymm15, %ymm0; - vpxor %ymm1, %ymm15, %ymm1; - vpxor %ymm2, %ymm15, %ymm2; - vpxor %ymm3, %ymm15, %ymm3; - vpxor %ymm4, %ymm15, %ymm4; - vpxor %ymm5, %ymm15, %ymm5; - vpxor %ymm6, %ymm15, %ymm6; - vpxor %ymm7, %ymm15, %ymm7; - vpxor %ymm8, %ymm15, %ymm8; - vpxor %ymm9, %ymm15, %ymm9; - vpxor %ymm10, %ymm15, %ymm10; - vpxor %ymm11, %ymm15, %ymm11; - vpxor 12 * 32(%rax), %ymm15, %ymm12; - vpxor 13 * 32(%rax), %ymm15, %ymm13; - vpxor 14 * 32(%rax), %ymm15, %ymm14; - vpxor 15 * 32(%rax), %ymm15, %ymm15; - - CALL_NOSPEC r9; - - addq $(16 * 32), %rsp; - - vpxor 0 * 32(%rsi), %ymm7, %ymm7; - vpxor 1 * 32(%rsi), %ymm6, %ymm6; - vpxor 2 * 32(%rsi), %ymm5, %ymm5; - vpxor 3 * 32(%rsi), %ymm4, %ymm4; - vpxor 4 * 32(%rsi), %ymm3, %ymm3; - vpxor 5 * 32(%rsi), %ymm2, %ymm2; - vpxor 6 * 32(%rsi), %ymm1, %ymm1; - vpxor 7 * 32(%rsi), %ymm0, %ymm0; - vpxor 8 * 32(%rsi), %ymm15, %ymm15; - vpxor 9 * 32(%rsi), %ymm14, %ymm14; - vpxor 10 * 32(%rsi), %ymm13, %ymm13; - vpxor 11 * 32(%rsi), %ymm12, %ymm12; - vpxor 12 * 32(%rsi), %ymm11, %ymm11; - vpxor 13 * 32(%rsi), %ymm10, %ymm10; - vpxor 14 * 32(%rsi), %ymm9, %ymm9; - vpxor 15 * 32(%rsi), %ymm8, %ymm8; - write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, - %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, - %ymm8, %rsi); - - vzeroupper; - - FRAME_END - ret; -SYM_FUNC_END(camellia_xts_crypt_32way) - -SYM_FUNC_START(camellia_xts_enc_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - xorl %r8d, %r8d; /* input whitening key, 0 for enc */ - - leaq __camellia_enc_blk32, %r9; - - jmp camellia_xts_crypt_32way; -SYM_FUNC_END(camellia_xts_enc_32way) - -SYM_FUNC_START(camellia_xts_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (32 blocks) - * %rdx: src (32 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - - cmpl $16, key_length(CTX); - movl $32, %r8d; - movl $24, %eax; - cmovel %eax, %r8d; /* input whitening key, last for dec */ - - leaq __camellia_dec_blk32, %r9; - - jmp camellia_xts_crypt_32way; -SYM_FUNC_END(camellia_xts_dec_32way) |