diff options
Diffstat (limited to 'arch/arm')
48 files changed, 592 insertions, 1361 deletions
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index f6fcb8a79889..a810fa8ba404 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -45,35 +45,42 @@ config DEBUG_WX If in doubt, say "Y". -# RMK wants arm kernels compiled with frame pointers or stack unwinding. -# If you know what you are doing and are willing to live without stack -# traces, you can get a slightly smaller kernel by setting this option to -# n, but then RMK will have to kill you ;). -config FRAME_POINTER - bool - depends on !THUMB2_KERNEL - default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER +choice + prompt "Choose kernel unwinder" + default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER + default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER help - If you say N here, the resulting kernel will be slightly smaller and - faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, - when a problem occurs with the kernel, the information that is - reported is severely limited. + This determines which method will be used for unwinding kernel stack + traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, + livepatch, lockdep, and more. + +config UNWINDER_FRAME_POINTER + bool "Frame pointer unwinder" + depends on !THUMB2_KERNEL && !CC_IS_CLANG + select ARCH_WANT_FRAME_POINTERS + select FRAME_POINTER + help + This option enables the frame pointer unwinder for unwinding + kernel stack traces. -config ARM_UNWIND - bool "Enable stack unwinding support (EXPERIMENTAL)" +config UNWINDER_ARM + bool "ARM EABI stack unwinder" depends on AEABI - default y + select ARM_UNWIND help This option enables stack unwinding support in the kernel using the information automatically generated by the compiler. The resulting kernel image is slightly bigger but the performance is not affected. Currently, this feature - only works with EABI compilers. If unsure say Y. + only works with EABI compilers. -config OLD_MCOUNT +endchoice + +config ARM_UNWIND + bool + +config FRAME_POINTER bool - depends on FUNCTION_TRACER && FRAME_POINTER - default y config DEBUG_USER bool "Verbose user fault messages" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index d1516f85f25d..05a91d8b89f3 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -74,7 +74,7 @@ endif arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 -arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3 +arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m # Evaluate arch cc-option calls now arch-y := $(arch-y) @@ -264,13 +264,9 @@ platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y))) ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y) ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y) -ifeq ($(KBUILD_SRC),) -KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs)) -else KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs)) endif endif -endif export TEXT_OFFSET GZFLAGS MMUEXT @@ -307,12 +303,7 @@ else KBUILD_IMAGE := $(boot)/zImage endif -# Build the DT binary blobs if we have OF configured -ifeq ($(CONFIG_USE_OF),y) -KBUILD_DTBS := dtbs -endif - -all: $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS) +all: $(notdir $(KBUILD_IMAGE)) archheaders: @@ -339,17 +330,6 @@ $(BOOT_TARGETS): vmlinux $(INSTALL_TARGETS): $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ -%.dtb: | scripts - $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@ - -PHONY += dtbs dtbs_install - -dtbs: prepare scripts - $(Q)$(MAKE) $(build)=$(boot)/dts - -dtbs_install: - $(Q)$(MAKE) $(dtbinst)=$(boot)/dts - PHONY += vdso_install vdso_install: ifeq ($(CONFIG_VDSO),y) @@ -371,8 +351,6 @@ define archhelp echo ' uImage - U-Boot wrapped zImage' echo ' bootpImage - Combined zImage and initial RAM disk' echo ' (supply initrd image via make variable INITRD=<path>)' - echo '* dtbs - Build device tree blobs for enabled boards' - echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' echo ' uinstall - Install U-Boot wrapped compressed kernel' diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 517e0e18f0b8..6c7ccb428c07 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -114,6 +114,35 @@ #endif .endm + /* + * Debug kernel copy by printing the memory addresses involved + */ + .macro dbgkc, begin, end, cbegin, cend +#ifdef DEBUG + kputc #'\n' + kputc #'C' + kputc #':' + kputc #'0' + kputc #'x' + kphex \begin, 8 /* Start of compressed kernel */ + kputc #'-' + kputc #'0' + kputc #'x' + kphex \end, 8 /* End of compressed kernel */ + kputc #'-' + kputc #'>' + kputc #'0' + kputc #'x' + kphex \cbegin, 8 /* Start of kernel copy */ + kputc #'-' + kputc #'0' + kputc #'x' + kphex \cend, 8 /* End of kernel copy */ + kputc #'\n' + kputc #'\r' +#endif + .endm + .section ".start", #alloc, #execinstr /* * sort out different calling conventions @@ -450,6 +479,20 @@ dtb_check_done: add r6, r9, r5 add r9, r9, r10 +#ifdef DEBUG + sub r10, r6, r5 + sub r10, r9, r10 + /* + * We are about to copy the kernel to a new memory area. + * The boundaries of the new memory area can be found in + * r10 and r9, whilst r5 and r6 contain the boundaries + * of the memory we are going to copy. + * Calling dbgkc will help with the printing of this + * information. + */ + dbgkc r5, r6, r10, r9 +#endif + 1: ldmdb r6!, {r0 - r3, r10 - r12, lr} cmp r6, r5 stmdb r9!, {r0 - r3, r10 - r12, lr} diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index 07437816e098..b36c0289a308 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -6,6 +6,8 @@ #include <linux/string.h> #include <asm/byteorder.h> +#define INT_MAX ((int)(~0U>>1)) + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 7234e8330a57..efbdeaaa8dcd 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -146,8 +146,9 @@ fsl,max-link-speed = <2>; power-domains = <&pgc_pcie_phy>; resets = <&src IMX7_RESET_PCIEPHY>, - <&src IMX7_RESET_PCIE_CTRL_APPS_EN>; - reset-names = "pciephy", "apps"; + <&src IMX7_RESET_PCIE_CTRL_APPS_EN>, + <&src IMX7_RESET_PCIE_CTRL_APPS_TURNOFF>; + reset-names = "pciephy", "apps", "turnoff"; status = "disabled"; }; }; diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 925d1364727a..ef0c7feea6e2 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_CRYPTD + select CRYPTO_GF128MUL help Use an implementation of GHASH (used by the GCM AEAD chaining mode) that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) @@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 -config CRYPTO_SPECK_NEON - tristate "NEON accelerated Speck cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_SPECK - endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 8de542c48ade..bd5bceef0605 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o -obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -54,7 +53,6 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o -speck-neon-y := speck-neon-core.o speck-neon-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S index 451a849ad518..50e7b9896818 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -18,6 +18,34 @@ * (at your option) any later version. */ + /* + * NEON doesn't have a rotate instruction. The alternatives are, more or less: + * + * (a) vshl.u32 + vsri.u32 (needs temporary register) + * (b) vshl.u32 + vshr.u32 + vorr (needs temporary register) + * (c) vrev32.16 (16-bit rotations only) + * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only, + * needs index vector) + * + * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit + * rotations, the only choices are (a) and (b). We use (a) since it takes + * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53. + * + * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest + * and doesn't need a temporary register. + * + * For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence + * is twice as fast as (a), even when doing (a) on multiple registers + * simultaneously to eliminate the stall between vshl and vsri. Also, it + * parallelizes better when temporary registers are scarce. + * + * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as + * (a), so the need to load the rotation table actually makes the vtbl method + * slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it + * seems to be a good compromise to get a more significant speed boost on some + * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7. + */ + #include <linux/linkage.h> .text @@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon) vmov q10, q2 vmov q11, q3 + adr ip, .Lrol8_table mov r3, #10 + vld1.8 {d10}, [ip, :64] .Ldoubleround: // x0 += x1, x3 = rotl32(x3 ^ x0, 16) @@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon) // x0 += x1, x3 = rotl32(x3 ^ x0, 8) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 + veor q3, q3, q0 + vtbl.8 d6, {d6}, d10 + vtbl.8 d7, {d7}, d10 // x2 += x3, x1 = rotl32(x1 ^ x2, 7) vadd.i32 q2, q2, q3 @@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon) // x0 += x1, x3 = rotl32(x3 ^ x0, 8) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 + veor q3, q3, q0 + vtbl.8 d6, {d6}, d10 + vtbl.8 d7, {d7}, d10 // x2 += x3, x1 = rotl32(x1 ^ x2, 7) vadd.i32 q2, q2, q3 @@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon) bx lr ENDPROC(chacha20_block_xor_neon) + .align 4 +.Lctrinc: .word 0, 1, 2, 3 +.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 + .align 5 ENTRY(chacha20_4block_xor_neon) - push {r4-r6, lr} - mov ip, sp // preserve the stack pointer - sub r3, sp, #0x20 // allocate a 32 byte buffer - bic r3, r3, #0x1f // aligned to 32 bytes - mov sp, r3 + push {r4-r5} + mov r4, sp // preserve the stack pointer + sub ip, sp, #0x20 // allocate a 32 byte buffer + bic ip, ip, #0x1f // aligned to 32 bytes + mov sp, ip // r0: Input state matrix, s // r1: 4 data blocks output, o @@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon) // This function encrypts four consecutive ChaCha20 blocks by loading // the state matrix in NEON registers four times. The algorithm performs // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. For final XORing step we transpose the - // matrix by interleaving 32- and then 64-bit words, which allows us to - // do XOR in NEON registers. + // requires no word shuffling. The words are re-interleaved before the + // final addition of the original state and the XORing step. // - // x0..15[0-3] = s0..3[0..3] - add r3, r0, #0x20 + // x0..15[0-3] = s0..15[0-3] + add ip, r0, #0x20 vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [r3] + vld1.32 {q2-q3}, [ip] - adr r3, CTRINC + adr r5, .Lctrinc vdup.32 q15, d7[1] vdup.32 q14, d7[0] - vld1.32 {q11}, [r3, :128] + vld1.32 {q4}, [r5, :128] vdup.32 q13, d6[1] vdup.32 q12, d6[0] - vadd.i32 q12, q12, q11 // x12 += counter values 0-3 vdup.32 q11, d5[1] vdup.32 q10, d5[0] + vadd.u32 q12, q12, q4 // x12 += counter values 0-3 vdup.32 q9, d4[1] vdup.32 q8, d4[0] vdup.32 q7, d3[1] @@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon) vdup.32 q1, d0[1] vdup.32 q0, d0[0] + adr ip, .Lrol8_table mov r3, #10 + b 1f .Ldoubleround4: + vld1.32 {q8-q9}, [sp, :256] +1: // x0 += x4, x12 = rotl32(x12 ^ x0, 16) // x1 += x5, x13 = rotl32(x13 ^ x1, 16) // x2 += x6, x14 = rotl32(x14 ^ x2, 16) @@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon) // x1 += x5, x13 = rotl32(x13 ^ x1, 8) // x2 += x6, x14 = rotl32(x14 ^ x2, 8) // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vld1.8 {d16}, [ip, :64] vadd.i32 q0, q0, q4 vadd.i32 q1, q1, q5 vadd.i32 q2, q2, q6 vadd.i32 q3, q3, q7 - veor q8, q12, q0 - veor q9, q13, q1 - vshl.u32 q12, q8, #8 - vshl.u32 q13, q9, #8 - vsri.u32 q12, q8, #24 - vsri.u32 q13, q9, #24 + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 - veor q8, q14, q2 - veor q9, q15, q3 - vshl.u32 q14, q8, #8 - vshl.u32 q15, q9, #8 - vsri.u32 q14, q8, #24 - vsri.u32 q15, q9, #24 + vtbl.8 d24, {d24}, d16 + vtbl.8 d25, {d25}, d16 + vtbl.8 d26, {d26}, d16 + vtbl.8 d27, {d27}, d16 + vtbl.8 d28, {d28}, d16 + vtbl.8 d29, {d29}, d16 + vtbl.8 d30, {d30}, d16 + vtbl.8 d31, {d31}, d16 vld1.32 {q8-q9}, [sp, :256] @@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon) // x1 += x6, x12 = rotl32(x12 ^ x1, 8) // x2 += x7, x13 = rotl32(x13 ^ x2, 8) // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vld1.8 {d16}, [ip, :64] vadd.i32 q0, q0, q5 vadd.i32 q1, q1, q6 vadd.i32 q2, q2, q7 vadd.i32 q3, q3, q4 - veor q8, q15, q0 - veor q9, q12, q1 - vshl.u32 q15, q8, #8 - vshl.u32 q12, q9, #8 - vsri.u32 q15, q8, #24 - vsri.u32 q12, q9, #24 + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 - veor q8, q13, q2 - veor q9, q14, q3 - vshl.u32 q13, q8, #8 - vshl.u32 q14, q9, #8 - vsri.u32 q13, q8, #24 - vsri.u32 q14, q9, #24 + vtbl.8 d30, {d30}, d16 + vtbl.8 d31, {d31}, d16 + vtbl.8 d24, {d24}, d16 + vtbl.8 d25, {d25}, d16 + vtbl.8 d26, {d26}, d16 + vtbl.8 d27, {d27}, d16 + vtbl.8 d28, {d28}, d16 + vtbl.8 d29, {d29}, d16 vld1.32 {q8-q9}, [sp, :256] @@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon) vsri.u32 q6, q9, #25 subs r3, r3, #1 - beq 0f - - vld1.32 {q8-q9}, [sp, :256] - b .Ldoubleround4 - - // x0[0-3] += s0[0] - // x1[0-3] += s0[1] - // x2[0-3] += s0[2] - // x3[0-3] += s0[3] -0: ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q0, q0, q8 - vadd.i32 q1, q1, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q2, q2, q8 - vadd.i32 q3, q3, q9 - - // x4[0-3] += s1[0] - // x5[0-3] += s1[1] - // x6[0-3] += s1[2] - // x7[0-3] += s1[3] - ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q4, q4, q8 - vadd.i32 q5, q5, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q6, q6, q8 - vadd.i32 q7, q7, q9 - - // interleave 32-bit words in state n, n+1 - vzip.32 q0, q1 - vzip.32 q2, q3 - vzip.32 q4, q5 - vzip.32 q6, q7 - - // interleave 64-bit words in state n, n+2 + bne .Ldoubleround4 + + // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15. + // x8..9[0-3] are on the stack. + + // Re-interleave the words in the first two rows of each block (x0..7). + // Also add the counter values 0-3 to x12[0-3]. + vld1.32 {q8}, [r5, :128] // load counter values 0-3 + vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) + vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) + vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) + vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7) + vadd.u32 q12, q8 // x12 += counter values 0-3 vswp d1, d4 vswp d3, d6 + vld1.32 {q8-q9}, [r0]! // load s0..7 vswp d9, d12 vswp d11, d14 - // xor with corresponding input, write to output + // Swap q1 and q4 so that we'll free up consecutive registers (q0-q1) + // after XORing the first 32 bytes. + vswp q1, q4 + + // First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7) + + // x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block) + vadd.u32 q0, q0, q8 + vadd.u32 q2, q2, q8 + vadd.u32 q4, q4, q8 + vadd.u32 q3, q3, q8 + + // x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block) + vadd.u32 q1, q1, q9 + vadd.u32 q6, q6, q9 + vadd.u32 q5, q5, q9 + vadd.u32 q7, q7, q9 + + // XOR first 32 bytes using keystream from first two rows of first block vld1.8 {q8-q9}, [r2]! veor q8, q8, q0 - veor q9, q9, q4 + veor q9, q9, q1 vst1.8 {q8-q9}, [r1]! + // Re-interleave the words in the last two rows of each block (x8..15). vld1.32 {q8-q9}, [sp, :256] - - // x8[0-3] += s2[0] - // x9[0-3] += s2[1] - // x10[0-3] += s2[2] - // x11[0-3] += s2[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - vadd.i32 q8, q8, q0 - vadd.i32 q9, q9, q4 - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q10, q10, q0 - vadd.i32 q11, q11, q4 - - // x12[0-3] += s3[0] - // x13[0-3] += s3[1] - // x14[0-3] += s3[2] - // x15[0-3] += s3[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - adr r3, CTRINC - vadd.i32 q12, q12, q0 - vld1.32 {q0}, [r3, :128] - vadd.i32 q13, q13, q4 - vadd.i32 q12, q12, q0 // x12 += counter values 0-3 - - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q14, q14, q0 - vadd.i32 q15, q15, q4 - - // interleave 32-bit words in state n, n+1 - vzip.32 q8, q9 - vzip.32 q10, q11 - vzip.32 q12, q13 - vzip.32 q14, q15 - - // interleave 64-bit words in state n, n+2 - vswp d17, d20 - vswp d19, d22 + vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) + vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) + vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) + vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11) + vld1.32 {q0-q1}, [r0] // load s8..15 vswp d25, d28 vswp d27, d30 + vswp d17, d20 + vswp d19, d22 + + // Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15) + + // x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block) + vadd.u32 q8, q8, q0 + vadd.u32 q10, q10, q0 + vadd.u32 q9, q9, q0 + vadd.u32 q11, q11, q0 + + // x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block) + vadd.u32 q12, q12, q1 + vadd.u32 q14, q14, q1 + vadd.u32 q13, q13, q1 + vadd.u32 q15, q15, q1 - vmov q4, q1 + // XOR the rest of the data with the keystream vld1.8 {q0-q1}, [r2]! veor q0, q0, q8 @@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon) vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2] + mov sp, r4 // restore original stack pointer veor q0, q0, q11 veor q1, q1, q15 vst1.8 {q0-q1}, [r1] - mov sp, ip - pop {r4-r6, pc} + pop {r4-r5} + bx lr ENDPROC(chacha20_4block_xor_neon) - - .align 4 -CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 96e62ec105d0..cd9e93b46c2d 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void) ARRAY_SIZE(crc32_pmull_algs)); } -static const struct cpu_feature crc32_cpu_feature[] = { +static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = { { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } }; MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index 2f78c10b1881..406009afa9cf 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -63,6 +63,33 @@ k48 .req d31 SHASH2_p64 .req d31 + HH .req q10 + HH3 .req q11 + HH4 .req q12 + HH34 .req q13 + + HH_L .req d20 + HH_H .req d21 + HH3_L .req d22 + HH3_H .req d23 + HH4_L .req d24 + HH4_H .req d25 + HH34_L .req d26 + HH34_H .req d27 + SHASH2_H .req d29 + + XL2 .req q5 + XM2 .req q6 + XH2 .req q7 + T3 .req q8 + + XL2_L .req d10 + XL2_H .req d11 + XM2_L .req d12 + XM2_H .req d13 + T3_L .req d16 + T3_H .req d17 + .text .fpu crypto-neon-fp-armv8 @@ -175,12 +202,77 @@ beq 0f vld1.64 {T1}, [ip] teq r0, #0 - b 1f + b 3f + +0: .ifc \pn, p64 + tst r0, #3 // skip until #blocks is a + bne 2f // round multiple of 4 + + vld1.8 {XL2-XM2}, [r2]! +1: vld1.8 {T3-T2}, [r2]! + vrev64.8 XL2, XL2 + vrev64.8 XM2, XM2 + + subs r0, r0, #4 + + vext.8 T1, XL2, XL2, #8 + veor XL2_H, XL2_H, XL_L + veor XL, XL, T1 + + vrev64.8 T3, T3 + vrev64.8 T1, T2 + + vmull.p64 XH, HH4_H, XL_H // a1 * b1 + veor XL2_H, XL2_H, XL_H + vmull.p64 XL, HH4_L, XL_L // a0 * b0 + vmull.p64 XM, HH34_H, XL2_H // (a1 + a0)(b1 + b0) + + vmull.p64 XH2, HH3_H, XM2_L // a1 * b1 + veor XM2_L, XM2_L, XM2_H + vmull.p64 XL2, HH3_L, XM2_H // a0 * b0 + vmull.p64 XM2, HH34_L, XM2_L // (a1 + a0)(b1 + b0) + + veor XH, XH, XH2 + veor XL, XL, XL2 + veor XM, XM, XM2 + + vmull.p64 XH2, HH_H, T3_L // a1 * b1 + veor T3_L, T3_L, T3_H + vmull.p64 XL2, HH_L, T3_H // a0 * b0 + vmull.p64 XM2, SHASH2_H, T3_L // (a1 + a0)(b1 + b0) + + veor XH, XH, XH2 + veor XL, XL, XL2 + veor XM, XM, XM2 + + vmull.p64 XH2, SHASH_H, T1_L // a1 * b1 + veor T1_L, T1_L, T1_H + vmull.p64 XL2, SHASH_L, T1_H // a0 * b0 + vmull.p64 XM2, SHASH2_p64, T1_L // (a1 + a0)(b1 + b0) + + veor XH, XH, XH2 + veor XL, XL, XL2 + veor XM, XM, XM2 -0: vld1.64 {T1}, [r2]! + beq 4f + + vld1.8 {XL2-XM2}, [r2]! + + veor T1, XL, XH + veor XM, XM, T1 + + __pmull_reduce_p64 + + veor T1, T1, XH + veor XL, XL, T1 + + b 1b + .endif + +2: vld1.64 {T1}, [r2]! subs r0, r0, #1 -1: /* multiply XL by SHASH in GF(2^128) */ +3: /* multiply XL by SHASH in GF(2^128) */ #ifndef CONFIG_CPU_BIG_ENDIAN vrev64.8 T1, T1 #endif @@ -193,7 +285,7 @@ __pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0 __pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0) - veor T1, XL, XH +4: veor T1, XL, XH veor XM, XM, T1 __pmull_reduce_\pn @@ -212,8 +304,14 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update_p64) - vld1.64 {SHASH}, [r3] + vld1.64 {SHASH}, [r3]! + vld1.64 {HH}, [r3]! + vld1.64 {HH3-HH4}, [r3] + veor SHASH2_p64, SHASH_L, SHASH_H + veor SHASH2_H, HH_L, HH_H + veor HH34_L, HH3_L, HH3_H + veor HH34_H, HH4_L, HH4_H vmov.i8 MASK, #0xe1 vshl.u64 MASK, MASK, #57 diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 8930fc4e7c22..b7d30b6cf49c 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. * - * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash"); #define GHASH_DIGEST_SIZE 16 struct ghash_key { - u64 a; - u64 b; + u64 h[2]; + u64 h2[2]; + u64 h3[2]; + u64 h4[2]; }; struct ghash_desc_ctx { @@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) return 0; } +static void ghash_reflect(u64 h[], const be128 *k) +{ + u64 carry = be64_to_cpu(k->a) >> 63; + + h[0] = (be64_to_cpu(k->b) << 1) | carry; + h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); + + if (carry) + h[1] ^= 0xc200000000000000UL; +} + static int ghash_setkey(struct crypto_shash *tfm, const u8 *inkey, unsigned int keylen) { struct ghash_key *key = crypto_shash_ctx(tfm); - u64 a, b; + be128 h, k; if (keylen != GHASH_BLOCK_SIZE) { crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } - /* perform multiplication by 'x' in GF(2^128) */ - b = get_unaligned_be64(inkey); - a = get_unaligned_be64(inkey + 8); + memcpy(&k, inkey, GHASH_BLOCK_SIZE); + ghash_reflect(key->h, &k); + + h = k; + gf128mul_lle(&h, &k); + ghash_reflect(key->h2, &h); - key->a = (a << 1) | (b >> 63); - key->b = (b << 1) | (a >> 63); + gf128mul_lle(&h, &k); + ghash_reflect(key->h3, &h); - if (b >> 63) - key->b ^= 0xc200000000000000UL; + gf128mul_lle(&h, &k); + ghash_reflect(key->h4, &h); return 0; } diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S deleted file mode 100644 index 57caa742016e..000000000000 --- a/arch/arm/crypto/speck-neon-core.S +++ /dev/null @@ -1,434 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Author: Eric Biggers <ebiggers@google.com> - */ - -#include <linux/linkage.h> - - .text - .fpu neon - - // arguments - ROUND_KEYS .req r0 // const {u64,u32} *round_keys - NROUNDS .req r1 // int nrounds - DST .req r2 // void *dst - SRC .req r3 // const void *src - NBYTES .req r4 // unsigned int nbytes - TWEAK .req r5 // void *tweak - - // registers which hold the data being encrypted/decrypted - X0 .req q0 - X0_L .req d0 - X0_H .req d1 - Y0 .req q1 - Y0_H .req d3 - X1 .req q2 - X1_L .req d4 - X1_H .req d5 - Y1 .req q3 - Y1_H .req d7 - X2 .req q4 - X2_L .req d8 - X2_H .req d9 - Y2 .req q5 - Y2_H .req d11 - X3 .req q6 - X3_L .req d12 - X3_H .req d13 - Y3 .req q7 - Y3_H .req d15 - - // the round key, duplicated in all lanes - ROUND_KEY .req q8 - ROUND_KEY_L .req d16 - ROUND_KEY_H .req d17 - - // index vector for vtbl-based 8-bit rotates - ROTATE_TABLE .req d18 - - // multiplication table for updating XTS tweaks - GF128MUL_TABLE .req d19 - GF64MUL_TABLE .req d19 - - // current XTS tweak value(s) - TWEAKV .req q10 - TWEAKV_L .req d20 - TWEAKV_H .req d21 - - TMP0 .req q12 - TMP0_L .req d24 - TMP0_H .req d25 - TMP1 .req q13 - TMP2 .req q14 - TMP3 .req q15 - - .align 4 -.Lror64_8_table: - .byte 1, 2, 3, 4, 5, 6, 7, 0 -.Lror32_8_table: - .byte 1, 2, 3, 0, 5, 6, 7, 4 -.Lrol64_8_table: - .byte 7, 0, 1, 2, 3, 4, 5, 6 -.Lrol32_8_table: - .byte 3, 0, 1, 2, 7, 4, 5, 6 -.Lgf128mul_table: - .byte 0, 0x87 - .fill 14 -.Lgf64mul_table: - .byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b - .fill 12 - -/* - * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time - * - * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for - * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes - * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. - * - * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because - * the vtbl approach is faster on some processors and the same speed on others. - */ -.macro _speck_round_128bytes n - - // x = ror(x, 8) - vtbl.8 X0_L, {X0_L}, ROTATE_TABLE - vtbl.8 X0_H, {X0_H}, ROTATE_TABLE - vtbl.8 X1_L, {X1_L}, ROTATE_TABLE - vtbl.8 X1_H, {X1_H}, ROTATE_TABLE - vtbl.8 X2_L, {X2_L}, ROTATE_TABLE - vtbl.8 X2_H, {X2_H}, ROTATE_TABLE - vtbl.8 X3_L, {X3_L}, ROTATE_TABLE - vtbl.8 X3_H, {X3_H}, ROTATE_TABLE - - // x += y - vadd.u\n X0, Y0 - vadd.u\n X1, Y1 - vadd.u\n X2, Y2 - vadd.u\n X3, Y3 - - // x ^= k - veor X0, ROUND_KEY - veor X1, ROUND_KEY - veor X2, ROUND_KEY - veor X3, ROUND_KEY - - // y = rol(y, 3) - vshl.u\n TMP0, Y0, #3 - vshl.u\n TMP1, Y1, #3 - vshl.u\n TMP2, Y2, #3 - vshl.u\n TMP3, Y3, #3 - vsri.u\n TMP0, Y0, #(\n - 3) - vsri.u\n TMP1, Y1, #(\n - 3) - vsri.u\n TMP2, Y2, #(\n - 3) - vsri.u\n TMP3, Y3, #(\n - 3) - - // y ^= x - veor Y0, TMP0, X0 - veor Y1, TMP1, X1 - veor Y2, TMP2, X2 - veor Y3, TMP3, X3 -.endm - -/* - * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time - * - * This is the inverse of _speck_round_128bytes(). - */ -.macro _speck_unround_128bytes n - - // y ^= x - veor TMP0, Y0, X0 - veor TMP1, Y1, X1 - veor TMP2, Y2, X2 - veor TMP3, Y3, X3 - - // y = ror(y, 3) - vshr.u\n Y0, TMP0, #3 - vshr.u\n Y1, TMP1, #3 - vshr.u\n Y2, TMP2, #3 - vshr.u\n Y3, TMP3, #3 - vsli.u\n Y0, TMP0, #(\n - 3) - vsli.u\n Y1, TMP1, #(\n - 3) - vsli.u\n Y2, TMP2, #(\n - 3) - vsli.u\n Y3, TMP3, #(\n - 3) - - // x ^= k - veor X0, ROUND_KEY - veor X1, ROUND_KEY - veor X2, ROUND_KEY - veor X3, ROUND_KEY - - // x -= y - vsub.u\n X0, Y0 - vsub.u\n X1, Y1 - vsub.u\n X2, Y2 - vsub.u\n X3, Y3 - - // x = rol(x, 8); - vtbl.8 X0_L, {X0_L}, ROTATE_TABLE - vtbl.8 X0_H, {X0_H}, ROTATE_TABLE - vtbl.8 X1_L, {X1_L}, ROTATE_TABLE - vtbl.8 X1_H, {X1_H}, ROTATE_TABLE - vtbl.8 X2_L, {X2_L}, ROTATE_TABLE - vtbl.8 X2_H, {X2_H}, ROTATE_TABLE - vtbl.8 X3_L, {X3_L}, ROTATE_TABLE - vtbl.8 X3_H, {X3_H}, ROTATE_TABLE -.endm - -.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp - - // Load the next source block - vld1.8 {\dst_reg}, [SRC]! - - // Save the current tweak in the tweak buffer - vst1.8 {TWEAKV}, [\tweak_buf:128]! - - // XOR the next source block with the current tweak - veor \dst_reg, TWEAKV - - /* - * Calculate the next tweak by multiplying the current one by x, - * modulo p(x) = x^128 + x^7 + x^2 + x + 1. - */ - vshr.u64 \tmp, TWEAKV, #63 - vshl.u64 TWEAKV, #1 - veor TWEAKV_H, \tmp\()_L - vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H - veor TWEAKV_L, \tmp\()_H -.endm - -.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp - - // Load the next two source blocks - vld1.8 {\dst_reg}, [SRC]! - - // Save the current two tweaks in the tweak buffer - vst1.8 {TWEAKV}, [\tweak_buf:128]! - - // XOR the next two source blocks with the current two tweaks - veor \dst_reg, TWEAKV - - /* - * Calculate the next two tweaks by multiplying the current ones by x^2, - * modulo p(x) = x^64 + x^4 + x^3 + x + 1. - */ - vshr.u64 \tmp, TWEAKV, #62 - vshl.u64 TWEAKV, #2 - vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L - vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H - veor TWEAKV, \tmp -.endm - -/* - * _speck_xts_crypt() - Speck-XTS encryption/decryption - * - * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer - * using Speck-XTS, specifically the variant with a block size of '2n' and round - * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and - * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a - * nonzero multiple of 128. - */ -.macro _speck_xts_crypt n, decrypting - push {r4-r7} - mov r7, sp - - /* - * The first four parameters were passed in registers r0-r3. Load the - * additional parameters, which were passed on the stack. - */ - ldr NBYTES, [sp, #16] - ldr TWEAK, [sp, #20] - - /* - * If decrypting, modify the ROUND_KEYS parameter to point to the last - * round key rather than the first, since for decryption the round keys - * are used in reverse order. - */ -.if \decrypting -.if \n == 64 - add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3 - sub ROUND_KEYS, #8 -.else - add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2 - sub ROUND_KEYS, #4 -.endif -.endif - - // Load the index vector for vtbl-based 8-bit rotates -.if \decrypting - ldr r12, =.Lrol\n\()_8_table -.else - ldr r12, =.Lror\n\()_8_table -.endif - vld1.8 {ROTATE_TABLE}, [r12:64] - - // One-time XTS preparation - - /* - * Allocate stack space to store 128 bytes worth of tweaks. For - * performance, this space is aligned to a 16-byte boundary so that we - * can use the load/store instructions that declare 16-byte alignment. - * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'. - */ - sub r12, sp, #128 - bic r12, #0xf - mov sp, r12 - -.if \n == 64 - // Load first tweak - vld1.8 {TWEAKV}, [TWEAK] - - // Load GF(2^128) multiplication table - ldr r12, =.Lgf128mul_table - vld1.8 {GF128MUL_TABLE}, [r12:64] -.else - // Load first tweak - vld1.8 {TWEAKV_L}, [TWEAK] - - // Load GF(2^64) multiplication table - ldr r12, =.Lgf64mul_table - vld1.8 {GF64MUL_TABLE}, [r12:64] - - // Calculate second tweak, packing it together with the first - vshr.u64 TMP0_L, TWEAKV_L, #63 - vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L - vshl.u64 TWEAKV_H, TWEAKV_L, #1 - veor TWEAKV_H, TMP0_L -.endif - -.Lnext_128bytes_\@: - - /* - * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak - * values, and save the tweaks on the stack for later. Then - * de-interleave the 'x' and 'y' elements of each block, i.e. make it so - * that the X[0-3] registers contain only the second halves of blocks, - * and the Y[0-3] registers contain only the first halves of blocks. - * (Speck uses the order (y, x) rather than the more intuitive (x, y).) - */ - mov r12, sp -.if \n == 64 - _xts128_precrypt_one X0, r12, TMP0 - _xts128_precrypt_one Y0, r12, TMP0 - _xts128_precrypt_one X1, r12, TMP0 - _xts128_precrypt_one Y1, r12, TMP0 - _xts128_precrypt_one X2, r12, TMP0 - _xts128_precrypt_one Y2, r12, TMP0 - _xts128_precrypt_one X3, r12, TMP0 - _xts128_precrypt_one Y3, r12, TMP0 - vswp X0_L, Y0_H - vswp X1_L, Y1_H - vswp X2_L, Y2_H - vswp X3_L, Y3_H -.else - _xts64_precrypt_two X0, r12, TMP0 - _xts64_precrypt_two Y0, r12, TMP0 - _xts64_precrypt_two X1, r12, TMP0 - _xts64_precrypt_two Y1, r12, TMP0 - _xts64_precrypt_two X2, r12, TMP0 - _xts64_precrypt_two Y2, r12, TMP0 - _xts64_precrypt_two X3, r12, TMP0 - _xts64_precrypt_two Y3, r12, TMP0 - vuzp.32 Y0, X0 - vuzp.32 Y1, X1 - vuzp.32 Y2, X2 - vuzp.32 Y3, X3 -.endif - - // Do the cipher rounds - - mov r12, ROUND_KEYS - mov r6, NROUNDS - -.Lnext_round_\@: -.if \decrypting -.if \n == 64 - vld1.64 ROUND_KEY_L, [r12] - sub r12, #8 - vmov ROUND_KEY_H, ROUND_KEY_L -.else - vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12] - sub r12, #4 -.endif - _speck_unround_128bytes \n -.else -.if \n == 64 - vld1.64 ROUND_KEY_L, [r12]! - vmov ROUND_KEY_H, ROUND_KEY_L -.else - vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]! -.endif - _speck_round_128bytes \n -.endif - subs r6, r6, #1 - bne .Lnext_round_\@ - - // Re-interleave the 'x' and 'y' elements of each block -.if \n == 64 - vswp X0_L, Y0_H - vswp X1_L, Y1_H - vswp X2_L, Y2_H - vswp X3_L, Y3_H -.else - vzip.32 Y0, X0 - vzip.32 Y1, X1 - vzip.32 Y2, X2 - vzip.32 Y3, X3 -.endif - - // XOR the encrypted/decrypted blocks with the tweaks we saved earlier - mov r12, sp - vld1.8 {TMP0, TMP1}, [r12:128]! - vld1.8 {TMP2, TMP3}, [r12:128]! - veor X0, TMP0 - veor Y0, TMP1 - veor X1, TMP2 - veor Y1, TMP3 - vld1.8 {TMP0, TMP1}, [r12:128]! - vld1.8 {TMP2, TMP3}, [r12:128]! - veor X2, TMP0 - veor Y2, TMP1 - veor X3, TMP2 - veor Y3, TMP3 - - // Store the ciphertext in the destination buffer - vst1.8 {X0, Y0}, [DST]! - vst1.8 {X1, Y1}, [DST]! - vst1.8 {X2, Y2}, [DST]! - vst1.8 {X3, Y3}, [DST]! - - // Continue if there are more 128-byte chunks remaining, else return - subs NBYTES, #128 - bne .Lnext_128bytes_\@ - - // Store the next tweak -.if \n == 64 - vst1.8 {TWEAKV}, [TWEAK] -.else - vst1.8 {TWEAKV_L}, [TWEAK] -.endif - - mov sp, r7 - pop {r4-r7} - bx lr -.endm - -ENTRY(speck128_xts_encrypt_neon) - _speck_xts_crypt n=64, decrypting=0 -ENDPROC(speck128_xts_encrypt_neon) - -ENTRY(speck128_xts_decrypt_neon) - _speck_xts_crypt n=64, decrypting=1 -ENDPROC(speck128_xts_decrypt_neon) - -ENTRY(speck64_xts_encrypt_neon) - _speck_xts_crypt n=32, decrypting=0 -ENDPROC(speck64_xts_encrypt_neon) - -ENTRY(speck64_xts_decrypt_neon) - _speck_xts_crypt n=32, decrypting=1 -ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c deleted file mode 100644 index f012c3ea998f..000000000000 --- a/arch/arm/crypto/speck-neon-glue.c +++ /dev/null @@ -1,288 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Note: the NIST recommendation for XTS only specifies a 128-bit block size, - * but a 64-bit version (needed for Speck64) is fairly straightforward; the math - * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial - * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004: - * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes - * OCB and PMAC"), represented as 0x1B. - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/algapi.h> -#include <crypto/gf128mul.h> -#include <crypto/internal/skcipher.h> -#include <crypto/speck.h> -#include <crypto/xts.h> -#include <linux/kernel.h> -#include <linux/module.h> - -/* The assembly functions only handle multiples of 128 bytes */ -#define SPECK_NEON_CHUNK_SIZE 128 - -/* Speck128 */ - -struct speck128_xts_tfm_ctx { - struct speck128_tfm_ctx main_key; - struct speck128_tfm_ctx tweak_key; -}; - -asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck128_xts_crypt(struct skcipher_request *req, - speck128_crypt_one_t crypt_one, - speck128_xts_crypt_many_t crypt_many) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - le128 tweak; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - le128_xor((le128 *)dst, (const le128 *)src, &tweak); - (*crypt_one)(&ctx->main_key, dst, dst); - le128_xor((le128 *)dst, (const le128 *)dst, &tweak); - gf128mul_x_ble(&tweak, &tweak); - - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static int speck128_xts_encrypt(struct skcipher_request *req) -{ - return __speck128_xts_crypt(req, crypto_speck128_encrypt, - speck128_xts_encrypt_neon); -} - -static int speck128_xts_decrypt(struct skcipher_request *req) -{ - return __speck128_xts_crypt(req, crypto_speck128_decrypt, - speck128_xts_decrypt_neon); -} - -static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - err = crypto_speck128_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -/* Speck64 */ - -struct speck64_xts_tfm_ctx { - struct speck64_tfm_ctx main_key; - struct speck64_tfm_ctx tweak_key; -}; - -asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one, - speck64_xts_crypt_many_t crypt_many) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - __le64 tweak; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - *(__le64 *)dst = *(__le64 *)src ^ tweak; - (*crypt_one)(&ctx->main_key, dst, dst); - *(__le64 *)dst ^= tweak; - tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ - ((tweak & cpu_to_le64(1ULL << 63)) ? - 0x1B : 0)); - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static int speck64_xts_encrypt(struct skcipher_request *req) -{ - return __speck64_xts_crypt(req, crypto_speck64_encrypt, - speck64_xts_encrypt_neon); -} - -static int speck64_xts_decrypt(struct skcipher_request *req) -{ - return __speck64_xts_crypt(req, crypto_speck64_decrypt, - speck64_xts_decrypt_neon); -} - -static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - err = crypto_speck64_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -static struct skcipher_alg speck_algs[] = { - { - .base.cra_name = "xts(speck128)", - .base.cra_driver_name = "xts-speck128-neon", - .base.cra_priority = 300, - .base.cra_blocksize = SPECK128_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), - .base.cra_alignmask = 7, - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SPECK128_128_KEY_SIZE, - .max_keysize = 2 * SPECK128_256_KEY_SIZE, - .ivsize = SPECK128_BLOCK_SIZE, - .walksize = SPECK_NEON_CHUNK_SIZE, - .setkey = speck128_xts_setkey, - .encrypt = speck128_xts_encrypt, - .decrypt = speck128_xts_decrypt, - }, { - .base.cra_name = "xts(speck64)", - .base.cra_driver_name = "xts-speck64-neon", - .base.cra_priority = 300, - .base.cra_blocksize = SPECK64_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), - .base.cra_alignmask = 7, - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SPECK64_96_KEY_SIZE, - .max_keysize = 2 * SPECK64_128_KEY_SIZE, - .ivsize = SPECK64_BLOCK_SIZE, - .walksize = SPECK_NEON_CHUNK_SIZE, - .setkey = speck64_xts_setkey, - .encrypt = speck64_xts_encrypt, - .decrypt = speck64_xts_decrypt, - } -}; - -static int __init speck_neon_module_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_neon_module_exit(void) -{ - crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_neon_module_init); -module_exit(speck_neon_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); -MODULE_ALIAS_CRYPTO("xts(speck128)"); -MODULE_ALIAS_CRYPTO("xts-speck128-neon"); -MODULE_ALIAS_CRYPTO("xts(speck64)"); -MODULE_ALIAS_CRYPTO("xts-speck64-neon"); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index b17ee03d280b..88286dd483ff 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -467,6 +467,17 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req +#ifdef CONFIG_CPU_SPECTRE + sub \tmp, \limit, #1 + subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr + addhs \tmp, \tmp, #1 @ if (tmp >= 0) { + subhss \tmp, \tmp, \size @ tmp = limit - (addr + size) } + movlo \addr, #0 @ if (tmp < 0) addr = NULL + csdb +#endif + .endm + .macro uaccess_disable, tmp, isb=1 #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 237aa52d8733..36c951dd23b8 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -62,8 +62,8 @@ do { \ struct pt_regs; void die(const char *msg, struct pt_regs *regs, int err); -struct siginfo; -void arm_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, +void arm_notify_die(const char *str, struct pt_regs *regs, + int signo, int si_code, void __user *addr, unsigned long err, unsigned long trap); #ifdef CONFIG_ARM_LPAE diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 9e842ff41768..18b0197f2384 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -16,9 +16,6 @@ extern void __gnu_mcount_nc(void); #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { -#ifdef CONFIG_OLD_MCOUNT - bool old_mcount; -#endif }; static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h index d4014fbe5ea3..0d9f3918fa7e 100644 --- a/arch/arm/include/asm/hugetlb-3level.h +++ b/arch/arm/include/asm/hugetlb-3level.h @@ -29,6 +29,7 @@ * ptes. * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes). */ +#define __HAVE_ARCH_HUGE_PTEP_GET static inline pte_t huge_ptep_get(pte_t *ptep) { pte_t retval = *ptep; @@ -37,35 +38,4 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return retval; } -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} - #endif /* _ASM_ARM_HUGETLB_3LEVEL_H */ diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 7d26f6c4f0f5..b67256c22b08 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -23,18 +23,8 @@ #define _ASM_ARM_HUGETLB_H #include <asm/page.h> -#include <asm-generic/hugetlb.h> - #include <asm/hugetlb-3level.h> - -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, - unsigned long ceiling) -{ - free_pgd_range(tlb, addr, end, floor, ceiling); -} - +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) @@ -42,27 +32,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return 0; } -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - struct hstate *h = hstate_file(file); - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (addr & ~huge_page_mask(h)) - return -EINVAL; - return 0; -} - -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} - -static inline pte_t huge_pte_wrprotect(pte_t pte) -{ - return pte_wrprotect(pte); -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 2d43dca29c72..b95f8d0d9f17 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -133,8 +133,7 @@ * space. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) + #define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) /* Virtualization Translation Control Register (VTCR) bits */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3ad482d2f1eb..5ca5d9af0c26 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } -static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) +static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) { return 0; } @@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) +{ + /* + * On 32bit ARM, VMs get a static 40bit IPA stage2 setup, + * so any non-zero value used as type is illegal. + */ + if (type) + return -EINVAL; + return 0; +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 847f01fa429d..1098ffc3d54b 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -35,16 +35,12 @@ addr; \ }) -/* - * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. - */ -#define KVM_MMU_CACHE_MIN_PAGES 2 - #ifndef __ASSEMBLY__ #include <linux/highmem.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> #include <asm/pgalloc.h> #include <asm/stage2_pgtable.h> @@ -52,6 +48,13 @@ /* Ensure compatibility with arm64 */ #define VA_BITS 32 +#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT +#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm)) +#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL) +#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK + +#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t)) + int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, @@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline void kvm_set_ipa_limit(void) {} + static inline bool kvm_cpu_has_cnp(void) { return false; diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index 460d616bb2d6..f6a7ea805232 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -19,43 +19,53 @@ #ifndef __ARM_S2_PGTABLE_H_ #define __ARM_S2_PGTABLE_H_ -#define stage2_pgd_none(pgd) pgd_none(pgd) -#define stage2_pgd_clear(pgd) pgd_clear(pgd) -#define stage2_pgd_present(pgd) pgd_present(pgd) -#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) -#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) -#define stage2_pud_free(pud) pud_free(NULL, pud) - -#define stage2_pud_none(pud) pud_none(pud) -#define stage2_pud_clear(pud) pud_clear(pud) -#define stage2_pud_present(pud) pud_present(pud) -#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) -#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) -#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) - -#define stage2_pud_huge(pud) pud_huge(pud) +/* + * kvm_mmu_cache_min_pages() is the number of pages required + * to install a stage-2 translation. We pre-allocate the entry + * level table at VM creation. Since we have a 3 level page-table, + * we need only two pages to add a new mapping. + */ +#define kvm_mmu_cache_min_pages(kvm) 2 + +#define stage2_pgd_none(kvm, pgd) pgd_none(pgd) +#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) +#define stage2_pgd_present(kvm, pgd) pgd_present(pgd) +#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud) +#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address) +#define stage2_pud_free(kvm, pud) pud_free(NULL, pud) + +#define stage2_pud_none(kvm, pud) pud_none(pud) +#define stage2_pud_clear(kvm, pud) pud_clear(pud) +#define stage2_pud_present(kvm, pud) pud_present(pud) +#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd) +#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address) +#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd) + +#define stage2_pud_huge(kvm, pud) pud_huge(pud) /* Open coded p*d_addr_end that can deal with 64bit addresses */ -static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; return (boundary - 1 < end - 1) ? boundary : end; } -#define stage2_pud_addr_end(addr, end) (end) +#define stage2_pud_addr_end(kvm, addr, end) (end) -static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; return (boundary - 1 < end - 1) ? boundary : end; } -#define stage2_pgd_index(addr) pgd_index(addr) +#define stage2_pgd_index(kvm, addr) pgd_index(addr) -#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) -#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) -#define stage2_pud_table_empty(pudp) false +#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) +#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) +#define stage2_pud_table_empty(kvm, pudp) false #endif /* __ARM_S2_PGTABLE_H_ */ diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 9b37b6ab27fe..8f55dc520a3e 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -121,8 +121,8 @@ extern void vfp_flush_hwstate(struct thread_info *); struct user_vfp; struct user_vfp_exc; -extern int vfp_preserve_user_clear_hwstate(struct user_vfp __user *, - struct user_vfp_exc __user *); +extern int vfp_preserve_user_clear_hwstate(struct user_vfp *, + struct user_vfp_exc *); extern int vfp_restore_user_hwstate(struct user_vfp *, struct user_vfp_exc *); #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 5451e1f05a19..c136eef8f690 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -69,6 +69,14 @@ extern int __put_user_bad(void); static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + + /* + * Prevent a mispredicted conditional call to set_fs from forwarding + * the wrong address limit to access_ok under speculation. + */ + dsb(nsh); + isb(); + modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); } @@ -92,6 +100,32 @@ static inline void set_fs(mm_segment_t fs) __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) /* + * Sanitise a uaccess pointer such that it becomes NULL if addr+size + * is above the current addr_limit. + */ +#define uaccess_mask_range_ptr(ptr, size) \ + ((__typeof__(ptr))__uaccess_mask_range_ptr(ptr, size)) +static inline void __user *__uaccess_mask_range_ptr(const void __user *ptr, + size_t size) +{ + void __user *safe_ptr = (void __user *)ptr; + unsigned long tmp; + + asm volatile( + " sub %1, %3, #1\n" + " subs %1, %1, %0\n" + " addhs %1, %1, #1\n" + " subhss %1, %1, %2\n" + " movlo %0, #0\n" + : "+r" (safe_ptr), "=&r" (tmp) + : "r" (size), "r" (current_thread_info()->addr_limit) + : "cc"); + + csdb(); + return safe_ptr; +} + +/* * Single-value transfer routines. They automatically use the right * size if we just have the right pointer type. Note that the functions * which read from user space (*get_*) need to take care not to leak @@ -362,6 +396,14 @@ do { \ __pu_err; \ }) +#ifdef CONFIG_CPU_SPECTRE +/* + * When mitigating Spectre variant 1.1, all accessors need to include + * verification of the address space. + */ +#define __put_user(x, ptr) put_user(x, ptr) + +#else #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ @@ -369,12 +411,6 @@ do { \ __pu_err; \ }) -#define __put_user_error(x, ptr, err) \ -({ \ - __put_user_switch((x), (ptr), (err), __put_user_nocheck); \ - (void) 0; \ -}) - #define __put_user_nocheck(x, __pu_ptr, __err, __size) \ do { \ unsigned long __pu_addr = (unsigned long)__pu_ptr; \ @@ -454,6 +490,7 @@ do { \ : "r" (x), "i" (-EFAULT) \ : "cc") +#endif /* !CONFIG_CPU_SPECTRE */ #ifdef CONFIG_MMU extern unsigned long __must_check diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 076090d2dbf5..88ef2ce1f69a 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -16,23 +16,23 @@ #include <uapi/asm/unistd.h> #include <asm/unistd-nr.h> +#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE #define __ARCH_WANT_SYS_GETPGRP -#define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_OLD_MMAP #define __ARCH_WANT_SYS_OLD_SELECT +#define __ARCH_WANT_SYS_UTIME #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT) #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_ALARM -#define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_SOCKETCALL diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 783fbb4de5f9..8fa2dc21d332 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -167,9 +167,6 @@ EXPORT_SYMBOL(_find_next_bit_be); #endif #ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_OLD_MCOUNT -EXPORT_SYMBOL(mcount); -#endif EXPORT_SYMBOL(__gnu_mcount_nc); #endif diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index ecaa68dd1af5..13bcd3b867cb 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -87,14 +87,11 @@ void __init arm_dt_init_cpu_maps(void) if (!cpus) return; - for_each_child_of_node(cpus, cpu) { + for_each_of_cpu_node(cpu) { const __be32 *cell; int prop_bytes; u32 hwid; - if (of_node_cmp(cpu->type, "cpu")) - continue; - pr_debug(" * %pOF...\n", cpu); /* * A device tree containing CPU nodes with missing "reg" diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 746565a876dc..0465d65d23de 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -296,16 +296,15 @@ __sys_trace: cmp scno, #-1 @ skip the syscall? bne 2b add sp, sp, #S_OFF @ restore stack - b ret_slow_syscall -__sys_trace_return: - str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 +__sys_trace_return_nosave: + enable_irq_notrace mov r0, sp bl syscall_trace_exit b ret_slow_syscall -__sys_trace_return_nosave: - enable_irq_notrace +__sys_trace_return: + str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 mov r0, sp bl syscall_trace_exit b ret_slow_syscall diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index efcd9f25a14b..0be69e551a64 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -15,23 +15,8 @@ * start of every function. In mcount, apart from the function's address (in * lr), we need to get hold of the function's caller's address. * - * Older GCCs (pre-4.4) inserted a call to a routine called mcount like this: - * - * bl mcount - * - * These versions have the limitation that in order for the mcount routine to - * be able to determine the function's caller's address, an APCS-style frame - * pointer (which is set up with something like the code below) is required. - * - * mov ip, sp - * push {fp, ip, lr, pc} - * sub fp, ip, #4 - * - * With EABI, these frame pointers are not available unless -mapcs-frame is - * specified, and if building as Thumb-2, not even then. - * - * Newer GCCs (4.4+) solve this problem by introducing a new version of mcount, - * with call sites like: + * Newer GCCs (4.4+) solve this problem by using a version of mcount with call + * sites like: * * push {lr} * bl __gnu_mcount_nc @@ -46,17 +31,10 @@ * allows it to be clobbered in subroutines and doesn't use it to hold * parameters.) * - * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0" - * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see - * arch/arm/kernel/ftrace.c). + * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}" + * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c). */ -#ifndef CONFIG_OLD_MCOUNT -#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) -#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0. -#endif -#endif - .macro mcount_adjust_addr rd, rn bic \rd, \rn, #1 @ clear the Thumb bit if present sub \rd, \rd, #MCOUNT_INSN_SIZE @@ -209,51 +187,6 @@ ftrace_graph_call\suffix: mcount_exit .endm -#ifdef CONFIG_OLD_MCOUNT -/* - * mcount - */ - -.macro mcount_enter - stmdb sp!, {r0-r3, lr} -.endm - -.macro mcount_get_lr reg - ldr \reg, [fp, #-4] -.endm - -.macro mcount_exit - ldr lr, [fp, #-4] - ldmia sp!, {r0-r3, pc} -.endm - -ENTRY(mcount) -#ifdef CONFIG_DYNAMIC_FTRACE - stmdb sp!, {lr} - ldr lr, [fp, #-4] - ldmia sp!, {pc} -#else - __mcount _old -#endif -ENDPROC(mcount) - -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(ftrace_caller_old) - __ftrace_caller _old -ENDPROC(ftrace_caller_old) -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller_old) - __ftrace_graph_caller -ENDPROC(ftrace_graph_caller_old) -#endif - -.purgem mcount_enter -.purgem mcount_get_lr -.purgem mcount_exit -#endif - /* * __gnu_mcount_nc */ diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 5617932a83df..0142fcfcc3d3 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -47,30 +47,6 @@ void arch_ftrace_update_code(int command) stop_machine(__ftrace_modify_code, &command, NULL); } -#ifdef CONFIG_OLD_MCOUNT -#define OLD_MCOUNT_ADDR ((unsigned long) mcount) -#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) - -#define OLD_NOP 0xe1a00000 /* mov r0, r0 */ - -static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) -{ - return rec->arch.old_mcount ? OLD_NOP : NOP; -} - -static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) -{ - if (!rec->arch.old_mcount) - return addr; - - if (addr == MCOUNT_ADDR) - addr = OLD_MCOUNT_ADDR; - else if (addr == FTRACE_ADDR) - addr = OLD_FTRACE_ADDR; - - return addr; -} -#else static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) { return NOP; @@ -80,7 +56,6 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) { return addr; } -#endif int ftrace_arch_code_modify_prepare(void) { @@ -150,15 +125,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) } #endif -#ifdef CONFIG_OLD_MCOUNT - if (!ret) { - pc = (unsigned long)&ftrace_call_old; - new = ftrace_call_replace(pc, (unsigned long)func); - - ret = ftrace_modify_code(pc, 0, new, false); - } -#endif - return ret; } @@ -203,16 +169,6 @@ int ftrace_make_nop(struct module *mod, new = ftrace_nop_replace(rec); ret = ftrace_modify_code(ip, old, new, true); -#ifdef CONFIG_OLD_MCOUNT - if (ret == -EINVAL && addr == MCOUNT_ADDR) { - rec->arch.old_mcount = true; - - old = ftrace_call_replace(ip, adjust_address(rec, addr)); - new = ftrace_nop_replace(rec); - ret = ftrace_modify_code(ip, old, new, true); - } -#endif - return ret; } @@ -290,13 +246,6 @@ static int ftrace_modify_graph_caller(bool enable) #endif -#ifdef CONFIG_OLD_MCOUNT - if (!ret) - ret = __ftrace_modify_caller(&ftrace_graph_call_old, - ftrace_graph_caller_old, - enable); -#endif - return ret; } diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 36718a424358..6fa5b6387556 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -203,15 +203,8 @@ void ptrace_disable(struct task_struct *child) */ void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) { - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = (void __user *)instruction_pointer(regs); - - force_sig_info(SIGTRAP, &info, tsk); + force_sig_fault(SIGTRAP, TRAP_BRKPT, + (void __user *)instruction_pointer(regs), tsk); } static int break_trap(struct pt_regs *regs, unsigned int instr) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index b8f766cf3a90..b908382b69ff 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -77,8 +77,6 @@ static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) kframe->magic = IWMMXT_MAGIC; kframe->size = IWMMXT_STORAGE_SIZE; iwmmxt_task_copy(current_thread_info(), &kframe->storage); - - err = __copy_to_user(frame, kframe, sizeof(*frame)); } else { /* * For bug-compatibility with older kernels, some space @@ -86,10 +84,14 @@ static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) * Set the magic and size appropriately so that properly * written userspace can skip it reliably: */ - __put_user_error(DUMMY_MAGIC, &frame->magic, err); - __put_user_error(IWMMXT_STORAGE_SIZE, &frame->size, err); + *kframe = (struct iwmmxt_sigframe) { + .magic = DUMMY_MAGIC, + .size = IWMMXT_STORAGE_SIZE, + }; } + err = __copy_to_user(frame, kframe, sizeof(*kframe)); + return err; } @@ -135,17 +137,18 @@ static int restore_iwmmxt_context(char __user **auxp) static int preserve_vfp_context(struct vfp_sigframe __user *frame) { - const unsigned long magic = VFP_MAGIC; - const unsigned long size = VFP_STORAGE_SIZE; + struct vfp_sigframe kframe; int err = 0; - __put_user_error(magic, &frame->magic, err); - __put_user_error(size, &frame->size, err); + memset(&kframe, 0, sizeof(kframe)); + kframe.magic = VFP_MAGIC; + kframe.size = VFP_STORAGE_SIZE; + err = vfp_preserve_user_clear_hwstate(&kframe.ufp, &kframe.ufp_exc); if (err) - return -EFAULT; + return err; - return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc); + return __copy_to_user(frame, &kframe, sizeof(kframe)); } static int restore_vfp_context(char __user **auxp) @@ -288,30 +291,35 @@ static int setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct aux_sigframe __user *aux; + struct sigcontext context; int err = 0; - __put_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err); - __put_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err); - __put_user_error(regs->ARM_r2, &sf->uc.uc_mcontext.arm_r2, err); - __put_user_error(regs->ARM_r3, &sf->uc.uc_mcontext.arm_r3, err); - __put_user_error(regs->ARM_r4, &sf->uc.uc_mcontext.arm_r4, err); - __put_user_error(regs->ARM_r5, &sf->uc.uc_mcontext.arm_r5, err); - __put_user_error(regs->ARM_r6, &sf->uc.uc_mcontext.arm_r6, err); - __put_user_error(regs->ARM_r7, &sf->uc.uc_mcontext.arm_r7, err); - __put_user_error(regs->ARM_r8, &sf->uc.uc_mcontext.arm_r8, err); - __put_user_error(regs->ARM_r9, &sf->uc.uc_mcontext.arm_r9, err); - __put_user_error(regs->ARM_r10, &sf->uc.uc_mcontext.arm_r10, err); - __put_user_error(regs->ARM_fp, &sf->uc.uc_mcontext.arm_fp, err); - __put_user_error(regs->ARM_ip, &sf->uc.uc_mcontext.arm_ip, err); - __put_user_error(regs->ARM_sp, &sf->uc.uc_mcontext.arm_sp, err); - __put_user_error(regs->ARM_lr, &sf->uc.uc_mcontext.arm_lr, err); - __put_user_error(regs->ARM_pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->ARM_cpsr, &sf->uc.uc_mcontext.arm_cpsr, err); - - __put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no, err); - __put_user_error(current->thread.error_code, &sf->uc.uc_mcontext.error_code, err); - __put_user_error(current->thread.address, &sf->uc.uc_mcontext.fault_address, err); - __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); + context = (struct sigcontext) { + .arm_r0 = regs->ARM_r0, + .arm_r1 = regs->ARM_r1, + .arm_r2 = regs->ARM_r2, + .arm_r3 = regs->ARM_r3, + .arm_r4 = regs->ARM_r4, + .arm_r5 = regs->ARM_r5, + .arm_r6 = regs->ARM_r6, + .arm_r7 = regs->ARM_r7, + .arm_r8 = regs->ARM_r8, + .arm_r9 = regs->ARM_r9, + .arm_r10 = regs->ARM_r10, + .arm_fp = regs->ARM_fp, + .arm_ip = regs->ARM_ip, + .arm_sp = regs->ARM_sp, + .arm_lr = regs->ARM_lr, + .arm_pc = regs->ARM_pc, + .arm_cpsr = regs->ARM_cpsr, + + .trap_no = current->thread.trap_no, + .error_code = current->thread.error_code, + .fault_address = current->thread.address, + .oldmask = set->sig[0], + }; + + err |= __copy_to_user(&sf->uc.uc_mcontext, &context, sizeof(context)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); @@ -328,7 +336,7 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) if (err == 0) err |= preserve_vfp_context(&aux->vfp); #endif - __put_user_error(0, &aux->end_magic, err); + err |= __put_user(0, &aux->end_magic); return err; } @@ -491,7 +499,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) /* * Set uc.uc_flags to a value which sc.trap_no would never have. */ - __put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err); + err = __put_user(0x5ac3c35a, &frame->uc.uc_flags); err |= setup_sigframe(frame, regs, set); if (err == 0) @@ -511,8 +519,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) err |= copy_siginfo_to_user(&frame->info, &ksig->info); - __put_user_error(0, &frame->sig.uc.uc_flags, err); - __put_user_error(NULL, &frame->sig.uc.uc_link, err); + err |= __put_user(0, &frame->sig.uc.uc_flags); + err |= __put_user(NULL, &frame->sig.uc.uc_link); err |= __save_altstack(&frame->sig.uc.uc_stack, regs->ARM_sp); err |= setup_sigframe(&frame->sig, regs, set); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 80517f293eb9..a188d5e8ab7f 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -98,22 +98,20 @@ static int proc_status_show(struct seq_file *m, void *v) */ static void set_segfault(struct pt_regs *regs, unsigned long addr) { - siginfo_t info; + int si_code; - clear_siginfo(&info); down_read(¤t->mm->mmap_sem); if (find_vma(current->mm, addr) == NULL) - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; else - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; up_read(¤t->mm->mmap_sem); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_addr = (void *) instruction_pointer(regs); - pr_debug("SWP{B} emulation: access caused memory abort!\n"); - arm_notify_die("Illegal memory access", regs, &info, 0, 0); + arm_notify_die("Illegal memory access", regs, + SIGSEGV, si_code, + (void __user *)instruction_pointer(regs), + 0, 0); abtcounter++; } diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index f0dd4b6ebb63..40da0872170f 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -277,6 +277,7 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, int maxevents, int timeout) { struct epoll_event *kbuf; + struct oabi_epoll_event e; mm_segment_t fs; long ret, err, i; @@ -295,8 +296,11 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, set_fs(fs); err = 0; for (i = 0; i < ret; i++) { - __put_user_error(kbuf[i].events, &events->events, err); - __put_user_error(kbuf[i].data, &events->data, err); + e.events = kbuf[i].events; + e.data = kbuf[i].data; + err = __copy_to_user(events, &e, sizeof(e)); + if (err) + break; events++; } kfree(kbuf); diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 24ac3cab411d..60e375ce1ab2 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -94,12 +94,6 @@ static void __init parse_dt_topology(void) __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), GFP_NOWAIT); - cn = of_find_node_by_path("/cpus"); - if (!cn) { - pr_err("No CPU information found in DT\n"); - return; - } - for_each_possible_cpu(cpu) { const u32 *rate; int len; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index badf02ca3693..2d668cff8ef4 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -365,13 +365,14 @@ void die(const char *str, struct pt_regs *regs, int err) } void arm_notify_die(const char *str, struct pt_regs *regs, - struct siginfo *info, unsigned long err, unsigned long trap) + int signo, int si_code, void __user *addr, + unsigned long err, unsigned long trap) { if (user_mode(regs)) { current->thread.error_code = err; current->thread.trap_no = trap; - force_sig_info(info->si_signo, info, current); + force_sig_fault(signo, si_code, addr, current); } else { die(str, regs, err); } @@ -438,10 +439,8 @@ int call_undef_hook(struct pt_regs *regs, unsigned int instr) asmlinkage void do_undefinstr(struct pt_regs *regs) { unsigned int instr; - siginfo_t info; void __user *pc; - clear_siginfo(&info); pc = (void __user *)instruction_pointer(regs); if (processor_mode(regs) == SVC_MODE) { @@ -485,13 +484,8 @@ die_sig: dump_instr(KERN_INFO, regs); } #endif - - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = pc; - - arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); + arm_notify_die("Oops - undefined instruction", regs, + SIGILL, ILL_ILLOPC, pc, 0, 6); } NOKPROBE_SYMBOL(do_undefinstr) @@ -539,9 +533,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason) static int bad_syscall(int n, struct pt_regs *regs) { - siginfo_t info; - - clear_siginfo(&info); if ((current->personality & PER_MASK) != PER_LINUX) { send_sig(SIGSEGV, current, 1); return regs->ARM_r0; @@ -555,13 +546,10 @@ static int bad_syscall(int n, struct pt_regs *regs) } #endif - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLTRP; - info.si_addr = (void __user *)instruction_pointer(regs) - - (thumb_mode(regs) ? 2 : 4); - - arm_notify_die("Oops - bad syscall", regs, &info, n, 0); + arm_notify_die("Oops - bad syscall", regs, SIGILL, ILL_ILLTRP, + (void __user *)instruction_pointer(regs) - + (thumb_mode(regs) ? 2 : 4), + n, 0); return regs->ARM_r0; } @@ -607,20 +595,13 @@ do_cache_op(unsigned long start, unsigned long end, int flags) #define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE) asmlinkage int arm_syscall(int no, struct pt_regs *regs) { - siginfo_t info; - - clear_siginfo(&info); if ((no >> 16) != (__ARM_NR_BASE>> 16)) return bad_syscall(no, regs); switch (no & 0xffff) { case 0: /* branch through 0 */ - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = NULL; - - arm_notify_die("branch through zero", regs, &info, 0, 0); + arm_notify_die("branch through zero", regs, + SIGSEGV, SEGV_MAPERR, NULL, 0, 0); return 0; case NR(breakpoint): /* SWI BREAK_POINT */ @@ -688,13 +669,10 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) } } #endif - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLTRP; - info.si_addr = (void __user *)instruction_pointer(regs) - - (thumb_mode(regs) ? 2 : 4); - - arm_notify_die("Oops - bad syscall(2)", regs, &info, no, 0); + arm_notify_die("Oops - bad syscall(2)", regs, SIGILL, ILL_ILLTRP, + (void __user *)instruction_pointer(regs) - + (thumb_mode(regs) ? 2 : 4), + no, 0); return 0; } @@ -744,9 +722,6 @@ asmlinkage void baddataabort(int code, unsigned long instr, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - siginfo_t info; - - clear_siginfo(&info); #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_BADABORT) { @@ -757,12 +732,8 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs) } #endif - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *)addr; - - arm_notify_die("unknown data abort code", regs, &info, instr, 0); + arm_notify_die("unknown data abort code", regs, + SIGILL, ILL_ILLOPC, (void __user *)addr, instr, 0); } void __readwrite_bug(const char *fn) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 3593d5c1acd2..8c74037ade22 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -96,7 +96,6 @@ SECTIONS INIT_SETUP(16) INIT_CALLS CON_INITCALL - SECURITY_INITCALL INIT_RAM_FS } diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index a826df3d3814..6709a8d33963 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -93,11 +93,7 @@ ENTRY(arm_copy_from_user) #ifdef CONFIG_CPU_SPECTRE get_thread_info r3 ldr r3, [r3, #TI_ADDR_LIMIT] - adds ip, r1, r2 @ ip=addr+size - sub r3, r3, #1 @ addr_limit - 1 - cmpcc ip, r3 @ if (addr+size > addr_limit - 1) - movcs r1, #0 @ addr = NULL - csdb + uaccess_mask_range_ptr r1, r2, r3, ip #endif #include "copy_template.S" diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index caf5019d8161..970abe521197 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -94,6 +94,11 @@ ENTRY(__copy_to_user_std) WEAK(arm_copy_to_user) +#ifdef CONFIG_CPU_SPECTRE + get_thread_info r3 + ldr r3, [r3, #TI_ADDR_LIMIT] + uaccess_mask_range_ptr r0, r2, r3, ip +#endif #include "copy_template.S" @@ -108,4 +113,3 @@ ENDPROC(__copy_to_user_std) rsb r0, r0, r2 copy_abort_end .popsection - diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 9b4ed1728616..73dc7360cbdd 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -152,7 +152,8 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n) n = __copy_to_user_std(to, from, n); uaccess_restore(ua_flags); } else { - n = __copy_to_user_memcpy(to, from, n); + n = __copy_to_user_memcpy(uaccess_mask_range_ptr(to, n), + from, n); } return n; } diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index a7c6ae13c945..bfe1c4d06901 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -149,6 +149,14 @@ exit_suspend: ENDPROC(at91_pm_suspend_in_sram) ENTRY(at91_backup_mode) + /* Switch the master clock source to slow clock. */ + ldr pmc, .pmc_base + ldr tmp1, [pmc, #AT91_PMC_MCKR] + bic tmp1, tmp1, #AT91_PMC_CSS + str tmp1, [pmc, #AT91_PMC_MCKR] + + wait_mckrdy + /*BUMEN*/ ldr r0, .sfr mov tmp1, #0x1 diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c index 671c7a09ab3d..0fca63c80e1a 100644 --- a/arch/arm/mach-mmp/devices.c +++ b/arch/arm/mach-mmp/devices.c @@ -277,21 +277,12 @@ struct platform_device pxa168_device_u2o = { #if IS_ENABLED(CONFIG_USB_EHCI_MV_U2O) struct resource pxa168_u2oehci_resources[] = { - /* regbase */ [0] = { - .start = PXA168_U2O_REGBASE + U2x_CAPREGS_OFFSET, + .start = PXA168_U2O_REGBASE, .end = PXA168_U2O_REGBASE + USB_REG_RANGE, .flags = IORESOURCE_MEM, - .name = "capregs", }, - /* phybase */ [1] = { - .start = PXA168_U2O_PHYBASE, - .end = PXA168_U2O_PHYBASE + USB_PHY_RANGE, - .flags = IORESOURCE_MEM, - .name = "phyregs", - }, - [2] = { .start = IRQ_PXA168_USB1, .end = IRQ_PXA168_USB1, .flags = IORESOURCE_IRQ, diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c index 345af3ebcc3a..7efe95bd584f 100644 --- a/arch/arm/mach-shmobile/pm-rcar-gen2.c +++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c @@ -50,7 +50,7 @@ void __init rcar_gen2_pm_init(void) void __iomem *p; u32 bar; static int once; - struct device_node *np, *cpus; + struct device_node *np; bool has_a7 = false; bool has_a15 = false; struct resource res; @@ -59,11 +59,7 @@ void __init rcar_gen2_pm_init(void) if (once++) return; - cpus = of_find_node_by_path("/cpus"); - if (!cpus) - return; - - for_each_child_of_node(cpus, np) { + for_each_of_cpu_node(np) { if (of_device_is_compatible(np, "arm,cortex-a15")) has_a15 = true; else if (of_device_is_compatible(np, "arm,cortex-a7")) diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c index e348bcfe389d..94fdeef11b81 100644 --- a/arch/arm/mach-shmobile/pm-rmobile.c +++ b/arch/arm/mach-shmobile/pm-rmobile.c @@ -202,7 +202,7 @@ static void __init get_special_pds(void) const struct of_device_id *id; /* PM domains containing CPUs */ - for_each_node_by_type(np, "cpu") + for_each_of_cpu_node(np) add_special_pd(np, PD_CPU); /* PM domain containing console */ diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c index 828e8aea037e..e48b0939693f 100644 --- a/arch/arm/mach-shmobile/timer.c +++ b/arch/arm/mach-shmobile/timer.c @@ -22,22 +22,16 @@ void __init shmobile_init_delay(void) { - struct device_node *np, *cpus; + struct device_node *np; u32 max_freq = 0; - cpus = of_find_node_by_path("/cpus"); - if (!cpus) - return; - - for_each_child_of_node(cpus, np) { + for_each_of_cpu_node(np) { u32 freq; if (!of_property_read_u32(np, "clock-frequency", &freq)) max_freq = max(max_freq, freq); } - of_node_put(cpus); - if (!max_freq) return; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index bd2c739d8083..b54f8f8def36 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -948,15 +948,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto fixup; if (ai_usermode & UM_SIGNAL) { - siginfo_t si; - - clear_siginfo(&si); - si.si_signo = SIGBUS; - si.si_errno = 0; - si.si_code = BUS_ADRALN; - si.si_addr = (void __user *)addr; - - force_sig_info(si.si_signo, &si, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current); } else { /* * We're about to disable the alignment trap and return to diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 3232afb6fdc0..f4ea4c62c613 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -161,13 +161,9 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int fsr, unsigned int sig, int code, struct pt_regs *regs) { - struct siginfo si; - if (addr > TASK_SIZE) harden_branch_predictor(); - clear_siginfo(&si); - #ifdef CONFIG_DEBUG_USER if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { @@ -181,11 +177,7 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; - si.si_signo = sig; - si.si_errno = 0; - si.si_code = code; - si.si_addr = (void __user *)addr; - force_sig_info(sig, &si, tsk); + force_sig_fault(sig, code, (void __user *)addr, tsk); } void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -554,7 +546,6 @@ asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + fsr_fs(fsr); - struct siginfo info; if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; @@ -563,12 +554,8 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) inf->name, fsr, addr); show_pte(current->mm, addr); - clear_siginfo(&info); - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - info.si_addr = (void __user *)addr; - arm_notify_die("", regs, &info, fsr, 0); + arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, + fsr, 0); } void __init @@ -588,7 +575,6 @@ asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) { const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); - struct siginfo info; if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; @@ -596,12 +582,8 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); - clear_siginfo(&info); - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - info.si_addr = (void __user *)addr; - arm_notify_die("", regs, &info, ifsr, 0); + arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, + ifsr, 0); } /* diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index dc7e6b50ef67..aff6e6eadc70 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -216,13 +216,6 @@ static struct notifier_block vfp_notifier_block = { */ static void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) { - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGFPE; - info.si_code = sicode; - info.si_addr = (void __user *)(instruction_pointer(regs) - 4); - /* * This is the same as NWFPE, because it's not clear what * this is used for @@ -230,7 +223,9 @@ static void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) current->thread.error_code = 0; current->thread.trap_no = 6; - send_sig_info(SIGFPE, &info, current); + send_sig_fault(SIGFPE, sicode, + (void __user *)(instruction_pointer(regs) - 4), + current); } static void vfp_panic(char *reason, u32 inst) @@ -553,12 +548,11 @@ void vfp_flush_hwstate(struct thread_info *thread) * Save the current VFP state into the provided structures and prepare * for entry into a new function (signal handler). */ -int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, - struct user_vfp_exc __user *ufp_exc) +int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, + struct user_vfp_exc *ufp_exc) { struct thread_info *thread = current_thread_info(); struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; - int err = 0; /* Ensure that the saved hwstate is up-to-date. */ vfp_sync_hwstate(thread); @@ -567,22 +561,19 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, * Copy the floating point registers. There can be unused * registers see asm/hwcap.h for details. */ - err |= __copy_to_user(&ufp->fpregs, &hwstate->fpregs, - sizeof(hwstate->fpregs)); + memcpy(&ufp->fpregs, &hwstate->fpregs, sizeof(hwstate->fpregs)); + /* * Copy the status and control register. */ - __put_user_error(hwstate->fpscr, &ufp->fpscr, err); + ufp->fpscr = hwstate->fpscr; /* * Copy the exception registers. */ - __put_user_error(hwstate->fpexc, &ufp_exc->fpexc, err); - __put_user_error(hwstate->fpinst, &ufp_exc->fpinst, err); - __put_user_error(hwstate->fpinst2, &ufp_exc->fpinst2, err); - - if (err) - return -EFAULT; + ufp_exc->fpexc = hwstate->fpexc; + ufp_exc->fpinst = hwstate->fpinst; + ufp_exc->fpinst2 = ufp_exc->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); |