summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig16
-rw-r--r--arch/alpha/kernel/traps.c2
-rw-r--r--arch/arc/include/asm/io.h2
-rw-r--r--arch/arc/include/asm/mmu.h1
-rw-r--r--arch/arc/include/asm/unaligned.h27
-rw-r--r--arch/arc/kernel/traps.c3
-rw-r--r--arch/arc/kernel/unaligned.c1
-rw-r--r--arch/arc/kernel/unaligned.h16
-rw-r--r--arch/arc/kernel/unwind.c2
-rw-r--r--arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts2
-rw-r--r--arch/arm/crypto/aes-ce-glue.c2
-rw-r--r--arch/arm/crypto/crc32-ce-glue.c2
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c2
-rw-r--r--arch/arm/crypto/poly1305-glue.c2
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c2
-rw-r--r--arch/arm/include/asm/uaccess.h2
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm64/Kconfig7
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi2
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c2
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c2
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c2
-rw-r--r--arch/arm64/crypto/poly1305-glue.c2
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha512-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sm3-neon-glue.c2
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/kvm_asm.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h32
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h3
-rw-r--r--arch/arm64/include/asm/kvm_nested.h4
-rw-r--r--arch/arm64/include/asm/uprobes.h8
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cpu_errata.c3
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c16
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c18
-rw-r--r--arch/arm64/kernel/probes/uprobes.c4
-rw-r--r--arch/arm64/kernel/process.c3
-rw-r--r--arch/arm64/kernel/signal.c92
-rw-r--r--arch/arm64/kvm/arm.c5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S52
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c6
-rw-r--r--arch/arm64/kvm/hypercalls.c12
-rw-r--r--arch/arm64/kvm/mmu.c15
-rw-r--r--arch/arm64/kvm/nested.c53
-rw-r--r--arch/arm64/kvm/sys_regs.c77
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c41
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c7
-rw-r--r--arch/arm64/net/bpf_jit_comp.c12
-rw-r--r--arch/loongarch/crypto/crc32-loongarch.c2
-rw-r--r--arch/loongarch/include/asm/bootinfo.h4
-rw-r--r--arch/loongarch/include/asm/kasan.h2
-rw-r--r--arch/loongarch/include/asm/loongarch.h2
-rw-r--r--arch/loongarch/include/asm/pgalloc.h11
-rw-r--r--arch/loongarch/include/asm/pgtable.h35
-rw-r--r--arch/loongarch/kernel/process.c16
-rw-r--r--arch/loongarch/kernel/setup.c3
-rw-r--r--arch/loongarch/kernel/traps.c5
-rw-r--r--arch/loongarch/kernel/vdso.c8
-rw-r--r--arch/loongarch/kvm/timer.c7
-rw-r--r--arch/loongarch/kvm/vcpu.c2
-rw-r--r--arch/loongarch/mm/init.c2
-rw-r--r--arch/loongarch/mm/pgtable.c20
-rw-r--r--arch/microblaze/include/asm/flat.h2
-rw-r--r--arch/mips/boot/compressed/decompress.c2
-rw-r--r--arch/mips/crypto/crc32-mips.c2
-rw-r--r--arch/mips/crypto/poly1305-glue.c2
-rw-r--r--arch/mips/kernel/cmpxchg.c1
-rw-r--r--arch/nios2/kernel/misaligned.c2
-rw-r--r--arch/parisc/boot/compressed/misc.c2
-rw-r--r--arch/parisc/include/asm/unaligned.h11
-rw-r--r--arch/parisc/kernel/traps.c4
-rw-r--r--arch/parisc/kernel/unaligned.c3
-rw-r--r--arch/parisc/kernel/unaligned.h3
-rw-r--r--arch/powerpc/crypto/aes-gcm-p10-glue.c2
-rw-r--r--arch/powerpc/crypto/poly1305-p10-glue.c2
-rw-r--r--arch/powerpc/kernel/head_8xx.S1
-rw-r--r--arch/powerpc/kernel/vdso/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c1
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c2
-rw-r--r--arch/riscv/kvm/aia_imsic.c8
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c8
-rw-r--r--arch/s390/configs/debug_defconfig13
-rw-r--r--arch/s390/configs/defconfig14
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/include/asm/io.h2
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/s390/kvm/diag.c2
-rw-r--r--arch/s390/kvm/gaccess.c4
-rw-r--r--arch/s390/kvm/gaccess.h14
-rw-r--r--arch/s390/pci/pci_event.c17
-rw-r--r--arch/sh/include/asm/flat.h2
-rw-r--r--arch/sh/kernel/dwarf.c2
-rw-r--r--arch/sh/kernel/module.c2
-rw-r--r--arch/sparc/crypto/crc32c_glue.c2
-rw-r--r--arch/um/drivers/virt-pci.c2
-rw-r--r--arch/um/include/asm/uaccess.h2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/crypto/camellia_glue.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c2
-rw-r--r--arch/x86/entry/entry.S5
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/ftrace.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h11
-rw-r--r--arch/x86/include/asm/reboot.h4
-rw-r--r--arch/x86/include/asm/runtime-const.h4
-rw-r--r--arch/x86/include/asm/uaccess_64.h43
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c14
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/bugs.c32
-rw-r--r--arch/x86/kernel/cpu/common.c13
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c51
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c23
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kvm/Kconfig9
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/mmu/mmu.c63
-rw-r--r--arch/x86/kvm/svm/nested.c6
-rw-r--r--arch/x86/kvm/vmx/vmx.c6
-rw-r--r--arch/x86/lib/getuser.S9
-rw-r--r--arch/x86/lib/insn.c2
-rw-r--r--arch/x86/virt/svm/sev.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c4
-rw-r--r--arch/xtensa/include/asm/flat.h2
136 files changed, 844 insertions, 365 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 98157b38f5cf..00163e4a237c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -838,7 +838,7 @@ config CFI_CLANG
config CFI_ICALL_NORMALIZE_INTEGERS
bool "Normalize CFI tags for integers"
depends on CFI_CLANG
- depends on $(cc-option,-fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers)
+ depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG
help
This option normalizes the CFI tags for integer types so that all
integer types of the same size and signedness receive the same CFI
@@ -851,6 +851,20 @@ config CFI_ICALL_NORMALIZE_INTEGERS
This option is necessary for using CFI with Rust. If unsure, say N.
+config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG
+ def_bool y
+ depends on $(cc-option,-fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers)
+ # With GCOV/KASAN we need this fix: https://github.com/llvm/llvm-project/pull/104826
+ depends on CLANG_VERSION >= 190000 || (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS)
+
+config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC
+ def_bool y
+ depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG
+ depends on RUSTC_VERSION >= 107900
+ # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373
+ depends on (RUSTC_LLVM_VERSION >= 190000 && RUSTC_VERSION >= 108200) || \
+ (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS)
+
config CFI_PERMISSIVE
bool "Use CFI in permissive mode"
depends on CFI_CLANG
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 6afae65e9a8b..a9a38c80c4a7 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -22,7 +22,7 @@
#include <asm/gentrap.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/sysinfo.h>
#include <asm/hwrpb.h>
#include <asm/mmu_context.h>
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 4fdb7350636c..f57cb5a6b624 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -9,7 +9,7 @@
#include <linux/types.h>
#include <asm/byteorder.h>
#include <asm/page.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#ifdef CONFIG_ISA_ARCV2
#include <asm/barrier.h>
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 9febf5bc3de6..4ae2db59d494 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -14,6 +14,7 @@ typedef struct {
unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */
} mm_context_t;
+struct pt_regs;
extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *);
#endif
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
deleted file mode 100644
index cf5a02382e0e..000000000000
--- a/arch/arc/include/asm/unaligned.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_ARC_UNALIGNED_H
-#define _ASM_ARC_UNALIGNED_H
-
-/* ARC700 can't handle unaligned Data accesses. */
-
-#include <asm-generic/unaligned.h>
-#include <asm/ptrace.h>
-
-#ifdef CONFIG_ARC_EMUL_UNALIGNED
-int misaligned_fixup(unsigned long address, struct pt_regs *regs,
- struct callee_regs *cregs);
-#else
-static inline int
-misaligned_fixup(unsigned long address, struct pt_regs *regs,
- struct callee_regs *cregs)
-{
- /* Not fixed */
- return 1;
-}
-#endif
-
-#endif /* _ASM_ARC_UNALIGNED_H */
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index a19751e824fb..8d2ea2cbd98b 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -18,8 +18,9 @@
#include <linux/kgdb.h>
#include <asm/entry.h>
#include <asm/setup.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/kprobes.h>
+#include "unaligned.h"
void die(const char *str, struct pt_regs *regs, unsigned long address)
{
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index 99a9b92ed98d..d2f5ceaaed1b 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -12,6 +12,7 @@
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <asm/disasm.h>
+#include "unaligned.h"
#ifdef CONFIG_CPU_BIG_ENDIAN
#define BE 1
diff --git a/arch/arc/kernel/unaligned.h b/arch/arc/kernel/unaligned.h
new file mode 100644
index 000000000000..5244453bb85f
--- /dev/null
+++ b/arch/arc/kernel/unaligned.h
@@ -0,0 +1,16 @@
+struct pt_regs;
+struct callee_regs;
+
+#ifdef CONFIG_ARC_EMUL_UNALIGNED
+int misaligned_fixup(unsigned long address, struct pt_regs *regs,
+ struct callee_regs *cregs);
+#else
+static inline int
+misaligned_fixup(unsigned long address, struct pt_regs *regs,
+ struct callee_regs *cregs)
+{
+ /* Not fixed */
+ return 1;
+}
+#endif
+
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 9270d0a713c3..d8969dab12d4 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -19,7 +19,7 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
#include <asm/sections.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/unwind.h>
extern char __start_unwind[], __end_unwind[];
diff --git a/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts
index 72d26d130efa..85f54fa595aa 100644
--- a/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts
+++ b/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts
@@ -77,7 +77,7 @@
};
&hdmi {
- hpd-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>;
+ hpd-gpios = <&expgpio 0 GPIO_ACTIVE_LOW>;
power-domains = <&power RPI_POWER_DOMAIN_HDMI>;
status = "okay";
};
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index f5b66f4cf45d..21df5e7f51f9 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -8,7 +8,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index 4ff18044af07..20b4dff13e3a 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -18,7 +18,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#define PMULL_MIN_LEN 64L /* minimum size of buffer
* for crc32_pmull_le_16 */
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 3ddf05b4234d..3af997082534 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -9,7 +9,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/gcm.h>
#include <crypto/b128ops.h>
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
index 8482e302c45a..4464ffbf8fd1 100644
--- a/arch/arm/crypto/poly1305-glue.c
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -8,7 +8,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/poly1305.h>
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index c62ce89dd3e0..aeac45bfbf9f 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -16,7 +16,7 @@
#include <asm/hwcap.h>
#include <asm/simd.h>
#include <asm/neon.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "sha256_glue.h"
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 6c9c16d767cf..f90be312418e 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -12,7 +12,7 @@
#include <linux/string.h>
#include <asm/page.h>
#include <asm/domain.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/unified.h>
#include <asm/pgtable.h>
#include <asm/proc-fns.h>
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index f8dd0b3cc8e0..3c6ddb1afdc4 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -22,7 +22,7 @@
#include <asm/cp15.h>
#include <asm/system_info.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/opcodes.h>
#include "fault.h"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3e29b44d2d7b..fd9df6dcc593 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -200,7 +200,8 @@ config ARM64
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
- if $(cc-option,-fpatchable-function-entry=2)
+ if (GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS || \
+ CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS)
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \
if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS
select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
@@ -286,12 +287,10 @@ config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS
def_bool CC_IS_CLANG
# https://github.com/ClangBuiltLinux/linux/issues/1507
depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600))
- select HAVE_DYNAMIC_FTRACE_WITH_ARGS
config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS
def_bool CC_IS_GCC
depends on $(cc-option,-fpatchable-function-entry=2)
- select HAVE_DYNAMIC_FTRACE_WITH_ARGS
config 64BIT
def_bool y
@@ -1097,6 +1096,7 @@ config ARM64_ERRATUM_3194386
* ARM Cortex-A78C erratum 3324346
* ARM Cortex-A78C erratum 3324347
* ARM Cortex-A710 erratam 3324338
+ * ARM Cortex-A715 errartum 3456084
* ARM Cortex-A720 erratum 3456091
* ARM Cortex-A725 erratum 3456106
* ARM Cortex-X1 erratum 3324344
@@ -1107,6 +1107,7 @@ config ARM64_ERRATUM_3194386
* ARM Cortex-X925 erratum 3324334
* ARM Neoverse-N1 erratum 3324349
* ARM Neoverse N2 erratum 3324339
+ * ARM Neoverse-N3 erratum 3456111
* ARM Neoverse-V1 erratum 3324341
* ARM Neoverse V2 erratum 3324336
* ARM Neoverse-V3 erratum 3312417
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b058c4803efb..9efd3f37c2fd 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,7 +10,7 @@
#
# Copyright (C) 1995-2001 by Russell King
-LDFLAGS_vmlinux :=--no-undefined -X
+LDFLAGS_vmlinux :=--no-undefined -X --pic-veneer
ifeq ($(CONFIG_RELOCATABLE), y)
# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
diff --git a/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi b/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi
index 4676e3488f54..cb8d54895a77 100644
--- a/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi
+++ b/arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi
@@ -136,7 +136,7 @@
};
cp0_mdio_pins: cp0-mdio-pins {
- marvell,pins = "mpp40", "mpp41";
+ marvell,pins = "mpp0", "mpp1";
marvell,function = "ge";
};
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index ce9b28e3c7d6..a523b519700f 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -9,7 +9,7 @@
*/
#include <asm/neon.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index e921823ca103..00b8749013c5 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -7,7 +7,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 97331b454ea8..da7b7ec1a664 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -7,7 +7,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/gcm.h>
#include <crypto/algapi.h>
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index 9c4bfd62e789..18883ea438f3 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -8,7 +8,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/poly1305.h>
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 1dd93e1fcb39..cbd14f208f83 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -7,7 +7,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha1.h>
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 0a44d2e7ee1f..6b4866a88ded 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -7,7 +7,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha2.h>
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 250e1377c481..5662c3ac49e9 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -12,7 +12,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha3.h>
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index f3431fc62315..071f64293227 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -11,7 +11,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha2.h>
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index 54bf6ebcfffb..1a71788c4cda 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -7,7 +7,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sm3.h>
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
index 7182ee683f14..8dd71ce79b69 100644
--- a/arch/arm64/crypto/sm3-neon-glue.c
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -7,7 +7,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sm3.h>
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5a7dfeb8e8eb..488f8e751349 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -94,6 +94,7 @@
#define ARM_CPU_PART_NEOVERSE_V3 0xD84
#define ARM_CPU_PART_CORTEX_X925 0xD85
#define ARM_CPU_PART_CORTEX_A725 0xD87
+#define ARM_CPU_PART_NEOVERSE_N3 0xD8E
#define APM_CPU_PART_XGENE 0x000
#define APM_CPU_VAR_POTENZA 0x00
@@ -176,6 +177,7 @@
#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
+#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b36a3b6cc011..67afac659231 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -178,6 +178,7 @@ struct kvm_nvhe_init_params {
unsigned long hcr_el2;
unsigned long vttbr;
unsigned long vtcr;
+ unsigned long tmp;
};
/*
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 329619c6fa96..bf64fed9820e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -51,6 +51,7 @@
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
+#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -212,6 +213,12 @@ struct kvm_s2_mmu {
bool nested_stage2_enabled;
/*
+ * true when this MMU needs to be unmapped before being used for a new
+ * purpose.
+ */
+ bool pending_unmap;
+
+ /*
* 0: Nobody is currently using this, check vttbr for validity
* >0: Somebody is actively using this.
*/
@@ -1441,11 +1448,6 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
sign_extend64(__val, id##_##fld##_WIDTH - 1); \
})
-#define expand_field_sign(id, fld, val) \
- (id##_##fld##_SIGNED ? \
- __expand_field_sign_signed(id, fld, val) : \
- __expand_field_sign_unsigned(id, fld, val))
-
#define get_idreg_field_unsigned(kvm, id, fld) \
({ \
u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \
@@ -1461,20 +1463,26 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define get_idreg_field_enum(kvm, id, fld) \
get_idreg_field_unsigned(kvm, id, fld)
-#define get_idreg_field(kvm, id, fld) \
+#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \
+ (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit))
+
+#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \
+ (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit))
+
+#define kvm_cmp_feat(kvm, id, fld, op, limit) \
(id##_##fld##_SIGNED ? \
- get_idreg_field_signed(kvm, id, fld) : \
- get_idreg_field_unsigned(kvm, id, fld))
+ kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \
+ kvm_cmp_feat_unsigned(kvm, id, fld, op, limit))
#define kvm_has_feat(kvm, id, fld, limit) \
- (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit))
+ kvm_cmp_feat(kvm, id, fld, >=, limit)
#define kvm_has_feat_enum(kvm, id, fld, val) \
- (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val))
+ kvm_cmp_feat_unsigned(kvm, id, fld, ==, val)
#define kvm_has_feat_range(kvm, id, fld, min, max) \
- (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
- get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+ (kvm_cmp_feat(kvm, id, fld, >=, min) && \
+ kvm_cmp_feat(kvm, id, fld, <=, max))
/* Check for a given level of PAuth support */
#define kvm_has_pauth(k, l) \
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index cd4087fbda9a..66d93e320ec8 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -166,7 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);
-void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size);
+void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
+ u64 size, bool may_block);
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index e8bc6d67aba2..233e65522716 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
+extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+
struct kvm_s2_trans {
phys_addr_t output;
unsigned long block_size;
@@ -124,7 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
struct kvm_s2_trans *trans);
extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
extern void kvm_nested_s2_wp(struct kvm *kvm);
-extern void kvm_nested_s2_unmap(struct kvm *kvm);
+extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block);
extern void kvm_nested_s2_flush(struct kvm *kvm);
unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
index 2b09495499c6..014b02897f8e 100644
--- a/arch/arm64/include/asm/uprobes.h
+++ b/arch/arm64/include/asm/uprobes.h
@@ -10,11 +10,9 @@
#include <asm/insn.h>
#include <asm/probes.h>
-#define MAX_UINSN_BYTES AARCH64_INSN_SIZE
-
#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES)
#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
-#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
+#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE
typedef __le32 uprobe_opcode_t;
@@ -23,8 +21,8 @@ struct arch_uprobe_task {
struct arch_uprobe {
union {
- u8 insn[MAX_UINSN_BYTES];
- u8 ixol[MAX_UINSN_BYTES];
+ __le32 insn;
+ __le32 ixol;
};
struct arch_probe_insn api;
bool simulate;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 27de1dddb0ab..b21dd24b8efc 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -146,6 +146,7 @@ int main(void)
DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
+ DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index dfefbdf4073a..a78f247029ae 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -439,6 +439,7 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
@@ -447,8 +448,10 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X4),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 968d5fffe233..3496d6169e59 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -99,10 +99,6 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
aarch64_insn_is_blr(insn) ||
aarch64_insn_is_ret(insn)) {
api->handler = simulate_br_blr_ret;
- } else if (aarch64_insn_is_ldr_lit(insn)) {
- api->handler = simulate_ldr_literal;
- } else if (aarch64_insn_is_ldrsw_lit(insn)) {
- api->handler = simulate_ldrsw_literal;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -140,6 +136,17 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
probe_opcode_t insn = le32_to_cpu(*addr);
probe_opcode_t *scan_end = NULL;
unsigned long size = 0, offset = 0;
+ struct arch_probe_insn *api = &asi->api;
+
+ if (aarch64_insn_is_ldr_lit(insn)) {
+ api->handler = simulate_ldr_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else if (aarch64_insn_is_ldrsw_lit(insn)) {
+ api->handler = simulate_ldrsw_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else {
+ decoded = arm_probe_decode_insn(insn, &asi->api);
+ }
/*
* If there's a symbol defined in front of and near enough to
@@ -157,7 +164,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
else
scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
- decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded != INSN_REJECTED && scan_end)
if (is_probed_address_atomic(addr - 1, scan_end))
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 22d0b3252476..b65334ab79d2 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -171,17 +171,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- u64 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (u64 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
if (opcode & (1 << 30)) /* x0-x30 */
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr));
else /* w0-w30 */
- set_w_reg(regs, xn, *load_addr);
+ set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
@@ -189,14 +187,12 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- s32 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (s32 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index d49aef2657cd..a2f137a595fc 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
switch (arm_probe_decode_insn(insn, &auprobe->api)) {
case INSN_REJECTED:
@@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (!auprobe->simulate)
return false;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
addr = instruction_pointer(regs);
if (auprobe->api.handler)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0540653fbf38..3e7c8c8195c3 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -412,6 +412,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.cpu_context.x19 = (unsigned long)args->fn;
p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
+
+ if (system_supports_poe())
+ p->thread.por_el0 = POR_EL0_INIT;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 561986947530..c7d311d8b92a 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/ratelimit.h>
#include <linux/rseq.h>
#include <linux/syscalls.h>
+#include <linux/pkeys.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
@@ -66,10 +67,63 @@ struct rt_sigframe_user_layout {
unsigned long end_offset;
};
+/*
+ * Holds any EL0-controlled state that influences unprivileged memory accesses.
+ * This includes both accesses done in userspace and uaccess done in the kernel.
+ *
+ * This state needs to be carefully managed to ensure that it doesn't cause
+ * uaccess to fail when setting up the signal frame, and the signal handler
+ * itself also expects a well-defined state when entered.
+ */
+struct user_access_state {
+ u64 por_el0;
+};
+
#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
+/*
+ * Save the user access state into ua_state and reset it to disable any
+ * restrictions.
+ */
+static void save_reset_user_access_state(struct user_access_state *ua_state)
+{
+ if (system_supports_poe()) {
+ u64 por_enable_all = 0;
+
+ for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
+ por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY);
+
+ ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
+ write_sysreg_s(por_enable_all, SYS_POR_EL0);
+ /* Ensure that any subsequent uaccess observes the updated value */
+ isb();
+ }
+}
+
+/*
+ * Set the user access state for invoking the signal handler.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void set_handler_user_access_state(void)
+{
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+/*
+ * Restore the user access state to the values saved in ua_state.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void restore_user_access_state(const struct user_access_state *ua_state)
+{
+ if (system_supports_poe())
+ write_sysreg_s(ua_state->por_el0, SYS_POR_EL0);
+}
+
static void init_user_layout(struct rt_sigframe_user_layout *user)
{
const size_t reserved_size =
@@ -261,18 +315,20 @@ static int restore_fpmr_context(struct user_ctxs *user)
return err;
}
-static int preserve_poe_context(struct poe_context __user *ctx)
+static int preserve_poe_context(struct poe_context __user *ctx,
+ const struct user_access_state *ua_state)
{
int err = 0;
__put_user_error(POE_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
- __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
+ __put_user_error(ua_state->por_el0, &ctx->por_el0, err);
return err;
}
-static int restore_poe_context(struct user_ctxs *user)
+static int restore_poe_context(struct user_ctxs *user,
+ struct user_access_state *ua_state)
{
u64 por_el0;
int err = 0;
@@ -282,7 +338,7 @@ static int restore_poe_context(struct user_ctxs *user)
__get_user_error(por_el0, &(user->poe->por_el0), err);
if (!err)
- write_sysreg_s(por_el0, SYS_POR_EL0);
+ ua_state->por_el0 = por_el0;
return err;
}
@@ -850,7 +906,8 @@ invalid:
}
static int restore_sigframe(struct pt_regs *regs,
- struct rt_sigframe __user *sf)
+ struct rt_sigframe __user *sf,
+ struct user_access_state *ua_state)
{
sigset_t set;
int i, err;
@@ -899,7 +956,7 @@ static int restore_sigframe(struct pt_regs *regs,
err = restore_zt_context(&user);
if (err == 0 && system_supports_poe() && user.poe)
- err = restore_poe_context(&user);
+ err = restore_poe_context(&user, ua_state);
return err;
}
@@ -908,6 +965,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -924,12 +982,14 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
- if (restore_sigframe(regs, frame))
+ if (restore_sigframe(regs, frame, &ua_state))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ restore_user_access_state(&ua_state);
+
return regs->regs[0];
badframe:
@@ -1035,7 +1095,8 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
}
static int setup_sigframe(struct rt_sigframe_user_layout *user,
- struct pt_regs *regs, sigset_t *set)
+ struct pt_regs *regs, sigset_t *set,
+ const struct user_access_state *ua_state)
{
int i, err = 0;
struct rt_sigframe __user *sf = user->sigframe;
@@ -1097,10 +1158,9 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct poe_context __user *poe_ctx =
apply_user_offset(user, user->poe_offset);
- err |= preserve_poe_context(poe_ctx);
+ err |= preserve_poe_context(poe_ctx, ua_state);
}
-
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1237,9 +1297,6 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
sme_smstop();
}
- if (system_supports_poe())
- write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
-
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
@@ -1253,6 +1310,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
{
struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
int err = 0;
fpsimd_signal_preserve_current_state();
@@ -1260,13 +1318,14 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
if (get_sigframe(&user, ksig, regs))
return 1;
+ save_reset_user_access_state(&ua_state);
frame = user.sigframe;
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(&user, regs, set);
+ err |= setup_sigframe(&user, regs, set, &ua_state);
if (err == 0) {
setup_return(regs, &ksig->ka, &user, usig);
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
@@ -1276,6 +1335,11 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
}
}
+ if (err == 0)
+ set_handler_user_access_state();
+ else
+ restore_user_access_state(&ua_state);
+
return err;
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a0d01c46e408..48cafb65d6ac 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -997,6 +997,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
static int check_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu))
+ return -EIO;
+
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
kvm_vcpu_sleep(vcpu);
@@ -1031,6 +1034,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_dirty_ring_check_request(vcpu))
return 0;
+
+ check_nested_vcpu_requests(vcpu);
}
return 1;
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 46d52e8a3df3..5310fe1da616 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -338,7 +338,7 @@ static inline void __hyp_sve_save_host(void)
struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
__sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
&sve_state->fpsr,
true);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 401af1835be6..fc1866226067 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -24,28 +24,25 @@
.align 11
SYM_CODE_START(__kvm_hyp_init)
- ventry __invalid // Synchronous EL2t
- ventry __invalid // IRQ EL2t
- ventry __invalid // FIQ EL2t
- ventry __invalid // Error EL2t
+ ventry . // Synchronous EL2t
+ ventry . // IRQ EL2t
+ ventry . // FIQ EL2t
+ ventry . // Error EL2t
- ventry __invalid // Synchronous EL2h
- ventry __invalid // IRQ EL2h
- ventry __invalid // FIQ EL2h
- ventry __invalid // Error EL2h
+ ventry . // Synchronous EL2h
+ ventry . // IRQ EL2h
+ ventry . // FIQ EL2h
+ ventry . // Error EL2h
ventry __do_hyp_init // Synchronous 64-bit EL1
- ventry __invalid // IRQ 64-bit EL1
- ventry __invalid // FIQ 64-bit EL1
- ventry __invalid // Error 64-bit EL1
+ ventry . // IRQ 64-bit EL1
+ ventry . // FIQ 64-bit EL1
+ ventry . // Error 64-bit EL1
- ventry __invalid // Synchronous 32-bit EL1
- ventry __invalid // IRQ 32-bit EL1
- ventry __invalid // FIQ 32-bit EL1
- ventry __invalid // Error 32-bit EL1
-
-__invalid:
- b .
+ ventry . // Synchronous 32-bit EL1
+ ventry . // IRQ 32-bit EL1
+ ventry . // FIQ 32-bit EL1
+ ventry . // Error 32-bit EL1
/*
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers.
@@ -76,6 +73,13 @@ __do_hyp_init:
eret
SYM_CODE_END(__kvm_hyp_init)
+SYM_CODE_START_LOCAL(__kvm_init_el2_state)
+ /* Initialize EL2 CPU state to sane values. */
+ init_el2_state // Clobbers x0..x2
+ finalise_el2_state
+ ret
+SYM_CODE_END(__kvm_init_el2_state)
+
/*
* Initialize the hypervisor in EL2.
*
@@ -102,9 +106,12 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init)
// TPIDR_EL2 is used to preserve x0 across the macro maze...
isb
msr tpidr_el2, x0
- init_el2_state
- finalise_el2_state
+ str lr, [x0, #NVHE_INIT_TMP]
+
+ bl __kvm_init_el2_state
+
mrs x0, tpidr_el2
+ ldr lr, [x0, #NVHE_INIT_TMP]
1:
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
@@ -199,9 +206,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
2: msr SPsel, #1 // We want to use SP_EL{1,2}
- /* Initialize EL2 CPU state to sane values. */
- init_el2_state // Clobbers x0..x2
- finalise_el2_state
+ bl __kvm_init_el2_state
+
__init_el2_nvhe_prepare_eret
/* Enable MMU, set vectors and stack. */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 87692b566d90..fefc89209f9e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -33,7 +33,7 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
*/
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
}
static void __hyp_sve_restore_host(void)
@@ -45,10 +45,11 @@ static void __hyp_sve_restore_host(void)
* the host. The layout of the data when saving the sve state depends
* on the VL, so use a consistent (i.e., the maximum) host VL.
*
- * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
- * supported by the system (or limited at EL3).
+ * Note that this constrains the PE to the maximum shared VL
+ * that was discovered, if we wish to use larger VLs this will
+ * need to be revisited.
*/
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
&sve_state->fpsr,
true);
@@ -488,7 +489,8 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
case ESR_ELx_EC_SVE:
cpacr_clear_set(0, CPACR_ELx_ZEN);
isb();
- sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
+ SYS_ZCR_EL2);
break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 187a5f4d56c0..077d4098548d 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -574,12 +574,14 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unlock:
hyp_spin_unlock(&vm_table_lock);
- if (ret)
+ if (ret) {
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ return ret;
+ }
hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
- return ret;
+ return 0;
}
static void
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 5763d979d8ca..ee6573befb81 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -317,7 +317,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
* to the guest, and hide SSBS so that the
* guest stays protected.
*/
- if (cpus_have_final_cap(ARM64_SSBS))
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
break;
fallthrough;
case SPECTRE_UNAFFECTED:
@@ -428,7 +428,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
* Convert the workaround level into an easy-to-compare number, where higher
* values mean better protection.
*/
-static int get_kernel_wa_level(u64 regid)
+static int get_kernel_wa_level(struct kvm_vcpu *vcpu, u64 regid)
{
switch (regid) {
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
@@ -449,7 +449,7 @@ static int get_kernel_wa_level(u64 regid)
* don't have any FW mitigation if SSBS is there at
* all times.
*/
- if (cpus_have_final_cap(ARM64_SSBS))
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
fallthrough;
case SPECTRE_UNAFFECTED:
@@ -486,7 +486,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
- val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+ val = get_kernel_wa_level(vcpu, reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
break;
case KVM_REG_ARM_STD_BMAP:
val = READ_ONCE(smccc_feat->std_bmap);
@@ -588,7 +588,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
return -EINVAL;
- if (get_kernel_wa_level(reg->id) < val)
+ if (get_kernel_wa_level(vcpu, reg->id) < val)
return -EINVAL;
return 0;
@@ -624,7 +624,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
* We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
* other way around.
*/
- if (get_kernel_wa_level(reg->id) < wa_level)
+ if (get_kernel_wa_level(vcpu, reg->id) < wa_level)
return -EINVAL;
return 0;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index a509b63bd4dd..0f7658aefa1a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -328,9 +328,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
may_block));
}
-void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
+ u64 size, bool may_block)
{
- __unmap_stage2_range(mmu, start, size, true);
+ __unmap_stage2_range(mmu, start, size, may_block);
}
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
@@ -1015,7 +1016,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
- kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
+ kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start, true);
}
hva = vm_end;
} while (hva < reg_end);
@@ -1042,7 +1043,7 @@ void stage2_unmap_vm(struct kvm *kvm)
kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap(kvm, true);
write_unlock(&kvm->mmu_lock);
mmap_read_unlock(current->mm);
@@ -1912,7 +1913,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
(range->end - range->start) << PAGE_SHIFT,
range->may_block);
- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap(kvm, range->may_block);
return false;
}
@@ -2179,8 +2180,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
write_lock(&kvm->mmu_lock);
- kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size);
- kvm_nested_s2_unmap(kvm);
+ kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size, true);
+ kvm_nested_s2_unmap(kvm, true);
write_unlock(&kvm->mmu_lock);
}
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index f9e30dd34c7a..c4b17d90fc49 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
/* Set the scene for the next search */
kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
- /* Clear the old state */
+ /* Make sure we don't forget to do the laundry */
if (kvm_s2_mmu_valid(s2_mmu))
- kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu));
+ s2_mmu->pending_unmap = true;
/*
* The virtual VMID (modulo CnP) will be used as a key when matching
@@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
out:
atomic_inc(&s2_mmu->refcnt);
+
+ /*
+ * Set the vCPU request to perform an unmap, even if the pending unmap
+ * originates from another vCPU. This guarantees that the MMU has been
+ * completely unmapped before any vCPU actually uses it, and allows
+ * multiple vCPUs to lend a hand with completing the unmap.
+ */
+ if (s2_mmu->pending_unmap)
+ kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu);
+
return s2_mmu;
}
@@ -663,6 +673,13 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
{
+ /*
+ * The vCPU kept its reference on the MMU after the last put, keep
+ * rolling with it.
+ */
+ if (vcpu->arch.hw_mmu)
+ return;
+
if (is_hyp_ctxt(vcpu)) {
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
} else {
@@ -674,10 +691,18 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
- if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) {
+ /*
+ * Keep a reference on the associated stage-2 MMU if the vCPU is
+ * scheduling out and not in WFI emulation, suggesting it is likely to
+ * reuse the MMU sometime soon.
+ */
+ if (vcpu->scheduled_out && !vcpu_get_flag(vcpu, IN_WFI))
+ return;
+
+ if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu))
atomic_dec(&vcpu->arch.hw_mmu->refcnt);
- vcpu->arch.hw_mmu = NULL;
- }
+
+ vcpu->arch.hw_mmu = NULL;
}
/*
@@ -730,7 +755,7 @@ void kvm_nested_s2_wp(struct kvm *kvm)
}
}
-void kvm_nested_s2_unmap(struct kvm *kvm)
+void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
{
int i;
@@ -740,7 +765,7 @@ void kvm_nested_s2_unmap(struct kvm *kvm)
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
if (kvm_s2_mmu_valid(mmu))
- kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu));
+ kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block);
}
}
@@ -1184,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
return 0;
}
+
+void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) {
+ struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ if (mmu->pending_unmap) {
+ kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true);
+ mmu->pending_unmap = false;
+ }
+ write_unlock(&vcpu->kvm->mmu_lock);
+ }
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index dad88e31f953..ff8c4e1b847e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1527,6 +1527,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
break;
case SYS_ID_AA64PFR2_EL1:
/* We only expose FPMR */
@@ -1550,7 +1558,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
break;
case SYS_ID_AA64MMFR3_EL1:
- val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE;
+ val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
+ ID_AA64MMFR3_EL1_S1PIE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
@@ -1985,7 +1994,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
* one cache line.
*/
if (kvm_has_mte(vcpu->kvm))
- clidr |= 2 << CLIDR_TTYPE_SHIFT(loc);
+ clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc);
__vcpu_sys_reg(vcpu, r->reg) = clidr;
@@ -2376,7 +2385,19 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP), },
- ID_SANITISED(ID_AA64PFR1_EL1),
+ ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR |
+ ID_AA64PFR1_EL1_DF2 |
+ ID_AA64PFR1_EL1_MTEX |
+ ID_AA64PFR1_EL1_THE |
+ ID_AA64PFR1_EL1_GCS |
+ ID_AA64PFR1_EL1_MTE_frac |
+ ID_AA64PFR1_EL1_NMI |
+ ID_AA64PFR1_EL1_RNDR_trap |
+ ID_AA64PFR1_EL1_SME |
+ ID_AA64PFR1_EL1_RES0 |
+ ID_AA64PFR1_EL1_MPAM_frac |
+ ID_AA64PFR1_EL1_RAS_frac |
+ ID_AA64PFR1_EL1_MTE)),
ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
@@ -2390,7 +2411,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.get_user = get_id_reg,
.set_user = set_id_aa64dfr0_el1,
.reset = read_sanitised_id_aa64dfr0_el1,
- .val = ID_AA64DFR0_EL1_PMUVer_MASK |
+ /*
+ * Prior to FEAT_Debugv8.9, the architecture defines context-aware
+ * breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs).
+ * KVM does not trap + emulate the breakpoint registers, and as such
+ * cannot support a layout that misaligns with the underlying hardware.
+ * While it may be possible to describe a subset that aligns with
+ * hardware, just prevent changes to BRPs and CTX_CMPs altogether for
+ * simplicity.
+ *
+ * See DDI0487K.a, section D2.8.3 Breakpoint types and linking
+ * of breakpoints for more details.
+ */
+ .val = ID_AA64DFR0_EL1_DoubleLock_MASK |
+ ID_AA64DFR0_EL1_WRPs_MASK |
+ ID_AA64DFR0_EL1_PMUVer_MASK |
ID_AA64DFR0_EL1_DebugVer_MASK, },
ID_SANITISED(ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2),
@@ -2433,6 +2468,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)),
ID_SANITISED(ID_AA64MMFR4_EL1),
ID_UNALLOCATED(7,5),
@@ -2903,7 +2939,7 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the
* corresponding VMIDs.
*/
- kvm_nested_s2_unmap(vcpu->kvm);
+ kvm_nested_s2_unmap(vcpu->kvm, true);
write_unlock(&vcpu->kvm->mmu_lock);
@@ -2955,7 +2991,30 @@ union tlbi_info {
static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu,
const union tlbi_info *info)
{
- kvm_stage2_unmap_range(mmu, info->range.start, info->range.size);
+ /*
+ * The unmap operation is allowed to drop the MMU lock and block, which
+ * means that @mmu could be used for a different context than the one
+ * currently being invalidated.
+ *
+ * This behavior is still safe, as:
+ *
+ * 1) The vCPU(s) that recycled the MMU are responsible for invalidating
+ * the entire MMU before reusing it, which still honors the intent
+ * of a TLBI.
+ *
+ * 2) Until the guest TLBI instruction is 'retired' (i.e. increment PC
+ * and ERET to the guest), other vCPUs are allowed to use stale
+ * translations.
+ *
+ * 3) Accidentally unmapping an unrelated MMU context is nonfatal, and
+ * at worst may cause more aborts for shadow stage-2 fills.
+ *
+ * Dropping the MMU lock also implies that shadow stage-2 fills could
+ * happen behind the back of the TLBI. This is still safe, though, as
+ * the L1 needs to put its stage-2 in a consistent state before doing
+ * the TLBI.
+ */
+ kvm_stage2_unmap_range(mmu, info->range.start, info->range.size, true);
}
static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -3050,7 +3109,11 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu,
max_size = compute_tlb_inval_range(mmu, info->ipa.addr);
base_addr &= ~(max_size - 1);
- kvm_stage2_unmap_range(mmu, base_addr, max_size);
+ /*
+ * See comment in s2_mmu_unmap_range() for why this is allowed to
+ * reschedule.
+ */
+ kvm_stage2_unmap_range(mmu, base_addr, max_size, true);
}
static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index e7c53e8af3d1..48c952563e85 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -417,8 +417,28 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ /*
+ * If this vCPU is being destroyed because of a failed creation
+ * then unregister the redistributor to avoid leaving behind a
+ * dangling pointer to the vCPU struct.
+ *
+ * vCPUs that have been successfully created (i.e. added to
+ * kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as
+ * this function gets called while holding kvm->arch.config_lock
+ * in the VM teardown path and would otherwise introduce a lock
+ * inversion w.r.t. kvm->srcu.
+ *
+ * vCPUs that failed creation are torn down outside of the
+ * kvm->arch.config_lock and do not get unregistered in
+ * kvm_vgic_destroy(), meaning it is both safe and necessary to
+ * do so here.
+ */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu)
+ vgic_unregister_redist_iodev(vcpu);
+
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ }
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -524,22 +544,31 @@ int kvm_vgic_map_resources(struct kvm *kvm)
if (ret)
goto out;
- dist->ready = true;
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);
ret = vgic_register_dist_iodev(kvm, dist_base, type);
- if (ret)
+ if (ret) {
kvm_err("Unable to register VGIC dist MMIO regions\n");
+ goto out_slots;
+ }
+ /*
+ * kvm_io_bus_register_dev() guarantees all readers see the new MMIO
+ * registration before returning through synchronize_srcu(), which also
+ * implies a full memory barrier. As such, marking the distributor as
+ * 'ready' here is guaranteed to be ordered after all vCPUs having seen
+ * a completely configured distributor.
+ */
+ dist->ready = true;
goto out_slots;
out:
mutex_unlock(&kvm->arch.config_lock);
out_slots:
- mutex_unlock(&kvm->slots_lock);
-
if (ret)
- kvm_vgic_destroy(kvm);
+ kvm_vm_dead(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 1d26bb5b02f4..5f4f57aaa23e 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -236,7 +236,12 @@ static int vgic_set_common_attr(struct kvm_device *dev,
mutex_lock(&dev->kvm->arch.config_lock);
- if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
+ /*
+ * Either userspace has already configured NR_IRQS or
+ * the vgic has already been initialized and vgic_init()
+ * supplied a default amount of SPIs.
+ */
+ if (dev->kvm->arch.vgic.nr_spis)
ret = -EBUSY;
else
dev->kvm->arch.vgic.nr_spis =
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8bbd0b20136a..5db82bfc9dc1 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2220,7 +2220,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
+ /* for the first pass, assume the worst case */
+ if (!ctx->image)
+ ctx->idx += 4;
+ else
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_enter, ctx);
}
@@ -2264,7 +2268,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->ro_image + ctx->idx;
- emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
+ /* for the first pass, assume the worst case */
+ if (!ctx->image)
+ ctx->idx += 4;
+ else
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c
index 3eebea3a7b47..b7d9782827f5 100644
--- a/arch/loongarch/crypto/crc32-loongarch.c
+++ b/arch/loongarch/crypto/crc32-loongarch.c
@@ -13,7 +13,7 @@
#include <crypto/internal/hash.h>
#include <asm/cpu-features.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#define _CRC32(crc, value, size, type) \
do { \
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index 6d5846dd075c..7657e016233f 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -26,6 +26,10 @@ struct loongson_board_info {
#define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG)
+/*
+ * The "core" of cores_per_node and cores_per_package stands for a
+ * logical core, which means in a SMT system it stands for a thread.
+ */
struct loongson_system_configuration {
int nr_cpus;
int nr_nodes;
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
index cd6084f4e153..c6bce5fbff57 100644
--- a/arch/loongarch/include/asm/kasan.h
+++ b/arch/loongarch/include/asm/kasan.h
@@ -16,7 +16,7 @@
#define XRANGE_SHIFT (48)
/* Valid address length */
-#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define XRANGE_SHADOW_SHIFT min(cpu_vabits, VA_BITS)
/* Used for taking out the valid address */
#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
/* One segment whole address space size */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 26542413a5b0..64ad277e096e 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -250,7 +250,7 @@
#define CSR_ESTAT_IS_WIDTH 15
#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)
-#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_ERA 0x6 /* Exception return address */
#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 4e2d6b7ca2ee..a7b9c9e73593 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -10,6 +10,7 @@
#define __HAVE_ARCH_PMD_ALLOC_ONE
#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -44,6 +45,16 @@ extern void pagetable_init(void);
extern pgd_t *pgd_alloc(struct mm_struct *mm);
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ pte_t *pte = __pte_alloc_one_kernel(mm);
+
+ if (pte)
+ kernel_pte_init(pte);
+
+ return pte;
+}
+
#define __pte_free_tlb(tlb, pte, address) \
do { \
pagetable_pte_dtor(page_ptdesc(pte)); \
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 9965f52ef65b..20714b73f14c 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -269,6 +269,7 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm
extern void pgd_init(void *addr);
extern void pud_init(void *addr);
extern void pmd_init(void *addr);
+extern void kernel_pte_init(void *addr);
/*
* Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
@@ -325,39 +326,17 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
{
WRITE_ONCE(*ptep, pteval);
- if (pte_val(pteval) & _PAGE_GLOBAL) {
- pte_t *buddy = ptep_buddy(ptep);
- /*
- * Make sure the buddy is global too (if it's !none,
- * it better already be global)
- */
- if (pte_none(ptep_get(buddy))) {
#ifdef CONFIG_SMP
- /*
- * For SMP, multiple CPUs can race, so we need
- * to do this atomically.
- */
- __asm__ __volatile__(
- __AMOR "$zero, %[global], %[buddy] \n"
- : [buddy] "+ZB" (buddy->pte)
- : [global] "r" (_PAGE_GLOBAL)
- : "memory");
-
- DBAR(0b11000); /* o_wrw = 0b11000 */
-#else /* !CONFIG_SMP */
- WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
-#endif /* CONFIG_SMP */
- }
- }
+ if (pte_val(pteval) & _PAGE_GLOBAL)
+ DBAR(0b11000); /* o_wrw = 0b11000 */
+#endif
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- /* Preserve global status for the pair */
- if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL)
- set_pte(ptep, __pte(_PAGE_GLOBAL));
- else
- set_pte(ptep, __pte(0));
+ pte_t pte = ptep_get(ptep);
+ pte_val(pte) &= _PAGE_GLOBAL;
+ set_pte(ptep, pte);
}
#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index f2ff8b5d591e..6e58f65455c7 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -293,13 +293,15 @@ unsigned long stack_top(void)
{
unsigned long top = TASK_SIZE & PAGE_MASK;
- /* Space for the VDSO & data page */
- top -= PAGE_ALIGN(current->thread.vdso->size);
- top -= VVAR_SIZE;
-
- /* Space to randomize the VDSO base */
- if (current->flags & PF_RANDOMIZE)
- top -= VDSO_RANDOMIZE_SIZE;
+ if (current->thread.vdso) {
+ /* Space for the VDSO & data page */
+ top -= PAGE_ALIGN(current->thread.vdso->size);
+ top -= VVAR_SIZE;
+
+ /* Space to randomize the VDSO base */
+ if (current->flags & PF_RANDOMIZE)
+ top -= VDSO_RANDOMIZE_SIZE;
+ }
return top;
}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 00e307203ddb..cbd3c09a93c1 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -55,6 +55,7 @@
#define SMBIOS_FREQHIGH_OFFSET 0x17
#define SMBIOS_FREQLOW_MASK 0xFF
#define SMBIOS_CORE_PACKAGE_OFFSET 0x23
+#define SMBIOS_THREAD_PACKAGE_OFFSET 0x25
#define LOONGSON_EFI_ENABLE (1 << 3)
unsigned long fw_arg0, fw_arg1, fw_arg2;
@@ -125,7 +126,7 @@ static void __init parse_cpu_table(const struct dmi_header *dm)
cpu_clock_freq = freq_temp * 1000000;
loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]);
- loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET);
+ loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_THREAD_PACKAGE_OFFSET);
pr_info("CpuClock = %llu\n", cpu_clock_freq);
}
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index f9f4eb00c92e..c57b4134f3e8 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -555,6 +555,9 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
#else
unsigned int *pc;
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_enable();
+
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
/*
@@ -579,6 +582,8 @@ sigbus:
die_if_kernel("Kernel ale access", regs);
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
out:
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_disable();
#endif
irqentry_exit(regs, state);
}
diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
index f6fcc52aefae..2c0d852ca536 100644
--- a/arch/loongarch/kernel/vdso.c
+++ b/arch/loongarch/kernel/vdso.c
@@ -34,7 +34,6 @@ static union {
struct loongarch_vdso_data vdata;
} loongarch_vdso_data __page_aligned_data;
-static struct page *vdso_pages[] = { NULL };
struct vdso_data *vdso_data = generic_vdso_data.data;
struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata;
struct vdso_rng_data *vdso_rng_data = &loongarch_vdso_data.vdata.rng_data;
@@ -85,10 +84,8 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct loongarch_vdso_info vdso_info = {
.vdso = vdso_start,
- .size = PAGE_SIZE,
.code_mapping = {
.name = "[vdso]",
- .pages = vdso_pages,
.mremap = vdso_mremap,
},
.data_mapping = {
@@ -103,11 +100,14 @@ static int __init init_vdso(void)
unsigned long i, cpu, pfn;
BUG_ON(!PAGE_ALIGNED(vdso_info.vdso));
- BUG_ON(!PAGE_ALIGNED(vdso_info.size));
for_each_possible_cpu(cpu)
vdso_pdata[cpu].node = cpu_to_node(cpu);
+ vdso_info.size = PAGE_ALIGN(vdso_end - vdso_start);
+ vdso_info.code_mapping.pages =
+ kcalloc(vdso_info.size / PAGE_SIZE, sizeof(struct page *), GFP_KERNEL);
+
pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso));
for (i = 0; i < vdso_info.size / PAGE_SIZE; i++)
vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i);
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c
index 74a4b5c272d6..32dc213374be 100644
--- a/arch/loongarch/kvm/timer.c
+++ b/arch/loongarch/kvm/timer.c
@@ -161,10 +161,11 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
if (kvm_vcpu_is_blocking(vcpu)) {
/*
- * HRTIMER_MODE_PINNED is suggested since vcpu may run in
- * the same physical cpu in next time
+ * HRTIMER_MODE_PINNED_HARD is suggested since vcpu may run in
+ * the same physical cpu in next time, and the timer should run
+ * in hardirq context even in the PREEMPT_RT case.
*/
- hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED_HARD);
}
}
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 0697b1064251..174734a23d0a 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -1457,7 +1457,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.vpid = 0;
vcpu->arch.flush_gpa = INVALID_GPA;
- hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
vcpu->arch.swtimer.function = kvm_swtimer_wakeup;
vcpu->arch.handle_exit = kvm_handle_exit;
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 8a87a482c8f4..188b52bbb254 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -201,7 +201,9 @@ pte_t * __init populate_kernel_pte(unsigned long addr)
pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pte)
panic("%s: Failed to allocate memory\n", __func__);
+
pmd_populate_kernel(&init_mm, pmd, pte);
+ kernel_pte_init(pte);
}
return pte_offset_kernel(pmd, addr);
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index eb6a29b491a7..3fa69b23ff84 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -116,6 +116,26 @@ void pud_init(void *addr)
EXPORT_SYMBOL_GPL(pud_init);
#endif
+void kernel_pte_init(void *addr)
+{
+ unsigned long *p, *end;
+
+ p = (unsigned long *)addr;
+ end = p + PTRS_PER_PTE;
+
+ do {
+ p[0] = _PAGE_GLOBAL;
+ p[1] = _PAGE_GLOBAL;
+ p[2] = _PAGE_GLOBAL;
+ p[3] = _PAGE_GLOBAL;
+ p[4] = _PAGE_GLOBAL;
+ p += 8;
+ p[-3] = _PAGE_GLOBAL;
+ p[-2] = _PAGE_GLOBAL;
+ p[-1] = _PAGE_GLOBAL;
+ } while (p != end);
+}
+
pmd_t mk_pmd(struct page *page, pgprot_t prot)
{
pmd_t pmd;
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index 79a749f4ad04..edff4306fa70 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -8,7 +8,7 @@
#ifndef _ASM_MICROBLAZE_FLAT_H
#define _ASM_MICROBLAZE_FLAT_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/*
* Microblaze works a little differently from other arches, because
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index adb6d5b0e6eb..90021c6a8cab 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -16,7 +16,7 @@
#include <linux/libfdt.h>
#include <asm/addrspace.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm-generic/vmlinux.lds.h>
#include "decompress.h"
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
index 2a59b85f88aa..a7a1d43a1b2c 100644
--- a/arch/mips/crypto/crc32-mips.c
+++ b/arch/mips/crypto/crc32-mips.c
@@ -14,7 +14,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <asm/mipsregs.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/internal/hash.h>
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
index 867728ee535a..c03ad0bbe69c 100644
--- a/arch/mips/crypto/poly1305-glue.c
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -5,7 +5,7 @@
* Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/poly1305.h>
diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c
index e974a4954df8..c371def2302d 100644
--- a/arch/mips/kernel/cmpxchg.c
+++ b/arch/mips/kernel/cmpxchg.c
@@ -102,3 +102,4 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
return old;
}
}
+EXPORT_SYMBOL(__cmpxchg_small);
diff --git a/arch/nios2/kernel/misaligned.c b/arch/nios2/kernel/misaligned.c
index 23e0544e117c..2f2862eab3c6 100644
--- a/arch/nios2/kernel/misaligned.c
+++ b/arch/nios2/kernel/misaligned.c
@@ -23,7 +23,7 @@
#include <linux/seq_file.h>
#include <asm/traps.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* instructions we emulate */
#define INST_LDHU 0x0b
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index d389359e22ac..9c83bd06ef15 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -6,7 +6,7 @@
#include <linux/uaccess.h>
#include <linux/elf.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/page.h>
#include "sizes.h"
diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h
deleted file mode 100644
index c0621295100d..000000000000
--- a/arch/parisc/include/asm/unaligned.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_PARISC_UNALIGNED_H
-#define _ASM_PARISC_UNALIGNED_H
-
-#include <asm-generic/unaligned.h>
-
-struct pt_regs;
-void handle_unaligned(struct pt_regs *regs);
-int check_unaligned(struct pt_regs *regs);
-
-#endif /* _ASM_PARISC_UNALIGNED_H */
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 294b0e026c9a..b9b3d527bc90 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -36,7 +36,7 @@
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/traps.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/atomic.h>
#include <asm/smp.h>
#include <asm/pdc.h>
@@ -47,6 +47,8 @@
#include <linux/kgdb.h>
#include <linux/kprobes.h>
+#include "unaligned.h"
+
#if defined(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK)
#include <asm/spinlock.h>
#endif
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 3e79e40e361d..f4626943633a 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -12,9 +12,10 @@
#include <linux/ratelimit.h>
#include <linux/uaccess.h>
#include <linux/sysctl.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/hardirq.h>
#include <asm/traps.h>
+#include "unaligned.h"
/* #define DEBUG_UNALIGNED 1 */
diff --git a/arch/parisc/kernel/unaligned.h b/arch/parisc/kernel/unaligned.h
new file mode 100644
index 000000000000..c1aa4b12e284
--- /dev/null
+++ b/arch/parisc/kernel/unaligned.h
@@ -0,0 +1,3 @@
+struct pt_regs;
+void handle_unaligned(struct pt_regs *regs);
+int check_unaligned(struct pt_regs *regs);
diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c
index f62ee54076c0..f66ad56e765f 100644
--- a/arch/powerpc/crypto/aes-gcm-p10-glue.c
+++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c
@@ -5,7 +5,7 @@
* Copyright 2022- IBM Inc. All rights reserved
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <crypto/aes.h>
diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c
index 95dd708573ee..369686e9370b 100644
--- a/arch/powerpc/crypto/poly1305-p10-glue.c
+++ b/arch/powerpc/crypto/poly1305-p10-glue.c
@@ -14,7 +14,7 @@
#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
#include <linux/cpufeature.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 811a7130505c..56c5ebe21b99 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -494,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
bctr /* jump into table */
152:
mfdar r11
+ mtdar r10
mtctr r11 /* restore ctr reg from DAR */
mfspr r11, SPRN_SPRG_THREAD
stw r10, DAR(r11)
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index 56fb1633529a..31ca5a547004 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -22,7 +22,7 @@ endif
ifneq ($(c-getrandom-y),)
CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
- CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30)
+ CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y)
endif
# Build rules
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 56a1f7ce78d2..d92759c21fae 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -282,6 +282,7 @@ int __init opal_event_init(void)
name, NULL);
if (rc) {
pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start);
+ kfree(name);
continue;
}
}
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index d95e03b3d3e3..9e297f88adc5 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -19,7 +19,7 @@
#include <uapi/linux/papr_pdsm.h>
#include <linux/papr_scm.h>
#include <asm/mce.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/perf_event.h>
#define BIND_ANY_ADDR (~0ul)
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 0a1e859323b4..a8085cd8215e 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -55,7 +55,7 @@ struct imsic {
/* IMSIC SW-file */
struct imsic_mrif *swfile;
phys_addr_t swfile_pa;
- spinlock_t swfile_extirq_lock;
+ raw_spinlock_t swfile_extirq_lock;
};
#define imsic_vs_csr_read(__c) \
@@ -622,7 +622,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
* interruptions between reading topei and updating pending status.
*/
- spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
+ raw_spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
@@ -630,7 +630,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
else
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
- spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
+ raw_spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
}
static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
@@ -1051,7 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
}
imsic->swfile = page_to_virt(swfile_page);
imsic->swfile_pa = page_to_phys(swfile_page);
- spin_lock_init(&imsic->swfile_extirq_lock);
+ raw_spin_lock_init(&imsic->swfile_extirq_lock);
/* Setup IO device */
kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 99f34409fb60..4cc631fa7039 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -18,6 +18,7 @@
#define RV_MAX_REG_ARGS 8
#define RV_FENTRY_NINSNS 2
#define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4)
+#define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI_CLANG) ? 1 : 0)
/* imm that allows emit_imm to emit max count insns */
#define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF
@@ -271,7 +272,8 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
if (!is_tail_call)
emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */
+ /* kcfi, fentry and TCC init insns will be skipped on tailcall */
+ is_tail_call ? (RV_KCFI_NINSNS + RV_FENTRY_NINSNS + 1) * 4 : 0,
ctx);
}
@@ -548,8 +550,8 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
rv_lr_w(r0, 0, rd, 0, 0), ctx);
jmp_offset = ninsns_rvoff(8);
emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
- emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) :
- rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx);
+ emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) :
+ rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
jmp_offset = ninsns_rvoff(-6);
emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
emit(rv_fence(0x3, 0x3), ctx);
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 9b57add02cd5..fb0e9a1d9be2 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -50,7 +50,6 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
-# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
@@ -95,6 +94,7 @@ CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLAB_BUCKETS=y
CONFIG_SLUB_STATS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
@@ -426,6 +426,13 @@ CONFIG_DEVTMPFS_SAFE=y
# CONFIG_FW_LOADER is not set
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
+CONFIG_ZRAM_BACKEND_LZ4=y
+CONFIG_ZRAM_BACKEND_LZ4HC=y
+CONFIG_ZRAM_BACKEND_ZSTD=y
+CONFIG_ZRAM_BACKEND_DEFLATE=y
+CONFIG_ZRAM_BACKEND_842=y
+CONFIG_ZRAM_BACKEND_LZO=y
+CONFIG_ZRAM_DEF_COMP_DEFLATE=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -486,6 +493,7 @@ CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
CONFIG_DM_VDO=m
@@ -535,6 +543,7 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_META is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -695,6 +704,7 @@ CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
CONFIG_CIFS=m
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
@@ -740,7 +750,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index df4addd1834a..88be0a734b60 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -48,7 +48,6 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
-# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
@@ -89,6 +88,7 @@ CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLAB_BUCKETS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -416,6 +416,13 @@ CONFIG_DEVTMPFS_SAFE=y
# CONFIG_FW_LOADER is not set
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
+CONFIG_ZRAM_BACKEND_LZ4=y
+CONFIG_ZRAM_BACKEND_LZ4HC=y
+CONFIG_ZRAM_BACKEND_ZSTD=y
+CONFIG_ZRAM_BACKEND_DEFLATE=y
+CONFIG_ZRAM_BACKEND_842=y
+CONFIG_ZRAM_BACKEND_LZO=y
+CONFIG_ZRAM_DEF_COMP_DEFLATE=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -476,6 +483,7 @@ CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
CONFIG_DM_VDO=m
@@ -525,6 +533,7 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_META is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -682,6 +691,7 @@ CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
CONFIG_CIFS=m
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
@@ -726,7 +736,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
@@ -767,6 +776,7 @@ CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ZSTD=m
CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 8c2b61363bab..bcbaa069de96 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -49,6 +49,7 @@ CONFIG_ZFCP=y
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
+# CONFIG_S390_UV_UAPI is not set
# CONFIG_S390_TAPE is not set
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 0fbc992d7a5e..fc9933a743d6 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -16,8 +16,10 @@
#include <asm/pci_io.h>
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr
void *xlate_dev_mem_ptr(phys_addr_t phys);
#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 66200d4a2134..29ee289108c5 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -49,6 +49,7 @@ struct perf_sf_sde_regs {
};
#define perf_arch_fetch_caller_regs(regs, __ip) do { \
+ (regs)->psw.mask = 0; \
(regs)->psw.addr = (__ip); \
(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \
offsetof(struct stack_frame, back_chain); \
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 2a32438e09ce..74f73141f9b9 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_diagnose_258++;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+ rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index e65f597e3044..a688351f4ab5 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -828,6 +828,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
const gfn_t gfn = gpa_to_gfn(gpa);
int rc;
+ if (!gfn_to_memslot(kvm, gfn))
+ return PGM_ADDRESSING;
if (mode == GACC_STORE)
rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
else
@@ -985,6 +987,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
gra += fragment_len;
data += fragment_len;
}
+ if (rc > 0)
+ vcpu->arch.pgm.code = rc;
return rc;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index b320d12aa049..3fde45a151f2 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
* @len: number of bytes to copy
*
* Copy @len bytes from @data (kernel space) to @gra (guest real address).
- * It is up to the caller to ensure that the entire guest memory range is
- * valid memory before calling this function.
* Guest low address and key protection are not checked.
*
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying from @data failed, or
+ * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
+ * is also stored to allow injecting into the guest (if applicable) using
+ * kvm_s390_inject_prog_cond().
*
* If an error occurs data may have been copied partially to guest memory.
*/
@@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
* @len: number of bytes to copy
*
* Copy @len bytes from @gra (guest real address) to @data (kernel space).
- * It is up to the caller to ensure that the entire guest memory range is
- * valid memory before calling this function.
* Guest key protection is not checked.
*
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying to @data failed, or
+ * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
+ * is also stored to allow injecting into the guest (if applicable) using
+ * kvm_s390_inject_prog_cond().
*
* If an error occurs data may have been copied partially to kernel space.
*/
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index dbe95ec5917e..d4f19d33914c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -280,18 +280,19 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
goto no_pdev;
switch (ccdf->pec) {
- case 0x003a: /* Service Action or Error Recovery Successful */
+ case 0x002a: /* Error event concerns FMB */
+ case 0x002b:
+ case 0x002c:
+ break;
+ case 0x0040: /* Service Action or Error Recovery Failed */
+ case 0x003b:
+ zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+ break;
+ default: /* PCI function left in the error state attempt to recover */
ers_res = zpci_event_attempt_error_recovery(pdev);
if (ers_res != PCI_ERS_RESULT_RECOVERED)
zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
break;
- default:
- /*
- * Mark as frozen not permanently failed because the device
- * could be subsequently recovered by the platform.
- */
- zpci_event_io_failure(pdev, pci_channel_io_frozen);
- break;
}
pci_dev_put(pdev);
no_pdev:
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index fee4f25555cb..70752c7bc55f 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -9,7 +9,7 @@
#ifndef __ASM_SH_FLAT_H
#define __ASM_SH_FLAT_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
u32 *addr)
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 45c8ae20d109..a1b54bedc929 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -24,7 +24,7 @@
#include <asm/dwarf.h>
#include <asm/unwinder.h>
#include <asm/sections.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/stacktrace.h>
/* Reserve enough memory for two stack frames */
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index b9cee98a754e..a469a80840d3 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -18,7 +18,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/dwarf.h>
int apply_relocate_add(Elf32_Shdr *sechdrs,
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index 688db0dcb97d..913b9a09e885 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -20,7 +20,7 @@
#include <asm/pstate.h>
#include <asm/elf.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "opcodes.h"
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 6100819681b5..744e7f31e8ef 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -14,7 +14,7 @@
#include <linux/virtio-uml.h>
#include <linux/delay.h>
#include <linux/msi.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <irq_kern.h>
#define MAX_DEVICES 8
diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
index 7d9d60e41e4e..1d4b6bbc1b65 100644
--- a/arch/um/include/asm/uaccess.h
+++ b/arch/um/include/asm/uaccess.h
@@ -8,7 +8,7 @@
#define __UM_UACCESS_H
#include <asm/elf.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#define __under_task_size(addr, size) \
(((unsigned long) (addr) < TASK_SIZE) && \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2852fcd82cbd..16354dfa6d96 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2257,6 +2257,7 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
config ADDRESS_MASKING
bool "Linear Address Masking support"
depends on X86_64
+ depends on COMPILE_TEST || !CPU_MITIGATIONS # wait for LASS
help
Linear Address Masking (LAM) modifies the checking that is applied
to 64-bit linear addresses, allowing software to use of the
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index d45e9c0c42ac..f110708c8038 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -8,7 +8,7 @@
* Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 700ecaee9a08..41bc02e48916 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -19,7 +19,7 @@
#include <crypto/internal/simd.h>
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index d9feadffa972..324686bca368 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -9,6 +9,8 @@
#include <asm/unwind_hints.h>
#include <asm/segment.h>
#include <asm/cache.h>
+#include <asm/cpufeatures.h>
+#include <asm/nospec-branch.h>
#include "calling.h"
@@ -19,6 +21,9 @@ SYM_FUNC_START(entry_ibpb)
movl $PRED_CMD_IBPB, %eax
xorl %edx, %edx
wrmsr
+
+ /* Make sure IBPB clears return stack preductions too. */
+ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET
RET
SYM_FUNC_END(entry_ibpb)
/* For KVM */
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d3a814efbff6..20be5758c2d2 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -871,6 +871,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
/* Now ready to switch the cr3 */
SWITCH_TO_USER_CR3 scratch_reg=%eax
+ /* Clobbers ZF */
+ CLEAR_CPU_BUFFERS
/*
* Restore all flags except IF. (We restore IF separately because
@@ -881,7 +883,6 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
- CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -1144,7 +1145,6 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
- CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
@@ -1165,6 +1165,7 @@ SYM_CODE_START(asm_exc_nmi)
CHECK_AND_APPLY_ESPFIX
RESTORE_ALL_NMI cr3_reg=%edi pop=4
+ CLEAR_CPU_BUFFERS
jmp .Lirq_return
#ifdef CONFIG_X86_ESPFIX32
@@ -1206,6 +1207,7 @@ SYM_CODE_START(asm_exc_nmi)
* 1 - orig_ax
*/
lss (1+5+6)*4(%esp), %esp # back to espfix stack
+ CLEAR_CPU_BUFFERS
jmp .Lirq_return
#endif
SYM_CODE_END(asm_exc_nmi)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index dd4682857c12..913fd3a7bac6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -215,7 +215,7 @@
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */
@@ -348,6 +348,7 @@
#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */
#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */
#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -523,4 +524,5 @@
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 0152a81d9b4a..b4d719de2c84 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
+#include <asm/ptrace.h>
+
#ifdef CONFIG_FUNCTION_TRACER
#ifndef CC_USING_FENTRY
# error Compiler does not support fentry?
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index ff5f1ecc7d1e..96b410b1d4e8 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -323,7 +323,16 @@
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
.macro CLEAR_CPU_BUFFERS
- ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+#ifdef CONFIG_X86_64
+ ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+#else
+ /*
+ * In 32bit mode, the memory operand must be a %cs reference. The data
+ * segments may not be usable (vm86 mode), and the stack segment may not
+ * be flat (ESPFIX32).
+ */
+ ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+#endif
.endm
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index c02183d3cdd7..ecd58ea9a837 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -26,7 +26,7 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_APM 1
typedef void (cpu_emergency_virt_cb)(void);
-#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+#if IS_ENABLED(CONFIG_KVM_X86)
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
@@ -34,7 +34,7 @@ void cpu_emergency_disable_virtualization(void);
static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_disable_virtualization(void) {}
-#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
+#endif /* CONFIG_KVM_X86 */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index 24e3a53ca255..6652ebddfd02 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -6,7 +6,7 @@
typeof(sym) __ret; \
asm_inline("mov %1,%0\n1:\n" \
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
- ".long 1b - %c2 - .\n\t" \
+ ".long 1b - %c2 - .\n" \
".popsection" \
:"=r" (__ret) \
:"i" ((unsigned long)0x0123456789abcdefull), \
@@ -20,7 +20,7 @@
typeof(0u+(val)) __ret = (val); \
asm_inline("shrl $12,%k0\n1:\n" \
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
- ".long 1b - 1 - .\n\t" \
+ ".long 1b - 1 - .\n" \
".popsection" \
:"+r" (__ret)); \
__ret; })
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index afce8ee5d7b7..b0a887209400 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -12,6 +12,13 @@
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/percpu.h>
+#include <asm/runtime-const.h>
+
+/*
+ * Virtual variable: there's no actual backing store for this,
+ * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
+ */
+extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -46,19 +53,24 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
-/*
- * The virtual address space space is logically divided into a kernel
- * half and a user half. When cast to a signed type, user pointers
- * are positive and kernel pointers are negative.
- */
-#define valid_user_address(x) ((__force long)(x) >= 0)
+#define valid_user_address(x) \
+ ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
/*
* Masking the user address is an alternative to a conditional
* user_access_begin that can avoid the fencing. This only works
* for dense accesses starting at the address.
*/
-#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63)))
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+ unsigned long mask;
+ asm("cmp %1,%0\n\t"
+ "sbb %0,%0"
+ :"=r" (mask)
+ :"r" (ptr),
+ "0" (runtime_const_ptr(USER_PTR_MAX)));
+ return (__force void __user *)(mask | (__force unsigned long)ptr);
+}
#define masked_user_access_begin(x) ({ \
__auto_type __masked_ptr = (x); \
__masked_ptr = mask_user_address(__masked_ptr); \
@@ -69,23 +81,16 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
* arbitrary values in those bits rather then masking them off.
*
* Enforce two rules:
- * 1. 'ptr' must be in the user half of the address space
+ * 1. 'ptr' must be in the user part of the address space
* 2. 'ptr+size' must not overflow into kernel addresses
*
- * Note that addresses around the sign change are not valid addresses,
- * and will GP-fault even with LAM enabled if the sign bit is set (see
- * "CR3.LAM_SUP" that can narrow the canonicality check if we ever
- * enable it, but not remove it entirely).
- *
- * So the "overflow into kernel addresses" does not imply some sudden
- * exact boundary at the sign bit, and we can allow a lot of slop on the
- * size check.
+ * Note that we always have at least one guard page between the
+ * max user address and the non-canonical gap, allowing us to
+ * ignore small sizes entirely.
*
* In fact, we could probably remove the size check entirely, since
* any kernel accesses will be in increasing address order starting
- * at 'ptr', and even if the end might be in kernel space, we'll
- * hit the GP faults for non-canonical accesses before we ever get
- * there.
+ * at 'ptr'.
*
* That's a separate optimization, for now just handle the small
* constant case.
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index dc5d3216af24..9fe9972d2071 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -44,6 +44,7 @@
#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4
+#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4 0x16fc
#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c
#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc
#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4
@@ -127,6 +128,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6513c53c9459..c5fb28e6451a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -440,7 +440,19 @@ static int lapic_timer_shutdown(struct clock_event_device *evt)
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
- apic_write(APIC_TMICT, 0);
+
+ /*
+ * Setting APIC_LVT_MASKED (above) should be enough to tell
+ * the hardware that this timer will never fire. But AMD
+ * erratum 411 and some Intel CPU behavior circa 2024 say
+ * otherwise. Time for belt and suspenders programming: mask
+ * the timer _and_ zero the counter registers:
+ */
+ if (v & APIC_LVT_TIMER_TSCDEADLINE)
+ wrmsrl(MSR_IA32_TSC_DEADLINE, 0);
+ else
+ apic_write(APIC_TMICT, 0);
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 015971adadfc..fab5caec0b72 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1202,5 +1202,6 @@ void amd_check_microcode(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return;
- on_each_cpu(zenbleed_check_cpu, NULL, 1);
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2))
+ on_each_cpu(zenbleed_check_cpu, NULL, 1);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d1915427b4ff..47a01d4028f6 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,8 +1115,25 @@ do_cmd_auto:
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+
+ /*
+ * IBPB on entry already obviates the need for
+ * software-based untraining so clear those in case some
+ * other mitigation like SRSO has selected them.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_UNRET);
+ setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
mitigate_smt = true;
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
break;
case RETBLEED_MITIGATION_STUFF:
@@ -2627,6 +2644,14 @@ static void __init srso_select_mitigation(void)
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
srso_mitigation = SRSO_MITIGATION_IBPB;
+
+ /*
+ * IBPB on entry already obviates the need for
+ * software-based untraining so clear those in case some
+ * other mitigation like Retbleed has selected them.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_UNRET);
+ setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2638,6 +2663,13 @@ static void __init srso_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 07a34d723505..a5f221ea5688 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -69,6 +69,7 @@
#include <asm/sev.h>
#include <asm/tdx.h>
#include <asm/posted_intr.h>
+#include <asm/runtime-const.h>
#include "cpu.h"
@@ -1443,6 +1444,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
+ if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
+ setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -2386,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) {
+ unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1;
+
+ /*
+ * Enable this when LAM is gated on LASS support
+ if (cpu_feature_enabled(X86_FEATURE_LAM))
+ USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1;
+ */
+ runtime_const_init(ptr, USER_PTR_MAX);
+
/*
* Make sure the first 2MB area is not mapped by huge pages
* There are typically fixed size MTRRs in there and overlapping
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index f63b051f25a0..31a73715d755 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -584,7 +584,7 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
}
-static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
+static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size);
static int __init save_microcode_in_initrd(void)
{
@@ -605,7 +605,7 @@ static int __init save_microcode_in_initrd(void)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
+ ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret > UCODE_UPDATED)
return -EINVAL;
@@ -613,16 +613,19 @@ static int __init save_microcode_in_initrd(void)
}
early_initcall(save_microcode_in_initrd);
-static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n)
+static inline bool patch_cpus_equivalent(struct ucode_patch *p,
+ struct ucode_patch *n,
+ bool ignore_stepping)
{
/* Zen and newer hardcode the f/m/s in the patch ID */
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
union cpuid_1_eax p_cid = ucode_rev_to_cpuid(p->patch_id);
union cpuid_1_eax n_cid = ucode_rev_to_cpuid(n->patch_id);
- /* Zap stepping */
- p_cid.stepping = 0;
- n_cid.stepping = 0;
+ if (ignore_stepping) {
+ p_cid.stepping = 0;
+ n_cid.stepping = 0;
+ }
return p_cid.full == n_cid.full;
} else {
@@ -644,13 +647,13 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi
WARN_ON_ONCE(!n.patch_id);
list_for_each_entry(p, &microcode_cache, plist)
- if (patch_cpus_equivalent(p, &n))
+ if (patch_cpus_equivalent(p, &n, false))
return p;
return NULL;
}
-static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
+static inline int patch_newer(struct ucode_patch *p, struct ucode_patch *n)
{
/* Zen and newer hardcode the f/m/s in the patch ID */
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
@@ -659,6 +662,9 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
zp.ucode_rev = p->patch_id;
zn.ucode_rev = n->patch_id;
+ if (zn.stepping != zp.stepping)
+ return -1;
+
return zn.rev > zp.rev;
} else {
return n->patch_id > p->patch_id;
@@ -668,10 +674,14 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
static void update_cache(struct ucode_patch *new_patch)
{
struct ucode_patch *p;
+ int ret;
list_for_each_entry(p, &microcode_cache, plist) {
- if (patch_cpus_equivalent(p, new_patch)) {
- if (!patch_newer(p, new_patch)) {
+ if (patch_cpus_equivalent(p, new_patch, true)) {
+ ret = patch_newer(p, new_patch);
+ if (ret < 0)
+ continue;
+ else if (!ret) {
/* we already have the latest patch */
kfree(new_patch->data);
kfree(new_patch);
@@ -944,21 +954,30 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
return UCODE_OK;
}
-static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size)
{
- struct cpuinfo_x86 *c;
- unsigned int nid, cpu;
- struct ucode_patch *p;
enum ucode_state ret;
/* free old equiv table */
free_equiv_cpu_table();
ret = __load_microcode_amd(family, data, size);
- if (ret != UCODE_OK) {
+ if (ret != UCODE_OK)
cleanup();
+
+ return ret;
+}
+
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+{
+ struct cpuinfo_x86 *c;
+ unsigned int nid, cpu;
+ struct ucode_patch *p;
+ enum ucode_state ret;
+
+ ret = _load_microcode_amd(family, data, size);
+ if (ret != UCODE_OK)
return ret;
- }
for_each_node(nid) {
cpu = cpumask_first(cpumask_of_node(nid));
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 8591d53c144b..b681c2e07dbf 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -207,7 +207,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
return false;
}
-static bool __get_mem_config_intel(struct rdt_resource *r)
+static __init bool __get_mem_config_intel(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
@@ -241,7 +241,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
return true;
}
-static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
+static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
u32 eax, ebx, ecx, edx, subleaf;
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 50fa1fe9a073..200d89a64027 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -29,10 +29,10 @@
* hardware. The allocated bandwidth percentage is rounded to the next
* control step available on the hardware.
*/
-static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
{
- unsigned long bw;
int ret;
+ u32 bw;
/*
* Only linear delay values is supported for current Intel SKUs.
@@ -42,16 +42,21 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return false;
}
- ret = kstrtoul(buf, 10, &bw);
+ ret = kstrtou32(buf, 10, &bw);
if (ret) {
- rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
+ rdt_last_cmd_printf("Invalid MB value %s\n", buf);
return false;
}
- if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
- !is_mba_sc(r)) {
- rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
- r->membw.min_bw, r->default_ctrl);
+ /* Nothing else to do if software controller is enabled. */
+ if (is_mba_sc(r)) {
+ *data = bw;
+ return true;
+ }
+
+ if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
+ bw, r->membw.min_bw, r->default_ctrl);
return false;
}
@@ -65,7 +70,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid;
struct rdt_resource *r = s->res;
- unsigned long bw_val;
+ u32 bw_val;
cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 263f8aed4e2c..21e9e4845354 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -37,6 +37,7 @@
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
+#include <asm/mtrr.h>
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
#include <asm/ptrace.h>
@@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
}
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
+
+ /* Set WB as the default cache mode for SEV-SNP and TDX */
+ mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
}
#if defined(CONFIG_AMD_MEM_ENCRYPT)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 0e0a4cf6b5eb..615922838c51 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -530,7 +530,7 @@ static inline void kb_wait(void)
static inline void nmi_shootdown_cpus_on_restart(void);
-#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+#if IS_ENABLED(CONFIG_KVM_X86)
/* RCU-protected callback to disable virtualization prior to reboot. */
static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
@@ -600,7 +600,7 @@ static void emergency_reboot_disable_virtualization(void)
}
#else
static void emergency_reboot_disable_virtualization(void) { }
-#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
+#endif /* CONFIG_KVM_X86 */
void __attribute__((weak)) mach_reboot_fixups(void)
{
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d05392db5d0f..2dbadf347b5f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -261,12 +261,6 @@ static noinstr bool handle_bug(struct pt_regs *regs)
int ud_type;
u32 imm;
- /*
- * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
- * is a rare case that uses @regs without passing them to
- * irqentry_enter().
- */
- kmsan_unpoison_entry_regs(regs);
ud_type = decode_bug(regs->ip, &imm);
if (ud_type == BUG_NONE)
return handled;
@@ -276,6 +270,12 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
instrumentation_begin();
/*
+ * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
+ * is a rare case that uses @regs without passing them to
+ * irqentry_enter().
+ */
+ kmsan_unpoison_entry_regs(regs);
+ /*
* Since we're emulating a CALL with exceptions, restore the interrupt
* state to what it was at the exception site.
*/
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 6726be89b7a6..b8c5741d2fb4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -358,6 +358,7 @@ SECTIONS
#endif
RUNTIME_CONST_VARIABLES
+ RUNTIME_CONST(ptr, USER_PTR_MAX)
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 730c2f34d347..f09f13c01c6b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -17,8 +17,8 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
-config KVM
- tristate "Kernel-based Virtual Machine (KVM) support"
+config KVM_X86
+ def_tristate KVM if KVM_INTEL || KVM_AMD
depends on X86_LOCAL_APIC
select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
@@ -44,7 +44,11 @@ config KVM
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_PRE_FAULT_MEMORY
+ select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM
select KVM_WERROR if WERROR
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
help
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
@@ -77,7 +81,6 @@ config KVM_SW_PROTECTED_VM
bool "Enable support for KVM software-protected VMs"
depends on EXPERT
depends on KVM && X86_64
- select KVM_GENERIC_PRIVATE_MEM
help
Enable support for KVM software-protected VMs. Currently, software-
protected VMs are purely a development and testing vehicle for
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 5494669a055a..f9dddb8cb466 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -32,7 +32,7 @@ kvm-intel-y += vmx/vmx_onhyperv.o vmx/hyperv_evmcs.o
kvm-amd-y += svm/svm_onhyperv.o
endif
-obj-$(CONFIG_KVM) += kvm.o
+obj-$(CONFIG_KVM_X86) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e52f990548df..8e853a5fc867 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1556,6 +1556,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool flush = false;
+ /*
+ * To prevent races with vCPUs faulting in a gfn using stale data,
+ * zapping a gfn range must be protected by mmu_invalidate_in_progress
+ * (and mmu_invalidate_seq). The only exception is memslot deletion;
+ * in that case, SRCU synchronization ensures that SPTEs are zapped
+ * after all vCPUs have unlocked SRCU, guaranteeing that vCPUs see the
+ * invalid slot.
+ */
+ lockdep_assert_once(kvm->mmu_invalidate_in_progress ||
+ lockdep_is_held(&kvm->slots_lock));
+
if (kvm_memslots_have_rmaps(kvm))
flush = __kvm_rmap_zap_gfn_range(kvm, range->slot,
range->start, range->end,
@@ -7047,14 +7058,42 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
kvm_mmu_zap_all(kvm);
}
-/*
- * Zapping leaf SPTEs with memslot range when a memslot is moved/deleted.
- *
- * Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst
- * case scenario we'll have unused shadow pages lying around until they
- * are recycled due to age or when the VM is destroyed.
- */
-static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *slot)
+static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ bool flush)
+{
+ LIST_HEAD(invalid_list);
+ unsigned long i;
+
+ if (list_empty(&kvm->arch.active_mmu_pages))
+ goto out_flush;
+
+ /*
+ * Since accounting information is stored in struct kvm_arch_memory_slot,
+ * all MMU pages that are shadowing guest PTEs must be zapped before the
+ * memslot is deleted, as freeing such pages after the memslot is freed
+ * will result in use-after-free, e.g. in unaccount_shadowed().
+ */
+ for (i = 0; i < slot->npages; i++) {
+ struct kvm_mmu_page *sp;
+ gfn_t gfn = slot->base_gfn + i;
+
+ for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn)
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+
+ if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
+ kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+ flush = false;
+ cond_resched_rwlock_write(&kvm->mmu_lock);
+ }
+ }
+
+out_flush:
+ kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+}
+
+static void kvm_mmu_zap_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
{
struct kvm_gfn_range range = {
.slot = slot,
@@ -7062,11 +7101,11 @@ static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *s
.end = slot->base_gfn + slot->npages,
.may_block = true,
};
+ bool flush;
write_lock(&kvm->mmu_lock);
- if (kvm_unmap_gfn_range(kvm, &range))
- kvm_flush_remote_tlbs_memslot(kvm, slot);
-
+ flush = kvm_unmap_gfn_range(kvm, &range);
+ kvm_mmu_zap_memslot_pages_and_flush(kvm, slot, flush);
write_unlock(&kvm->mmu_lock);
}
@@ -7082,7 +7121,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
if (kvm_memslot_flush_zap_all(kvm))
kvm_mmu_zap_all_fast(kvm);
else
- kvm_mmu_zap_memslot_leafs(kvm, slot);
+ kvm_mmu_zap_memslot(kvm, slot);
}
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d5314cb7dff4..cf84103ce38b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -63,8 +63,12 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
u64 pdpte;
int ret;
+ /*
+ * Note, nCR3 is "assumed" to be 32-byte aligned, i.e. the CPU ignores
+ * nCR3[4:0] when loading PDPTEs from memory.
+ */
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
- offset_in_page(cr3) + index * 8, 8);
+ (cr3 & GENMASK(11, 5)) + index * 8, 8);
if (ret)
return 0;
return pdpte;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1a4438358c5e..81ed596e4454 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4888,9 +4888,6 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->hv_deadline_tsc = -1;
kvm_set_cr8(vcpu, 0);
- vmx_segment_cache_clear(vmx);
- kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
-
seg_setup(VCPU_SREG_CS);
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
@@ -4917,6 +4914,9 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_writel(GUEST_IDTR_BASE, 0);
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+ vmx_segment_cache_clear(vmx);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
+
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index d066aecf8aeb..4357ec2a0bfc 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -39,8 +39,13 @@
.macro check_range size:req
.if IS_ENABLED(CONFIG_X86_64)
- mov %rax, %rdx
- sar $63, %rdx
+ movq $0x0123456789abcdef,%rdx
+ 1:
+ .pushsection runtime_ptr_USER_PTR_MAX,"a"
+ .long 1b - 8 - .
+ .popsection
+ cmp %rax, %rdx
+ sbb %rdx, %rdx
or %rdx, %rax
.else
cmp $TASK_SIZE_MAX-\size+1, %eax
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5952ab41c60f..6ffb931b9fb1 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -13,7 +13,7 @@
#endif
#include <asm/inat.h> /*__ignore_sync_check__ */
#include <asm/insn.h> /* __ignore_sync_check__ */
-#include <asm/unaligned.h> /* __ignore_sync_check__ */
+#include <linux/unaligned.h> /* __ignore_sync_check__ */
#include <linux/errno.h>
#include <linux/kconfig.h>
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 0ce17766c0e5..9a6a943d8e41 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -173,6 +173,8 @@ static void __init __snp_fixup_e820_tables(u64 pa)
e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ if (!memblock_is_region_reserved(pa, PMD_SIZE))
+ memblock_reserve(pa, PMD_SIZE);
}
}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 2c12ae42dc8b..d6818c6cafda 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1032,6 +1032,10 @@ static u64 xen_do_read_msr(unsigned int msr, int *err)
switch (msr) {
case MSR_IA32_APICBASE:
val &= ~X2APIC_ENABLE;
+ if (smp_processor_id() == 0)
+ val |= MSR_IA32_APICBASE_BSP;
+ else
+ val &= ~MSR_IA32_APICBASE_BSP;
break;
}
return val;
diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h
index ed5870c779f9..4854419dcd86 100644
--- a/arch/xtensa/include/asm/flat.h
+++ b/arch/xtensa/include/asm/flat.h
@@ -2,7 +2,7 @@
#ifndef __ASM_XTENSA_FLAT_H
#define __ASM_XTENSA_FLAT_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
u32 *addr)