diff options
Diffstat (limited to 'arch')
41 files changed, 199 insertions, 104 deletions
diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h new file mode 100644 index 000000000000..7ebb7eb0bd93 --- /dev/null +++ b/arch/arm/include/asm/xen/xen-ops.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <xen/arm/xen-ops.h> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 82ffac621854..059cce018570 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -33,7 +33,7 @@ #include <asm/dma-iommu.h> #include <asm/mach/map.h> #include <asm/system_info.h> -#include <xen/swiotlb-xen.h> +#include <asm/xen/xen-ops.h> #include "dma.h" #include "mm.h" @@ -2287,10 +2287,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, set_dma_ops(dev, dma_ops); -#ifdef CONFIG_XEN - if (xen_initial_domain()) - dev->dma_ops = &xen_swiotlb_dma_ops; -#endif + xen_setup_dma_ops(dev); dev->archdata.dma_ops_setup = true; } diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 07eb69f9e7df..1f9c3ba32833 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -443,6 +443,8 @@ static int __init xen_guest_init(void) if (!xen_domain()) return 0; + xen_set_restricted_virtio_memory_access(); + if (!acpi_disabled) xen_acpi_guest_init(); else diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 55f998c3dc28..42ff95dba6da 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -843,9 +843,9 @@ #define ID_AA64SMFR0_F32F32_SHIFT 32 #define ID_AA64SMFR0_FA64 0x1 -#define ID_AA64SMFR0_I16I64 0x4 +#define ID_AA64SMFR0_I16I64 0xf #define ID_AA64SMFR0_F64F64 0x1 -#define ID_AA64SMFR0_I8I32 0x4 +#define ID_AA64SMFR0_I8I32 0xf #define ID_AA64SMFR0_F16F32 0x1 #define ID_AA64SMFR0_B16F32 0x1 #define ID_AA64SMFR0_F32F32 0x1 diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h new file mode 100644 index 000000000000..7ebb7eb0bd93 --- /dev/null +++ b/arch/arm64/include/asm/xen/xen-ops.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <xen/arm/xen-ops.h> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 819979398127..aecf3071efdd 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -331,7 +331,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * trapping to the kernel. * * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the - * corresponding Zn), P0-P15 and FFR are encoded in in + * corresponding Zn), P0-P15 and FFR are encoded in * task->thread.sve_state, formatted appropriately for vector * length task->thread.sve_vl or, if SVCR.SM is set, * task->thread.sme_vl. @@ -1916,10 +1916,15 @@ void __efi_fpsimd_begin(void) if (system_supports_sme()) { svcr = read_sysreg_s(SYS_SVCR); - if (!system_supports_fa64()) - ffr = svcr & SVCR_SM_MASK; + __this_cpu_write(efi_sm_state, + svcr & SVCR_SM_MASK); - __this_cpu_write(efi_sm_state, ffr); + /* + * Unless we have FA64 FFR does not + * exist in streaming mode. + */ + if (!system_supports_fa64()) + ffr = !(svcr & SVCR_SM_MASK); } sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), @@ -1964,8 +1969,13 @@ void __efi_fpsimd_end(void) sysreg_clear_set_s(SYS_SVCR, 0, SVCR_SM_MASK); + + /* + * Unless we have FA64 FFR does not + * exist in streaming mode. + */ if (!system_supports_fa64()) - ffr = efi_sm_state; + ffr = false; } } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 57b30bcf9f21..f6b00743c399 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -244,6 +244,11 @@ static void mte_update_gcr_excl(struct task_struct *task) SYS_GCR_EL1); } +#ifdef CONFIG_KASAN_HW_TAGS +/* Only called from assembly, silence sparse */ +void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); + void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { @@ -252,6 +257,7 @@ void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, if (kasan_hw_tags_enabled()) *updptr = cpu_to_le32(aarch64_insn_gen_nop()); } +#endif void mte_thread_init_user(void) { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6719f9efea09..6099c81b9322 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -9,9 +9,9 @@ #include <linux/dma-map-ops.h> #include <linux/dma-iommu.h> #include <xen/xen.h> -#include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> +#include <asm/xen/xen-ops.h> void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) @@ -52,8 +52,5 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (iommu) iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); -#ifdef CONFIG_XEN - if (xen_swiotlb_detect()) - dev->dma_ops = &xen_swiotlb_dma_ops; -#endif + xen_setup_dma_ops(dev); } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8ab4035dea27..42f2e9a8616c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1478,6 +1478,7 @@ skip_init_ctx: bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; + prog->jited_len = 0; goto out_off; } bpf_jit_binary_lock_ro(header); diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 89bfb74e28de..5c55509eb43f 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -253,7 +253,7 @@ END { next } -/0b[01]+/ && block = "Enum" { +/0b[01]+/ && block == "Enum" { expect_fields(2) val = $1 name = $2 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index be68c1f02b79..c2ce2e60c8f0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -223,7 +223,6 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IOREMAP_PROT - select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE @@ -786,7 +785,6 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 - default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 125328d1b980..af58f1ed3952 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,10 +14,16 @@ #ifdef __KERNEL__ -#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT +#ifdef CONFIG_KASAN +#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) +#else +#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT +#endif + +#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT #define THREAD_SHIFT PAGE_SHIFT #else -#define THREAD_SHIFT CONFIG_THREAD_SHIFT +#define THREAD_SHIFT MIN_THREAD_SHIFT #endif #define THREAD_SIZE (1 << THREAD_SHIFT) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2e2a2a9bcf43..f91f0f29a566 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -37,6 +37,8 @@ KASAN_SANITIZE_paca.o := n KASAN_SANITIZE_setup_64.o := n KASAN_SANITIZE_mce.o := n KASAN_SANITIZE_mce_power.o := n +KASAN_SANITIZE_udbg.o := n +KASAN_SANITIZE_udbg_16550.o := n # we have to be particularly careful in ppc64 to exclude code that # runs with translations off, as we cannot access the shadow with diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b62046bf3bb8..ee0433809621 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2158,12 +2158,12 @@ static unsigned long ___get_wchan(struct task_struct *p) return 0; do { - sp = *(unsigned long *)sp; + sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || task_is_running(p)) return 0; if (count > 0) { - ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; + ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); if (!in_sched_functions(ip)) return ip; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c index 5dca19361316..09c49632bfe5 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-fpu.c +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -17,9 +17,13 @@ int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) #ifdef CONFIG_PPC_FPU_REGS flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); - else + if (fpidx < (PT_FPSCR - PT_FPR0)) { + if (IS_ENABLED(CONFIG_PPC32)) + // On 32-bit the index we are passed refers to 32-bit words + *data = ((u32 *)child->thread.fp_state.fpr)[fpidx]; + else + memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); + } else *data = child->thread.fp_state.fpscr; #else *data = 0; @@ -39,9 +43,13 @@ int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) #ifdef CONFIG_PPC_FPU_REGS flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); - else + if (fpidx < (PT_FPSCR - PT_FPR0)) { + if (IS_ENABLED(CONFIG_PPC32)) + // On 32-bit the index we are passed refers to 32-bit words + ((u32 *)child->thread.fp_state.fpr)[fpidx] = data; + else + memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); + } else child->thread.fp_state.fpscr = data; #endif diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 4d2dc22d4a2d..5d7a72b41ae7 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -444,4 +444,7 @@ void __init pt_regs_check(void) * real registers. */ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); + + // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 9bb43aa53d43..a6fce3106e02 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -993,8 +993,8 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) * * Return: A pointer to the specified errorlog or NULL if not found. */ -struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, - uint16_t section_id) +noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, + uint16_t section_id) { struct rtas_ext_event_log_v6 *ext_log = (struct rtas_ext_event_log_v6 *)log->buffer; diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index d85fa9fc6f3c..80f54723cf6d 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -224,7 +224,7 @@ void crash_kexec_secondary(struct pt_regs *regs) /* wait for all the CPUs to hit real mode but timeout if they don't come in */ #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) -static void __maybe_unused crash_kexec_wait_realmode(int cpu) +noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) { unsigned int msecs; int i; diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 1f3f9fedf1bc..0d04f9d5da8d 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -19,7 +19,6 @@ #include <asm/cacheflush.h> #include <asm/kdump.h> #include <mm/mmu_decl.h> -#include <generated/compile.h> #include <generated/utsrelease.h> struct regions { @@ -37,10 +36,6 @@ struct regions { int reserved_mem_size_cells; }; -/* Simplified build-specific string for starting entropy. */ -static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; - struct regions __initdata regions; static __init void kaslr_get_cmdline(void *fdt) @@ -71,7 +66,8 @@ static unsigned long __init get_boot_seed(void *fdt) { unsigned long hash = 0; - hash = rotate_xor(hash, build_str, sizeof(build_str)); + /* build-specific string for starting entropy. */ + hash = rotate_xor(hash, linux_banner, strlen(linux_banner)); hash = rotate_xor(hash, fdt, fdt_totalsize(fdt)); return hash; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 6488b3842199..19f0fc5c6f1b 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -4,6 +4,7 @@ # in particular, idle code runs a bunch of things in real mode KASAN_SANITIZE_idle.o := n KASAN_SANITIZE_pci-ioda.o := n +KASAN_SANITIZE_pci-ioda-tce.o := n # pnv_machine_check_early KASAN_SANITIZE_setup.o := n diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 181b855b3050..82cae08976bc 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -465,6 +465,9 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu u32 available_events; int index, rc = 0; + if (!p->stat_buffer_len) + return -ENOENT; + available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) / sizeof(struct papr_scm_perf_stat); if (available_events == 0) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b1a88f6cc349..91c0b80a8bf0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -125,6 +125,7 @@ config S390 select CLONE_BACKWARDS2 select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER + select GCC12_NO_ARRAY_BOUNDS select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES @@ -768,7 +769,6 @@ menu "Virtualization" config PROTECTED_VIRTUALIZATION_GUEST def_bool n prompt "Protected virtualization guest support" - select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS help Select this option, if you want to be able to run this kernel as a protected virtualization KVM guest. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index d73611b35164..495c68a4df1e 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -32,15 +32,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) - -ifdef CONFIG_CC_IS_GCC - ifeq ($(call cc-ifversion, -ge, 1200, y), y) - ifeq ($(call cc-ifversion, -lt, 1300, y), y) - KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) - KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds) - endif - endif -endif +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds) UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6fb6bf64326f..6a0ac00d5a42 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -31,6 +31,7 @@ #include <linux/cma.h> #include <linux/gfp.h> #include <linux/dma-direct.h> +#include <linux/platform-feature.h> #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgalloc.h> @@ -168,22 +169,14 @@ bool force_dma_unencrypted(struct device *dev) return is_prot_virt_guest(); } -#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS - -int arch_has_restricted_virtio_memory_access(void) -{ - return is_prot_virt_guest(); -} -EXPORT_SYMBOL(arch_has_restricted_virtio_memory_access); - -#endif - /* protected virtualization */ static void pv_init(void) { if (!is_prot_virt_guest()) return; + platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS); + /* make sure bounce buffers are shared */ swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE); swiotlb_update_mem_attributes(); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9783ebc4e021..be0b95e51df6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1542,7 +1542,6 @@ config X86_CPA_STATISTICS config X86_MEM_ENCRYPT select ARCH_HAS_FORCE_DMA_UNENCRYPTED select DYNAMIC_PHYSICAL_MASK - select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS def_bool n config AMD_MEM_ENCRYPT diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 959d66b9be94..3a240a64ac68 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -653,6 +653,7 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; + bool at_instruction_boundary; bool tpr_access_reporting; bool xsaves_enabled; bool xfd_no_write_intercept; @@ -1300,6 +1301,8 @@ struct kvm_vcpu_stat { u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; + u64 preemption_reported; + u64 preemption_other; u64 guest_mode; }; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35f222aa66bf..913e593a3b45 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -439,7 +439,7 @@ do { \ [ptr] "+m" (*_ptr), \ [old] "+a" (__old) \ : [new] ltype (__new) \ - : "memory", "cc"); \ + : "memory"); \ if (unlikely(__err)) \ goto label; \ if (unlikely(!success)) \ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f4653688fa6d..e826ee9138fa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5179,7 +5179,7 @@ static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu) roots_to_free |= KVM_MMU_ROOT_CURRENT; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { - if (is_obsolete_root(kvm, mmu->root.hpa)) + if (is_obsolete_root(kvm, mmu->prev_roots[i].hpa)) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index 6d3b3e5a5533..ee4802d7b36c 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -146,6 +146,15 @@ static bool try_step_up(struct tdp_iter *iter) } /* + * Step the iterator back up a level in the paging structure. Should only be + * used when the iterator is below the root level. + */ +void tdp_iter_step_up(struct tdp_iter *iter) +{ + WARN_ON(!try_step_up(iter)); +} + +/* * Step to the next SPTE in a pre-order traversal of the paging structure. * To get to the next SPTE, the iterator either steps down towards the goal * GFN, if at a present, non-last-level SPTE, or over to a SPTE mapping a diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index f0af385c56e0..adfca0cf94d3 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, int min_level, gfn_t next_last_level_gfn); void tdp_iter_next(struct tdp_iter *iter); void tdp_iter_restart(struct tdp_iter *iter); +void tdp_iter_step_up(struct tdp_iter *iter); #endif /* __KVM_X86_MMU_TDP_ITER_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 841feaa48be5..7b9265d67131 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1742,12 +1742,12 @@ static void zap_collapsible_spte_range(struct kvm *kvm, gfn_t start = slot->base_gfn; gfn_t end = start + slot->npages; struct tdp_iter iter; + int max_mapping_level; kvm_pfn_t pfn; rcu_read_lock(); tdp_root_for_each_pte(iter, root, start, end) { -retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; @@ -1755,15 +1755,41 @@ retry: !is_last_spte(iter.old_spte, iter.level)) continue; + /* + * This is a leaf SPTE. Check if the PFN it maps can + * be mapped at a higher level. + */ pfn = spte_to_pfn(iter.old_spte); - if (kvm_is_reserved_pfn(pfn) || - iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn, - pfn, PG_LEVEL_NUM)) + + if (kvm_is_reserved_pfn(pfn)) continue; + max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot, + iter.gfn, pfn, PG_LEVEL_NUM); + + WARN_ON(max_mapping_level < iter.level); + + /* + * If this page is already mapped at the highest + * viable level, there's nothing more to do. + */ + if (max_mapping_level == iter.level) + continue; + + /* + * The page can be remapped at a higher level, so step + * up to zap the parent SPTE. + */ + while (max_mapping_level > iter.level) + tdp_iter_step_up(&iter); + /* Note, a successful atomic zap also does a remote TLB flush. */ - if (tdp_mmu_zap_spte_atomic(kvm, &iter)) - goto retry; + tdp_mmu_zap_spte_atomic(kvm, &iter); + + /* + * If the atomic zap fails, the iter will recurse back into + * the same subtree to retry. + */ } rcu_read_unlock(); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bed5e1692cef..3361258640a2 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -982,7 +982,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) { WARN_ON(!svm->tsc_scaling_enabled); vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; - svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); } svm->nested.ctl.nested_cr3 = 0; @@ -1387,7 +1387,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, svm->tsc_ratio_msr); - svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); } /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 200045f71df0..1dc02cdf6960 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -465,11 +465,24 @@ static int has_svm(void) return 1; } +void __svm_write_tsc_multiplier(u64 multiplier) +{ + preempt_disable(); + + if (multiplier == __this_cpu_read(current_tsc_ratio)) + goto out; + + wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); + __this_cpu_write(current_tsc_ratio, multiplier); +out: + preempt_enable(); +} + static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (tsc_scaling) - wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT); + __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT); cpu_svm_disable(); @@ -515,8 +528,7 @@ static int svm_hardware_enable(void) * Set the default value, even if we don't use TSC scaling * to avoid having stale value in the msr */ - wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT); - __this_cpu_write(current_tsc_ratio, SVM_TSC_RATIO_DEFAULT); + __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT); } @@ -999,11 +1011,12 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) { - wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); + __svm_write_tsc_multiplier(multiplier); } + /* Evaluate instruction intercepts that depend on guest CPUID features. */ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) @@ -1363,13 +1376,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) sev_es_prepare_switch_to_guest(hostsa); } - if (tsc_scaling) { - u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; - if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { - __this_cpu_write(current_tsc_ratio, tsc_ratio); - wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); - } - } + if (tsc_scaling) + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); if (likely(tsc_aux_uret_slot >= 0)) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); @@ -4255,6 +4263,8 @@ out: static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { + if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) + vcpu->arch.at_instruction_boundary = true; } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 21c5460e947a..500348c1cb35 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -590,7 +590,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier); +void __svm_write_tsc_multiplier(u64 multiplier); void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, struct vmcb_control_area *control); void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a07e8cd753ec..9bd86ecccdab 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6547,6 +6547,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) return; handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); + vcpu->arch.at_instruction_boundary = true; } static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e9473c7c7390..03fbfbbec460 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -296,6 +296,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, nested_run), STATS_DESC_COUNTER(VCPU, directed_yield_attempted), STATS_DESC_COUNTER(VCPU, directed_yield_successful), + STATS_DESC_COUNTER(VCPU, preemption_reported), + STATS_DESC_COUNTER(VCPU, preemption_other), STATS_DESC_ICOUNTER(VCPU, guest_mode) }; @@ -4625,6 +4627,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_memslots *slots; static const u8 preempted = KVM_VCPU_PREEMPTED; + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on + * an instruction boundary and will not trigger guest emulation of any + * kind (see vcpu_run). Vendor specific code controls (conservatively) + * when this is true, for example allowing the vCPU to be marked + * preempted if and only if the VM-Exit was due to a host interrupt. + */ + if (!vcpu->arch.at_instruction_boundary) { + vcpu->stat.preemption_other++; + return; + } + + vcpu->stat.preemption_reported++; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -4654,19 +4669,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted && !vcpu->arch.guest_state_protected) - vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); + if (vcpu->preempted) { + if (!vcpu->arch.guest_state_protected) + vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); - /* - * Take the srcu lock as memslots will be accessed to check the gfn - * cache generation against the memslots generation. - */ - idx = srcu_read_lock(&vcpu->kvm->srcu); - if (kvm_xen_msr_enabled(vcpu->kvm)) - kvm_xen_runstate_set_preempted(vcpu); - else - kvm_steal_time_set_preempted(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* + * Take the srcu lock as memslots will be accessed to check the gfn + * cache generation against the memslots generation. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + if (kvm_xen_msr_enabled(vcpu->kvm)) + kvm_xen_runstate_set_preempted(vcpu); + else + kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); @@ -10422,6 +10439,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.l1tf_flush_l1d = true; for (;;) { + /* + * If another guest vCPU requests a PV TLB flush in the middle + * of instruction emulation, the rest of the emulation could + * use a stale page translation. Assume that any code after + * this point can start executing an instruction. + */ + vcpu->arch.at_instruction_boundary = false; if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index ee5c4ae0755c..532a535a9e99 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -159,8 +159,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) * behalf of the vCPU. Only if the VMM does actually block * does it need to enter RUNSTATE_blocked. */ - if (vcpu->preempted) - kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); + if (WARN_ON_ONCE(!vcpu->preempted)) + return; + + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); } /* 32-bit compatibility definitions, also used natively in 32-bit build */ diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 11350e2fd736..9f27e14e185f 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -12,7 +12,6 @@ #include <linux/swiotlb.h> #include <linux/cc_platform.h> #include <linux/mem_encrypt.h> -#include <linux/virtio_config.h> /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ bool force_dma_unencrypted(struct device *dev) @@ -87,9 +86,3 @@ void __init mem_encrypt_init(void) print_mem_encrypt_feature_info(); } - -int arch_has_restricted_virtio_memory_access(void) -{ - return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); -} -EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e8f7953fda83..f6d038e2cd8e 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -21,6 +21,7 @@ #include <linux/dma-mapping.h> #include <linux/virtio_config.h> #include <linux/cc_platform.h> +#include <linux/platform-feature.h> #include <asm/tlbflush.h> #include <asm/fixmap.h> @@ -242,6 +243,9 @@ void __init sev_setup_arch(void) size = total_mem * 6 / 100; size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G); swiotlb_adjust_size(size); + + /* Set restricted memory access for virtio. */ + platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS); } static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 517a9d8d8f94..8b71b1dd7639 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -195,6 +195,8 @@ static void __init xen_hvm_guest_init(void) if (xen_pv_domain()) return; + xen_set_restricted_virtio_memory_access(); + init_hvm_pv_info(); reserve_shared_info(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f33a4421e7cd..e3297b15701c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -109,6 +109,8 @@ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); static void __init xen_pv_init_platform(void) { + xen_set_restricted_virtio_memory_access(); + populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); |