diff options
Diffstat (limited to 'arch')
132 files changed, 2283 insertions, 948 deletions
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index f5c42a8fcf9c..53520f8cb904 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -430,8 +430,13 @@ register_cpus(void) arch_initcall(register_cpus); #ifdef CONFIG_MAGIC_SYSRQ +static void sysrq_reboot_handler(int unused) +{ + machine_halt(); +} + static const struct sysrq_key_op srm_sysrq_reboot_op = { - .handler = machine_halt, + .handler = sysrq_reboot_handler, .help_msg = "reboot(b)", .action_msg = "Resetting", .enable_mask = SYSRQ_ENABLE_BOOT, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7f9d38444d6d..8a46ed3ab429 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1299,6 +1299,14 @@ config COMPAT_VDSO You must have a 32-bit build of glibc 2.22 or later for programs to seamlessly take advantage of this. +config THUMB2_COMPAT_VDSO + bool "Compile the 32-bit vDSO for Thumb-2 mode" if EXPERT + depends on COMPAT_VDSO + default y + help + Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y, + otherwise with '-marm'. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on SYSCTL @@ -1740,8 +1748,9 @@ config ARM64_DEBUG_PRIORITY_MASKING endif config RELOCATABLE - bool + bool "Build a relocatable kernel image" if EXPERT select ARCH_HAS_RELR + default y help This builds the kernel as a Position Independent Executable (PIE), which retains all relocation metadata required to relocate the diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index b263e239cb59..a45366c3909b 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> +#include <linux/stddef.h> #include <asm/cputype.h> #include <asm/io.h> @@ -31,14 +32,14 @@ * is therefore used to delimit the MADT GICC structure minimum length * appropriately. */ -#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \ +#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \ struct acpi_madt_generic_interrupt, efficiency_class) #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) -#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ +#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) /* Basic configuration for ACPI */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 0c9b5fc4ba0a..352aaebf4198 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -81,12 +81,39 @@ extern u32 __kvm_get_mdcr_el2(void); extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; -/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ +/* + * Obtain the PC-relative address of a kernel symbol + * s: symbol + * + * The goal of this macro is to return a symbol's address based on a + * PC-relative computation, as opposed to a loading the VA from a + * constant pool or something similar. This works well for HYP, as an + * absolute VA is guaranteed to be wrong. Only use this if trying to + * obtain the address of a symbol (i.e. not something you obtained by + * following a pointer). + */ +#define hyp_symbol_addr(s) \ + ({ \ + typeof(s) *addr; \ + asm("adrp %0, %1\n" \ + "add %0, %0, :lo12:%1\n" \ + : "=r" (addr) : "S" (&s)); \ + addr; \ + }) + +/* + * Home-grown __this_cpu_{ptr,read} variants that always work at HYP, + * provided that sym is really a *symbol* and not a pointer obtained from + * a data structure. As for SHIFT_PERCPU_PTR(), the creative casting keeps + * sparse quiet. + */ #define __hyp_this_cpu_ptr(sym) \ ({ \ - void *__ptr = hyp_symbol_addr(sym); \ + void *__ptr; \ + __verify_pcpu_ptr(&sym); \ + __ptr = hyp_symbol_addr(sym); \ __ptr += read_sysreg(tpidr_el2); \ - (typeof(&sym))__ptr; \ + (typeof(sym) __kernel __force *)__ptr; \ }) #define __hyp_this_cpu_read(sym) \ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6ea53e6e8b26..4d0f8ea600ba 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -112,12 +112,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) -{ - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index abbdf9703e20..c3e6fcc664b1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -284,9 +284,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - /* Pointer to host CPU context */ - struct kvm_cpu_context *host_cpu_context; - struct thread_info *host_thread_info; /* hyp VA */ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ @@ -404,8 +401,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); * CP14 and CP15 live in the same array, as they are backed by the * same system registers. */ -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) +#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 094260aaafdd..b12bfc1f051a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -108,26 +108,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) /* - * Obtain the PC-relative address of a kernel symbol - * s: symbol - * - * The goal of this macro is to return a symbol's address based on a - * PC-relative computation, as opposed to a loading the VA from a - * constant pool or something similar. This works well for HYP, as an - * absolute VA is guaranteed to be wrong. Only use this if trying to - * obtain the address of a symbol (i.e. not something you obtained by - * following a pointer). - */ -#define hyp_symbol_addr(s) \ - ({ \ - typeof(s) *addr; \ - asm("adrp %0, %1\n" \ - "add %0, %0, :lo12:%1\n" \ - : "=r" (addr) : "S" (&s)); \ - addr; \ - }) - -/* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 15e80c876d46..5df49366e9ab 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -130,7 +130,7 @@ static int clear_os_lock(unsigned int cpu) return 0; } -static int debug_monitors_init(void) +static int __init debug_monitors_init(void) { return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, "arm64/debug_monitors:starting", diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 8618faa82e6d..86a5cf9bc19a 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -69,7 +69,8 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) if (addr == FTRACE_ADDR) return &plt[FTRACE_PLT_IDX]; - if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS)) + if (addr == FTRACE_REGS_ADDR && + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) return &plt[FTRACE_REGS_PLT_IDX]; #endif return NULL; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3fd2c11c09fc..93b3844cf442 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -319,6 +319,10 @@ void __init setup_arch(char **cmdline_p) xen_early_init(); efi_init(); + + if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0) + pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!"); + arm64_memblock_init(); paging_init(); diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 3964738ebbde..7ea1e827e505 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -105,6 +105,14 @@ VDSO_CFLAGS += -D__uint128_t='void*' VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow) VDSO_CFLAGS += -Wno-int-to-pointer-cast +# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is +# unreliable. +ifeq ($(CONFIG_THUMB2_COMPAT_VDSO), y) +VDSO_CFLAGS += -mthumb -fomit-frame-pointer +else +VDSO_CFLAGS += -marm +endif + VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c index 0a356aa91aa1..40a62a99fbf8 100644 --- a/arch/arm64/kvm/aarch32.c +++ b/arch/arm64/kvm/aarch32.c @@ -33,6 +33,26 @@ static const u8 return_offsets[8][2] = { [7] = { 4, 4 }, /* FIQ, unused */ }; +static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.sysregs_loaded_on_cpu) { + kvm_arch_vcpu_put(vcpu); + return true; + } + + preempt_enable(); + return false; +} + +static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) +{ + if (loaded) { + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); + } +} + /* * When an exception is taken, most CPSR fields are left unchanged in the * handler. However, some are explicitly overridden (e.g. M[4:0]). @@ -155,7 +175,10 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) void kvm_inject_undef32(struct kvm_vcpu *vcpu) { + bool loaded = pre_fault_synchronize(vcpu); + prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); + post_fault_synchronize(vcpu, loaded); } /* @@ -168,6 +191,9 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 vect_offset; u32 *far, *fsr; bool is_lpae; + bool loaded; + + loaded = pre_fault_synchronize(vcpu); if (is_pabt) { vect_offset = 12; @@ -191,6 +217,8 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, /* no need to shuffle FS[4] into DFSR[10] as its 0 */ *fsr = DFSR_FSC_EXTABT_nLPAE; } + + post_fault_synchronize(vcpu, loaded); } void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7a57381c05e8..90cb90561446 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -144,11 +144,6 @@ out_fail_alloc: return ret; } -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; @@ -340,10 +335,8 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { int *last_ran; - kvm_host_data_t *cpu_data; last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); - cpu_data = this_cpu_ptr(&kvm_host_data); /* * We might get preempted before the vCPU actually runs, but @@ -355,7 +348,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } vcpu->cpu = cpu; - vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); @@ -370,7 +362,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) else vcpu_set_wfx_traps(vcpu); - vcpu_ptrauth_setup_lazy(vcpu); + if (vcpu_has_ptrauth(vcpu)) + vcpu_ptrauth_disable(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -990,11 +983,17 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Ensure a rebooted VM will fault in RAM pages and detect if the * guest MMU is turned off and flush the caches as needed. * - * S2FWB enforces all memory accesses to RAM being cacheable, we - * ensure that the cache is always coherent. + * S2FWB enforces all memory accesses to RAM being cacheable, + * ensuring that the data side is always coherent. We still + * need to invalidate the I-cache though, as FWB does *not* + * imply CTR_EL0.DIC. */ - if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) - stage2_unmap_vm(vcpu->kvm); + if (vcpu->arch.has_run_once) { + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) + stage2_unmap_vm(vcpu->kvm); + else + __flush_icache_all(); + } vcpu_reset_hcr(vcpu); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index eb194696ef62..5a02d4c90559 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -162,40 +162,14 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - -/* - * Handle the guest trying to use a ptrauth instruction, or trying to access a - * ptrauth register. - */ -void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *ctxt; - - if (vcpu_has_ptrauth(vcpu)) { - vcpu_ptrauth_enable(vcpu); - ctxt = vcpu->arch.host_cpu_context; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - } else { - kvm_inject_undefined(vcpu); - } -} - /* * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into - * a NOP). + * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all + * that we can do is give the guest an UNDEF. */ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_arm_vcpu_ptrauth_trap(vcpu); + kvm_inject_undefined(vcpu); return 1; } diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 0fc9872a1467..e95af204fec7 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -185,7 +185,7 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); @@ -207,7 +207,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 676b6585e5ae..db1c4487d95d 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -490,6 +490,64 @@ static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } +static bool __hyp_text esr_is_ptrauth_trap(u32 esr) +{ + u32 ec = ESR_ELx_EC(esr); + + if (ec == ESR_ELx_EC_PAC) + return true; + + if (ec != ESR_ELx_EC_SYS64) + return false; + + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(regs, key) \ +({ \ + regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ +}) + +static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu) || + !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) + return false; + + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __ptrauth_save_key(ctxt->sys_regs, APIA); + __ptrauth_save_key(ctxt->sys_regs, APIB); + __ptrauth_save_key(ctxt->sys_regs, APDA); + __ptrauth_save_key(ctxt->sys_regs, APDB); + __ptrauth_save_key(ctxt->sys_regs, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -524,6 +582,9 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (__hyp_handle_fpsimd(vcpu)) return true; + if (__hyp_handle_ptrauth(vcpu)) + return true; + if (!__populate_fault_info(vcpu)) return true; @@ -642,7 +703,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = vcpu->arch.host_cpu_context; + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -747,7 +808,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) vcpu = kern_hyp_va(vcpu); - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index ea5d22fbdacf..cc7e957f5b2c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -39,7 +39,6 @@ static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); @@ -123,7 +122,6 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) isb(); } - write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); @@ -267,12 +265,13 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) */ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; if (!has_vhe()) return; + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; __sysreg_save_user_state(host_ctxt); /* @@ -303,12 +302,13 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) */ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; if (!has_vhe()) return; + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; deactivate_traps_vhe_put(); __sysreg_save_el1_state(guest_ctxt); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index e71d00bb5271..b5ae3a5d509e 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -163,15 +163,13 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events) */ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *host_ctxt; struct kvm_host_data *host; u32 events_guest, events_host; if (!has_vhe()) return; - host_ctxt = vcpu->arch.host_cpu_context; - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + host = this_cpu_ptr(&kvm_host_data); events_guest = host->pmu_events.events_guest; events_host = host->pmu_events.events_host; @@ -184,15 +182,13 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) */ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *host_ctxt; struct kvm_host_data *host; u32 events_guest, events_host; if (!has_vhe()) return; - host_ctxt = vcpu->arch.host_cpu_context; - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + host = this_cpu_ptr(&kvm_host_data); events_guest = host->pmu_events.events_guest; events_host = host->pmu_events.events_host; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 80985439bfb2..baf5ce9225ce 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -78,7 +78,6 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) switch (reg) { case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; @@ -118,7 +117,6 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) switch (reg) { case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; @@ -1034,16 +1032,13 @@ static bool trap_ptrauth(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - kvm_arm_vcpu_ptrauth_trap(vcpu); - /* - * Return false for both cases as we never skip the trapped - * instruction: - * - * - Either we re-execute the same key register access instruction - * after enabling ptrauth. - * - Or an UNDEF is injected as ptrauth is not supported/enabled. + * If we land here, that is because we didn't fixup the access on exit + * by allowing the PtrAuth sysregs. The only way this happens is when + * the guest does not have PtrAuth support enabled. */ + kvm_inject_undefined(vcpu); + return false; } @@ -1319,10 +1314,16 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + int reg = r->reg; + + /* See the 32bit mapping in kvm_host.h */ + if (p->is_aarch32) + reg = r->reg / 2; + if (p->is_write) - vcpu_write_sys_reg(vcpu, p->regval, r->reg); + vcpu_write_sys_reg(vcpu, p->regval, reg); else - p->regval = vcpu_read_sys_reg(vcpu, r->reg); + p->regval = vcpu_read_sys_reg(vcpu, reg); return true; } diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 9cb6b4c8355a..aa9d356451eb 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -27,6 +27,14 @@ static bool access_actlr(struct kvm_vcpu *vcpu, return ignore_write(vcpu, p); p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); + + if (p->is_aarch32) { + if (r->Op2 & 2) + p->regval = upper_32_bits(p->regval); + else + p->regval = lower_32_bits(p->regval); + } + return true; } @@ -47,6 +55,8 @@ static const struct sys_reg_desc genericv8_cp15_regs[] = { /* ACTLR */ { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), access_actlr }, + { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011), + access_actlr }, }; static struct kvm_sys_reg_target_table genericv8_target_table = { diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 62b0eb6cf69a..84eab0f5e00a 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -216,8 +216,10 @@ static int __init mcf_pci_init(void) /* Keep a virtual mapping to IO/config space active */ iospace = (unsigned long) ioremap(PCI_IO_PA, PCI_IO_SIZE); - if (iospace == 0) + if (iospace == 0) { + pci_free_host_bridge(bridge); return -ENODEV; + } pr_info("Coldfire: PCI IO/config window mapped to 0x%x\n", (u32) iospace); diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 27fa9465d19d..2b746f55f419 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -48,7 +48,6 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PLATRAM=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h index a24cfe4a0d32..dcfb69361408 100644 --- a/arch/m68k/include/asm/uaccess_no.h +++ b/arch/m68k/include/asm/uaccess_no.h @@ -42,7 +42,7 @@ static inline int _access_ok(unsigned long addr, unsigned long size) __put_user_asm(__pu_err, __pu_val, ptr, l); \ break; \ case 8: \ - memcpy(ptr, &__pu_val, sizeof (*(ptr))); \ + memcpy((void __force *)ptr, &__pu_val, sizeof(*(ptr))); \ break; \ default: \ __pu_err = __put_user_bad(); \ @@ -60,7 +60,7 @@ extern int __put_user_bad(void); * aliasing issues. */ -#define __ptr(x) ((unsigned long *)(x)) +#define __ptr(x) ((unsigned long __user *)(x)) #define __put_user_asm(err,x,ptr,bwl) \ __asm__ ("move" #bwl " %0,%1" \ @@ -85,7 +85,7 @@ extern int __put_user_bad(void); u64 l; \ __typeof__(*(ptr)) t; \ } __gu_val; \ - memcpy(&__gu_val.l, ptr, sizeof(__gu_val.l)); \ + memcpy(&__gu_val.l, (const void __force *)ptr, sizeof(__gu_val.l)); \ (x) = __gu_val.t; \ break; \ } \ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index eb1e86c30d0c..26c63e8161f0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1403,6 +1403,7 @@ config CPU_LOONGSON64 select MIPS_L1_CACHE_SHIFT_6 select GPIOLIB select SWIOTLB + select HAVE_KVM help The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor cores implements the MIPS64R2 instruction set with many extensions, diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index caecbae4b599..724dfddcab92 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -682,6 +682,9 @@ #ifndef cpu_guest_has_htw #define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW) #endif +#ifndef cpu_guest_has_ldpte +#define cpu_guest_has_ldpte (cpu_data[0].guest.options & MIPS_CPU_LDPTE) +#endif #ifndef cpu_guest_has_mvh #define cpu_guest_has_mvh (cpu_data[0].guest.options & MIPS_CPU_MVH) #endif diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index e28b5a946e26..363e7a89d173 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -23,6 +23,8 @@ #include <asm/inst.h> #include <asm/mipsregs.h> +#include <kvm/iodev.h> + /* MIPS KVM register ids */ #define MIPS_CP0_32(_R, _S) \ (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S))) @@ -66,9 +68,11 @@ #define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3) #define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) #define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) +#define KVM_REG_MIPS_CP0_CONFIG6 MIPS_CP0_32(16, 6) #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) #define KVM_REG_MIPS_CP0_MAARI MIPS_CP0_64(17, 2) #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) +#define KVM_REG_MIPS_CP0_DIAG MIPS_CP0_32(22, 0) #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) #define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2) #define KVM_REG_MIPS_CP0_KSCRATCH2 MIPS_CP0_64(31, 3) @@ -78,8 +82,8 @@ #define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7) -#define KVM_MAX_VCPUS 8 -#define KVM_USER_MEM_SLOTS 8 +#define KVM_MAX_VCPUS 16 +#define KVM_USER_MEM_SLOTS 16 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 0 @@ -171,6 +175,9 @@ struct kvm_vcpu_stat { u64 vz_ghfc_exits; u64 vz_gpa_exits; u64 vz_resvd_exits; +#ifdef CONFIG_CPU_LOONGSON64 + u64 vz_cpucfg_exits; +#endif #endif u64 halt_successful_poll; u64 halt_attempted_poll; @@ -183,11 +190,39 @@ struct kvm_vcpu_stat { struct kvm_arch_memory_slot { }; +#ifdef CONFIG_CPU_LOONGSON64 +struct ipi_state { + uint32_t status; + uint32_t en; + uint32_t set; + uint32_t clear; + uint64_t buf[4]; +}; + +struct loongson_kvm_ipi; + +struct ipi_io_device { + int node_id; + struct loongson_kvm_ipi *ipi; + struct kvm_io_device device; +}; + +struct loongson_kvm_ipi { + spinlock_t lock; + struct kvm *kvm; + struct ipi_state ipistate[16]; + struct ipi_io_device dev_ipi[4]; +}; +#endif + struct kvm_arch { /* Guest physical mm */ struct mm_struct gpa_mm; /* Mask of CPUs needing GPA ASID flush */ cpumask_t asid_flush_mask; +#ifdef CONFIG_CPU_LOONGSON64 + struct loongson_kvm_ipi ipi; +#endif }; #define N_MIPS_COPROC_REGS 32 @@ -225,6 +260,7 @@ struct mips_coproc { #define MIPS_CP0_WATCH_LO 18 #define MIPS_CP0_WATCH_HI 19 #define MIPS_CP0_TLB_XCONTEXT 20 +#define MIPS_CP0_DIAG 22 #define MIPS_CP0_ECC 26 #define MIPS_CP0_CACHE_ERR 27 #define MIPS_CP0_TAG_LO 28 @@ -276,8 +312,12 @@ enum emulation_result { #define MIPS3_PG_SHIFT 6 #define MIPS3_PG_FRAME 0x3fffffc0 +#if defined(CONFIG_64BIT) +#define VPN2_MASK GENMASK(cpu_vmbits - 1, 13) +#else #define VPN2_MASK 0xffffe000 -#define KVM_ENTRYHI_ASID MIPS_ENTRYHI_ASID +#endif +#define KVM_ENTRYHI_ASID cpu_asid_mask(&boot_cpu_data) #define TLB_IS_GLOBAL(x) ((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) @@ -892,6 +932,10 @@ void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, unsigned int count); void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, unsigned int count); +#ifdef CONFIG_CPU_LOONGSON64 +void kvm_loongson_clear_guest_vtlb(void); +void kvm_loongson_clear_guest_ftlb(void); +#endif #endif void kvm_mips_suspend_mm(int cpu); @@ -1131,6 +1175,8 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc, /* Misc */ extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); +extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq); static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 796dbb86575b..20d6d40c59a4 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1038,6 +1038,8 @@ /* Disable Branch Return Cache */ #define R10K_DIAG_D_BRC (_ULCAST_(1) << 22) +/* Flush BTB */ +#define LOONGSON_DIAG_BTB (_ULCAST_(1) << 1) /* Flush ITLB */ #define LOONGSON_DIAG_ITLB (_ULCAST_(1) << 2) /* Flush DTLB */ @@ -2874,7 +2876,9 @@ __BUILD_SET_C0(status) __BUILD_SET_C0(cause) __BUILD_SET_C0(config) __BUILD_SET_C0(config5) +__BUILD_SET_C0(config6) __BUILD_SET_C0(config7) +__BUILD_SET_C0(diag) __BUILD_SET_C0(intcontrol) __BUILD_SET_C0(intctl) __BUILD_SET_C0(srsmap) diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 98f97c85e059..43d1faa02933 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -1012,6 +1012,16 @@ struct loongson3_lsdc2_format { /* Loongson-3 overridden ldc2/sdc2 Load/Store fo ;)))))) }; +struct loongson3_lscsr_format { /* Loongson-3 CPUCFG&CSR read/write format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int rs : 5, + __BITFIELD_FIELD(unsigned int fr : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + __BITFIELD_FIELD(unsigned int fd : 5, + __BITFIELD_FIELD(unsigned int func : 6, + ;)))))) +}; + /* * MIPS16e instruction formats (16-bit length) */ @@ -1114,6 +1124,7 @@ union mips_instruction { struct mm16_r5_format mm16_r5_format; struct loongson3_lswc2_format loongson3_lswc2_format; struct loongson3_lsdc2_format loongson3_lsdc2_format; + struct loongson3_lscsr_format loongson3_lscsr_format; }; union mips16e_instruction { diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6b93162d7c5a..def1659fe262 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -2017,8 +2017,10 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c) if (cfg2 & LOONGSON_CFG2_LEXT2) c->ases |= MIPS_ASE_LOONGSON_EXT2; - if (cfg2 & LOONGSON_CFG2_LSPW) + if (cfg2 & LOONGSON_CFG2_LSPW) { c->options |= MIPS_CPU_LDPTE; + c->guest.options |= MIPS_CPU_LDPTE; + } if (cfg3 & LOONGSON_CFG3_LCAMP) c->ases |= MIPS_ASE_LOONGSON_CAM; @@ -2074,6 +2076,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->writecombine = _CACHE_UNCACHED_ACCELERATED; c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); + c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */ break; case PRID_IMP_LOONGSON_64G: c->cputype = CPU_LOONGSON64; diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index b91d145aa2d5..d697752a5723 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select EXPORT_UASM select PREEMPT_NOTIFIERS select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_MMIO select MMU_NOTIFIER diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 01affc1d21c5..506c4ac0ba1c 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for KVM support for MIPS # -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o) EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm @@ -13,6 +13,9 @@ kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ fpu.o kvm-objs += hypcall.o kvm-objs += mmu.o +ifdef CONFIG_CPU_LOONGSON64 +kvm-objs += loongson_ipi.o +endif ifdef CONFIG_KVM_MIPS_VZ kvm-objs += vz.o diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 7ccf9b096783..5ae82d925197 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1600,9 +1600,11 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { + int r; enum emulation_result er; u32 rt; void *data = run->mmio.data; + unsigned int imme; unsigned long curr_pc; /* @@ -1660,15 +1662,229 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, vcpu->arch.gprs[rt], *(u8 *)data); break; + case swl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + *(u32 *)data = ((*(u32 *)data) & 0xffffff00) | + (vcpu->arch.gprs[rt] >> 24); + break; + case 1: + *(u32 *)data = ((*(u32 *)data) & 0xffff0000) | + (vcpu->arch.gprs[rt] >> 16); + break; + case 2: + *(u32 *)data = ((*(u32 *)data) & 0xff000000) | + (vcpu->arch.gprs[rt] >> 8); + break; + case 3: + *(u32 *)data = vcpu->arch.gprs[rt]; + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SWL: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + + case swr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + *(u32 *)data = vcpu->arch.gprs[rt]; + break; + case 1: + *(u32 *)data = ((*(u32 *)data) & 0xff) | + (vcpu->arch.gprs[rt] << 8); + break; + case 2: + *(u32 *)data = ((*(u32 *)data) & 0xffff) | + (vcpu->arch.gprs[rt] << 16); + break; + case 3: + *(u32 *)data = ((*(u32 *)data) & 0xffffff) | + (vcpu->arch.gprs[rt] << 24); + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SWR: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + + case sdl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffffff00) | + ((vcpu->arch.gprs[rt] >> 56) & 0xff); + break; + case 1: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffff0000) | + ((vcpu->arch.gprs[rt] >> 48) & 0xffff); + break; + case 2: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffff000000) | + ((vcpu->arch.gprs[rt] >> 40) & 0xffffff); + break; + case 3: + *(u64 *)data = ((*(u64 *)data) & 0xffffffff00000000) | + ((vcpu->arch.gprs[rt] >> 32) & 0xffffffff); + break; + case 4: + *(u64 *)data = ((*(u64 *)data) & 0xffffff0000000000) | + ((vcpu->arch.gprs[rt] >> 24) & 0xffffffffff); + break; + case 5: + *(u64 *)data = ((*(u64 *)data) & 0xffff000000000000) | + ((vcpu->arch.gprs[rt] >> 16) & 0xffffffffffff); + break; + case 6: + *(u64 *)data = ((*(u64 *)data) & 0xff00000000000000) | + ((vcpu->arch.gprs[rt] >> 8) & 0xffffffffffffff); + break; + case 7: + *(u64 *)data = vcpu->arch.gprs[rt]; + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SDL: eaddr: %#lx, gpr: %#lx, data: %llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; + + case sdr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + *(u64 *)data = vcpu->arch.gprs[rt]; + break; + case 1: + *(u64 *)data = ((*(u64 *)data) & 0xff) | + (vcpu->arch.gprs[rt] << 8); + break; + case 2: + *(u64 *)data = ((*(u64 *)data) & 0xffff) | + (vcpu->arch.gprs[rt] << 16); + break; + case 3: + *(u64 *)data = ((*(u64 *)data) & 0xffffff) | + (vcpu->arch.gprs[rt] << 24); + break; + case 4: + *(u64 *)data = ((*(u64 *)data) & 0xffffffff) | + (vcpu->arch.gprs[rt] << 32); + break; + case 5: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffff) | + (vcpu->arch.gprs[rt] << 40); + break; + case 6: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffff) | + (vcpu->arch.gprs[rt] << 48); + break; + case 7: + *(u64 *)data = ((*(u64 *)data) & 0xffffffffffffff) | + (vcpu->arch.gprs[rt] << 56); + break; + default: + break; + } + + kvm_debug("[%#lx] OP_SDR: eaddr: %#lx, gpr: %#lx, data: %llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; + +#ifdef CONFIG_CPU_LOONGSON64 + case sdc2_op: + rt = inst.loongson3_lsdc2_format.rt; + switch (inst.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden sdc2 instructions. + * opcode1 instruction + * 0x0 gssbx: store 1 bytes from GPR + * 0x1 gsshx: store 2 bytes from GPR + * 0x2 gsswx: store 4 bytes from GPR + * 0x3 gssdx: store 8 bytes from GPR + */ + case 0x0: + run->mmio.len = 1; + *(u8 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSBX: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u8 *)data); + break; + case 0x1: + run->mmio.len = 2; + *(u16 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSSHX: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u16 *)data); + break; + case 0x2: + run->mmio.len = 4; + *(u32 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSWX: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u32 *)data); + break; + case 0x3: + run->mmio.len = 8; + *(u64 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_GSSDX: eaddr: %#lx, gpr: %#lx, data: %#llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); + break; + default: + kvm_err("Godson Exteneded GS-Store not yet supported (inst=0x%08x)\n", + inst.word); + break; + } + break; +#endif default: kvm_err("Store not yet supported (inst=0x%08x)\n", inst.word); goto out_fail; } - run->mmio.is_write = 1; vcpu->mmio_needed = 1; + run->mmio.is_write = 1; vcpu->mmio_is_write = 1; + + r = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, + run->mmio.phys_addr, run->mmio.len, data); + + if (!r) { + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + return EMULATE_DO_MMIO; out_fail: @@ -1681,9 +1897,11 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu) { + int r; enum emulation_result er; unsigned long curr_pc; u32 op, rt; + unsigned int imme; rt = inst.i_format.rt; op = inst.i_format.opcode; @@ -1736,6 +1954,162 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, run->mmio.len = 1; break; + case lwl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + vcpu->mmio_needed = 3; /* 1 byte */ + break; + case 1: + vcpu->mmio_needed = 4; /* 2 bytes */ + break; + case 2: + vcpu->mmio_needed = 5; /* 3 bytes */ + break; + case 3: + vcpu->mmio_needed = 6; /* 4 bytes */ + break; + default: + break; + } + break; + + case lwr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x3); + + run->mmio.len = 4; + imme = vcpu->arch.host_cp0_badvaddr & 0x3; + switch (imme) { + case 0: + vcpu->mmio_needed = 7; /* 4 bytes */ + break; + case 1: + vcpu->mmio_needed = 8; /* 3 bytes */ + break; + case 2: + vcpu->mmio_needed = 9; /* 2 bytes */ + break; + case 3: + vcpu->mmio_needed = 10; /* 1 byte */ + break; + default: + break; + } + break; + + case ldl_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + vcpu->mmio_needed = 11; /* 1 byte */ + break; + case 1: + vcpu->mmio_needed = 12; /* 2 bytes */ + break; + case 2: + vcpu->mmio_needed = 13; /* 3 bytes */ + break; + case 3: + vcpu->mmio_needed = 14; /* 4 bytes */ + break; + case 4: + vcpu->mmio_needed = 15; /* 5 bytes */ + break; + case 5: + vcpu->mmio_needed = 16; /* 6 bytes */ + break; + case 6: + vcpu->mmio_needed = 17; /* 7 bytes */ + break; + case 7: + vcpu->mmio_needed = 18; /* 8 bytes */ + break; + default: + break; + } + break; + + case ldr_op: + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr) & (~0x7); + + run->mmio.len = 8; + imme = vcpu->arch.host_cp0_badvaddr & 0x7; + switch (imme) { + case 0: + vcpu->mmio_needed = 19; /* 8 bytes */ + break; + case 1: + vcpu->mmio_needed = 20; /* 7 bytes */ + break; + case 2: + vcpu->mmio_needed = 21; /* 6 bytes */ + break; + case 3: + vcpu->mmio_needed = 22; /* 5 bytes */ + break; + case 4: + vcpu->mmio_needed = 23; /* 4 bytes */ + break; + case 5: + vcpu->mmio_needed = 24; /* 3 bytes */ + break; + case 6: + vcpu->mmio_needed = 25; /* 2 bytes */ + break; + case 7: + vcpu->mmio_needed = 26; /* 1 byte */ + break; + default: + break; + } + break; + +#ifdef CONFIG_CPU_LOONGSON64 + case ldc2_op: + rt = inst.loongson3_lsdc2_format.rt; + switch (inst.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden ldc2 instructions. + * opcode1 instruction + * 0x0 gslbx: store 1 bytes from GPR + * 0x1 gslhx: store 2 bytes from GPR + * 0x2 gslwx: store 4 bytes from GPR + * 0x3 gsldx: store 8 bytes from GPR + */ + case 0x0: + run->mmio.len = 1; + vcpu->mmio_needed = 27; /* signed */ + break; + case 0x1: + run->mmio.len = 2; + vcpu->mmio_needed = 28; /* signed */ + break; + case 0x2: + run->mmio.len = 4; + vcpu->mmio_needed = 29; /* signed */ + break; + case 0x3: + run->mmio.len = 8; + vcpu->mmio_needed = 30; /* signed */ + break; + default: + kvm_err("Godson Exteneded GS-Load for float not yet supported (inst=0x%08x)\n", + inst.word); + break; + } + break; +#endif + default: kvm_err("Load not yet supported (inst=0x%08x)\n", inst.word); @@ -1745,6 +2119,16 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, run->mmio.is_write = 0; vcpu->mmio_is_write = 0; + + r = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, + run->mmio.phys_addr, run->mmio.len, run->mmio.data); + + if (!r) { + kvm_mips_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + return EMULATE_DO_MMIO; } @@ -2591,28 +2975,125 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, switch (run->mmio.len) { case 8: - *gpr = *(s64 *)run->mmio.data; + switch (vcpu->mmio_needed) { + case 11: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffffff) | + (((*(s64 *)run->mmio.data) & 0xff) << 56); + break; + case 12: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffff) | + (((*(s64 *)run->mmio.data) & 0xffff) << 48); + break; + case 13: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffff) | + (((*(s64 *)run->mmio.data) & 0xffffff) << 40); + break; + case 14: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffff) | + (((*(s64 *)run->mmio.data) & 0xffffffff) << 32); + break; + case 15: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff) | + (((*(s64 *)run->mmio.data) & 0xffffffffff) << 24); + break; + case 16: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff) | + (((*(s64 *)run->mmio.data) & 0xffffffffffff) << 16); + break; + case 17: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff) | + (((*(s64 *)run->mmio.data) & 0xffffffffffffff) << 8); + break; + case 18: + case 19: + *gpr = *(s64 *)run->mmio.data; + break; + case 20: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff00000000000000) | + ((((*(s64 *)run->mmio.data)) >> 8) & 0xffffffffffffff); + break; + case 21: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff000000000000) | + ((((*(s64 *)run->mmio.data)) >> 16) & 0xffffffffffff); + break; + case 22: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff0000000000) | + ((((*(s64 *)run->mmio.data)) >> 24) & 0xffffffffff); + break; + case 23: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffff00000000) | + ((((*(s64 *)run->mmio.data)) >> 32) & 0xffffffff); + break; + case 24: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffff000000) | + ((((*(s64 *)run->mmio.data)) >> 40) & 0xffffff); + break; + case 25: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffff0000) | + ((((*(s64 *)run->mmio.data)) >> 48) & 0xffff); + break; + case 26: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffffff00) | + ((((*(s64 *)run->mmio.data)) >> 56) & 0xff); + break; + default: + *gpr = *(s64 *)run->mmio.data; + } break; case 4: - if (vcpu->mmio_needed == 2) - *gpr = *(s32 *)run->mmio.data; - else + switch (vcpu->mmio_needed) { + case 1: *gpr = *(u32 *)run->mmio.data; + break; + case 2: + *gpr = *(s32 *)run->mmio.data; + break; + case 3: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff) | + (((*(s32 *)run->mmio.data) & 0xff) << 24); + break; + case 4: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff) | + (((*(s32 *)run->mmio.data) & 0xffff) << 16); + break; + case 5: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff) | + (((*(s32 *)run->mmio.data) & 0xffffff) << 8); + break; + case 6: + case 7: + *gpr = *(s32 *)run->mmio.data; + break; + case 8: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff000000) | + ((((*(s32 *)run->mmio.data)) >> 8) & 0xffffff); + break; + case 9: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff0000) | + ((((*(s32 *)run->mmio.data)) >> 16) & 0xffff); + break; + case 10: + *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff00) | + ((((*(s32 *)run->mmio.data)) >> 24) & 0xff); + break; + default: + *gpr = *(s32 *)run->mmio.data; + } break; case 2: - if (vcpu->mmio_needed == 2) - *gpr = *(s16 *) run->mmio.data; - else + if (vcpu->mmio_needed == 1) *gpr = *(u16 *)run->mmio.data; + else + *gpr = *(s16 *)run->mmio.data; break; case 1: - if (vcpu->mmio_needed == 2) - *gpr = *(s8 *) run->mmio.data; + if (vcpu->mmio_needed == 1) + *gpr = *(u8 *)run->mmio.data; else - *gpr = *(u8 *) run->mmio.data; + *gpr = *(s8 *)run->mmio.data; break; } diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index 16e1c93b484f..fd716942e302 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c @@ -56,6 +56,7 @@ #define C0_BADVADDR 8, 0 #define C0_BADINSTR 8, 1 #define C0_BADINSTRP 8, 2 +#define C0_PGD 9, 7 #define C0_ENTRYHI 10, 0 #define C0_GUESTCTL1 10, 4 #define C0_STATUS 12, 0 @@ -307,7 +308,10 @@ static void *kvm_mips_build_enter_guest(void *addr) #ifdef CONFIG_KVM_MIPS_VZ /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */ - UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg); + if (cpu_has_ldpte) + UASM_i_MFC0(&p, K0, C0_PWBASE); + else + UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg); UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1); /* @@ -469,8 +473,10 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) u32 *p = addr; struct uasm_label labels[2]; struct uasm_reloc relocs[2]; +#ifndef CONFIG_CPU_LOONGSON64 struct uasm_label *l = labels; struct uasm_reloc *r = relocs; +#endif memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -490,6 +496,16 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) */ preempt_disable(); +#ifdef CONFIG_CPU_LOONGSON64 + UASM_i_MFC0(&p, K1, C0_PGD); + uasm_i_lddir(&p, K0, K1, 3); /* global page dir */ +#ifndef __PAGETABLE_PMD_FOLDED + uasm_i_lddir(&p, K1, K0, 1); /* middle page dir */ +#endif + uasm_i_ldpte(&p, K1, 0); /* even */ + uasm_i_ldpte(&p, K1, 1); /* odd */ + uasm_i_tlbwr(&p); +#else /* * Now for the actual refill bit. A lot of this can be common with the * Linux TLB refill handler, however we don't need to handle so many @@ -512,6 +528,7 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) build_get_ptep(&p, K0, K1); build_update_entries(&p, K0, K1); build_tlb_write_entry(&p, &l, &r, tlb_random); +#endif preempt_enable(); diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c index 7257e8b6f5a9..d28c2c9c343e 100644 --- a/arch/mips/kvm/interrupt.c +++ b/arch/mips/kvm/interrupt.c @@ -61,27 +61,8 @@ void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, * the EXC code will be set when we are actually * delivering the interrupt: */ - switch (intr) { - case 2: - kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0)); - /* Queue up an INT exception for the core */ - kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IO); - break; - - case 3: - kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ1)); - kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IPI_1); - break; - - case 4: - kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ2)); - kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IPI_2); - break; - - default: - break; - } - + kvm_set_c0_guest_cause(vcpu->arch.cop0, 1 << (intr + 8)); + kvm_mips_queue_irq(vcpu, kvm_irq_to_priority(intr)); } void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, @@ -89,26 +70,8 @@ void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, { int intr = (int)irq->irq; - switch (intr) { - case -2: - kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0)); - kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IO); - break; - - case -3: - kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ1)); - kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1); - break; - - case -4: - kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ2)); - kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2); - break; - - default: - break; - } - + kvm_clear_c0_guest_cause(vcpu->arch.cop0, 1 << (-intr + 8)); + kvm_mips_dequeue_irq(vcpu, kvm_irq_to_priority(-intr)); } /* Deliver the interrupt of the corresponding priority, if possible. */ @@ -116,50 +79,20 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, u32 cause) { int allowed = 0; - u32 exccode; + u32 exccode, ie; struct kvm_vcpu_arch *arch = &vcpu->arch; struct mips_coproc *cop0 = vcpu->arch.cop0; - switch (priority) { - case MIPS_EXC_INT_TIMER: - if ((kvm_read_c0_guest_status(cop0) & ST0_IE) - && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) - && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) { - allowed = 1; - exccode = EXCCODE_INT; - } - break; - - case MIPS_EXC_INT_IO: - if ((kvm_read_c0_guest_status(cop0) & ST0_IE) - && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) - && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) { - allowed = 1; - exccode = EXCCODE_INT; - } - break; - - case MIPS_EXC_INT_IPI_1: - if ((kvm_read_c0_guest_status(cop0) & ST0_IE) - && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) - && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) { - allowed = 1; - exccode = EXCCODE_INT; - } - break; - - case MIPS_EXC_INT_IPI_2: - if ((kvm_read_c0_guest_status(cop0) & ST0_IE) - && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) - && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) { - allowed = 1; - exccode = EXCCODE_INT; - } - break; + if (priority == MIPS_EXC_MAX) + return 0; - default: - break; + ie = 1 << (kvm_priority_to_irq[priority] + 8); + if ((kvm_read_c0_guest_status(cop0) & ST0_IE) + && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL))) + && (kvm_read_c0_guest_status(cop0) & ie)) { + allowed = 1; + exccode = EXCCODE_INT; } /* Are we allowed to deliver the interrupt ??? */ diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index 3bf0a49725e8..c3e878ca3e07 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h @@ -21,11 +21,12 @@ #define MIPS_EXC_NMI 5 #define MIPS_EXC_MCHK 6 #define MIPS_EXC_INT_TIMER 7 -#define MIPS_EXC_INT_IO 8 -#define MIPS_EXC_EXECUTE 9 -#define MIPS_EXC_INT_IPI_1 10 -#define MIPS_EXC_INT_IPI_2 11 -#define MIPS_EXC_MAX 12 +#define MIPS_EXC_INT_IO_1 8 +#define MIPS_EXC_INT_IO_2 9 +#define MIPS_EXC_EXECUTE 10 +#define MIPS_EXC_INT_IPI_1 11 +#define MIPS_EXC_INT_IPI_2 12 +#define MIPS_EXC_MAX 13 /* XXXSL More to follow */ #define C_TI (_ULCAST_(1) << 30) @@ -38,6 +39,9 @@ #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) #endif +extern u32 *kvm_priority_to_irq; +u32 kvm_irq_to_priority(u32 irq); + void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); int kvm_mips_pending_timer(struct kvm_vcpu *vcpu); diff --git a/arch/mips/kvm/loongson_ipi.c b/arch/mips/kvm/loongson_ipi.c new file mode 100644 index 000000000000..3681fc8fba38 --- /dev/null +++ b/arch/mips/kvm/loongson_ipi.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Loongson-3 Virtual IPI interrupt support. + * + * Copyright (C) 2019 Loongson Technologies, Inc. All rights reserved. + * + * Authors: Chen Zhu <zhuchen@loongson.cn> + * Authors: Huacai Chen <chenhc@lemote.com> + */ + +#include <linux/kvm_host.h> + +#define IPI_BASE 0x3ff01000ULL + +#define CORE0_STATUS_OFF 0x000 +#define CORE0_EN_OFF 0x004 +#define CORE0_SET_OFF 0x008 +#define CORE0_CLEAR_OFF 0x00c +#define CORE0_BUF_20 0x020 +#define CORE0_BUF_28 0x028 +#define CORE0_BUF_30 0x030 +#define CORE0_BUF_38 0x038 + +#define CORE1_STATUS_OFF 0x100 +#define CORE1_EN_OFF 0x104 +#define CORE1_SET_OFF 0x108 +#define CORE1_CLEAR_OFF 0x10c +#define CORE1_BUF_20 0x120 +#define CORE1_BUF_28 0x128 +#define CORE1_BUF_30 0x130 +#define CORE1_BUF_38 0x138 + +#define CORE2_STATUS_OFF 0x200 +#define CORE2_EN_OFF 0x204 +#define CORE2_SET_OFF 0x208 +#define CORE2_CLEAR_OFF 0x20c +#define CORE2_BUF_20 0x220 +#define CORE2_BUF_28 0x228 +#define CORE2_BUF_30 0x230 +#define CORE2_BUF_38 0x238 + +#define CORE3_STATUS_OFF 0x300 +#define CORE3_EN_OFF 0x304 +#define CORE3_SET_OFF 0x308 +#define CORE3_CLEAR_OFF 0x30c +#define CORE3_BUF_20 0x320 +#define CORE3_BUF_28 0x328 +#define CORE3_BUF_30 0x330 +#define CORE3_BUF_38 0x338 + +static int loongson_vipi_read(struct loongson_kvm_ipi *ipi, + gpa_t addr, int len, void *val) +{ + uint32_t core = (addr >> 8) & 3; + uint32_t node = (addr >> 44) & 3; + uint32_t id = core + node * 4; + uint64_t offset = addr & 0xff; + void *pbuf; + struct ipi_state *s = &(ipi->ipistate[id]); + + BUG_ON(offset & (len - 1)); + + switch (offset) { + case CORE0_STATUS_OFF: + *(uint64_t *)val = s->status; + break; + + case CORE0_EN_OFF: + *(uint64_t *)val = s->en; + break; + + case CORE0_SET_OFF: + *(uint64_t *)val = 0; + break; + + case CORE0_CLEAR_OFF: + *(uint64_t *)val = 0; + break; + + case CORE0_BUF_20 ... CORE0_BUF_38: + pbuf = (void *)s->buf + (offset - 0x20); + if (len == 8) + *(uint64_t *)val = *(uint64_t *)pbuf; + else /* Assume len == 4 */ + *(uint32_t *)val = *(uint32_t *)pbuf; + break; + + default: + pr_notice("%s with unknown addr %llx\n", __func__, addr); + break; + } + + return 0; +} + +static int loongson_vipi_write(struct loongson_kvm_ipi *ipi, + gpa_t addr, int len, const void *val) +{ + uint32_t core = (addr >> 8) & 3; + uint32_t node = (addr >> 44) & 3; + uint32_t id = core + node * 4; + uint64_t data, offset = addr & 0xff; + void *pbuf; + struct kvm *kvm = ipi->kvm; + struct kvm_mips_interrupt irq; + struct ipi_state *s = &(ipi->ipistate[id]); + + data = *(uint64_t *)val; + BUG_ON(offset & (len - 1)); + + switch (offset) { + case CORE0_STATUS_OFF: + break; + + case CORE0_EN_OFF: + s->en = data; + break; + + case CORE0_SET_OFF: + s->status |= data; + irq.cpu = id; + irq.irq = 6; + kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq); + break; + + case CORE0_CLEAR_OFF: + s->status &= ~data; + if (!s->status) { + irq.cpu = id; + irq.irq = -6; + kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq); + } + break; + + case CORE0_BUF_20 ... CORE0_BUF_38: + pbuf = (void *)s->buf + (offset - 0x20); + if (len == 8) + *(uint64_t *)pbuf = (uint64_t)data; + else /* Assume len == 4 */ + *(uint32_t *)pbuf = (uint32_t)data; + break; + + default: + pr_notice("%s with unknown addr %llx\n", __func__, addr); + break; + } + + return 0; +} + +static int kvm_ipi_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + unsigned long flags; + struct loongson_kvm_ipi *ipi; + struct ipi_io_device *ipi_device; + + ipi_device = container_of(dev, struct ipi_io_device, device); + ipi = ipi_device->ipi; + + spin_lock_irqsave(&ipi->lock, flags); + loongson_vipi_read(ipi, addr, len, val); + spin_unlock_irqrestore(&ipi->lock, flags); + + return 0; +} + +static int kvm_ipi_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + unsigned long flags; + struct loongson_kvm_ipi *ipi; + struct ipi_io_device *ipi_device; + + ipi_device = container_of(dev, struct ipi_io_device, device); + ipi = ipi_device->ipi; + + spin_lock_irqsave(&ipi->lock, flags); + loongson_vipi_write(ipi, addr, len, val); + spin_unlock_irqrestore(&ipi->lock, flags); + + return 0; +} + +static const struct kvm_io_device_ops kvm_ipi_ops = { + .read = kvm_ipi_read, + .write = kvm_ipi_write, +}; + +void kvm_init_loongson_ipi(struct kvm *kvm) +{ + int i; + unsigned long addr; + struct loongson_kvm_ipi *s; + struct kvm_io_device *device; + + s = &kvm->arch.ipi; + s->kvm = kvm; + spin_lock_init(&s->lock); + + /* + * Initialize IPI device + */ + for (i = 0; i < 4; i++) { + device = &s->dev_ipi[i].device; + kvm_iodevice_init(device, &kvm_ipi_ops); + addr = (((unsigned long)i) << 44) + IPI_BASE; + mutex_lock(&kvm->slots_lock); + kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, 0x400, device); + mutex_unlock(&kvm->slots_lock); + s->dev_ipi[i].ipi = s; + s->dev_ipi[i].node_id = i; + } +} diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2416fa40b687..521bd5891e84 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -67,6 +67,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("vz_ghfc", vz_ghfc_exits), VCPU_STAT("vz_gpa", vz_gpa_exits), VCPU_STAT("vz_resvd", vz_resvd_exits), + VCPU_STAT("vz_cpucfg", vz_cpucfg_exits), #endif VCPU_STAT("halt_successful_poll", halt_successful_poll), VCPU_STAT("halt_attempted_poll", halt_attempted_poll), @@ -129,6 +130,8 @@ int kvm_arch_check_processor_compat(void *opaque) return 0; } +extern void kvm_init_loongson_ipi(struct kvm *kvm); + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { switch (type) { @@ -148,6 +151,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.gpa_mm.pgd) return -ENOMEM; +#ifdef CONFIG_CPU_LOONGSON64 + kvm_init_loongson_ipi(kvm); +#endif + return 0; } @@ -490,7 +497,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, int intr = (int)irq->irq; struct kvm_vcpu *dvcpu = NULL; - if (intr == 3 || intr == -3 || intr == 4 || intr == -4) + if (intr == kvm_priority_to_irq[MIPS_EXC_INT_IPI_1] || + intr == kvm_priority_to_irq[MIPS_EXC_INT_IPI_2] || + intr == (-kvm_priority_to_irq[MIPS_EXC_INT_IPI_1]) || + intr == (-kvm_priority_to_irq[MIPS_EXC_INT_IPI_2])) kvm_debug("%s: CPU: %d, INTR: %d\n", __func__, irq->cpu, (int)intr); @@ -499,10 +509,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, else dvcpu = vcpu->kvm->vcpus[irq->cpu]; - if (intr == 2 || intr == 3 || intr == 4) { + if (intr == 2 || intr == 3 || intr == 4 || intr == 6) { kvm_mips_callbacks->queue_io_int(dvcpu, irq); - } else if (intr == -2 || intr == -3 || intr == -4) { + } else if (intr == -2 || intr == -3 || intr == -4 || intr == -6) { kvm_mips_callbacks->dequeue_io_int(dvcpu, irq); } else { kvm_err("%s: invalid interrupt ioctl (%d:%d)\n", __func__, @@ -1620,6 +1630,34 @@ static struct notifier_block kvm_mips_csr_die_notifier = { .notifier_call = kvm_mips_csr_die_notify, }; +static u32 kvm_default_priority_to_irq[MIPS_EXC_MAX] = { + [MIPS_EXC_INT_TIMER] = C_IRQ5, + [MIPS_EXC_INT_IO_1] = C_IRQ0, + [MIPS_EXC_INT_IPI_1] = C_IRQ1, + [MIPS_EXC_INT_IPI_2] = C_IRQ2, +}; + +static u32 kvm_loongson3_priority_to_irq[MIPS_EXC_MAX] = { + [MIPS_EXC_INT_TIMER] = C_IRQ5, + [MIPS_EXC_INT_IO_1] = C_IRQ0, + [MIPS_EXC_INT_IO_2] = C_IRQ1, + [MIPS_EXC_INT_IPI_1] = C_IRQ4, +}; + +u32 *kvm_priority_to_irq = kvm_default_priority_to_irq; + +u32 kvm_irq_to_priority(u32 irq) +{ + int i; + + for (i = MIPS_EXC_INT_TIMER; i < MIPS_EXC_MAX; i++) { + if (kvm_priority_to_irq[i] == (1 << (irq + 8))) + return i; + } + + return MIPS_EXC_MAX; +} + static int __init kvm_mips_init(void) { int ret; @@ -1638,6 +1676,9 @@ static int __init kvm_mips_init(void) if (ret) return ret; + if (boot_cpu_type() == CPU_LOONGSON64) + kvm_priority_to_irq = kvm_loongson3_priority_to_irq; + register_die_notifier(&kvm_mips_csr_die_notifier); return 0; diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index fc8aee05d10b..1c1fbce3f566 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -20,6 +20,7 @@ #include <asm/cpu.h> #include <asm/bootinfo.h> +#include <asm/mipsregs.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> #include <asm/tlb.h> @@ -621,6 +622,46 @@ void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, } EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb); +#ifdef CONFIG_CPU_LOONGSON64 +void kvm_loongson_clear_guest_vtlb(void) +{ + int idx = read_gc0_index(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + write_gc0_index(0); + guest_tlbinvf(); + write_gc0_index(idx); + + clear_root_gid(); + set_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); +} +EXPORT_SYMBOL_GPL(kvm_loongson_clear_guest_vtlb); + +void kvm_loongson_clear_guest_ftlb(void) +{ + int i; + int idx = read_gc0_index(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + for (i = current_cpu_data.tlbsizevtlb; + i < (current_cpu_data.tlbsizevtlb + + current_cpu_data.tlbsizeftlbsets); + i++) { + write_gc0_index(i); + guest_tlbinvf(); + } + write_gc0_index(idx); + + clear_root_gid(); + set_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); +} +EXPORT_SYMBOL_GPL(kvm_loongson_clear_guest_ftlb); +#endif + #endif /** diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 60763efa723e..34ad0b46e610 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -529,6 +529,9 @@ static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MIPS_TE: r = 1; break; + case KVM_CAP_IOEVENTFD: + r = 1; + break; default: r = 0; break; diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 51f51009a53f..d9c462c14163 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -29,6 +29,7 @@ #include <linux/kvm_host.h> #include "interrupt.h" +#include "loongson_regs.h" #include "trace.h" @@ -126,6 +127,11 @@ static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu) return mask; } +static inline unsigned int kvm_vz_config6_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return MIPS_CONF6_LOONGSON_INTIMER | MIPS_CONF6_LOONGSON_EXTIMER; +} + /* * VZ optionally allows these additional Config bits to be written by root: * Config: M, [MT] @@ -180,6 +186,12 @@ static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu) return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP; } +static inline unsigned int kvm_vz_config6_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config6_guest_wrmask(vcpu) | + MIPS_CONF6_LOONGSON_SFBEN | MIPS_CONF6_LOONGSON_FTLBDIS; +} + static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva) { /* VZ guest has already converted gva to gpa */ @@ -225,23 +237,7 @@ static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu, * interrupts are asynchronous to vcpu execution therefore defer guest * cp0 accesses */ - switch (intr) { - case 2: - kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO); - break; - - case 3: - kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1); - break; - - case 4: - kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2); - break; - - default: - break; - } - + kvm_vz_queue_irq(vcpu, kvm_irq_to_priority(intr)); } static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu, @@ -253,44 +249,22 @@ static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu, * interrupts are asynchronous to vcpu execution therefore defer guest * cp0 accesses */ - switch (intr) { - case -2: - kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO); - break; - - case -3: - kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1); - break; - - case -4: - kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2); - break; - - default: - break; - } - + kvm_vz_dequeue_irq(vcpu, kvm_irq_to_priority(-intr)); } -static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = { - [MIPS_EXC_INT_TIMER] = C_IRQ5, - [MIPS_EXC_INT_IO] = C_IRQ0, - [MIPS_EXC_INT_IPI_1] = C_IRQ1, - [MIPS_EXC_INT_IPI_2] = C_IRQ2, -}; - static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, u32 cause) { u32 irq = (priority < MIPS_EXC_MAX) ? - kvm_vz_priority_to_irq[priority] : 0; + kvm_priority_to_irq[priority] : 0; switch (priority) { case MIPS_EXC_INT_TIMER: set_gc0_cause(C_TI); break; - case MIPS_EXC_INT_IO: + case MIPS_EXC_INT_IO_1: + case MIPS_EXC_INT_IO_2: case MIPS_EXC_INT_IPI_1: case MIPS_EXC_INT_IPI_2: if (cpu_has_guestctl2) @@ -311,7 +285,7 @@ static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, u32 cause) { u32 irq = (priority < MIPS_EXC_MAX) ? - kvm_vz_priority_to_irq[priority] : 0; + kvm_priority_to_irq[priority] : 0; switch (priority) { case MIPS_EXC_INT_TIMER: @@ -329,7 +303,8 @@ static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, } break; - case MIPS_EXC_INT_IO: + case MIPS_EXC_INT_IO_1: + case MIPS_EXC_INT_IO_2: case MIPS_EXC_INT_IPI_1: case MIPS_EXC_INT_IPI_2: /* Clear GuestCtl2.VIP irq if not using Hardware Clear */ @@ -966,7 +941,8 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, (sel == 2 || /* SRSCtl */ sel == 3)) || /* SRSMap */ (rd == MIPS_CP0_CONFIG && - (sel == 7)) || /* Config7 */ + (sel == 6 || /* Config6 */ + sel == 7)) || /* Config7 */ (rd == MIPS_CP0_LLADDR && (sel == 2) && /* MAARI */ cpu_guest_has_maar && @@ -974,6 +950,11 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, (rd == MIPS_CP0_ERRCTL && (sel == 0))) { /* ErrCtl */ val = cop0->reg[rd][sel]; +#ifdef CONFIG_CPU_LOONGSON64 + } else if (rd == MIPS_CP0_DIAG && + (sel == 0)) { /* Diag */ + val = cop0->reg[rd][sel]; +#endif } else { val = 0; er = EMULATE_FAIL; @@ -1036,9 +1017,40 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, cpu_guest_has_maar && !cpu_guest_has_dyn_maar) { kvm_write_maari(vcpu, val); + } else if (rd == MIPS_CP0_CONFIG && + (sel == 6)) { + cop0->reg[rd][sel] = (int)val; } else if (rd == MIPS_CP0_ERRCTL && (sel == 0)) { /* ErrCtl */ /* ignore the written value */ +#ifdef CONFIG_CPU_LOONGSON64 + } else if (rd == MIPS_CP0_DIAG && + (sel == 0)) { /* Diag */ + unsigned long flags; + + local_irq_save(flags); + if (val & LOONGSON_DIAG_BTB) { + /* Flush BTB */ + set_c0_diag(LOONGSON_DIAG_BTB); + } + if (val & LOONGSON_DIAG_ITLB) { + /* Flush ITLB */ + set_c0_diag(LOONGSON_DIAG_ITLB); + } + if (val & LOONGSON_DIAG_DTLB) { + /* Flush DTLB */ + set_c0_diag(LOONGSON_DIAG_DTLB); + } + if (val & LOONGSON_DIAG_VTLB) { + /* Flush VTLB */ + kvm_loongson_clear_guest_vtlb(); + } + if (val & LOONGSON_DIAG_FTLB) { + /* Flush FTLB */ + kvm_loongson_clear_guest_ftlb(); + } + local_irq_restore(flags); +#endif } else { er = EMULATE_FAIL; } @@ -1129,6 +1141,77 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, return EMULATE_FAIL; } +#ifdef CONFIG_CPU_LOONGSON64 +static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + unsigned int rs, rd; + unsigned int hostcfg; + unsigned long curr_pc; + enum emulation_result er = EMULATE_DONE; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + rs = inst.loongson3_lscsr_format.rs; + rd = inst.loongson3_lscsr_format.rd; + switch (inst.loongson3_lscsr_format.fr) { + case 0x8: /* Read CPUCFG */ + ++vcpu->stat.vz_cpucfg_exits; + hostcfg = read_cpucfg(vcpu->arch.gprs[rs]); + + switch (vcpu->arch.gprs[rs]) { + case LOONGSON_CFG0: + vcpu->arch.gprs[rd] = 0x14c000; + break; + case LOONGSON_CFG1: + hostcfg &= (LOONGSON_CFG1_FP | LOONGSON_CFG1_MMI | + LOONGSON_CFG1_MSA1 | LOONGSON_CFG1_MSA2 | + LOONGSON_CFG1_SFBP); + vcpu->arch.gprs[rd] = hostcfg; + break; + case LOONGSON_CFG2: + hostcfg &= (LOONGSON_CFG2_LEXT1 | LOONGSON_CFG2_LEXT2 | + LOONGSON_CFG2_LEXT3 | LOONGSON_CFG2_LSPW); + vcpu->arch.gprs[rd] = hostcfg; + break; + case LOONGSON_CFG3: + vcpu->arch.gprs[rd] = hostcfg; + break; + default: + /* Don't export any other advanced features to guest */ + vcpu->arch.gprs[rd] = 0; + break; + } + break; + + default: + kvm_err("lwc2 emulate not impl %d rs %lx @%lx\n", + inst.loongson3_lscsr_format.fr, vcpu->arch.gprs[rs], curr_pc); + er = EMULATE_FAIL; + break; + } + + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) { + kvm_err("[%#lx]%s: unsupported lwc2 instruction 0x%08x 0x%08x\n", + curr_pc, __func__, inst.word, inst.loongson3_lscsr_format.fr); + + vcpu->arch.pc = curr_pc; + } + + return er; +} +#endif + static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, struct kvm_vcpu *vcpu) { @@ -1158,6 +1241,11 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); break; #endif +#ifdef CONFIG_CPU_LOONGSON64 + case lwc2_op: + er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu); + break; +#endif case spec3_op: switch (inst.spec3_format.func) { #ifdef CONFIG_CPU_MIPSR6 @@ -1652,6 +1740,7 @@ static u64 kvm_vz_get_one_regs[] = { KVM_REG_MIPS_CP0_CONFIG3, KVM_REG_MIPS_CP0_CONFIG4, KVM_REG_MIPS_CP0_CONFIG5, + KVM_REG_MIPS_CP0_CONFIG6, #ifdef CONFIG_64BIT KVM_REG_MIPS_CP0_XCONTEXT, #endif @@ -1706,7 +1795,7 @@ static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu) ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); if (cpu_guest_has_segments) ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments); - if (cpu_guest_has_htw) + if (cpu_guest_has_htw || cpu_guest_has_ldpte) ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw); if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) ret += 1 + ARRAY_SIZE(vcpu->arch.maar); @@ -1755,7 +1844,7 @@ static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) return -EFAULT; indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments); } - if (cpu_guest_has_htw) { + if (cpu_guest_has_htw || cpu_guest_has_ldpte) { if (copy_to_user(indices, kvm_vz_get_one_regs_htw, sizeof(kvm_vz_get_one_regs_htw))) return -EFAULT; @@ -1878,17 +1967,17 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, *v = read_gc0_segctl2(); break; case KVM_REG_MIPS_CP0_PWBASE: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; *v = read_gc0_pwbase(); break; case KVM_REG_MIPS_CP0_PWFIELD: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; *v = read_gc0_pwfield(); break; case KVM_REG_MIPS_CP0_PWSIZE: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; *v = read_gc0_pwsize(); break; @@ -1896,7 +1985,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, *v = (long)read_gc0_wired(); break; case KVM_REG_MIPS_CP0_PWCTL: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; *v = read_gc0_pwctl(); break; @@ -1979,6 +2068,9 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, return -EINVAL; *v = read_gc0_config5(); break; + case KVM_REG_MIPS_CP0_CONFIG6: + *v = kvm_read_sw_gc0_config6(cop0); + break; case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) return -EINVAL; @@ -2101,17 +2193,17 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, write_gc0_segctl2(v); break; case KVM_REG_MIPS_CP0_PWBASE: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; write_gc0_pwbase(v); break; case KVM_REG_MIPS_CP0_PWFIELD: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; write_gc0_pwfield(v); break; case KVM_REG_MIPS_CP0_PWSIZE: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; write_gc0_pwsize(v); break; @@ -2119,7 +2211,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, change_gc0_wired(MIPSR6_WIRED_WIRED, v); break; case KVM_REG_MIPS_CP0_PWCTL: - if (!cpu_guest_has_htw) + if (!cpu_guest_has_htw && !cpu_guest_has_ldpte) return -EINVAL; write_gc0_pwctl(v); break; @@ -2248,6 +2340,14 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, write_gc0_config5(v); } break; + case KVM_REG_MIPS_CP0_CONFIG6: + cur = kvm_read_sw_gc0_config6(cop0); + change = (cur ^ v) & kvm_vz_config6_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_sw_gc0_config6(cop0, (int)v); + } + break; case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) return -EINVAL; @@ -2580,7 +2680,7 @@ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } /* restore HTW registers */ - if (cpu_guest_has_htw) { + if (cpu_guest_has_htw || cpu_guest_has_ldpte) { kvm_restore_gc0_pwbase(cop0); kvm_restore_gc0_pwfield(cop0); kvm_restore_gc0_pwsize(cop0); @@ -2597,7 +2697,7 @@ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * prevents a SC on the next VCPU from succeeding by matching a LL on * the previous VCPU. */ - if (cpu_guest_has_rw_llb) + if (vcpu->kvm->created_vcpus > 1) write_gc0_lladdr(0); return 0; @@ -2685,8 +2785,8 @@ static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu) } /* save HTW registers if enabled in guest */ - if (cpu_guest_has_htw && - kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) { + if (cpu_guest_has_ldpte || (cpu_guest_has_htw && + kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW)) { kvm_save_gc0_pwbase(cop0); kvm_save_gc0_pwfield(cop0); kvm_save_gc0_pwsize(cop0); @@ -2853,8 +2953,12 @@ static int kvm_vz_hardware_enable(void) write_c0_guestctl0(MIPS_GCTL0_CP0 | (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) | MIPS_GCTL0_CG | MIPS_GCTL0_CF); - if (cpu_has_guestctl0ext) - set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + if (cpu_has_guestctl0ext) { + if (current_cpu_type() != CPU_LOONGSON64) + set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + else + clear_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + } if (cpu_has_guestid) { write_c0_guestctl1(0); @@ -2871,6 +2975,12 @@ static int kvm_vz_hardware_enable(void) if (cpu_has_guestctl2) clear_c0_guestctl2(0x3f << 10); +#ifdef CONFIG_CPU_LOONGSON64 + /* Control guest CCA attribute */ + if (cpu_has_csr()) + csr_writel(csr_readl(0xffffffec) | 0x1, 0xffffffec); +#endif + return 0; } @@ -2927,6 +3037,9 @@ static int kvm_vz_check_extension(struct kvm *kvm, long ext) r = 2; break; #endif + case KVM_CAP_IOEVENTFD: + r = 1; + break; default: r = 0; break; diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index a42dd09c6578..d8a087cf2b42 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -252,6 +252,7 @@ static int do_signal(struct pt_regs *regs) switch (retval) { case ERESTART_RESTARTBLOCK: restart = -2; + fallthrough; case ERESTARTNOHAND: case ERESTARTSYS: case ERESTARTNOINTR: diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8dd24c7692a0..d32ec9ae73bd 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -155,12 +155,11 @@ extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); -extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, - struct kvm_vcpu *vcpu, unsigned long addr, - unsigned long status); +extern int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, + unsigned long addr, unsigned long status); extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long valid); -extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store); extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); @@ -174,8 +173,7 @@ extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); -extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, - struct kvm_vcpu *vcpu, +extern int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr); extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, gva_t eaddr, void *to, void *from, @@ -234,7 +232,7 @@ extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); -extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu); extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, @@ -300,12 +298,12 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); -int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu, +int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); -long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu); +long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 337047ba4a56..7e2d061d0445 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -795,7 +795,6 @@ struct kvm_vcpu_arch { struct mmio_hpte_cache_entry *pgfault_cache; struct task_struct *run_task; - struct kvm_run *kvm_run; spinlock_t vpa_update_lock; struct kvmppc_vpa vpa; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 94f5a32acaf1..ccf66b3a4c1d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -58,28 +58,28 @@ enum xlate_readwrite { XLATE_WRITE /* check for write permissions */ }; -extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu); +extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); -extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_loads(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend); -extern int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int rs, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_store(struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu, int rs, unsigned int bytes, int is_default_endian); @@ -90,10 +90,9 @@ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_emulate_instruction(struct kvm_run *run, - struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); -extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); @@ -267,7 +266,7 @@ struct kvmppc_ops { void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); - int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); + int (*vcpu_run)(struct kvm_vcpu *vcpu); int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); int (*check_requests)(struct kvm_vcpu *vcpu); @@ -291,7 +290,7 @@ struct kvmppc_ops { int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); - int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, + int (*emulate_op)(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 37508a356f28..41fedec69ac3 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -755,9 +755,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) } EXPORT_SYMBOL_GPL(kvmppc_set_msr); -int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); + return vcpu->kvm->arch.kvm_ops->vcpu_run(vcpu); } int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index eae259ee49af..9b6323ec8e60 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -18,7 +18,7 @@ extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); -extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index ddfc4c90ebb6..7c5a1812a1c3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -412,7 +412,7 @@ static int instruction_is_store(unsigned int instr) return (instr & mask) != 0; } -int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { u32 last_inst; @@ -472,10 +472,10 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.paddr_accessed = gpa; vcpu->arch.vaddr_accessed = ea; - return kvmppc_emulate_mmio(run, vcpu); + return kvmppc_emulate_mmio(vcpu); } -int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; @@ -498,7 +498,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte_t pte, *ptep; if (kvm_is_radix(kvm)) - return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); + return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr); /* * Real-mode code has already searched the HPT and found the @@ -518,7 +518,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa_base = r & HPTE_R_RPN & ~(psize - 1); gfn_base = gpa_base >> PAGE_SHIFT; gpa = gpa_base | (ea & (psize - 1)); - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, dsisr & DSISR_ISSTORE); } } @@ -554,7 +554,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, dsisr & DSISR_ISSTORE); /* diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 2a2fad9a1c13..3cb0c9843d01 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -353,7 +353,13 @@ static struct kmem_cache *kvm_pmd_cache; static pte_t *kvmppc_pte_alloc(void) { - return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + pte_t *pte; + + pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + /* pmd_populate() will only reference _pa(pte). */ + kmemleak_ignore(pte); + + return pte; } static void kvmppc_pte_free(pte_t *ptep) @@ -363,7 +369,13 @@ static void kvmppc_pte_free(pte_t *ptep) static pmd_t *kvmppc_pmd_alloc(void) { - return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + pmd_t *pmd; + + pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + /* pud_populate() will only reference _pa(pmd). */ + kmemleak_ignore(pmd); + + return pmd; } static void kvmppc_pmd_free(pmd_t *pmdp) @@ -417,9 +429,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, * Callers are responsible for flushing the PWC. * * When page tables are being unmapped/freed as part of page fault path - * (full == false), ptes are not expected. There is code to unmap them - * and emit a warning if encountered, but there may already be data - * corruption due to the unexpected mappings. + * (full == false), valid ptes are generally not expected; however, there + * is one situation where they arise, which is when dirty page logging is + * turned off for a memslot while the VM is running. The new memslot + * becomes visible to page faults before the memslot commit function + * gets to flush the memslot, which can lead to a 2MB page mapping being + * installed for a guest physical address where there are already 64kB + * (or 4kB) mappings (of sub-pages of the same 2MB page). */ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, unsigned int lpid) @@ -433,7 +449,6 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, for (it = 0; it < PTRS_PER_PTE; ++it, ++p) { if (pte_val(*p) == 0) continue; - WARN_ON_ONCE(1); kvmppc_unmap_pte(kvm, p, pte_pfn(*p) << PAGE_SHIFT, PAGE_SHIFT, NULL, lpid); @@ -891,7 +906,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, return ret; } -int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; @@ -937,7 +952,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_core_queue_data_storage(vcpu, ea, dsisr); return RESUME_GUEST; } - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { @@ -1142,6 +1157,11 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, kvm->arch.lpid); gpa += PAGE_SIZE; } + /* + * Increase the mmu notifier sequence number to prevent any page + * fault that read the memslot earlier from writing a PTE. + */ + kvm->mmu_notifier_seq++; spin_unlock(&kvm->mmu_lock); } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 50555ad1db93..1a529df0ab44 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -73,6 +73,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct kvmppc_spapr_tce_iommu_table *stit, *tmp; struct iommu_table_group *table_group = NULL; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { table_group = iommu_group_get_iommudata(grp); @@ -87,7 +88,9 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, kref_put(&stit->kref, kvm_spapr_tce_liobn_put); } } + cond_resched_rcu(); } + rcu_read_unlock(); } extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, @@ -105,12 +108,14 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!f.file) return -EBADF; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { if (stt == f.file->private_data) { found = true; break; } } + rcu_read_unlock(); fdput(f); @@ -143,6 +148,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!tbl) return -EINVAL; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { if (tbl != stit->tbl) continue; @@ -150,14 +156,17 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!kref_get_unless_zero(&stit->kref)) { /* stit is being destroyed */ iommu_tce_table_put(tbl); + rcu_read_unlock(); return -ENOTTY; } /* * The table is already known to this KVM, we just increased * its KVM reference counter and can return. */ + rcu_read_unlock(); return 0; } + rcu_read_unlock(); stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { @@ -365,18 +374,19 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { unsigned long hpa = 0; struct mm_iommu_table_group_mem_t *mem; long shift = stit->tbl->it_page_shift; mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift); - if (!mem) - return H_TOO_HARD; - - if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) + if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) { + rcu_read_unlock(); return H_TOO_HARD; + } } + rcu_read_unlock(); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index dad71d276b91..0effd48c8f4d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -235,7 +235,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) #endif -int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; @@ -371,13 +371,13 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) break; - run->papr_hcall.nr = cmd; + vcpu->run->papr_hcall.nr = cmd; for (i = 0; i < 9; ++i) { ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); - run->papr_hcall.args[i] = gpr; + vcpu->run->papr_hcall.args[i] = gpr; } - run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->run->exit_reason = KVM_EXIT_PAPR_HCALL; vcpu->arch.hcall_needed = 1; emulated = EMULATE_EXIT_USER; break; @@ -629,7 +629,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, } if (emulated == EMULATE_FAIL) - emulated = kvmppc_emulate_paired_single(run, vcpu); + emulated = kvmppc_emulate_paired_single(vcpu); return emulated; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7f5d58663f13..6bf66649ab92 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1094,9 +1094,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = kvmppc_h_svm_init_done(vcpu->kvm); break; case H_SVM_INIT_ABORT: - ret = H_UNSUPPORTED; - if (kvmppc_get_srr1(vcpu) & MSR_S) - ret = kvmppc_h_svm_init_abort(vcpu->kvm); + /* + * Even if that call is made by the Ultravisor, the SSR1 value + * is the guest context one, with the secure bit clear as it has + * not yet been secured. So we can't check it here. + * Instead the kvm->arch.secure_guest flag is checked inside + * kvmppc_h_svm_init_abort(). + */ + ret = kvmppc_h_svm_init_abort(vcpu->kvm); break; default: @@ -1151,8 +1156,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) return kvmppc_hcall_impl_hv_realmode(cmd); } -static int kvmppc_emulate_debug_inst(struct kvm_run *run, - struct kvm_vcpu *vcpu) +static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu) { u32 last_inst; @@ -1166,8 +1170,8 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run, } if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = kvmppc_get_pc(vcpu); + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu); return RESUME_HOST; } else { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); @@ -1268,9 +1272,10 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } -static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, struct task_struct *tsk) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; vcpu->stat.sum_exits++; @@ -1405,7 +1410,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, swab32(vcpu->arch.emul_inst) : vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { - r = kvmppc_emulate_debug_inst(run, vcpu); + r = kvmppc_emulate_debug_inst(vcpu); } else { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; @@ -1457,7 +1462,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { int r; int srcu_idx; @@ -1515,7 +1520,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(run, vcpu); + r = kvmhv_nested_page_fault(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: @@ -1525,7 +1530,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(run, vcpu); + r = kvmhv_nested_page_fault(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; @@ -2929,7 +2934,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) ret = RESUME_GUEST; if (vcpu->arch.trap) - ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, + ret = kvmppc_handle_exit_hv(vcpu, vcpu->arch.run_task); vcpu->arch.ret = ret; @@ -3894,15 +3899,16 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) return r; } -static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int n_ceded, i, r; struct kvmppc_vcore *vc; struct kvm_vcpu *v; trace_kvmppc_run_vcpu_enter(vcpu); - kvm_run->exit_reason = 0; + run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; kvmppc_update_vpas(vcpu); @@ -3914,7 +3920,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) spin_lock(&vc->lock); vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.kvm_run = kvm_run; vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; @@ -3947,8 +3952,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) r = kvmhv_setup_mmu(vcpu); spin_lock(&vc->lock); if (r) { - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - kvm_run->fail_entry. + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry. hardware_entry_failure_reason = 0; vcpu->arch.ret = r; break; @@ -3967,7 +3972,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); v->stat.signal_exits++; - v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; + v->run->exit_reason = KVM_EXIT_INTR; v->arch.ret = -EINTR; wake_up(&v->arch.cpu_run); } @@ -4008,7 +4013,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { kvmppc_remove_runnable(vc, vcpu); vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; + run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; } @@ -4019,15 +4024,15 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } - trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); + trace_kvmppc_run_vcpu_exit(vcpu); spin_unlock(&vc->lock); return vcpu->arch.ret; } -int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, - struct kvm_vcpu *vcpu, u64 time_limit, +int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { + struct kvm_run *run = vcpu->run; int trap, r, pcpu; int srcu_idx, lpid; struct kvmppc_vcore *vc; @@ -4036,14 +4041,13 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, trace_kvmppc_run_vcpu_enter(vcpu); - kvm_run->exit_reason = 0; + run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.kvm_run = kvm_run; vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; @@ -4161,9 +4165,9 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, r = RESUME_GUEST; if (trap) { if (!nested) - r = kvmppc_handle_exit_hv(kvm_run, vcpu, current); + r = kvmppc_handle_exit_hv(vcpu, current); else - r = kvmppc_handle_nested_exit(kvm_run, vcpu); + r = kvmppc_handle_nested_exit(vcpu); } vcpu->arch.ret = r; @@ -4173,7 +4177,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) { if (signal_pending(current)) { vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; + run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; break; } @@ -4189,13 +4193,13 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, done: kvmppc_remove_runnable(vc, vcpu); - trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); + trace_kvmppc_run_vcpu_exit(vcpu); return vcpu->arch.ret; sigpend: vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; + run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: local_irq_enable(); @@ -4203,8 +4207,9 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, goto done; } -static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r; int srcu_idx; unsigned long ebb_regs[3] = {}; /* shut up GCC */ @@ -4288,10 +4293,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) */ if (kvm->arch.threads_indep && kvm_is_radix(kvm) && !no_mixing_hpt_and_radix) - r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0, + r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); else - r = kvmppc_run_vcpu(run, vcpu); + r = kvmppc_run_vcpu(vcpu); if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { @@ -4301,7 +4306,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { srcu_idx = srcu_read_lock(&kvm->srcu); - r = kvmppc_book3s_hv_page_fault(run, vcpu, + r = kvmppc_book3s_hv_page_fault(vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&kvm->srcu, srcu_idx); } else if (r == RESUME_PASSTHROUGH) { @@ -4975,7 +4980,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) } /* We don't need to emulate any privileged instructions or dcbz */ -static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index a221cae792dc..0989751c9d5e 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -290,8 +290,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp, - lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ @@ -1270,8 +1269,7 @@ static inline int kvmppc_radix_shift_to_level(int shift) } /* called with gp->tlb_lock held */ -static long int __kvmhv_nested_page_fault(struct kvm_run *run, - struct kvm_vcpu *vcpu, +static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp) { struct kvm *kvm = vcpu->kvm; @@ -1354,7 +1352,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, } /* passthrough of emulated MMIO case */ - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { if (writing) { @@ -1429,8 +1427,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level, mmu_seq, gp->shadow_lpid, rmapp, &n_rmap); - if (n_rmap) - kfree(n_rmap); + kfree(n_rmap); if (ret == -EAGAIN) ret = RESUME_GUEST; /* Let the guest try again */ @@ -1441,13 +1438,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, return RESUME_GUEST; } -long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu) +long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) { struct kvm_nested_guest *gp = vcpu->arch.nested; long int ret; mutex_lock(&gp->tlb_lock); - ret = __kvmhv_nested_page_fault(run, vcpu, gp); + ret = __kvmhv_nested_page_fault(vcpu, gp); mutex_unlock(&gp->tlb_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index f91224ea034a..09d8119024db 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -749,6 +749,20 @@ static u64 kvmppc_get_secmem_size(void) const __be32 *prop; u64 size = 0; + /* + * First try the new ibm,secure-memory nodes which supersede the + * secure-memory-ranges property. + * If we found some, no need to read the deprecated ones. + */ + for_each_compatible_node(np, NULL, "ibm,secure-memory") { + prop = of_get_property(np, "reg", &len); + if (!prop) + continue; + size += of_read_number(prop + 2, 2); + } + if (size) + return size; + np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); if (!np) goto out; diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index bf0282775e37..a11436720a8c 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -169,7 +169,7 @@ static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); } -static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_fpr_load(struct kvm_vcpu *vcpu, int rs, ulong addr, int ls_type) { int emulated = EMULATE_FAIL; @@ -188,7 +188,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_inject_pf(vcpu, addr, false); goto done_load; } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs, len, 1); goto done_load; } @@ -213,7 +213,7 @@ done_load: return emulated; } -static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_fpr_store(struct kvm_vcpu *vcpu, int rs, ulong addr, int ls_type) { int emulated = EMULATE_FAIL; @@ -248,7 +248,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, if (r < 0) { kvmppc_inject_pf(vcpu, addr, true); } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_store(run, vcpu, val, len, 1); + emulated = kvmppc_handle_store(vcpu, val, len, 1); } else { emulated = EMULATE_DONE; } @@ -259,7 +259,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_psq_load(struct kvm_vcpu *vcpu, int rs, ulong addr, bool w, int i) { int emulated = EMULATE_FAIL; @@ -279,12 +279,12 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_inject_pf(vcpu, addr, false); goto done_load; } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs, 4, 1); vcpu->arch.qpr[rs] = tmp[1]; goto done_load; } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FQPR | rs, 8, 1); goto done_load; } @@ -302,7 +302,7 @@ done_load: return emulated; } -static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_psq_store(struct kvm_vcpu *vcpu, int rs, ulong addr, bool w, int i) { int emulated = EMULATE_FAIL; @@ -318,10 +318,10 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, if (r < 0) { kvmppc_inject_pf(vcpu, addr, true); } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); + emulated = kvmppc_handle_store(vcpu, tmp[0], 4, 1); } else if (r == EMULATE_DO_MMIO) { u64 val = ((u64)tmp[0] << 32) | tmp[1]; - emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); + emulated = kvmppc_handle_store(vcpu, val, 8, 1); } else { emulated = EMULATE_DONE; } @@ -618,7 +618,7 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, return EMULATE_DONE; } -int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) +int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu) { u32 inst; enum emulation_result emulated = EMULATE_DONE; @@ -680,7 +680,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); break; } case OP_PSQ_LU: @@ -690,7 +690,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -703,7 +703,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); break; } case OP_PSQ_STU: @@ -713,7 +713,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -733,7 +733,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); break; } case OP_4X_PS_CMPO0: @@ -747,7 +747,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -824,7 +824,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); break; } case OP_4XW_PSQ_STUX: @@ -834,7 +834,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -922,7 +922,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -930,7 +930,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -941,7 +941,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -949,7 +949,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -960,7 +960,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -968,7 +968,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -979,7 +979,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -987,7 +987,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -1001,7 +1001,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -1010,7 +1010,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -1022,7 +1022,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -1031,7 +1031,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -1043,7 +1043,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -1052,7 +1052,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -1064,7 +1064,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -1073,7 +1073,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -1085,7 +1085,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE_LOW); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a0f6813f4560..ef54f917bdaf 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -700,7 +700,7 @@ static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); } -int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu, ulong eaddr, int vec) { bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); @@ -795,7 +795,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* The guest's PTE is not mapped yet. Map on the host */ if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) { /* Exit KVM if mapping failed */ - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return RESUME_HOST; } if (data) @@ -808,7 +808,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.mmio_exits++; vcpu->arch.paddr_accessed = pte.raddr; vcpu->arch.vaddr_accessed = pte.eaddr; - r = kvmppc_emulate_mmio(run, vcpu); + r = kvmppc_emulate_mmio(vcpu); if ( r == RESUME_HOST_NV ) r = RESUME_HOST; } @@ -992,7 +992,7 @@ static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac) enum emulation_result er = EMULATE_FAIL; if (!(kvmppc_get_msr(vcpu) & MSR_PR)) - er = kvmppc_emulate_instruction(vcpu->run, vcpu); + er = kvmppc_emulate_instruction(vcpu); if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) { /* Couldn't emulate, trigger interrupt in guest */ @@ -1089,8 +1089,7 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu) } } -static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr) { enum emulation_result er; ulong flags; @@ -1124,7 +1123,7 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, } vcpu->stat.emulated_inst_exits++; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_instruction(vcpu); switch (er) { case EMULATE_DONE: r = RESUME_GUEST_NV; @@ -1139,7 +1138,7 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; + vcpu->run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; case EMULATE_EXIT_USER: @@ -1198,7 +1197,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, /* only care about PTEG not found errors, but leave NX alone */ if (shadow_srr1 & 0x40000000) { int idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); + r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr); srcu_read_unlock(&vcpu->kvm->srcu, idx); vcpu->stat.sp_instruc++; } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && @@ -1248,7 +1247,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { int idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); + r = kvmppc_handle_pagefault(vcpu, dar, exit_nr); srcu_read_unlock(&vcpu->kvm->srcu, idx); } else { kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr); @@ -1292,7 +1291,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_PROGRAM: case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); + r = kvmppc_exit_pr_progint(vcpu, exit_nr); break; case BOOK3S_INTERRUPT_SYSCALL: { @@ -1370,7 +1369,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); if (emul == EMULATE_DONE) - r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); + r = kvmppc_exit_pr_progint(vcpu, exit_nr); else r = RESUME_GUEST; @@ -1825,8 +1824,9 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) vfree(vcpu_book3s); } -static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int ret; #ifdef CONFIG_ALTIVEC unsigned long uninitialized_var(vrsave); @@ -1834,7 +1834,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { - kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = -EINVAL; goto out; } @@ -1861,7 +1861,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(kvm_run, vcpu); + ret = __kvmppc_vcpu_run(run, vcpu); kvmppc_clear_debug(vcpu); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 888afe8d35cc..c0d62a917e20 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -729,13 +729,14 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) return r; } -int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int ret, s; struct debug_reg debug; if (!vcpu->arch.sane) { - kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } @@ -777,7 +778,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(kvm_run, vcpu); + ret = __kvmppc_vcpu_run(run, vcpu); /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ @@ -799,11 +800,11 @@ out: return ret; } -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int emulation_exit(struct kvm_vcpu *vcpu) { enum emulation_result er; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_instruction(vcpu); switch (er) { case EMULATE_DONE: /* don't overwrite subtypes, just account kvm_stats */ @@ -820,8 +821,8 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + vcpu->run->hw.hardware_exit_reason = ~0ULL << 32; + vcpu->run->hw.hardware_exit_reason |= vcpu->arch.last_inst; kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; @@ -833,8 +834,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) } } -static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_handle_debug(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg); u32 dbsr = vcpu->arch.dbsr; @@ -953,7 +955,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, } } -static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu, enum emulation_result emulated, u32 last_inst) { switch (emulated) { @@ -965,8 +967,8 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, __func__, vcpu->arch.regs.nip); /* For debugging, encode the failing instruction and * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= last_inst; + vcpu->run->hw.hardware_exit_reason = ~0ULL << 32; + vcpu->run->hw.hardware_exit_reason |= last_inst; kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; @@ -1023,7 +1025,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->ready_for_interrupt_injection = 1; if (emulated != EMULATE_DONE) { - r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst); + r = kvmppc_resume_inst_load(vcpu, emulated, last_inst); goto out; } @@ -1083,7 +1085,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_HV_PRIV: - r = emulation_exit(run, vcpu); + r = emulation_exit(vcpu); break; case BOOKE_INTERRUPT_PROGRAM: @@ -1093,7 +1095,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * We are here because of an SW breakpoint instr, * so lets return to host to handle. */ - r = kvmppc_handle_debug(run, vcpu); + r = kvmppc_handle_debug(vcpu); run->exit_reason = KVM_EXIT_DEBUG; kvmppc_account_exit(vcpu, DEBUG_EXITS); break; @@ -1114,7 +1116,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - r = emulation_exit(run, vcpu); + r = emulation_exit(vcpu); break; case BOOKE_INTERRUPT_FP_UNAVAIL: @@ -1281,7 +1283,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * actually RAM. */ vcpu->arch.paddr_accessed = gpaddr; vcpu->arch.vaddr_accessed = eaddr; - r = kvmppc_emulate_mmio(run, vcpu); + r = kvmppc_emulate_mmio(vcpu); kvmppc_account_exit(vcpu, MMIO_EXITS); } @@ -1332,7 +1334,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } case BOOKE_INTERRUPT_DEBUG: { - r = kvmppc_handle_debug(run, vcpu); + r = kvmppc_handle_debug(vcpu); if (r == RESUME_HOST) run->exit_reason = KVM_EXIT_DEBUG; kvmppc_account_exit(vcpu, DEBUG_EXITS); diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 65b4d337d337..be9da96d9f06 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -70,7 +70,7 @@ void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); -int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); @@ -94,16 +94,12 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, - struct kvm_vcpu *vcpu, +extern int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); -extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, - struct kvm_vcpu *vcpu, - unsigned int inst, int *advance); extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 689ff5f90e9e..d8d38aca71bd 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -39,7 +39,7 @@ static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, vcpu->arch.csrr1); } -int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 3d0d3ec5be96..64eb833e9f02 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -83,16 +83,16 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) } #endif -static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_e500_emul_ehpriv(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; switch (get_oc(inst)) { case EHPRIV_OC_DEBUG: - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = vcpu->arch.regs.nip; - run->debug.arch.status = 0; + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.address = vcpu->arch.regs.nip; + vcpu->run->debug.arch.status = 0; kvmppc_account_exit(vcpu, DEBUG_EXITS); emulated = EMULATE_EXIT_USER; *advance = 0; @@ -125,7 +125,7 @@ static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst, return EMULATE_FAIL; } -int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; @@ -182,8 +182,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_EHPRIV: - emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, - advance); + emulated = kvmppc_e500_emul_ehpriv(vcpu, inst, advance); break; default: @@ -197,7 +196,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, } if (emulated == EMULATE_FAIL) - emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + emulated = kvmppc_booke_emulate_op(vcpu, inst, advance); return emulated; } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 6fca38ca791f..ee1147c98cd8 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -191,7 +191,7 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ -int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) +int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu) { u32 inst; int rs, rt, sprn; @@ -270,9 +270,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) * these are illegal instructions. */ if (inst == KVMPPC_INST_SW_BREAKPOINT) { - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.status = 0; - run->debug.arch.address = kvmppc_get_pc(vcpu); + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.status = 0; + vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu); emulated = EMULATE_EXIT_USER; advance = 0; } else @@ -285,7 +285,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } if (emulated == EMULATE_FAIL) { - emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(vcpu, inst, &advance); if (emulated == EMULATE_AGAIN) { advance = 0; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 135d0e686622..48272a9b9c30 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -71,7 +71,6 @@ static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu) */ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; u32 inst; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; @@ -104,10 +103,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) int instr_byte_swap = op.type & BYTEREV; if (op.type & SIGNEXT) - emulated = kvmppc_handle_loads(run, vcpu, + emulated = kvmppc_handle_loads(vcpu, op.reg, size, !instr_byte_swap); else - emulated = kvmppc_handle_load(run, vcpu, + emulated = kvmppc_handle_load(vcpu, op.reg, size, !instr_byte_swap); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) @@ -124,10 +123,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_sp64_extend = 1; if (op.type & SIGNEXT) - emulated = kvmppc_handle_loads(run, vcpu, + emulated = kvmppc_handle_loads(vcpu, KVM_MMIO_REG_FPR|op.reg, size, 1); else - emulated = kvmppc_handle_load(run, vcpu, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR|op.reg, size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) @@ -164,12 +163,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (size == 16) { vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_vmx_load(run, - vcpu, KVM_MMIO_REG_VMX|op.reg, + emulated = kvmppc_handle_vmx_load(vcpu, + KVM_MMIO_REG_VMX|op.reg, 8, 1); } else { vcpu->arch.mmio_vmx_copy_nums = 1; - emulated = kvmppc_handle_vmx_load(run, vcpu, + emulated = kvmppc_handle_vmx_load(vcpu, KVM_MMIO_REG_VMX|op.reg, size, 1); } @@ -217,7 +216,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) io_size_each = op.element_size; } - emulated = kvmppc_handle_vsx_load(run, vcpu, + emulated = kvmppc_handle_vsx_load(vcpu, KVM_MMIO_REG_VSX|op.reg, io_size_each, 1, op.type & SIGNEXT); break; @@ -227,8 +226,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) /* if need byte reverse, op.val has been reversed by * analyse_instr(). */ - emulated = kvmppc_handle_store(run, vcpu, op.val, - size, 1); + emulated = kvmppc_handle_store(vcpu, op.val, size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) kvmppc_set_gpr(vcpu, op.update_reg, op.ea); @@ -250,7 +248,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (op.type & FPCONV) vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, + emulated = kvmppc_handle_store(vcpu, VCPU_FPR(vcpu, op.reg), size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) @@ -290,12 +288,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (size == 16) { vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_vmx_store(run, - vcpu, op.reg, 8, 1); + emulated = kvmppc_handle_vmx_store(vcpu, + op.reg, 8, 1); } else { vcpu->arch.mmio_vmx_copy_nums = 1; - emulated = kvmppc_handle_vmx_store(run, - vcpu, op.reg, size, 1); + emulated = kvmppc_handle_vmx_store(vcpu, + op.reg, size, 1); } break; @@ -338,7 +336,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) io_size_each = op.element_size; } - emulated = kvmppc_handle_vsx_store(run, vcpu, + emulated = kvmppc_handle_vsx_store(vcpu, op.reg, io_size_each, 1); break; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27ccff612903..dd7d141e33e8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -279,7 +279,7 @@ out: } EXPORT_SYMBOL_GPL(kvmppc_sanity_check); -int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) { enum emulation_result er; int r; @@ -295,7 +295,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_GUEST; break; case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; + vcpu->run->exit_reason = KVM_EXIT_MMIO; /* We must reload nonvolatiles because "update" load/store * instructions modify register state. */ /* Future optimization: only reload non-volatiles if they were @@ -1107,9 +1107,9 @@ static inline u32 dp_to_sp(u64 fprd) #define dp_to_sp(x) (x) #endif /* CONFIG_PPC_FPU */ -static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) +static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; u64 uninitialized_var(gpr); if (run->mmio.len > sizeof(gpr)) { @@ -1219,10 +1219,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } } -static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int __kvmppc_handle_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int sign_extend) { + struct kvm_run *run = vcpu->run; int idx, ret; bool host_swabbed; @@ -1256,7 +1257,7 @@ static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { - kvmppc_complete_mmio_load(vcpu, run); + kvmppc_complete_mmio_load(vcpu); vcpu->mmio_needed = 0; return EMULATE_DONE; } @@ -1264,24 +1265,24 @@ static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } -int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian) { - return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0); + return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0); } EXPORT_SYMBOL_GPL(kvmppc_handle_load); /* Same as above, but sign extends */ -int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_loads(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian) { - return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1); + return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 1); } #ifdef CONFIG_VSX -int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend) { @@ -1292,13 +1293,13 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; while (vcpu->arch.mmio_vsx_copy_nums) { - emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, + emulated = __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, mmio_sign_extend); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vsx_copy_nums--; vcpu->arch.mmio_vsx_offset++; @@ -1307,9 +1308,10 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, } #endif /* CONFIG_VSX */ -int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_store(struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian) { + struct kvm_run *run = vcpu->run; void *data = run->mmio.data; int idx, ret; bool host_swabbed; @@ -1423,7 +1425,7 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) return result; } -int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu, int rs, unsigned int bytes, int is_default_endian) { u64 val; @@ -1439,13 +1441,13 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) return EMULATE_FAIL; - emulated = kvmppc_handle_store(run, vcpu, + emulated = kvmppc_handle_store(vcpu, val, bytes, is_default_endian); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vsx_copy_nums--; vcpu->arch.mmio_vsx_offset++; @@ -1454,19 +1456,19 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, - struct kvm_run *run) +static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; enum emulation_result emulated = EMULATE_FAIL; int r; vcpu->arch.paddr_accessed += run->mmio.len; if (!vcpu->mmio_is_write) { - emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr, + emulated = kvmppc_handle_vsx_load(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1, vcpu->arch.mmio_sign_extend); } else { - emulated = kvmppc_handle_vsx_store(run, vcpu, + emulated = kvmppc_handle_vsx_store(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1); } @@ -1490,7 +1492,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, #endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC -int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian) { enum emulation_result emulated = EMULATE_DONE; @@ -1499,13 +1501,13 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { - emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, + emulated = __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; vcpu->arch.mmio_vmx_offset++; } @@ -1585,7 +1587,7 @@ int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int rs, unsigned int bytes, int is_default_endian) { u64 val = 0; @@ -1620,12 +1622,12 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; } - emulated = kvmppc_handle_store(run, vcpu, val, bytes, + emulated = kvmppc_handle_store(vcpu, val, bytes, is_default_endian); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; vcpu->arch.mmio_vmx_offset++; } @@ -1633,19 +1635,19 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu, - struct kvm_run *run) +static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; enum emulation_result emulated = EMULATE_FAIL; int r; vcpu->arch.paddr_accessed += run->mmio.len; if (!vcpu->mmio_is_write) { - emulated = kvmppc_handle_vmx_load(run, vcpu, + emulated = kvmppc_handle_vmx_load(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1); } else { - emulated = kvmppc_handle_vmx_store(run, vcpu, + emulated = kvmppc_handle_vmx_store(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1); } @@ -1775,7 +1777,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) - kvmppc_complete_mmio_load(vcpu, run); + kvmppc_complete_mmio_load(vcpu); #ifdef CONFIG_VSX if (vcpu->arch.mmio_vsx_copy_nums > 0) { vcpu->arch.mmio_vsx_copy_nums--; @@ -1783,7 +1785,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } if (vcpu->arch.mmio_vsx_copy_nums > 0) { - r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run); + r = kvmppc_emulate_mmio_vsx_loadstore(vcpu); if (r == RESUME_HOST) { vcpu->mmio_needed = 1; goto out; @@ -1797,7 +1799,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } if (vcpu->arch.mmio_vmx_copy_nums > 0) { - r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run); + r = kvmppc_emulate_mmio_vmx_loadstore(vcpu); if (r == RESUME_HOST) { vcpu->mmio_needed = 1; goto out; @@ -1830,7 +1832,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (run->immediate_exit) r = -EINTR; else - r = kvmppc_vcpu_run(run, vcpu); + r = kvmppc_vcpu_run(vcpu); kvm_sigset_deactivate(vcpu); diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 8a1e3b0047f1..4a61a971c34e 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -472,9 +472,9 @@ TRACE_EVENT(kvmppc_run_vcpu_enter, ); TRACE_EVENT(kvmppc_run_vcpu_exit, - TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run), + TP_PROTO(struct kvm_vcpu *vcpu), - TP_ARGS(vcpu, run), + TP_ARGS(vcpu), TP_STRUCT__entry( __field(int, vcpu_id) @@ -484,7 +484,7 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->exit = run->exit_reason; + __entry->exit = vcpu->run->exit_reason; __entry->ret = vcpu->arch.ret; ), diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 25db70be4c9c..266a6ca5e15e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window, return; } - use_mm(window->mm); + kthread_use_mm(window->mm); rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier @@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window, smp_mb(); rc = copy_to_user(csb_addr, &csb, sizeof(u8)); } - unuse_mm(window->mm); + kthread_unuse_mm(window->mm); put_task_struct(tsk); /* Success */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c733007b90ab..128192e14ff2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -12,64 +12,70 @@ config 32BIT config RISCV def_bool y - select OF - select OF_EARLY_FLATTREE - select OF_IRQ + select ARCH_CLOCKSOURCE_INIT select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_WX + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_MMIOWB + select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select CLONE_BACKWARDS select COMMON_CLK + select EDAC_SUPPORT + select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS + select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO + select GENERIC_IOREMAP + select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK + select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER if MMU select GENERIC_STRNLEN_USER if MMU - select GENERIC_SMP_IDLE_THREAD - select GENERIC_ATOMIC64 if !64BIT - select GENERIC_IOREMAP - select GENERIC_PTDUMP if MMU + select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_KASAN if MMU && 64BIT + select HAVE_ARCH_KGDB + select HAVE_ARCH_KGDB_QXFER_PKT + select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS + select HAVE_COPY_THREAD_TLS select HAVE_DMA_CONTIGUOUS if MMU + select HAVE_EBPF_JIT if MMU select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GENERIC_VDSO if MMU && 64BIT + select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN - select SPARSE_IRQ - select SYSCTL_EXCEPTION_TRACE - select HAVE_ARCH_TRACEHOOK - select HAVE_PCI select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES - select THREAD_INFO_IN_TASK + select OF + select OF_EARLY_FLATTREE + select OF_IRQ select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI + select RISCV_INTC select RISCV_TIMER - select GENERIC_IRQ_MULTI_HANDLER - select GENERIC_ARCH_TOPOLOGY if SMP - select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_MMIOWB - select ARCH_HAS_DEBUG_VIRTUAL if MMU - select HAVE_EBPF_JIT if MMU - select EDAC_SUPPORT - select ARCH_HAS_GIGANTIC_PAGE - select ARCH_HAS_SET_DIRECT_MAP - select ARCH_HAS_SET_MEMORY - select ARCH_HAS_STRICT_KERNEL_RWX if MMU - select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT - select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU - select HAVE_ARCH_MMAP_RND_BITS if MMU - select ARCH_HAS_GCOV_PROFILE_ALL - select HAVE_COPY_THREAD_TLS - select HAVE_ARCH_KASAN if MMU && 64BIT - select HAVE_ARCH_KGDB - select HAVE_ARCH_KGDB_QXFER_PKT + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -196,11 +202,11 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 - select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE if MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select SWIOTLB if MMU endchoice diff --git a/arch/riscv/include/asm/clocksource.h b/arch/riscv/include/asm/clocksource.h new file mode 100644 index 000000000000..482185566b0c --- /dev/null +++ b/arch/riscv/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +#include <asm/vdso/clocksource.h> + +#endif diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 6e1b0e0325eb..9807ad164015 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -10,11 +10,6 @@ #include <linux/interrupt.h> #include <linux/linkage.h> -#define NR_IRQS 0 - -void riscv_timer_interrupt(void); -void riscv_software_interrupt(void); - #include <asm-generic/irq.h> #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3ddb798264f1..bdddcd5c1b71 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -8,6 +8,8 @@ #include <linux/const.h> +#include <vdso/processor.h> + #include <asm/ptrace.h> /* @@ -58,16 +60,6 @@ static inline void release_thread(struct task_struct *dead_task) extern unsigned long get_wchan(struct task_struct *p); -static inline void cpu_relax(void) -{ -#ifdef __riscv_muldiv - int dummy; - /* In lieu of a halt instruction, induce a long-latency stall. */ - __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); -#endif - barrier(); -} - static inline void wait_for_interrupt(void) { __asm__ __volatile__ ("wfi"); @@ -75,6 +67,7 @@ static inline void wait_for_interrupt(void) struct device_node; int riscv_of_processor_hartid(struct device_node *node); +int riscv_of_parent_hartid(struct device_node *node); extern void riscv_fill_hwcap(void); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index f4c7cfda6b7f..40bb1c15a731 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -28,6 +28,9 @@ void show_ipi_stats(struct seq_file *p, int prec); /* SMP initialization hook for setup_arch */ void __init setup_smp(void); +/* Called from C code, this handles an IPI. */ +void handle_IPI(struct pt_regs *regs); + /* Hook for the generic smp_call_function_many() routine. */ void arch_send_call_function_ipi_mask(struct cpumask *mask); diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 7a7fce63c474..8454f746bbfd 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,8 +10,10 @@ #include <linux/types.h> +#ifndef GENERIC_TIME_VSYSCALL struct vdso_data { }; +#endif /* * The VDSO symbols are mapped into Linux so we can just use regular symbol diff --git a/arch/riscv/include/asm/vdso/clocksource.h b/arch/riscv/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..df6ea65c1dec --- /dev/null +++ b/arch/riscv/include/asm/vdso/clocksource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSOCLOCKSOURCE_H +#define __ASM_VDSOCLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_ARCHTIMER + +#endif diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..c8e818688ec1 --- /dev/null +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <asm/csr.h> +#include <uapi/linux/time.h> + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct __kernel_old_timeval *tv asm("a0") = _tv; + register struct timezone *tz asm("a1") = _tz; + register long ret asm("a0"); + register long nr asm("a7") = __NR_gettimeofday; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r"(tv), "r"(tz), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long ret asm("a0"); + register long nr asm("a7") = __NR_clock_gettime; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r"(clkid), "r"(ts), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long ret asm("a0"); + register long nr asm("a7") = __NR_clock_getres; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r"(clkid), "r"(ts), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + /* + * The purpose of csr_read(CSR_TIME) is to trap the system into + * M-mode to obtain the value of CSR_TIME. Hence, unlike other + * architecture, no fence instructions surround the csr_read() + */ + return csr_read(CSR_TIME); +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h new file mode 100644 index 000000000000..82a5693b1861 --- /dev/null +++ b/arch/riscv/include/asm/vdso/processor.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +static inline void cpu_relax(void) +{ +#ifdef __riscv_muldiv + int dummy; + /* In lieu of a halt instruction, induce a long-latency stall. */ + __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); +#endif + barrier(); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/vdso/vsyscall.h b/arch/riscv/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..82fd5d83bd60 --- /dev/null +++ b/arch/riscv/include/asm/vdso/vsyscall.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline struct vdso_data *__riscv_get_k_vdso_data(void) +{ + return vdso_data; +} + +#define __arch_get_k_vdso_data __riscv_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 40a3c442ac5f..6d59e6906fdd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -44,6 +44,22 @@ int riscv_of_processor_hartid(struct device_node *node) return hart; } +/* + * Find hart ID of the CPU DT node under which given DT node falls. + * + * To achieve this, we walk up the DT tree until we find an active + * RISC-V core (HART) node and extract the cpuid from it. + */ +int riscv_of_parent_hartid(struct device_node *node) +{ + for (; node; node = node->parent) { + if (of_device_is_compatible(node, "riscv")) + return riscv_of_processor_hartid(node); + } + + return -1; +} + #ifdef CONFIG_PROC_FS static void print_isa(struct seq_file *f, const char *isa) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 56d071b2c0a1..cae7e6d4c7ef 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -106,7 +106,9 @@ _save_context: /* Handle interrupts */ move a0, sp /* pt_regs */ - tail do_IRQ + la a1, handle_arch_irq + REG_L a1, (a1) + jr a1 1: /* * Exceptions run with interrupts enabled or disabled depending on the diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 345c4f2eba13..7207fa08d78f 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -7,7 +7,6 @@ #include <linux/interrupt.h> #include <linux/irqchip.h> -#include <linux/irqdomain.h> #include <linux/seq_file.h> #include <asm/smp.h> @@ -17,37 +16,9 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - switch (regs->cause & ~CAUSE_IRQ_FLAG) { - case RV_IRQ_TIMER: - riscv_timer_interrupt(); - break; -#ifdef CONFIG_SMP - case RV_IRQ_SOFT: - /* - * We only use software interrupts to pass IPIs, so if a non-SMP - * system gets one, then we don't know what to do. - */ - riscv_software_interrupt(); - break; -#endif - case RV_IRQ_EXT: - handle_arch_irq(regs); - break; - default: - pr_alert("unexpected interrupt cause 0x%lx", regs->cause); - BUG(); - } - irq_exit(); - - set_irq_regs(old_regs); -} - void __init init_IRQ(void) { irqchip_init(); + if (!handle_arch_irq) + panic("No interrupt controller found."); } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 5805791cd5b5..d4a64dfed342 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -11,6 +11,7 @@ #include <asm/kprobes.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> +#include <asm/patch.h> struct patch_insn { void *addr; diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index a65a8fa0c22d..b1d4f452f843 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -123,11 +123,14 @@ static inline void clear_ipi(void) clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); } -void riscv_software_interrupt(void) +void handle_IPI(struct pt_regs *regs) { + struct pt_regs *old_regs = set_irq_regs(regs); unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; unsigned long *stats = ipi_data[smp_processor_id()].stats; + irq_enter(); + clear_ipi(); while (true) { @@ -138,7 +141,7 @@ void riscv_software_interrupt(void) ops = xchg(pending_ipis, 0); if (ops == 0) - return; + goto done; if (ops & (1 << IPI_RESCHEDULE)) { stats[IPI_RESCHEDULE]++; @@ -160,6 +163,10 @@ void riscv_software_interrupt(void) /* Order data access and bit testing. */ mb(); } + +done: + irq_exit(); + set_irq_regs(old_regs); } static const char * const ipi_names[] = { diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 6a53c02e9c73..4d3a1048ad8b 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -26,3 +26,12 @@ void __init time_init(void) lpj_fine = riscv_timebase / HZ; timer_probe(); } + +void clocksource_arch_init(struct clocksource *cs) +{ +#ifdef CONFIG_GENERIC_GETTIMEOFDAY + cs->vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER; +#else + cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; +#endif +} diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 5080fdf8c296..ecec1778e3a4 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -183,6 +183,4 @@ void trap_init(void) csr_write(CSR_SCRATCH, 0); /* Set the exception vector address */ csr_write(CSR_TVEC, &handle_exception); - /* Enable interrupts */ - csr_write(CSR_IE, IE_SIE); } diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index e827fae3bf90..678204231700 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -11,8 +11,12 @@ #include <linux/slab.h> #include <linux/binfmts.h> #include <linux/err.h> - +#include <asm/page.h> +#ifdef GENERIC_TIME_VSYSCALL +#include <vdso/datapage.h> +#else #include <asm/vdso.h> +#endif extern char vdso_start[], vdso_end[]; @@ -26,7 +30,7 @@ static union { struct vdso_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -static struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = &vdso_data_store.data; static int __init vdso_init(void) { @@ -75,13 +79,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, */ mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_len, + ret = + install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), vdso_pagelist); - if (unlikely(ret)) + if (unlikely(ret)) { mm->context.vdso = NULL; + goto end; + } + vdso_base += (vdso_pages << PAGE_SHIFT); + ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, + (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); + + if (unlikely(ret)) + mm->context.vdso = NULL; end: mmap_write_unlock(mm); return ret; @@ -91,5 +104,8 @@ const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso)) return "[vdso]"; + if (vma->vm_mm && (vma->vm_start == + (long)vma->vm_mm->context.vdso + PAGE_SIZE)) + return "[vdso_data]"; return NULL; } diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 4c8b2a4a6a70..38ba55b0eb9d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,12 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-only # Copied from arch/tile/kernel/vdso/Makefile +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_RISCV_32|R_RISCV_64|R_RISCV_JUMP_SLOT +include $(srctree)/lib/vdso/Makefile # Symbols present in the vdso vdso-syms = rt_sigreturn ifdef CONFIG_64BIT -vdso-syms += gettimeofday -vdso-syms += clock_gettime -vdso-syms += clock_getres +vdso-syms += vgettimeofday endif vdso-syms += getcpu vdso-syms += flush_icache @@ -14,6 +16,10 @@ vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) +endif + # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S deleted file mode 100644 index 91378a52eb22..000000000000 --- a/arch/riscv/kernel/vdso/clock_getres.S +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 SiFive - */ - -#include <linux/linkage.h> -#include <asm/unistd.h> - - .text -/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ -ENTRY(__vdso_clock_getres) - .cfi_startproc - /* For now, just do the syscall. */ - li a7, __NR_clock_getres - ecall - ret - .cfi_endproc -ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S deleted file mode 100644 index 5371fd9bc01f..000000000000 --- a/arch/riscv/kernel/vdso/clock_gettime.S +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 SiFive - */ - -#include <linux/linkage.h> -#include <asm/unistd.h> - - .text -/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ -ENTRY(__vdso_clock_gettime) - .cfi_startproc - /* For now, just do the syscall. */ - li a7, __NR_clock_gettime - ecall - ret - .cfi_endproc -ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S deleted file mode 100644 index e6fb8af88632..000000000000 --- a/arch/riscv/kernel/vdso/gettimeofday.S +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 SiFive - */ - -#include <linux/linkage.h> -#include <asm/unistd.h> - - .text -/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ -ENTRY(__vdso_gettimeofday) - .cfi_startproc - /* For now, just do the syscall. */ - li a7, __NR_gettimeofday - ecall - ret - .cfi_endproc -ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index f66a091cb890..e6f558bca71b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -2,11 +2,13 @@ /* * Copyright (C) 2012 Regents of the University of California */ +#include <asm/page.h> OUTPUT_ARCH(riscv) SECTIONS { + PROVIDE(_vdso_data = . + PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..d264943e2e47 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgettimeofday.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copied from arch/arm64/kernel/vdso/vgettimeofday.c + * + * Copyright (C) 2018 ARM Ltd. + * Copyright (C) 2020 SiFive + */ + +#include <linux/time.h> +#include <linux/types.h> + +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9996f49959bd..f4adb3684f3d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -480,17 +480,6 @@ static void __init setup_vm_final(void) csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); } - -void free_initmem(void) -{ - unsigned long init_begin = (unsigned long)__init_begin; - unsigned long init_end = (unsigned long)__init_end; - - /* Make the region as non-execuatble. */ - set_memory_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); - free_initmem_default(POISON_FREE_INITMEM); -} - #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) { diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3d554887794e..cee3cb6455a2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -978,7 +978,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d0ff26d157bc..d47c19718615 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3923,11 +3923,13 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, } } -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); + + return true; } void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a16c45460f1b..dde744682e63 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -232,6 +232,7 @@ config X86 select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS + select HAVE_ARCH_KCSAN if X86_64 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI @@ -821,14 +822,6 @@ config PVH This option enables the PVH entry point for guest virtual machines as specified in the x86/HVM direct boot ABI. -config KVM_DEBUG_FS - bool "Enable debug information for KVM Guests in debugfs" - depends on KVM_GUEST && DEBUG_FS - ---help--- - This option enables collection of various statistics for KVM guest. - Statistics are displayed in debugfs filesystem. Enabling this option - may incur significant overhead. - config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" depends on PARAVIRT diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 4c5355684321..fe605205b4ce 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -9,7 +9,9 @@ # Changed by many, many contributors over the years. # +# Sanitizer runtimes are unavailable and cannot be linked for early boot code. KASAN_SANITIZE := n +KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Kernel does not boot with kcov instrumentation here. diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 5f7c262bcc99..7619742f91c9 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -17,7 +17,9 @@ # (see scripts/Makefile.lib size_append) # compressed vmlinux.bin.all + u32 size of vmlinux.bin.all +# Sanitizer runtimes are unavailable and cannot be linked for early boot code. KASAN_SANITIZE := n +KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index f4d57782c14b..bd3f14175193 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -557,14 +557,34 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) return false; } - if (!__rcu_is_watching()) { + /* + * If this entry hit the idle task invoke rcu_irq_enter() whether + * RCU is watching or not. + * + * Interupts can nest when the first interrupt invokes softirq + * processing on return which enables interrupts. + * + * Scheduler ticks in the idle task can mark quiescent state and + * terminate a grace period, if and only if the timer interrupt is + * not nested into another interrupt. + * + * Checking for __rcu_is_watching() here would prevent the nesting + * interrupt to invoke rcu_irq_enter(). If that nested interrupt is + * the tick then rcu_flavor_sched_clock_irq() would wrongfully + * assume that it is the first interupt and eventually claim + * quiescient state and end grace periods prematurely. + * + * Unconditionally invoke rcu_irq_enter() so RCU state stays + * consistent. + * + * TINY_RCU does not support EQS, so let the compiler eliminate + * this part when enabled. + */ + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { /* * If RCU is not watching then the same careful * sequence vs. lockdep and tracing is required * as in enter_from_user_mode(). - * - * This only happens for IRQs that hit the idle - * loop, i.e. if idle is not using MWAIT. */ lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter(); @@ -576,9 +596,10 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) } /* - * If RCU is watching then RCU only wants to check - * whether it needs to restart the tick in NOHZ - * mode. + * If RCU is watching then RCU only wants to check whether it needs + * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() + * already contains a warning when RCU is not watching, so no point + * in having another one here. */ instrumentation_begin(); rcu_irq_enter_check_tick(); diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 54e03ab26ff3..04e65f0698f6 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -10,8 +10,11 @@ ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE include $(srctree)/lib/vdso/Makefile KBUILD_CFLAGS += $(DISABLE_LTO) + +# Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n UBSAN_SANITIZE := n +KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. @@ -29,6 +32,9 @@ vobjs32-y += vdso32/vclock_gettime.o # files to link into kernel obj-y += vma.o +KASAN_SANITIZE_vma.o := y +UBSAN_SANITIZE_vma.o := y +KCSAN_SANITIZE_vma.o := y OBJECT_FILES_NON_STANDARD_vma.o := n # vDSO images to build diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 0367efdc5b7a..35460fef39b8 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,8 +201,12 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr) return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } -static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) +static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { + /* + * Because this is a plain access, we need to disable KCSAN here to + * avoid double instrumentation via instrumented bitops. + */ return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index d203c541a65a..cf51c50eb356 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -391,8 +391,8 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST -#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_IST -#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_IST +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST @@ -543,7 +543,6 @@ SYM_CODE_END(spurious_entries_start) DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); -DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); @@ -561,6 +560,7 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); /* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 8f1e94f29a16..a338a6deb950 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -89,6 +89,8 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1da5858501ca..f8998e97457f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1306,7 +1306,6 @@ struct kvm_arch_async_pf { extern u64 __read_mostly host_efer; extern struct kvm_x86_ops kvm_x86_ops; -extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) @@ -1671,7 +1670,7 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm); void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap); -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 9a6dc9b4ec99..fb81fea99093 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return __vdso_data; } +static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok arch_vdso_clocksource_ok + +/* + * Clocksource read value validation to handle PV and HyperV clocksources + * which can be invalidated asynchronously and indicate invalidation by + * returning U64_MAX, which can be effectively tested by checking for a + * negative value after casting it to s64. + */ +static inline bool arch_vdso_cycles_ok(u64 cycles) +{ + return (s64)cycles >= 0; +} +#define vdso_cycles_ok arch_vdso_cycles_ok + /* * x86 specific delta calculation. * diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8ef4369a4f06..e77261db2391 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,6 +28,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n +# With some compiler versions the generated code results in boot hangs, caused +# by several compilation units. To be safe, disable all instrumentation. +KCSAN_SANITIZE := n + OBJECT_FILES_NON_STANDARD_test_nx.o := y OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 9244377ed454..e0e2f020ec02 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2051,7 +2051,7 @@ void __init init_apic_mappings(void) unsigned int new_apicid; if (apic_validate_deadline_timer()) - pr_debug("TSC deadline timer available\n"); + pr_info("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 7dc4ad68eb41..dba6a83bc349 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,6 +13,9 @@ endif KCOV_INSTRUMENT_common.o := n KCOV_INSTRUMENT_perf_event.o := n +# As above, instrumenting secondary CPU boot code causes boot hangs. +KCSAN_SANITIZE_common.o := n + # Make sure load_percpu_segment has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_common.o := $(nostackp) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b6f887be440c..0b71970d2d3d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -588,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline); static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = +static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; #ifdef CONFIG_RETPOLINE @@ -734,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } - /* - * At this point, an STIBP mode other than "off" has been set. - * If STIBP support is not being forced, check if STIBP always-on - * is preferred. - */ - if (mode != SPECTRE_V2_USER_STRICT && - boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) - mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); @@ -765,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); + + spectre_v2_user_ibpb = mode; } - /* If enhanced IBRS is enabled no STIBP required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * required. + */ + if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIBP - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + /* + * If STIBP is not available, clear the STIBP mode. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP)) mode = SPECTRE_V2_USER_NONE; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { @@ -1014,7 +1020,7 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: @@ -1257,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; /* * Indirect branch speculation is always disabled in strict - * mode. + * mode. It can neither be enabled if it was force-disabled + * by a previous prctl call. + */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + task_spec_ib_force_disable(task)) return -EPERM; task_clear_spec_ib_disable(task); task_update_spec_tif(task); @@ -1275,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return 0; task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) @@ -1309,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1340,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - return PR_SPEC_DISABLE; - default: + } else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1594,7 +1610,7 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 63926c94eb5f..c25a67a34bd3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1142,9 +1142,12 @@ void switch_to_sld(unsigned long tifn) static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), {} }; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 4d13c57f370a..983cd53ed4c9 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -991,7 +991,15 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + + /* + * SETUP_EFI is supplied by kexec and does not need to be + * reserved. + */ + if (data->type != SETUP_EFI) + e820__range_update_kexec(pa_data, + sizeof(*data) + data->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); if (data->type == SETUP_INDIRECT && ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d00f7c430e65..df63786e7bfa 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -21,7 +21,6 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/kprobes.h> -#include <linux/debugfs.h> #include <linux/nmi.h> #include <linux/swait.h> #include <asm/timer.h> diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3a98ff36f411..2de365f15684 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -476,7 +476,7 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); static DEFINE_PER_CPU(unsigned long, nmi_dr7); -DEFINE_IDTENTRY_NMI(exc_nmi) +DEFINE_IDTENTRY_RAW(exc_nmi) { if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) return; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8e3d0347b664..f362ce0d5ac0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,28 +545,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, lockdep_assert_irqs_disabled(); - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e040ba6be27b..0ec7ced727fe 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 371a6b348e44..e42faa792c07 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,10 +25,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7febae381b91..af75109485c2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -97,24 +97,6 @@ int is_valid_bugaddr(unsigned long addr) return ud == INSN_UD0 || ud == INSN_UD2; } -int fixup_bug(struct pt_regs *regs, int trapnr) -{ - if (trapnr != X86_TRAP_UD) - return 0; - - switch (report_bug(regs->ip, regs)) { - case BUG_TRAP_TYPE_NONE: - case BUG_TRAP_TYPE_BUG: - break; - - case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD2; - return 1; - } - - return 0; -} - static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) @@ -190,13 +172,6 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - /* - * WARN*()s end up here; fix them up before we call the - * notifier chain. - */ - if (!user_mode(regs) && fixup_bug(regs, trapnr)) - return; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); @@ -241,9 +216,46 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY(exc_invalid_op) +DEFINE_IDTENTRY_RAW(exc_invalid_op) { + bool rcu_exit; + + /* + * Handle BUG/WARN like NMIs instead of like normal idtentries: + * if we bugged/warned in a bad RCU context, for example, the last + * thing we want is to BUG/WARN again in the idtentry code, ad + * infinitum. + */ + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { + enum bug_trap_type type; + + nmi_enter(); + instrumentation_begin(); + trace_hardirqs_off_finish(); + type = report_bug(regs->ip, regs); + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); + instrumentation_end(); + nmi_exit(); + + if (type == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * Else, if this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } + + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); handle_invalid_op(regs); + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); } DEFINE_IDTENTRY(exc_coproc_segment_overrun) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index b4c6b6f35548..3bfc8dd8a43d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -40,13 +40,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif +jiffies = jiffies_64; + #if defined(CONFIG_X86_64) /* * On 64-bit, align RODATA to 2MB so we retain large page mappings for diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 253b8e875ccd..8a294f9747aa 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -181,17 +181,14 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, r = -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) goto out; - r = -ENOMEM; if (cpuid->nent) { - cpuid_entries = - vmalloc(array_size(sizeof(struct kvm_cpuid_entry), - cpuid->nent)); - if (!cpuid_entries) - goto out; - r = -EFAULT; - if (copy_from_user(cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry))) + cpuid_entries = vmemdup_user(entries, + array_size(sizeof(struct kvm_cpuid_entry), + cpuid->nent)); + if (IS_ERR(cpuid_entries)) { + r = PTR_ERR(cpuid_entries); goto out; + } } for (i = 0; i < cpuid->nent; i++) { vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; @@ -211,8 +208,8 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, kvm_x86_ops.cpuid_update(vcpu); r = kvm_update_cpuid(vcpu); + kvfree(cpuid_entries); out: - vfree(cpuid_entries); return r; } @@ -325,7 +322,7 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_7_ECX, - F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) | + F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ @@ -334,6 +331,13 @@ void kvm_set_cpu_caps(void) if (cpuid_ecx(7) & F(LA57)) kvm_cpu_cap_set(X86_FEATURE_LA57); + /* + * PKU not yet implemented for shadow paging and requires OSPKE + * to be set on the host. Clear it if that is not the case + */ + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) + kvm_cpu_cap_clear(X86_FEATURE_PKU); + kvm_cpu_cap_mask(CPUID_7_EDX, F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | @@ -426,7 +430,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); struct kvm_cpuid_array { struct kvm_cpuid_entry2 *entries; - const int maxnent; + int maxnent; int nent; }; @@ -870,7 +874,6 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_array array = { .nent = 0, - .maxnent = cpuid->nent, }; int r, i; @@ -887,6 +890,8 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, if (!array.entries) return -ENOMEM; + array.maxnent = cpuid->nent; + for (i = 0; i < ARRAY_SIZE(funcs); i++) { r = get_cpuid_func(&array, funcs[i], type); if (r) diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 018aebce33ff..7e818d64bb4d 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -43,22 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); -void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu, + debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu, &vcpu_tsc_offset_fops); if (lapic_in_kernel(vcpu)) debugfs_create_file("lapic_timer_advance_ns", 0444, - vcpu->debugfs_dentry, vcpu, + debugfs_dentry, vcpu, &vcpu_timer_advance_ns_fops); if (kvm_has_tsc_control) { debugfs_create_file("tsc-scaling-ratio", 0444, - vcpu->debugfs_dentry, vcpu, + debugfs_dentry, vcpu, &vcpu_tsc_scaling_fops); debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, - vcpu->debugfs_dentry, vcpu, + debugfs_dentry, vcpu, &vcpu_tsc_scaling_frac_fops); } } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index de5476f8683e..d0e2825ae617 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4800,8 +4800,12 @@ static const struct opcode twobyte_table[256] = { GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11), GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11), N, N, N, N, N, N, - D(ImplicitOps | ModRM | SrcMem | NoAccess), - N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), + D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */ + D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, + D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */ + D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */ + D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */ + D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */ /* 0x20 - 0x2F */ DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index febca334c320..a6e218c6140d 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -462,7 +462,6 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, if (channel == 3) { /* Read-Back Command. */ for (channel = 0; channel < 3; channel++) { - s = &pit_state->channels[channel]; if (val & (2 << channel)) { if (!(val & 0x20)) pit_latch_count(pit, channel); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8a6db11dcb43..6bceafb19108 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -258,7 +258,7 @@ void sync_nested_vmcb_control(struct vcpu_svm *svm) /* Only a few fields of int_ctl are written by the processor. */ mask = V_IRQ_MASK | V_TPR_MASK; if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && - is_intercept(svm, SVM_EXIT_VINTR)) { + is_intercept(svm, INTERCEPT_VINTR)) { /* * In order to request an interrupt window, L0 is usurping * svm->vmcb->control.int_ctl and possibly setting V_IRQ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7502cd65528f..8ccfa4197d9c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1378,6 +1378,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm) /* Drop int_ctl fields related to VINTR injection. */ svm->vmcb->control.int_ctl &= mask; if (is_guest_mode(&svm->vcpu)) { + svm->nested.hsave->control.int_ctl &= mask; + WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != (svm->nested.ctl.int_ctl & V_TPR_MASK)); svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; @@ -1999,7 +2001,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value) */ if (vgif_enabled(svm)) clr_intercept(svm, INTERCEPT_STGI); - if (is_intercept(svm, SVM_EXIT_VINTR)) + if (is_intercept(svm, INTERCEPT_VINTR)) svm_clear_vintr(svm); enable_gif(svm); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2e7238a57fc1..d1af20b050a8 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4624,19 +4624,24 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) } } -static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) +static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer, + int *ret) { gva_t gva; struct x86_exception e; + int r; if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmcs_read32(VMX_INSTRUCTION_INFO), false, - sizeof(*vmpointer), &gva)) - return 1; + sizeof(*vmpointer), &gva)) { + *ret = 1; + return -EINVAL; + } - if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; + r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e); + if (r != X86EMUL_CONTINUE) { + *ret = vmx_handle_memory_failure(vcpu, r, &e); + return -EINVAL; } return 0; @@ -4764,8 +4769,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (nested_vmx_get_vmptr(vcpu, &vmptr)) - return 1; + if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret)) + return ret; /* * SDM 3: 24.11.5 @@ -4838,12 +4843,13 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) u32 zero = 0; gpa_t vmptr; u64 evmcs_gpa; + int r; if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_get_vmptr(vcpu, &vmptr)) - return 1; + if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) + return r; if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failValid(vcpu, @@ -4902,7 +4908,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) u64 value; gva_t gva = 0; short offset; - int len; + int len, r; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -4943,10 +4949,9 @@ static int handle_vmread(struct kvm_vcpu *vcpu) instr_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; - } + r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e); + if (r != X86EMUL_CONTINUE) + return vmx_handle_memory_failure(vcpu, r, &e); } return nested_vmx_succeed(vcpu); @@ -4987,7 +4992,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) unsigned long field; short offset; gva_t gva; - int len; + int len, r; /* * The value to write might be 32 or 64 bits, depending on L1's long @@ -5017,10 +5022,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, exit_qualification, instr_info, false, len, &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; - } + r = kvm_read_guest_virt(vcpu, gva, &value, len, &e); + if (r != X86EMUL_CONTINUE) + return vmx_handle_memory_failure(vcpu, r, &e); } field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); @@ -5103,12 +5107,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); gpa_t vmptr; + int r; if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_get_vmptr(vcpu, &vmptr)) - return 1; + if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) + return r; if (!page_address_valid(vcpu, vmptr)) return nested_vmx_failValid(vcpu, @@ -5170,6 +5175,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; gva_t gva; + int r; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -5181,11 +5187,11 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) true, sizeof(gpa_t), &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, - sizeof(gpa_t), &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; - } + r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, + sizeof(gpa_t), &e); + if (r != X86EMUL_CONTINUE) + return vmx_handle_memory_failure(vcpu, r, &e); + return nested_vmx_succeed(vcpu); } @@ -5209,7 +5215,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) struct { u64 eptp, gpa; } operand; - int i; + int i, r; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || @@ -5236,10 +5242,9 @@ static int handle_invept(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; - } + r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); + if (r != X86EMUL_CONTINUE) + return vmx_handle_memory_failure(vcpu, r, &e); /* * Nested EPT roots are always held through guest_mmu, @@ -5291,6 +5296,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) u64 gla; } operand; u16 vpid02; + int r; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || @@ -5318,10 +5324,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; - } + r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); + if (r != X86EMUL_CONTINUE) + return vmx_handle_memory_failure(vcpu, r, &e); + if (operand.vpid >> 16) return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); @@ -5666,7 +5672,7 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) { u32 intr_info; - switch (exit_reason) { + switch ((u16)exit_reason) { case EXIT_REASON_EXCEPTION_NMI: intr_info = vmx_get_intr_info(vcpu); if (is_nmi(intr_info)) @@ -5727,7 +5733,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); u32 intr_info; - switch (exit_reason) { + switch ((u16)exit_reason) { case EXIT_REASON_EXCEPTION_NMI: intr_info = vmx_get_intr_info(vcpu); if (is_nmi(intr_info)) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index d33d890b605f..bdcce65c7a1d 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -181,7 +181,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) ret = pmu->version > 1; break; case MSR_IA32_PERF_CAPABILITIES: - ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM); + ret = 1; break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2b5ba6063a2d..36c771728c8c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1600,6 +1600,32 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) return 1; } +/* + * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns + * KVM_EXIT_INTERNAL_ERROR for cases not currently handled by KVM. Return value + * indicates whether exit to userspace is needed. + */ +int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r, + struct x86_exception *e) +{ + if (r == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_emulated_page_fault(vcpu, e); + return 1; + } + + /* + * In case kvm_read/write_guest_virt*() failed with X86EMUL_IO_NEEDED + * while handling a VMX instruction KVM could've handled the request + * correctly by exiting to userspace and performing I/O but there + * doesn't seem to be a real use-case behind such requests, just return + * KVM_EXIT_INTERNAL_ERROR for now. + */ + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + + return 0; +} /* * Recognizes a pending MTF VM-exit and records the nested state for later @@ -5486,6 +5512,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) u64 pcid; u64 gla; } operand; + int r; if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { kvm_queue_exception(vcpu, UD_VECTOR); @@ -5508,10 +5535,9 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) sizeof(operand), &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_emulated_page_fault(vcpu, &e); - return 1; - } + r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); + if (r != X86EMUL_CONTINUE) + return vmx_handle_memory_failure(vcpu, r, &e); if (operand.pcid >> 12 != 0) { kvm_inject_gp(vcpu, 0); @@ -7282,10 +7308,6 @@ static __init void vmx_set_cpu_caps(void) if (vmx_pt_mode_is_host_guest()) kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT); - /* PKU is not yet implemented for shadow paging. */ - if (enable_ept && boot_cpu_has(X86_FEATURE_OSPKE)) - kvm_cpu_cap_check_and_set(X86_FEATURE_PKU); - if (vmx_umip_emulated()) kvm_cpu_cap_set(X86_FEATURE_UMIP); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 672c28f17e49..8a83b5edc820 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -355,6 +355,8 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); int vmx_find_msr_index(struct vmx_msrs *m, u32 msr); +int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r, + struct x86_exception *e); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9e41b5135340..00c88c2f34e4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -239,8 +239,7 @@ u64 __read_mostly host_xcr0; u64 __read_mostly supported_xcr0; EXPORT_SYMBOL_GPL(supported_xcr0); -struct kmem_cache *x86_fpu_cache; -EXPORT_SYMBOL_GPL(x86_fpu_cache); +static struct kmem_cache *x86_fpu_cache; static struct kmem_cache *x86_emulator_cache; @@ -5647,13 +5646,6 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, /* kvm_write_guest_virt_system can pull in tons of pages. */ vcpu->arch.l1tf_flush_l1d = true; - /* - * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED - * is returned, but our callers are not ready for that and they blindly - * call kvm_inject_page_fault. Ensure that they at least do not leak - * uninitialized kernel stack memory into cr2 and error code. - */ - memset(exception, 0, sizeof(*exception)); return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, PFERR_WRITE_MASK, exception); } @@ -7018,7 +7010,7 @@ restart: if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); - if (r && ctxt->tf) + if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) r = kvm_vcpu_do_singlestep(vcpu); if (kvm_x86_ops.update_emulated_instruction) kvm_x86_ops.update_emulated_instruction(vcpu); @@ -8277,9 +8269,8 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, - bool blockable) +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { unsigned long apic_address; @@ -8290,8 +8281,6 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (start <= apic_address && apic_address < end) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); - - return 0; } void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) @@ -9962,13 +9951,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) if (!slot || !slot->npages) return 0; - /* - * Stuff a non-canonical value to catch use-after-delete. This - * ends up being 0 on 32-bit KVM, but there's no better - * alternative. - */ - hva = (unsigned long)(0xdeadull << 48); old_npages = slot->npages; + hva = 0; } for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { @@ -10140,43 +10124,65 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } static void kvm_mmu_slot_apply_flags(struct kvm *kvm, - struct kvm_memory_slot *new) + struct kvm_memory_slot *old, + struct kvm_memory_slot *new, + enum kvm_mr_change change) { - /* Still write protect RO slot */ - if (new->flags & KVM_MEM_READONLY) { - kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); + /* + * Nothing to do for RO slots or CREATE/MOVE/DELETE of a slot. + * See comments below. + */ + if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY)) return; - } /* - * Call kvm_x86_ops dirty logging hooks when they are valid. - * - * kvm_x86_ops.slot_disable_log_dirty is called when: - * - * - KVM_MR_CREATE with dirty logging is disabled - * - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag - * - * The reason is, in case of PML, we need to set D-bit for any slots - * with dirty logging disabled in order to eliminate unnecessary GPA - * logging in PML buffer (and potential PML buffer full VMEXIT). This - * guarantees leaving PML enabled during guest's lifetime won't have - * any additional overhead from PML when guest is running with dirty - * logging disabled for memory slots. + * Dirty logging tracks sptes in 4k granularity, meaning that large + * sptes have to be split. If live migration is successful, the guest + * in the source machine will be destroyed and large sptes will be + * created in the destination. However, if the guest continues to run + * in the source machine (for example if live migration fails), small + * sptes will remain around and cause bad performance. * - * kvm_x86_ops.slot_enable_log_dirty is called when switching new slot - * to dirty logging mode. + * Scan sptes if dirty logging has been stopped, dropping those + * which can be collapsed into a single large-page spte. Later + * page faults will create the large-page sptes. * - * If kvm_x86_ops dirty logging hooks are invalid, use write protect. + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() + */ + if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mmu_zap_collapsible_sptes(kvm, new); + + /* + * Enable or disable dirty logging for the slot. * - * In case of write protect: + * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old + * slot have been zapped so no dirty logging updates are needed for + * the old slot. + * For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible + * any mappings that might be created in it will consume the + * properties of the new slot and do not need to be updated here. * - * Write protect all pages for dirty logging. + * When PML is enabled, the kvm_x86_ops dirty logging hooks are + * called to enable/disable dirty logging. * - * All the sptes including the large sptes which point to this - * slot are set to readonly. We can not create any new large - * spte on this slot until the end of the logging. + * When disabling dirty logging with PML enabled, the D-bit is set + * for sptes in the slot in order to prevent unnecessary GPA + * logging in the PML buffer (and potential PML buffer full VMEXIT). + * This guarantees leaving PML enabled for the guest's lifetime + * won't have any additional overhead from PML when the guest is + * running with dirty logging disabled. * + * When enabling dirty logging, large sptes are write-protected + * so they can be split on first write. New large sptes cannot + * be created for this slot until the end of the logging. * See the comments in fast_page_fault(). + * For small sptes, nothing is done if the dirty log is in the + * initial-all-set state. Otherwise, depending on whether pml + * is enabled the D-bit or the W-bit will be cleared. */ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { if (kvm_x86_ops.slot_enable_log_dirty) { @@ -10213,39 +10219,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_mmu_calculate_default_mmu_pages(kvm)); /* - * Dirty logging tracks sptes in 4k granularity, meaning that large - * sptes have to be split. If live migration is successful, the guest - * in the source machine will be destroyed and large sptes will be - * created in the destination. However, if the guest continues to run - * in the source machine (for example if live migration fails), small - * sptes will remain around and cause bad performance. - * - * Scan sptes if dirty logging has been stopped, dropping those - * which can be collapsed into a single large-page spte. Later - * page faults will create the large-page sptes. - * - * There is no need to do this in any of the following cases: - * CREATE: No dirty mappings will already exist. - * MOVE/DELETE: The old mappings will already have been cleaned up by - * kvm_arch_flush_shadow_memslot() - */ - if (change == KVM_MR_FLAGS_ONLY && - (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && - !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) - kvm_mmu_zap_collapsible_sptes(kvm, new); - - /* - * Set up write protection and/or dirty logging for the new slot. - * - * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have - * been zapped so no dirty logging staff is needed for old slot. For - * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the - * new and it's also covered when dealing with the new slot. - * * FIXME: const-ify all uses of struct kvm_memory_slot. */ - if (change != KVM_MR_DELETE) - kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new); + kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change); /* Free the arrays associated with the old memslot. */ if (change == KVM_MR_MOVE) @@ -10530,7 +10506,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) return kvm_arch_interrupt_allowed(vcpu); } -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { struct x86_exception fault; @@ -10547,6 +10523,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, fault.address = work->arch.token; fault.async_page_fault = true; kvm_inject_page_fault(vcpu, &fault); + return true; } else { /* * It is not possible to deliver a paravirtualized asynchronous @@ -10557,6 +10534,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, * fault is retried, hopefully the page will be ready in the host. */ kvm_make_request(KVM_REQ_APF_HALT, vcpu); + return false; } } @@ -10574,7 +10552,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, kvm_del_async_pf_gfn(vcpu, work->arch.gfn); trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); - if (kvm_pv_async_pf_enabled(vcpu) && + if ((work->wakeup_all || work->notpresent_injected) && + kvm_pv_async_pf_enabled(vcpu) && !apf_put_user_ready(vcpu, work->arch.token)) { vcpu->arch.apf.pageready_pending = true; kvm_apic_set_irq(vcpu, &irq, NULL); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 5246db42de45..6110bce7237b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -6,10 +6,19 @@ # Produces uninteresting flaky coverage. KCOV_INSTRUMENT_delay.o := n +# KCSAN uses udelay for introducing watchpoint delay; avoid recursion. +KCSAN_SANITIZE_delay.o := n +ifdef CONFIG_KCSAN +# In case KCSAN+lockdep+ftrace are enabled, disable ftrace for delay.o to avoid +# lockdep -> [other libs] -> KCSAN -> udelay -> ftrace -> lockdep recursion. +CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE) +endif + # Early boot use of cmdline; don't instrument it ifdef CONFIG_AMD_MEM_ENCRYPT KCOV_INSTRUMENT_cmdline.o := n KASAN_SANITIZE_cmdline.o := n +KCSAN_SANITIZE_cmdline.o := n ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_cmdline.o = -pg diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 98f7c6fa2eaa..f7fd0e868c9c 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -7,6 +7,10 @@ KCOV_INSTRUMENT_mem_encrypt_identity.o := n KASAN_SANITIZE_mem_encrypt.o := n KASAN_SANITIZE_mem_encrypt_identity.o := n +# Disable KCSAN entirely, because otherwise we get warnings that some functions +# reference __initdata sections. +KCSAN_SANITIZE := n + ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b991aa4bdfae..1d6cb07f4f86 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -204,8 +204,19 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (fixup_exception(regs, trapnr, regs->orig_ax, 0)) return; - if (fixup_bug(regs, trapnr)) - return; + if (trapnr == X86_TRAP_UD) { + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * If this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", diff --git a/arch/x86/purgatory/.gitignore b/arch/x86/purgatory/.gitignore new file mode 100644 index 000000000000..d2be1500671d --- /dev/null +++ b/arch/x86/purgatory/.gitignore @@ -0,0 +1 @@ +purgatory.chk diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index fb4ee5444379..b04e6e72a592 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -14,10 +14,18 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE CFLAGS_sha256.o := -D__DISABLE_EXPORTS -LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib -targets += purgatory.ro - +# When linking purgatory.ro with -r unresolved symbols are not checked, +# also link a purgatory.chk binary without -r to check for unresolved symbols. +PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib +LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS) +LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS) +targets += purgatory.ro purgatory.chk + +# Sanitizer, etc. runtimes are unavailable and cannot be linked here. +GCOV_PROFILE := n KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCSAN_SANITIZE := n KCOV_INSTRUMENT := n # These are adjustments to the compiler flags used for objects that @@ -25,7 +33,7 @@ KCOV_INSTRUMENT := n PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) +PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not @@ -58,12 +66,15 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) +$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE + $(call if_changed,ld) + targets += kexec-purgatory.c quiet_cmd_bin2c = BIN2C $@ cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@ -$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE $(call if_changed,bin2c) obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index 682c895753d9..6b1f3a4eeb44 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -6,7 +6,10 @@ # for more details. # # + +# Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n +KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y subdir- := rm diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index b11ec5d8f8ac..83f1b6a56449 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -6,7 +6,10 @@ # for more details. # # + +# Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n +KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. |