diff options
Diffstat (limited to 'arch/arm64/kvm/hyp')
27 files changed, 2162 insertions, 1702 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 8c9880783839..f54f0e89a71c 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,18 +3,12 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) - -obj-$(CONFIG_KVM) += hyp.o - -hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o - -# KVM code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +incdir := $(srctree)/$(src)/include +subdir-asflags-y := -I$(incdir) +subdir-ccflags-y := -I$(incdir) \ + -fno-stack-protector \ + -DDISABLE_BRANCH_PROFILING \ + $(DISABLE_STACKLEAK_PLUGIN) + +obj-$(CONFIG_KVM) += vhe/ nvhe/ +obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index 25c0e47d57cb..ae56d8a4b382 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -44,14 +44,14 @@ static const unsigned short cc_map[16] = { /* * Check if a trapped instruction should have been executed or not. */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) { unsigned long cpsr; u32 cpsr_cond; int cond; /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) + if (kvm_vcpu_get_esr(vcpu) >> 30) return true; /* Is condition field valid? */ @@ -93,7 +93,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) * * IT[7:0] -> CPSR[26:25],CPSR[15:10] */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) { unsigned long itbits, cond; unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 90186cf6473e..ee32a7743389 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -16,12 +16,10 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_ptrauth.h> -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text - .pushsection .hyp.text, "ax" /* * We treat x18 as callee-saved as the host may use it as a platform diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 5b8ff517ff10..01f114aa47b0 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -9,7 +9,6 @@ #include <asm/fpsimdmacros.h> .text - .pushsection .hyp.text, "ax" SYM_FUNC_START(__fpsimd_save_state) fpsimd_save x0, 1 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9c5cfb04170e..689fccbc9de7 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -16,7 +16,6 @@ #include <asm/mmu.h> .text - .pushsection .hyp.text, "ax" .macro do_el2_call /* @@ -40,6 +39,7 @@ el1_sync: // Guest trapped into EL2 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap +#ifdef __KVM_NVHE_HYPERVISOR__ mrs x1, vttbr_el2 // If vttbr is valid, the guest cbnz x1, el1_hvc_guest // called HVC @@ -74,6 +74,7 @@ el1_sync: // Guest trapped into EL2 eret sb +#endif /* __KVM_NVHE_HYPERVISOR__ */ el1_hvc_guest: /* @@ -180,6 +181,7 @@ el2_error: eret sb +#ifdef __KVM_NVHE_HYPERVISOR__ SYM_FUNC_START(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) @@ -189,6 +191,7 @@ SYM_FUNC_START(__hyp_do_panic) eret sb SYM_FUNC_END(__hyp_do_panic) +#endif SYM_CODE_START(__hyp_panic) get_host_ctxt x0, x1 @@ -318,20 +321,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) - - .popsection - -SYM_CODE_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_CODE_END(__smccc_workaround_1_smc) #endif diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index e95af204fec7..0297dc63988c 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -4,6 +4,9 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ +#define __ARM64_KVM_HYP_DEBUG_SR_H__ + #include <linux/compiler.h> #include <linux/kvm_host.h> @@ -85,53 +88,8 @@ default: write_debug(ptr[0], reg, 0); \ } -static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) -{ - u64 reg; - - /* Clear pmscr in case of early return */ - *pmscr_el1 = 0; - - /* SPE present on this CPU? */ - if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), - ID_AA64DFR0_PMSVER_SHIFT)) - return; - - /* Yes; is it owned by EL3? */ - reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) - return; - - /* No; is the host actually using the thing? */ - reg = read_sysreg_s(SYS_PMBLIMITR_EL1); - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) - return; - - /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); - isb(); - - /* Now drain all buffered data to memory */ - psb_csync(); - dsb(nsh); -} - -static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) -{ - if (!pmscr_el1) - return; - - /* The host page table is installed, but not yet synchronised */ - isb(); - - /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); -} - -static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_save_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, save_debug(dbg->dbg_wcr, dbgwcr, wrps); save_debug(dbg->dbg_wvr, dbgwvr, wrps); - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); } -static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -165,23 +122,16 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, restore_debug(dbg->dbg_wcr, dbgwcr, wrps); restore_debug(dbg->dbg_wvr, dbgwvr, wrps); - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); } -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -190,20 +140,17 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, host_dbg, host_ctxt); - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); + __debug_save_state(host_dbg, host_ctxt); + __debug_restore_state(guest_dbg, guest_ctxt); } -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -212,13 +159,10 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, guest_dbg, guest_ctxt); - __debug_restore_state(vcpu, host_dbg, host_ctxt); + __debug_save_state(guest_dbg, guest_ctxt); + __debug_restore_state(host_dbg, host_ctxt); vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; } -u32 __hyp_text __kvm_get_mdcr_el2(void) -{ - return read_sysreg(mdcr_el2); -} +#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h new file mode 100644 index 000000000000..426ef65601dd --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SWITCH_H__ +#define __ARM64_KVM_HYP_SWITCH_H__ + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +extern const char __hyp_panic_string[]; + +/* Check whether the FP regs were dirtied while in the host-side run loop: */ +static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) +{ + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_FP_HOST); + + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); +} + +/* Save the 32-bit only FPSIMD system register state */ +static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); +} + +static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +{ + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. + */ + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } +} + +static inline void __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static inline void __deactivate_traps_common(void) +{ + write_sysreg(0, hstr_el2); + write_sysreg(0, pmuserenr_el0); +} + +static inline void ___activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +} + +static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) +{ + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } +} + +static inline void __activate_vm(struct kvm_s2_mmu *mmu) +{ + __load_guest_stage2(mmu); +} + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); + asm volatile("at s1e1r, %0" : : "r" (far)); + isb(); + + tmp = read_sysreg(par_el1); + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u8 ec; + u64 esr; + u64 hpfar, far; + + esr = vcpu->arch.fault.esr_el2; + ec = ESR_ELx_EC(esr); + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + vcpu->arch.fault.far_el2 = far; + vcpu->arch.fault.hpfar_el2 = hpfar; + return true; +} + +/* Check for an FPSIMD/SVE trap and handle as appropriate */ +static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +{ + bool vhe, sve_guest, sve_host; + u8 esr_ec; + + if (!system_supports_fpsimd()) + return false; + + /* + * Currently system_supports_sve() currently implies has_vhe(), + * so the check is redundant. However, has_vhe() can be determined + * statically and helps the compiler remove dead code. + */ + if (has_vhe() && system_supports_sve()) { + sve_guest = vcpu_has_sve(vcpu); + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; + vhe = true; + } else { + sve_guest = false; + sve_host = false; + vhe = has_vhe(); + } + + esr_ec = kvm_vcpu_trap_get_class(vcpu); + if (esr_ec != ESR_ELx_EC_FP_ASIMD && + esr_ec != ESR_ELx_EC_SVE) + return false; + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest) + if (esr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + if (vhe) { + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; + + if (sve_guest) + reg |= CPACR_EL1_ZEN; + + write_sysreg(reg, cpacr_el1); + } else { + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + } + + isb(); + + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + /* + * In the SVE case, VHE is assumed: it is enforced by + * Kconfig and kvm_arch_init(). + */ + if (sve_host) { + struct thread_struct *thread = container_of( + vcpu->arch.host_fpsimd_state, + struct thread_struct, uw.fpsimd_state); + + sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); + } else { + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + } + + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + } + + if (sve_guest) { + sve_load_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); + } else { + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); + } + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); + + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + + return true; +} + +static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + +static inline bool esr_is_ptrauth_trap(u32 esr) +{ + u32 ec = ESR_ELx_EC(esr); + + if (ec == ESR_ELx_EC_PAC) + return true; + + if (ec != ESR_ELx_EC_SYS64) + return false; + + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ +} while(0) + +static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu) || + !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return false; + + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __ptrauth_save_key(ctxt, APIA); + __ptrauth_save_key(ctxt, APIB); + __ptrauth_save_key(ctxt, APDA); + __ptrauth_save_key(ctxt, APDB); + __ptrauth_save_key(ctxt, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + * Similarly for trapped SVE accesses. + */ + if (__hyp_handle_fpsimd(vcpu)) + return true; + + if (__hyp_handle_ptrauth(vcpu)) + return true; + + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_dabt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + + goto exit; + } + } + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + } + +exit: + /* Return to the host kernel and handle the exit */ + return false; +} + +static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + +#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h new file mode 100644 index 000000000000..7a986030145f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ +#define __ARM64_KVM_HYP_SYSREG_SR_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); +} + +static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); +} + +static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); +} + +static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); +} + +static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); +} + +static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); +} + +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); + + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with nVHE's __activate_traps(). + */ + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with nVHE's __deactivate_traps(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } + + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); +} + +static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ + u64 pstate = ctxt->regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); +} + +static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); + + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); +} + +static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); + + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); +} + +#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile new file mode 100644 index 000000000000..aef76487edc2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_NVHE_HYPERVISOR__ +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o + +obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) +extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) + +$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) +$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE + $(call if_changed_rule,as_o_S) +$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE + $(call if_changed,hypcopy) + +# Disable reordering functions by GCC (enabled at -O2). +# This pass puts functions into '.text.*' sections to aid the linker +# in optimizing ELF layout. See HYPCOPY comment below for more info. +ccflags-y += $(call cc-option,-fno-reorder-functions) + +# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names +# and relevant ELF section names to avoid clashes with VHE code/data. +# +# Hyp code is assumed to be in the '.text' section of the input object +# files (with the exception of specialized sections such as +# '.hyp.idmap.text'). This assumption may be broken by a compiler that +# divides code into sections like '.text.unlikely' so as to optimize +# ELF layout. HYPCOPY checks that no such sections exist in the input +# using `objdump`, otherwise they would be linked together with other +# kernel code and not memory-mapped correctly at runtime. +quiet_cmd_hypcopy = HYPCOPY $@ + cmd_hypcopy = \ + if $(OBJDUMP) -h $< | grep -F '.text.'; then \ + echo "$@: function reordering not supported in nVHE hyp code" >&2; \ + /bin/false; \ + fi; \ + $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ + --rename-section=.text=.hyp.text \ + $< $@ + +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) + +# KVM nVHE code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# Skip objtool checking for this directory because nVHE code is compiled with +# non-standard build rules. +OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c new file mode 100644 index 000000000000..91a711aa8382 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/debug-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static void __debug_save_spe(u64 *pmscr_el1) +{ + u64 reg; + + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + + /* SPE present on this CPU? */ + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), + ID_AA64DFR0_PMSVER_SHIFT)) + return; + + /* Yes; is it owned by EL3? */ + reg = read_sysreg_s(SYS_PMBIDR_EL1); + if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) + return; + + /* No; is the host actually using the thing? */ + reg = read_sysreg_s(SYS_PMBLIMITR_EL1); + if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) + return; + + /* Yes; save the control register and disable data generation */ + *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); + write_sysreg_s(0, SYS_PMSCR_EL1); + isb(); + + /* Now drain all buffered data to memory */ + psb_csync(); + dsb(nsh); +} + +static void __debug_restore_spe(u64 pmscr_el1) +{ + if (!pmscr_el1) + return; + + /* The host page table is installed, but not yet synchronised */ + isb(); + + /* Re-enable data generation */ + write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + /* Disable and flush SPE data generation */ + __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S new file mode 100644 index 000000000000..d9434e90c06d --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sysreg.h> +#include <asm/virt.h> + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +SYM_CODE_START(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP pgd + * x1: HYP stack + * x2: HYP vectors + * x3: per-CPU offset + */ +__do_hyp_init: + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.lo __kvm_handle_stub_hvc + + phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif + msr ttbr0_el2, x4 + + mrs x4, tcr_el1 + mov_q x5, TCR_EL2_MASK + and x4, x4, x5 + mov x5, #TCR_EL2_RES1 + orr x4, x4, x5 + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + ldr_l x5, idmap_t0sz + bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + + /* + * Set the PS bits in TCR_EL2. + */ + tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 + + msr tcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + /* Invalidate the stale TLBs from Bootloader */ + tlbi alle2 + dsb sy + + /* + * Preserve all the RES1 bits while setting the default flags, + * as well as the EE bit on BE. Drop the A flag since the compiler + * is allowed to generate unaligned accesses. + */ + mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) +CPU_BE( orr x4, x4, #SCTLR_ELx_EE) +alternative_if ARM64_HAS_ADDRESS_AUTH + mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + orr x4, x4, x5 +alternative_else_nop_endif + msr sctlr_el2, x4 + isb + + /* Set the stack and new vectors */ + kern_hyp_va x1 + mov sp, x1 + msr vbar_el2, x2 + + /* Set tpidr_el2 for use by HYP */ + msr tpidr_el2, x3 + + /* Hello, World! */ + eret +SYM_CODE_END(__kvm_hyp_init) + +SYM_CODE_START(__kvm_handle_stub_hvc) + cmp x0, #HVC_SOFT_RESTART + b.ne 1f + + /* This is where we're about to jump, staying at EL2 */ + msr elr_el2, x1 + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) + msr spsr_el2, x0 + + /* Shuffle the arguments, and don't come back */ + mov x0, x2 + mov x1, x3 + mov x2, x4 + b reset + +1: cmp x0, #HVC_RESET_VECTORS + b.ne 1f + + /* + * Set the HVC_RESET_VECTORS return code before entering the common + * path so that we do not clobber x0-x2 in case we are coming via + * HVC_SOFT_RESTART. + */ + mov x0, xzr +reset: + /* Reset kvm back to the hyp stub. */ + mrs x5, sctlr_el2 + mov_q x6, SCTLR_ELx_FLAGS + bic x5, x5, x6 // Clear SCTL_M and etc + pre_disable_mmu_workaround + msr sctlr_el2, x5 + isb + + /* Install stub vectors */ + adr_l x5, __hyp_stub_vectors + msr vbar_el2, x5 + eret + +1: /* Bad stub call */ + mov_q x0, HVC_STUB_ERR + eret + +SYM_CODE_END(__kvm_handle_stub_hvc) + + .popsection diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c new file mode 100644 index 000000000000..341be2f2f312 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/switch.h> +#include <hyp/sysreg-sr.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; + if (!update_fp_enabled(vcpu)) { + val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cptr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } +} + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + u64 mdcr_el2; + + ___deactivate_traps(vcpu); + + mdcr_el2 = read_sysreg(mdcr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + + __deactivate_traps_common(); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK; + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + + write_sysreg(mdcr_el2, mdcr_el2); + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); +} + +static void __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/** + * Disable host events, enable guest events + */ +static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + +/* Switch to the guest for legacy non-VHE systems */ +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool pmu_switch_needed; + u64 exit_code; + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + pmr_sync(); + } + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + + __sysreg_save_state_nvhe(host_ctxt); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + + __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); + __activate_traps(vcpu); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + __sysreg_save_state_nvhe(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_disable_traps(vcpu); + __hyp_vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state_nvhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ + __debug_switch_to_host(vcpu); + + if (pmu_switch_needed) + __pmu_switch_to_host(host_ctxt); + + /* Returning to host will clear PSR.I, remask PMR if needed */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQOFF); + + return exit_code; +} + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; + unsigned long str_va; + + if (read_sysreg(vttbr_el2)) { + __timer_disable_traps(vcpu); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state_nvhe(host_ctxt); + } + + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); + + __hyp_do_panic(str_va, + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c new file mode 100644 index 000000000000..88a25fc8fcd3 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * Non-VHE: Both host and guest must save everything. + */ + +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_el1_state(ctxt); + __sysreg_save_common_state(ctxt); + __sysreg_save_user_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} + +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_el1_state(ctxt); + __sysreg_restore_common_state(ctxt); + __sysreg_restore_user_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} + +void __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index fb5c0be33223..9072e71693ba 100644 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -10,7 +10,7 @@ #include <asm/kvm_hyp.h> -void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) +void __kvm_timer_set_cntvoff(u64 cntvoff) { write_sysreg(cntvoff, cntvoff_el2); } @@ -19,7 +19,7 @@ void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +void __timer_disable_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -33,7 +33,7 @@ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +void __timer_enable_traps(struct kvm_vcpu *vcpu) { u64 val; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c new file mode 100644 index 000000000000..69eae608d670 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + u64 tcr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + + __load_guest_stage2(mmu); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + write_sysreg(0, vttbr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S new file mode 100644 index 000000000000..b0441dbdf68b --- /dev/null +++ b/arch/arm64/kvm/hyp/smccc_wa.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2018 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/arm-smccc.h> +#include <linux/linkage.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + /* + * This is not executed directly and is instead copied into the vectors + * by install_bp_hardening_cb(). + */ + .data + .pushsection .rodata + .global __smccc_workaround_1_smc +SYM_DATA_START(__smccc_workaround_1_smc) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ + .org 1b +SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c deleted file mode 100644 index db1c4487d95d..000000000000 --- a/arch/arm64/kvm/hyp/switch.c +++ /dev/null @@ -1,936 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/arm-smccc.h> -#include <linux/kvm_host.h> -#include <linux/types.h> -#include <linux/jump_label.h> -#include <uapi/linux/psci.h> - -#include <kvm/arm_psci.h> - -#include <asm/barrier.h> -#include <asm/cpufeature.h> -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/fpsimd.h> -#include <asm/debug-monitors.h> -#include <asm/processor.h> -#include <asm/thread_info.h> - -/* Check whether the FP regs were dirtied while in the host-side run loop: */ -static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) -{ - /* - * When the system doesn't support FP/SIMD, we cannot rely on - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an - * abort on the very first access to FP and thus we should never - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always - * trap the accesses. - */ - if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_FP_HOST); - - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); -} - -/* Save the 32-bit only FPSIMD system register state */ -static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) -{ - if (!vcpu_el1_is_32bit(vcpu)) - return; - - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); -} - -static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) -{ - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to - * it will cause an exception. - */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { - write_sysreg(1 << 30, fpexc32_el2); - isb(); - } -} - -static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) -{ - /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ - write_sysreg(1 << 15, hstr_el2); - - /* - * Make sure we trap PMU access from EL0 to EL2. Also sanitize - * PMSELR_EL0 to make sure it never contains the cycle - * counter, which could make a PMXEVCNTR_EL0 access UNDEF at - * EL1 instead of being trapped to EL2. - */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); -} - -static void __hyp_text __deactivate_traps_common(void) -{ - write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); -} - -static void activate_traps_vhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - val = read_sysreg(cpacr_el1); - val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - - val |= CPTR_EL2_TAM; - - if (update_fp_enabled(vcpu)) { - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - val &= ~CPACR_EL1_FPEN; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cpacr_el1); - - write_sysreg(kvm_get_hyp_vector(), vbar_el1); -} -NOKPROBE_SYMBOL(activate_traps_vhe); - -static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - __activate_traps_common(vcpu); - - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cptr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; - - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * configured and enabled. We can now restore the guest's S1 - * configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } -} - -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) -{ - u64 hcr = vcpu->arch.hcr_el2; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) - hcr |= HCR_TVM; - - write_sysreg(hcr, hcr_el2); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (has_vhe()) - activate_traps_vhe(vcpu); - else - __activate_traps_nvhe(vcpu); -} - -static void deactivate_traps_vhe(void) -{ - extern char vectors[]; /* kernel exception vectors */ - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL2/EL0 translation regime used by - * the host. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); -} -NOKPROBE_SYMBOL(deactivate_traps_vhe); - -static void __hyp_text __deactivate_traps_nvhe(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * Set the TCR and SCTLR registers in the exact opposite - * sequence as __activate_traps_nvhe (first prevent walks, - * then force the MMU on). A generous sprinkling of isb() - * ensure that things happen in this exact order. - */ - val = read_sysreg_el1(SYS_TCR); - write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); - isb(); - val = read_sysreg_el1(SYS_SCTLR); - write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); - isb(); - } - - __deactivate_traps_common(); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK; - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; - - write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); -} - -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) -{ - /* - * If we pended a virtual abort, preserve it until it gets - * cleared. See D1.14.3 (Virtual Interrupts) for details, but - * the crucial bit is "On taking a vSError interrupt, - * HCR_EL2.VSE is cleared to 0." - */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; - } - - if (has_vhe()) - deactivate_traps_vhe(); - else - __deactivate_traps_nvhe(); -} - -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) -{ - __activate_traps_common(vcpu); -} - -void deactivate_traps_vhe_put(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - - write_sysreg(mdcr_el2, mdcr_el2); - - __deactivate_traps_common(); -} - -static void __hyp_text __activate_vm(struct kvm *kvm) -{ - __load_guest_stage2(kvm); -} - -static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) -{ - write_sysreg(0, vttbr_el2); -} - -/* Save VGICv3 state on non-VHE systems */ -static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -/* Restore VGICv3 state on non_VEH systems */ -static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) -{ - u8 ec; - u64 esr; - u64 hpfar, far; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - vcpu->arch.fault.far_el2 = far; - vcpu->arch.fault.hpfar_el2 = hpfar; - return true; -} - -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) -{ - bool vhe, sve_guest, sve_host; - u8 hsr_ec; - - if (!system_supports_fpsimd()) - return false; - - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - vhe = true; - } else { - sve_guest = false; - sve_host = false; - vhe = has_vhe(); - } - - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && - hsr_ec != ESR_ELx_EC_SVE) - return false; - - /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest) - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) - return false; - - /* Valid trap. Switch the context: */ - - if (vhe) { - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; - - if (sve_guest) - reg |= CPACR_EL1_ZEN; - - write_sysreg(reg, cpacr_el1); - } else { - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, - cptr_el2); - } - - isb(); - - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - /* - * In the SVE case, VHE is assumed: it is enforced by - * Kconfig and kvm_arch_init(). - */ - if (sve_host) { - struct thread_struct *thread = container_of( - vcpu->arch.host_fpsimd_state, - struct thread_struct, uw.fpsimd_state); - - sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); - } else { - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - } - - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - - if (sve_guest) { - sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); - } else { - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); - } - - /* Skip restoring fpexc32 for AArch64 guests */ - if (!(read_sysreg(hcr_el2) & HCR_RW)) - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], - fpexc32_el2); - - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; - - return true; -} - -static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) -{ - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); - int rt = kvm_vcpu_sys_get_rt(vcpu); - u64 val = vcpu_get_reg(vcpu, rt); - - /* - * The normal sysreg handling code expects to see the traps, - * let's not do anything here. - */ - if (vcpu->arch.hcr_el2 & HCR_TVM) - return false; - - switch (sysreg) { - case SYS_SCTLR_EL1: - write_sysreg_el1(val, SYS_SCTLR); - break; - case SYS_TTBR0_EL1: - write_sysreg_el1(val, SYS_TTBR0); - break; - case SYS_TTBR1_EL1: - write_sysreg_el1(val, SYS_TTBR1); - break; - case SYS_TCR_EL1: - write_sysreg_el1(val, SYS_TCR); - break; - case SYS_ESR_EL1: - write_sysreg_el1(val, SYS_ESR); - break; - case SYS_FAR_EL1: - write_sysreg_el1(val, SYS_FAR); - break; - case SYS_AFSR0_EL1: - write_sysreg_el1(val, SYS_AFSR0); - break; - case SYS_AFSR1_EL1: - write_sysreg_el1(val, SYS_AFSR1); - break; - case SYS_MAIR_EL1: - write_sysreg_el1(val, SYS_MAIR); - break; - case SYS_AMAIR_EL1: - write_sysreg_el1(val, SYS_AMAIR); - break; - case SYS_CONTEXTIDR_EL1: - write_sysreg_el1(val, SYS_CONTEXTIDR); - break; - default: - return false; - } - - __kvm_skip_instr(vcpu); - return true; -} - -static bool __hyp_text esr_is_ptrauth_trap(u32 esr) -{ - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - - switch (esr_sys64_to_sysreg(esr)) { - case SYS_APIAKEYLO_EL1: - case SYS_APIAKEYHI_EL1: - case SYS_APIBKEYLO_EL1: - case SYS_APIBKEYHI_EL1: - case SYS_APDAKEYLO_EL1: - case SYS_APDAKEYHI_EL1: - case SYS_APDBKEYLO_EL1: - case SYS_APDBKEYHI_EL1: - case SYS_APGAKEYLO_EL1: - case SYS_APGAKEYHI_EL1: - return true; - } - - return false; -} - -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - -static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *ctxt; - u64 val; - - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) - return false; - - ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - - vcpu_ptrauth_enable(vcpu); - - val = read_sysreg(hcr_el2); - val |= (HCR_API | HCR_APK); - write_sysreg(val, hcr_el2); - - return true; -} - -/* - * Return true when we were able to fixup the guest exit and should return to - * the guest, false when we should restore the host state and return to the - * main run loop. - */ -static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - - /* - * We're using the raw exception code in order to only process - * the trap if no SError is pending. We will come back to the - * same PC once the SError has been injected, and replay the - * trapping instruction. - */ - if (*exit_code != ARM_EXCEPTION_TRAP) - goto exit; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) - return true; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - return true; - - if (__hyp_handle_ptrauth(vcpu)) - return true; - - if (!__populate_fault_info(vcpu)) - return true; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_dabt_isextabt(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - } - -exit: - /* Return to the host kernel and handle the exit */ - return false; -} - -static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - -/** - * Disable host events, enable guest events - */ -static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - -/* Switch to the guest for VHE systems running in EL2 */ -static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - u64 exit_code; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - sysreg_save_host_state_vhe(host_ctxt); - - /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs above. We must now call __activate_vm - * before __activate_traps, because __activate_vm configures - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE - * (among other things). - */ - __activate_vm(vcpu->kvm); - __activate_traps(vcpu); - - sysreg_restore_guest_state_vhe(guest_ctxt); - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - sysreg_save_guest_state_vhe(guest_ctxt); - - __deactivate_traps(vcpu); - - sysreg_restore_host_state_vhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - __debug_switch_to_host(vcpu); - - return exit_code; -} -NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); - -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - int ret; - - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); - - ret = __kvm_vcpu_run_vhe(vcpu); - - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); - - return ret; -} - -/* Switch to the guest for legacy non-VHE systems */ -int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - bool pmu_switch_needed; - u64 exit_code; - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - pmr_sync(); - } - - vcpu = kern_hyp_va(vcpu); - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); - - __sysreg_save_state_nvhe(host_ctxt); - - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - * - * Also, and in order to be able to deal with erratum #1319537 (A57) - * and #1319367 (A72), we must ensure that all VM-related sysreg are - * restored before we enable S2 translation. - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_state_nvhe(guest_ctxt); - - __activate_vm(kern_hyp_va(vcpu->kvm)); - __activate_traps(vcpu); - - __hyp_vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - __sysreg_save_state_nvhe(guest_ctxt); - __sysreg32_save_state(vcpu); - __timer_disable_traps(vcpu); - __hyp_vgic_save_state(vcpu); - - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - - __sysreg_restore_state_nvhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - /* - * This must come after restoring the host sysregs, since a non-VHE - * system may enable SPE here and make use of the TTBRs. - */ - __debug_switch_to_host(vcpu); - - if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); - - /* Returning to host will clear PSR.I, remask PMR if needed */ - if (system_uses_irq_prio_masking()) - gic_write_pmr(GIC_PRIO_IRQOFF); - - return exit_code; -} - -static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; - -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *__host_ctxt) -{ - struct kvm_vcpu *vcpu; - unsigned long str_va; - - vcpu = __host_ctxt->__hyp_running_vcpu; - - if (read_sysreg(vttbr_el2)) { - __timer_disable_traps(vcpu); - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - __sysreg_restore_state_nvhe(__host_ctxt); - } - - /* - * Force the panic string to be loaded from the literal pool, - * making sure it is a kernel address and not a PC-relative - * reference. - */ - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); - - __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} - -static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *host_ctxt) -{ - struct kvm_vcpu *vcpu; - vcpu = host_ctxt->__hyp_running_vcpu; - - __deactivate_traps(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); - - panic(__hyp_panic_string, - spsr, elr, - read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} -NOKPROBE_SYMBOL(__hyp_call_panic_vhe); - -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) -{ - u64 spsr = read_sysreg_el2(SYS_SPSR); - u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); - - if (!has_vhe()) - __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); - else - __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); - - unreachable(); -} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c deleted file mode 100644 index cc7e957f5b2c..000000000000 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/compiler.h> -#include <linux/kvm_host.h> - -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -/* - * Non-VHE: Both host and guest must save everything. - * - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and - * pstate, which are handled as part of the el2 return state) on every - * switch (sp_el0 is being dealt with in the assembly code). - * tpidr_el0 and tpidrro_el0 only need to be switched when going - * to host userspace or a different VCPU. EL1 registers only need to be - * switched when potentially going to run a different VCPU. The latter two - * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. - */ - -static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); -} - -static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); -} - -static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); - - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); -} - -static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) -{ - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_el1_state(ctxt); - __sysreg_save_common_state(ctxt); - __sysreg_save_user_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} - -void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); - -void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); - -static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); -} - -static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); -} - -static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - - if (has_vhe() || - !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } else if (!ctxt->__hyp_running_vcpu) { - /* - * Must only be done for guest registers, hence the context - * test. We're coming from the host, so SCTLR.M is already - * set. Pairs with __activate_traps_nvhe(). - */ - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | - TCR_EPD1_MASK | TCR_EPD0_MASK), - SYS_TCR); - isb(); - } - - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - - if (!has_vhe() && - cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && - ctxt->__hyp_running_vcpu) { - /* - * Must only be done for host registers, hence the context - * test. Pairs with __deactivate_traps_nvhe(). - */ - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * deconfigured and disabled. We can now restore the host's - * S1 configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } - - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); -} - -static void __hyp_text -__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) -{ - u64 pstate = ctxt->gp_regs.regs.pstate; - u64 mode = pstate & PSR_AA32_MODE_MASK; - - /* - * Safety check to ensure we're setting the CPU up to enter the guest - * in a less privileged mode. - * - * If we are attempting a return to EL2 or higher in AArch64 state, - * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that - * we'll take an illegal exception state exception immediately after - * the ERET to the guest. Attempts to return to AArch32 Hyp will - * result in an illegal exception return because EL2's execution state - * is determined by SCR_EL3.RW. - */ - if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) - pstate = PSR_MODE_EL2h | PSR_IL_BIT; - - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); - write_sysreg_el2(pstate, SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_el1_state(ctxt); - __sysreg_restore_common_state(ctxt); - __sysreg_restore_user_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} - -void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); - -void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); - -void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); - - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); -} - -void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); - - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); -} - -/** - * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Load system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_load() function - * and loading system register state early avoids having to load them on - * every entry to the VM. - */ -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __sysreg_save_user_state(host_ctxt); - - /* - * Load guest EL1 and user state - * - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = true; - - activate_traps_vhe_load(vcpu); -} - -/** - * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Save guest system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_put() function - * and deferring saving system register state until we're no longer running the - * VCPU avoids having to save them on every exit from the VM. - */ -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(); - - __sysreg_save_el1_state(guest_ctxt); - __sysreg_save_user_state(guest_ctxt); - __sysreg32_save_state(vcpu); - - /* Restore host user state */ - __sysreg_restore_user_state(host_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = false; -} - -void __hyp_text __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c deleted file mode 100644 index d063a576d511..000000000000 --- a/arch/arm64/kvm/hyp/tlb.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/irqflags.h> - -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/tlbflush.h> - -struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; -}; - -static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - u64 val; - - local_irq_save(cxt->flags); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* - * For CPUs that are affected by ARM errata 1165522 or 1530923, - * we cannot trust stage-1 to be in a correct state at that - * point. Since we do not want to force a full load of the - * vcpu state, we prevent the EL1 page-table walker to - * allocate new TLBs. This is done by setting the EPD bits - * in the TCR_EL1 register. We also need to prevent it to - * allocate IPA->PA walks, so we enable the S1 MMU... - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); - val |= SCTLR_ELx_M; - write_sysreg_el1(val, SYS_SCTLR); - } - - /* - * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and - * most TLB operations target EL2/EL0. In order to affect the - * guest TLBs (EL1/EL0), we need to change one of these two - * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so - * let's flip TGE before executing the TLB operation. - * - * ARM erratum 1165522 requires some special handling (again), - * as we need to make sure both stages of translation are in - * place before clearing TGE. __load_guest_stage2() already - * has an ISB in order to deal with this. - */ - __load_guest_stage2(kvm); - val = read_sysreg(hcr_el2); - val &= ~HCR_TGE; - write_sysreg(val, hcr_el2); - isb(); -} - -static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - isb(); - } - - /* __load_guest_stage2() includes an ISB for the workaround. */ - __load_guest_stage2(kvm); - asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); -} - -static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_guest_vhe(kvm, cxt); - else - __tlb_switch_to_guest_nvhe(kvm, cxt); -} - -static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - /* - * We're done with the TLB operation, let's restore the host's - * view of HCR_EL2. - */ - write_sysreg(0, vttbr_el2); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - isb(); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - write_sysreg_el1(cxt->sctlr, SYS_SCTLR); - } - - local_irq_restore(cxt->flags); -} - -static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - write_sysreg(0, vttbr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ - isb(); - /* Restore the host's TCR_EL1 */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - } -} - -static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_host_vhe(kvm, cxt); - else - __tlb_switch_to_host_nvhe(kvm, cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - ipa >>= 12; - __tlbi(ipas2e1is, ipa); - - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb(ish); - __tlbi(vmalle1is); - dsb(ish); - isb(); - - /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. - */ - if (!has_vhe() && icache_is_vpipt()) - __flush_icache_all(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalls12e1is); - dsb(ish); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); - struct tlb_inv_context cxt; - - /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalle1); - dsb(nsh); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_flush_vm_context(void) -{ - dsb(ishst); - __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - - dsb(ish); -} diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 4f3a087e36d5..bd1bab551d48 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -13,7 +13,7 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +static bool __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); @@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) * 0: Not a GICV access * -1: Illegal GICV access successfully performed */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct vgic_dist *vgic = &kvm->arch.vgic; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 10ed539835c1..5a0073511efb 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -16,7 +16,7 @@ #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) -static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +static u64 __gic_v3_get_lr(unsigned int lr) { switch (lr & 0xf) { case 0: @@ -56,7 +56,7 @@ static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) unreachable(); } -static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +static void __gic_v3_set_lr(u64 val, int lr) { switch (lr & 0xf) { case 0: @@ -110,7 +110,7 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } -static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +static void __vgic_v3_write_ap0rn(u32 val, int n) { switch (n) { case 0: @@ -128,7 +128,7 @@ static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) } } -static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +static void __vgic_v3_write_ap1rn(u32 val, int n) { switch (n) { case 0: @@ -146,7 +146,7 @@ static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) } } -static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +static u32 __vgic_v3_read_ap0rn(int n) { u32 val; @@ -170,7 +170,7 @@ static u32 __hyp_text __vgic_v3_read_ap0rn(int n) return val; } -static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +static u32 __vgic_v3_read_ap1rn(int n) { u32 val; @@ -194,7 +194,7 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n) return val; } -void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; @@ -229,7 +229,7 @@ void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; int i; @@ -255,7 +255,7 @@ void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) { /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a @@ -302,7 +302,7 @@ void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) { u64 val; @@ -328,7 +328,7 @@ void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -361,7 +361,7 @@ void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -394,7 +394,7 @@ void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_init_lrs(void) +void __vgic_v3_init_lrs(void) { int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); int i; @@ -403,30 +403,30 @@ void __hyp_text __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) +u64 __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } -u64 __hyp_text __vgic_v3_read_vmcr(void) +u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } -static int __hyp_text __vgic_v3_bpr_min(void) +static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); } -static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; return crm != 8; @@ -434,9 +434,8 @@ static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) #define GICv3_IDLE_PRIORITY 0xff -static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, - u32 vmcr, - u64 *lr_val) +static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; u8 priority = GICv3_IDLE_PRIORITY; @@ -474,8 +473,8 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, return lr; } -static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, - int intid, u64 *lr_val) +static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; int i; @@ -494,7 +493,7 @@ static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, return -1; } -static int __hyp_text __vgic_v3_get_highest_active_priority(void) +static int __vgic_v3_get_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -526,12 +525,12 @@ static int __hyp_text __vgic_v3_get_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +static unsigned int __vgic_v3_get_bpr0(u32 vmcr) { return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; } -static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +static unsigned int __vgic_v3_get_bpr1(u32 vmcr) { unsigned int bpr; @@ -550,7 +549,7 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) * Convert a priority to a preemption level, taking the relevant BPR * into account by zeroing the sub-priority bits. */ -static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) { unsigned int bpr; @@ -568,7 +567,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) * matter what the guest does with its BPR, we can always set/get the * same value of a priority. */ -static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) { u8 pre, ap; u32 val; @@ -587,7 +586,7 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) } } -static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +static int __vgic_v3_clear_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -625,7 +624,7 @@ static int __hyp_text __vgic_v3_clear_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; u8 lr_prio, pmr; @@ -661,7 +660,7 @@ spurious: vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); } -static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +static void __vgic_v3_clear_active_lr(int lr, u64 lr_val) { lr_val &= ~ICH_LR_ACTIVE_BIT; if (lr_val & ICH_LR_HW) { @@ -674,7 +673,7 @@ static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) __gic_v3_set_lr(lr_val, lr); } -static void __hyp_text __vgic_v3_bump_eoicount(void) +static void __vgic_v3_bump_eoicount(void) { u32 hcr; @@ -683,8 +682,7 @@ static void __hyp_text __vgic_v3_bump_eoicount(void) write_gicreg(hcr, ICH_HCR_EL2); } -static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -707,7 +705,7 @@ static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -744,17 +742,17 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); } -static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); } -static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -766,7 +764,7 @@ static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -778,17 +776,17 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); } -static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); } -static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min() - 1; @@ -805,7 +803,7 @@ static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min(); @@ -825,7 +823,7 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val; @@ -837,7 +835,7 @@ static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val = vcpu_get_reg(vcpu, rt); @@ -847,56 +845,49 @@ static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int __vgic_v3_write_ap1rn(val, n); } -static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; int lr, lr_grp, grp; @@ -915,16 +906,14 @@ spurious: vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); } -static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vmcr &= ICH_VMCR_PMR_MASK; vmcr >>= ICH_VMCR_PMR_SHIFT; vcpu_set_reg(vcpu, rt, vmcr); } -static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -936,15 +925,13 @@ static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = __vgic_v3_get_highest_active_priority(); vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vtr, val; @@ -965,8 +952,7 @@ static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -983,7 +969,7 @@ static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; u32 esr; @@ -992,7 +978,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) bool is_read; u32 sysreg; - esr = kvm_vcpu_get_hsr(vcpu); + esr = kvm_vcpu_get_esr(vcpu); if (vcpu_mode_is_32bit(vcpu)) { if (!kvm_condition_valid(vcpu)) { __kvm_skip_instr(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile new file mode 100644 index 000000000000..461e97c375cc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_VHE_HYPERVISOR__ +ccflags-y := -D__KVM_VHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c new file mode 100644 index 000000000000..f1e2e5a00933 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/debug-sr.h> + +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c new file mode 100644 index 000000000000..c52d714e0d75 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/switch.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~CPACR_EL1_ZEN; + + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + + val |= CPTR_EL2_TAM; + + if (update_fp_enabled(vcpu)) { + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } else { + val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cpacr_el1); + + write_sysreg(kvm_get_hyp_vector(), vbar_el1); +} +NOKPROBE_SYMBOL(__activate_traps); + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + extern char vectors[]; /* kernel exception vectors */ + + ___deactivate_traps(vcpu); + + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by + * the host. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); + write_sysreg(vectors, vbar_el1); +} +NOKPROBE_SYMBOL(__deactivate_traps); + +void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +{ + __activate_traps_common(vcpu); +} + +void deactivate_traps_vhe_put(void) +{ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); + + __deactivate_traps_common(); +} + +/* Switch to the guest for VHE systems running in EL2 */ +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + u64 exit_code; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + sysreg_save_host_state_vhe(host_ctxt); + + /* + * ARM erratum 1165522 requires us to configure both stage 1 and + * stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. + * + * We have already configured the guest's stage 1 translation in + * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm + * before __activate_traps, because __activate_vm configures + * stage 2 translation, and __activate_traps clear HCR_EL2.TGE + * (among other things). + */ + __activate_vm(vcpu->arch.hw_mmu); + __activate_traps(vcpu); + + sysreg_restore_guest_state_vhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + sysreg_save_guest_state_vhe(guest_ctxt); + + __deactivate_traps(vcpu); + + sysreg_restore_host_state_vhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + __debug_switch_to_host(vcpu); + + return exit_code; +} +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} + +static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, + struct kvm_cpu_context *host_ctxt) +{ + struct kvm_vcpu *vcpu; + vcpu = host_ctxt->__hyp_running_vcpu; + + __deactivate_traps(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); + + panic(__hyp_panic_string, + spsr, elr, + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); +} +NOKPROBE_SYMBOL(__hyp_call_panic); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + + __hyp_call_panic(spsr, elr, par, host_ctxt); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c new file mode 100644 index 000000000000..996471e4c138 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and + * pstate, which are handled as part of the el2 return state) on every + * switch (sp_el0 is being dealt with in the assembly code). + * tpidr_el0 and tpidrro_el0 only need to be switched when going + * to host userspace or a different VCPU. EL1 registers only need to be + * switched when potentially going to run a different VCPU. The latter two + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. + */ + +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); + +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); + +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); + +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); + +/** + * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __sysreg_save_user_state(host_ctxt); + + /* + * Load guest EL1 and user state + * + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_user_state(guest_ctxt); + __sysreg_restore_el1_state(guest_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = true; + + activate_traps_vhe_load(vcpu); +} + +/** + * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + deactivate_traps_vhe_put(); + + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); + __sysreg32_save_state(vcpu); + + /* Restore host user state */ + __sysreg_restore_user_state(host_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = false; +} diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c new file mode 100644 index 000000000000..4cda674a8be6 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <asm/kvm_hyp.h> + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c new file mode 100644 index 000000000000..fd7895945bbc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/irqflags.h> + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + unsigned long flags; + u64 tcr; + u64 sctlr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + u64 val; + + local_irq_save(cxt->flags); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that + * point. Since we do not want to force a full load of the + * vcpu state, we prevent the EL1 page-table walker to + * allocate new TLBs. This is done by setting the EPD bits + * in the TCR_EL1 register. We also need to prevent it to + * allocate IPA->PA walks, so we enable the S1 MMU... + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + } + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + * + * ARM erratum 1165522 requires some special handling (again), + * as we need to make sure both stages of translation are in + * place before clearing TGE. __load_guest_stage2() already + * has an ISB in order to deal with this. + */ + __load_guest_stage2(mmu); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + isb(); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Restore the registers to what they were */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + } + + local_irq_restore(cxt->flags); +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} |