diff options
Diffstat (limited to 'arch/arm64/kvm/hyp')
27 files changed, 3512 insertions, 1629 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ea710f674cb6..f54f0e89a71c 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,28 +3,12 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) - -KVM=../../../../virt/kvm - -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o - -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o -obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += entry.o -obj-$(CONFIG_KVM_ARM_HOST) += switch.o -obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o -obj-$(CONFIG_KVM_ARM_HOST) += tlb.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o - -# KVM code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +incdir := $(srctree)/$(src)/include +subdir-asflags-y := -I$(incdir) +subdir-ccflags-y := -I$(incdir) \ + -fno-stack-protector \ + -DDISABLE_BRANCH_PROFILING \ + $(DISABLE_STACKLEAK_PLUGIN) + +obj-$(CONFIG_KVM) += vhe/ nvhe/ +obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c new file mode 100644 index 000000000000..ae56d8a4b382 --- /dev/null +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyp portion of the (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_esr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); + + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~PSR_AA32_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + u32 pc = *vcpu_pc(vcpu); + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); + if (is_thumb && !is_wide_instr) + pc += 2; + else + pc += 4; + + *vcpu_pc(vcpu) = pc; + + kvm_adjust_itstate(vcpu); +} diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 90186cf6473e..76e7eaf4675e 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -16,12 +16,10 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_ptrauth.h> -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text - .pushsection .hyp.text, "ax" /* * We treat x18 as callee-saved as the host may use it as a platform @@ -198,20 +196,23 @@ alternative_endif // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. - .global abort_guest_exit_start abort_guest_exit_start: isb - .global abort_guest_exit_end abort_guest_exit_end: msr daifset, #4 // Mask aborts + ret + + _kvm_extable abort_guest_exit_start, 9997f + _kvm_extable abort_guest_exit_end, 9997f +9997: + msr daifset, #4 // Mask aborts + mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) - // If the exception took place, restore the EL1 exception - // context so that we can report some information. - // Merge the exception code with the SError pending bit. - tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f + // restore the EL1 exception context so that we can report some + // information. Merge the exception code with the SError pending bit. msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 5b8ff517ff10..01f114aa47b0 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -9,7 +9,6 @@ #include <asm/fpsimdmacros.h> .text - .pushsection .hyp.text, "ax" SYM_FUNC_START(__fpsimd_save_state) fpsimd_save x0, 1 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9c5cfb04170e..46b4dab933d0 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -15,8 +15,31 @@ #include <asm/kvm_mmu.h> #include <asm/mmu.h> +.macro save_caller_saved_regs_vect + /* x0 and x1 were saved in the vector entry */ + stp x2, x3, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x16, x17, [sp, #-16]! +.endm + +.macro restore_caller_saved_regs_vect + ldp x16, x17, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + .text - .pushsection .hyp.text, "ax" .macro do_el2_call /* @@ -40,6 +63,7 @@ el1_sync: // Guest trapped into EL2 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap +#ifdef __KVM_NVHE_HYPERVISOR__ mrs x1, vttbr_el2 // If vttbr is valid, the guest cbnz x1, el1_hvc_guest // called HVC @@ -74,6 +98,7 @@ el1_sync: // Guest trapped into EL2 eret sb +#endif /* __KVM_NVHE_HYPERVISOR__ */ el1_hvc_guest: /* @@ -142,13 +167,19 @@ el1_error: b __guest_exit el2_sync: - /* Check for illegal exception return, otherwise panic */ + /* Check for illegal exception return */ mrs x0, spsr_el2 + tbnz x0, #20, 1f + + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + bl kvm_unexpected_el2_exception + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect - /* if this was something else, then panic! */ - tst x0, #PSR_IL_BIT - b.eq __hyp_panic + eret +1: /* Let's attempt a recovery from the illegal exception return */ get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IL @@ -156,30 +187,18 @@ el2_sync: el2_error: - ldp x0, x1, [sp], #16 + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + + bl kvm_unexpected_el2_exception + + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect - /* - * Only two possibilities: - * 1) Either we come from the exit path, having just unmasked - * PSTATE.A: change the return code to an EL2 fault, and - * carry on, as we're already in a sane state to handle it. - * 2) Or we come from anywhere else, and that's a bug: we panic. - * - * For (1), x0 contains the original return code and x1 doesn't - * contain anything meaningful at that stage. We can reuse them - * as temp registers. - * For (2), who cares? - */ - mrs x0, elr_el2 - adr x1, abort_guest_exit_start - cmp x0, x1 - adr x1, abort_guest_exit_end - ccmp x0, x1, #4, ne - b.ne __hyp_panic - mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) eret sb +#ifdef __KVM_NVHE_HYPERVISOR__ SYM_FUNC_START(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) @@ -189,6 +208,7 @@ SYM_FUNC_START(__hyp_do_panic) eret sb SYM_FUNC_END(__hyp_do_panic) +#endif SYM_CODE_START(__hyp_panic) get_host_ctxt x0, x1 @@ -318,20 +338,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) - - .popsection - -SYM_CODE_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_CODE_END(__smccc_workaround_1_smc) #endif diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 0fc9872a1467..5e28ea6aa097 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -4,6 +4,9 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ +#define __ARM64_KVM_HYP_DEBUG_SR_H__ + #include <linux/compiler.h> #include <linux/kvm_host.h> @@ -18,120 +21,75 @@ #define save_debug(ptr,reg,nr) \ switch (nr) { \ case 15: ptr[15] = read_debug(reg, 15); \ - /* Fall through */ \ + fallthrough; \ case 14: ptr[14] = read_debug(reg, 14); \ - /* Fall through */ \ + fallthrough; \ case 13: ptr[13] = read_debug(reg, 13); \ - /* Fall through */ \ + fallthrough; \ case 12: ptr[12] = read_debug(reg, 12); \ - /* Fall through */ \ + fallthrough; \ case 11: ptr[11] = read_debug(reg, 11); \ - /* Fall through */ \ + fallthrough; \ case 10: ptr[10] = read_debug(reg, 10); \ - /* Fall through */ \ + fallthrough; \ case 9: ptr[9] = read_debug(reg, 9); \ - /* Fall through */ \ + fallthrough; \ case 8: ptr[8] = read_debug(reg, 8); \ - /* Fall through */ \ + fallthrough; \ case 7: ptr[7] = read_debug(reg, 7); \ - /* Fall through */ \ + fallthrough; \ case 6: ptr[6] = read_debug(reg, 6); \ - /* Fall through */ \ + fallthrough; \ case 5: ptr[5] = read_debug(reg, 5); \ - /* Fall through */ \ + fallthrough; \ case 4: ptr[4] = read_debug(reg, 4); \ - /* Fall through */ \ + fallthrough; \ case 3: ptr[3] = read_debug(reg, 3); \ - /* Fall through */ \ + fallthrough; \ case 2: ptr[2] = read_debug(reg, 2); \ - /* Fall through */ \ + fallthrough; \ case 1: ptr[1] = read_debug(reg, 1); \ - /* Fall through */ \ + fallthrough; \ default: ptr[0] = read_debug(reg, 0); \ } #define restore_debug(ptr,reg,nr) \ switch (nr) { \ case 15: write_debug(ptr[15], reg, 15); \ - /* Fall through */ \ + fallthrough; \ case 14: write_debug(ptr[14], reg, 14); \ - /* Fall through */ \ + fallthrough; \ case 13: write_debug(ptr[13], reg, 13); \ - /* Fall through */ \ + fallthrough; \ case 12: write_debug(ptr[12], reg, 12); \ - /* Fall through */ \ + fallthrough; \ case 11: write_debug(ptr[11], reg, 11); \ - /* Fall through */ \ + fallthrough; \ case 10: write_debug(ptr[10], reg, 10); \ - /* Fall through */ \ + fallthrough; \ case 9: write_debug(ptr[9], reg, 9); \ - /* Fall through */ \ + fallthrough; \ case 8: write_debug(ptr[8], reg, 8); \ - /* Fall through */ \ + fallthrough; \ case 7: write_debug(ptr[7], reg, 7); \ - /* Fall through */ \ + fallthrough; \ case 6: write_debug(ptr[6], reg, 6); \ - /* Fall through */ \ + fallthrough; \ case 5: write_debug(ptr[5], reg, 5); \ - /* Fall through */ \ + fallthrough; \ case 4: write_debug(ptr[4], reg, 4); \ - /* Fall through */ \ + fallthrough; \ case 3: write_debug(ptr[3], reg, 3); \ - /* Fall through */ \ + fallthrough; \ case 2: write_debug(ptr[2], reg, 2); \ - /* Fall through */ \ + fallthrough; \ case 1: write_debug(ptr[1], reg, 1); \ - /* Fall through */ \ + fallthrough; \ default: write_debug(ptr[0], reg, 0); \ } -static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) -{ - u64 reg; - - /* Clear pmscr in case of early return */ - *pmscr_el1 = 0; - - /* SPE present on this CPU? */ - if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), - ID_AA64DFR0_PMSVER_SHIFT)) - return; - - /* Yes; is it owned by EL3? */ - reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) - return; - - /* No; is the host actually using the thing? */ - reg = read_sysreg_s(SYS_PMBLIMITR_EL1); - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) - return; - - /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); - isb(); - - /* Now drain all buffered data to memory */ - psb_csync(); - dsb(nsh); -} - -static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) -{ - if (!pmscr_el1) - return; - - /* The host page table is installed, but not yet synchronised */ - isb(); - - /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); -} - -static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_save_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, save_debug(dbg->dbg_wcr, dbgwcr, wrps); save_debug(dbg->dbg_wvr, dbgwvr, wrps); - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); } -static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -165,60 +122,47 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, restore_debug(dbg->dbg_wcr, dbgwcr, wrps); restore_debug(dbg->dbg_wvr, dbgwvr, wrps); - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); } -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, host_dbg, host_ctxt); - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); + __debug_save_state(host_dbg, host_ctxt); + __debug_restore_state(guest_dbg, guest_ctxt); } -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, guest_dbg, guest_ctxt); - __debug_restore_state(vcpu, host_dbg, host_ctxt); + __debug_save_state(guest_dbg, guest_ctxt); + __debug_restore_state(host_dbg, host_ctxt); vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; } -u32 __hyp_text __kvm_get_mdcr_el2(void) -{ - return read_sysreg(mdcr_el2); -} +#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h new file mode 100644 index 000000000000..0261308bf944 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SWITCH_H__ +#define __ARM64_KVM_HYP_SWITCH_H__ + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/extable.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +extern const char __hyp_panic_string[]; + +extern struct exception_table_entry __start___kvm_ex_table; +extern struct exception_table_entry __stop___kvm_ex_table; + +/* Check whether the FP regs were dirtied while in the host-side run loop: */ +static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) +{ + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_FP_HOST); + + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); +} + +/* Save the 32-bit only FPSIMD system register state */ +static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); +} + +static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +{ + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. + */ + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } +} + +static inline void __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static inline void __deactivate_traps_common(void) +{ + write_sysreg(0, hstr_el2); + write_sysreg(0, pmuserenr_el0); +} + +static inline void ___activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +} + +static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) +{ + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } +} + +static inline void __activate_vm(struct kvm_s2_mmu *mmu) +{ + __load_guest_stage2(mmu); +} + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg(par_el1); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u8 ec; + u64 esr; + u64 hpfar, far; + + esr = vcpu->arch.fault.esr_el2; + ec = ESR_ELx_EC(esr); + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + vcpu->arch.fault.far_el2 = far; + vcpu->arch.fault.hpfar_el2 = hpfar; + return true; +} + +/* Check for an FPSIMD/SVE trap and handle as appropriate */ +static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +{ + bool vhe, sve_guest, sve_host; + u8 esr_ec; + + if (!system_supports_fpsimd()) + return false; + + /* + * Currently system_supports_sve() currently implies has_vhe(), + * so the check is redundant. However, has_vhe() can be determined + * statically and helps the compiler remove dead code. + */ + if (has_vhe() && system_supports_sve()) { + sve_guest = vcpu_has_sve(vcpu); + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; + vhe = true; + } else { + sve_guest = false; + sve_host = false; + vhe = has_vhe(); + } + + esr_ec = kvm_vcpu_trap_get_class(vcpu); + if (esr_ec != ESR_ELx_EC_FP_ASIMD && + esr_ec != ESR_ELx_EC_SVE) + return false; + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest) + if (esr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + if (vhe) { + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; + + if (sve_guest) + reg |= CPACR_EL1_ZEN; + + write_sysreg(reg, cpacr_el1); + } else { + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + } + + isb(); + + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + /* + * In the SVE case, VHE is assumed: it is enforced by + * Kconfig and kvm_arch_init(). + */ + if (sve_host) { + struct thread_struct *thread = container_of( + vcpu->arch.host_fpsimd_state, + struct thread_struct, uw.fpsimd_state); + + sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); + } else { + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + } + + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + } + + if (sve_guest) { + sve_load_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); + } else { + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); + } + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); + + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + + return true; +} + +static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + +static inline bool esr_is_ptrauth_trap(u32 esr) +{ + u32 ec = ESR_ELx_EC(esr); + + if (ec == ESR_ELx_EC_PAC) + return true; + + if (ec != ESR_ELx_EC_SYS64) + return false; + + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ +} while(0) + +static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu) || + !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return false; + + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __ptrauth_save_key(ctxt, APIA); + __ptrauth_save_key(ctxt, APIB); + __ptrauth_save_key(ctxt, APDA); + __ptrauth_save_key(ctxt, APDB); + __ptrauth_save_key(ctxt, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + * Similarly for trapped SVE accesses. + */ + if (__hyp_handle_fpsimd(vcpu)) + return true; + + if (__hyp_handle_ptrauth(vcpu)) + return true; + + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_abt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + + goto exit; + } + } + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + } + +exit: + /* Return to the host kernel and handle the exit */ + return false; +} + +static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + +static inline void __kvm_unexpected_el2_exception(void) +{ + unsigned long addr, fixup; + struct kvm_cpu_context *host_ctxt; + struct exception_table_entry *entry, *end; + unsigned long elr_el2 = read_sysreg(elr_el2); + + entry = hyp_symbol_addr(__start___kvm_ex_table); + end = hyp_symbol_addr(__stop___kvm_ex_table); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + + while (entry < end) { + addr = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; + + if (addr != elr_el2) { + entry++; + continue; + } + + write_sysreg(fixup, elr_el2); + return; + } + + hyp_panic(host_ctxt); +} + +#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h new file mode 100644 index 000000000000..7a986030145f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ +#define __ARM64_KVM_HYP_SYSREG_SR_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); +} + +static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); +} + +static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); +} + +static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); +} + +static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); +} + +static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); +} + +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); + + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with nVHE's __activate_traps(). + */ + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with nVHE's __deactivate_traps(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } + + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); +} + +static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ + u64 pstate = ctxt->regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); +} + +static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); + + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); +} + +static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); + + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); +} + +#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile new file mode 100644 index 000000000000..aef76487edc2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_NVHE_HYPERVISOR__ +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o + +obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) +extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) + +$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) +$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE + $(call if_changed_rule,as_o_S) +$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE + $(call if_changed,hypcopy) + +# Disable reordering functions by GCC (enabled at -O2). +# This pass puts functions into '.text.*' sections to aid the linker +# in optimizing ELF layout. See HYPCOPY comment below for more info. +ccflags-y += $(call cc-option,-fno-reorder-functions) + +# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names +# and relevant ELF section names to avoid clashes with VHE code/data. +# +# Hyp code is assumed to be in the '.text' section of the input object +# files (with the exception of specialized sections such as +# '.hyp.idmap.text'). This assumption may be broken by a compiler that +# divides code into sections like '.text.unlikely' so as to optimize +# ELF layout. HYPCOPY checks that no such sections exist in the input +# using `objdump`, otherwise they would be linked together with other +# kernel code and not memory-mapped correctly at runtime. +quiet_cmd_hypcopy = HYPCOPY $@ + cmd_hypcopy = \ + if $(OBJDUMP) -h $< | grep -F '.text.'; then \ + echo "$@: function reordering not supported in nVHE hyp code" >&2; \ + /bin/false; \ + fi; \ + $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ + --rename-section=.text=.hyp.text \ + $< $@ + +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) + +# KVM nVHE code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# Skip objtool checking for this directory because nVHE code is compiled with +# non-standard build rules. +OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c new file mode 100644 index 000000000000..91a711aa8382 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/debug-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static void __debug_save_spe(u64 *pmscr_el1) +{ + u64 reg; + + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + + /* SPE present on this CPU? */ + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), + ID_AA64DFR0_PMSVER_SHIFT)) + return; + + /* Yes; is it owned by EL3? */ + reg = read_sysreg_s(SYS_PMBIDR_EL1); + if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) + return; + + /* No; is the host actually using the thing? */ + reg = read_sysreg_s(SYS_PMBLIMITR_EL1); + if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) + return; + + /* Yes; save the control register and disable data generation */ + *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); + write_sysreg_s(0, SYS_PMSCR_EL1); + isb(); + + /* Now drain all buffered data to memory */ + psb_csync(); + dsb(nsh); +} + +static void __debug_restore_spe(u64 pmscr_el1) +{ + if (!pmscr_el1) + return; + + /* The host page table is installed, but not yet synchronised */ + isb(); + + /* Re-enable data generation */ + write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + /* Disable and flush SPE data generation */ + __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S new file mode 100644 index 000000000000..d9434e90c06d --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sysreg.h> +#include <asm/virt.h> + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +SYM_CODE_START(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP pgd + * x1: HYP stack + * x2: HYP vectors + * x3: per-CPU offset + */ +__do_hyp_init: + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.lo __kvm_handle_stub_hvc + + phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif + msr ttbr0_el2, x4 + + mrs x4, tcr_el1 + mov_q x5, TCR_EL2_MASK + and x4, x4, x5 + mov x5, #TCR_EL2_RES1 + orr x4, x4, x5 + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + ldr_l x5, idmap_t0sz + bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + + /* + * Set the PS bits in TCR_EL2. + */ + tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 + + msr tcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + /* Invalidate the stale TLBs from Bootloader */ + tlbi alle2 + dsb sy + + /* + * Preserve all the RES1 bits while setting the default flags, + * as well as the EE bit on BE. Drop the A flag since the compiler + * is allowed to generate unaligned accesses. + */ + mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) +CPU_BE( orr x4, x4, #SCTLR_ELx_EE) +alternative_if ARM64_HAS_ADDRESS_AUTH + mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + orr x4, x4, x5 +alternative_else_nop_endif + msr sctlr_el2, x4 + isb + + /* Set the stack and new vectors */ + kern_hyp_va x1 + mov sp, x1 + msr vbar_el2, x2 + + /* Set tpidr_el2 for use by HYP */ + msr tpidr_el2, x3 + + /* Hello, World! */ + eret +SYM_CODE_END(__kvm_hyp_init) + +SYM_CODE_START(__kvm_handle_stub_hvc) + cmp x0, #HVC_SOFT_RESTART + b.ne 1f + + /* This is where we're about to jump, staying at EL2 */ + msr elr_el2, x1 + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) + msr spsr_el2, x0 + + /* Shuffle the arguments, and don't come back */ + mov x0, x2 + mov x1, x3 + mov x2, x4 + b reset + +1: cmp x0, #HVC_RESET_VECTORS + b.ne 1f + + /* + * Set the HVC_RESET_VECTORS return code before entering the common + * path so that we do not clobber x0-x2 in case we are coming via + * HVC_SOFT_RESTART. + */ + mov x0, xzr +reset: + /* Reset kvm back to the hyp stub. */ + mrs x5, sctlr_el2 + mov_q x6, SCTLR_ELx_FLAGS + bic x5, x5, x6 // Clear SCTL_M and etc + pre_disable_mmu_workaround + msr sctlr_el2, x5 + isb + + /* Install stub vectors */ + adr_l x5, __hyp_stub_vectors + msr vbar_el2, x5 + eret + +1: /* Bad stub call */ + mov_q x0, HVC_STUB_ERR + eret + +SYM_CODE_END(__kvm_handle_stub_hvc) + + .popsection diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c new file mode 100644 index 000000000000..0970442d2dbc --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/switch.h> +#include <hyp/sysreg-sr.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; + if (!update_fp_enabled(vcpu)) { + val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cptr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } +} + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + u64 mdcr_el2; + + ___deactivate_traps(vcpu); + + mdcr_el2 = read_sysreg(mdcr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + + __deactivate_traps_common(); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK; + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + + write_sysreg(mdcr_el2, mdcr_el2); + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); +} + +static void __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/** + * Disable host events, enable guest events + */ +static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + +/* Switch to the guest for legacy non-VHE systems */ +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool pmu_switch_needed; + u64 exit_code; + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + pmr_sync(); + } + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + + __sysreg_save_state_nvhe(host_ctxt); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + + __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); + __activate_traps(vcpu); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + __sysreg_save_state_nvhe(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_disable_traps(vcpu); + __hyp_vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state_nvhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ + __debug_switch_to_host(vcpu); + + if (pmu_switch_needed) + __pmu_switch_to_host(host_ctxt); + + /* Returning to host will clear PSR.I, remask PMR if needed */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQOFF); + + return exit_code; +} + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; + unsigned long str_va; + + if (read_sysreg(vttbr_el2)) { + __timer_disable_traps(vcpu); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state_nvhe(host_ctxt); + } + + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); + + __hyp_do_panic(str_va, + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); + unreachable(); +} + +asmlinkage void kvm_unexpected_el2_exception(void) +{ + return __kvm_unexpected_el2_exception(); +} diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c new file mode 100644 index 000000000000..88a25fc8fcd3 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * Non-VHE: Both host and guest must save everything. + */ + +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_el1_state(ctxt); + __sysreg_save_common_state(ctxt); + __sysreg_save_user_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} + +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_el1_state(ctxt); + __sysreg_restore_common_state(ctxt); + __sysreg_restore_user_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} + +void __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c new file mode 100644 index 000000000000..9072e71693ba --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <clocksource/arm_arch_timer.h> +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __timer_disable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __timer_enable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); +} diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c new file mode 100644 index 000000000000..b15d65a42042 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + u64 tcr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + + /* + * __load_guest_stage2() includes an ISB only when the AT + * workaround is applied. Take care of the opposite condition, + * ensuring that we always have an ISB, but not two ISBs back + * to back. + */ + __load_guest_stage2(mmu); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + write_sysreg(0, vttbr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S new file mode 100644 index 000000000000..b0441dbdf68b --- /dev/null +++ b/arch/arm64/kvm/hyp/smccc_wa.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2018 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/arm-smccc.h> +#include <linux/linkage.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + /* + * This is not executed directly and is instead copied into the vectors + * by install_bp_hardening_cb(). + */ + .data + .pushsection .rodata + .global __smccc_workaround_1_smc +SYM_DATA_START(__smccc_workaround_1_smc) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ + .org 1b +SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c deleted file mode 100644 index 8a1e81a400e0..000000000000 --- a/arch/arm64/kvm/hyp/switch.c +++ /dev/null @@ -1,875 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/arm-smccc.h> -#include <linux/kvm_host.h> -#include <linux/types.h> -#include <linux/jump_label.h> -#include <uapi/linux/psci.h> - -#include <kvm/arm_psci.h> - -#include <asm/barrier.h> -#include <asm/cpufeature.h> -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/fpsimd.h> -#include <asm/debug-monitors.h> -#include <asm/processor.h> -#include <asm/thread_info.h> - -/* Check whether the FP regs were dirtied while in the host-side run loop: */ -static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) -{ - /* - * When the system doesn't support FP/SIMD, we cannot rely on - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an - * abort on the very first access to FP and thus we should never - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always - * trap the accesses. - */ - if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_FP_HOST); - - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); -} - -/* Save the 32-bit only FPSIMD system register state */ -static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) -{ - if (!vcpu_el1_is_32bit(vcpu)) - return; - - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); -} - -static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) -{ - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to - * it will cause an exception. - */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { - write_sysreg(1 << 30, fpexc32_el2); - isb(); - } -} - -static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) -{ - /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ - write_sysreg(1 << 15, hstr_el2); - - /* - * Make sure we trap PMU access from EL0 to EL2. Also sanitize - * PMSELR_EL0 to make sure it never contains the cycle - * counter, which could make a PMXEVCNTR_EL0 access UNDEF at - * EL1 instead of being trapped to EL2. - */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); -} - -static void __hyp_text __deactivate_traps_common(void) -{ - write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); -} - -static void activate_traps_vhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - val = read_sysreg(cpacr_el1); - val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - - val |= CPTR_EL2_TAM; - - if (update_fp_enabled(vcpu)) { - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - val &= ~CPACR_EL1_FPEN; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cpacr_el1); - - write_sysreg(kvm_get_hyp_vector(), vbar_el1); -} -NOKPROBE_SYMBOL(activate_traps_vhe); - -static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - __activate_traps_common(vcpu); - - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cptr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { - struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; - - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * configured and enabled. We can now restore the guest's S1 - * configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } -} - -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) -{ - u64 hcr = vcpu->arch.hcr_el2; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) - hcr |= HCR_TVM; - - write_sysreg(hcr, hcr_el2); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (has_vhe()) - activate_traps_vhe(vcpu); - else - __activate_traps_nvhe(vcpu); -} - -static void deactivate_traps_vhe(void) -{ - extern char vectors[]; /* kernel exception vectors */ - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL2/EL0 translation regime used by - * the host. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); - - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); -} -NOKPROBE_SYMBOL(deactivate_traps_vhe); - -static void __hyp_text __deactivate_traps_nvhe(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { - u64 val; - - /* - * Set the TCR and SCTLR registers in the exact opposite - * sequence as __activate_traps_nvhe (first prevent walks, - * then force the MMU on). A generous sprinkling of isb() - * ensure that things happen in this exact order. - */ - val = read_sysreg_el1(SYS_TCR); - write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); - isb(); - val = read_sysreg_el1(SYS_SCTLR); - write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); - isb(); - } - - __deactivate_traps_common(); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK; - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; - - write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); -} - -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) -{ - /* - * If we pended a virtual abort, preserve it until it gets - * cleared. See D1.14.3 (Virtual Interrupts) for details, but - * the crucial bit is "On taking a vSError interrupt, - * HCR_EL2.VSE is cleared to 0." - */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; - } - - if (has_vhe()) - deactivate_traps_vhe(); - else - __deactivate_traps_nvhe(); -} - -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) -{ - __activate_traps_common(vcpu); -} - -void deactivate_traps_vhe_put(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - - write_sysreg(mdcr_el2, mdcr_el2); - - __deactivate_traps_common(); -} - -static void __hyp_text __activate_vm(struct kvm *kvm) -{ - __load_guest_stage2(kvm); -} - -static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) -{ - write_sysreg(0, vttbr_el2); -} - -/* Save VGICv3 state on non-VHE systems */ -static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(vcpu); - __vgic_v3_deactivate_traps(vcpu); - } -} - -/* Restore VGICv3 state on non_VEH systems */ -static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(vcpu); - __vgic_v3_restore_state(vcpu); - } -} - -static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) -{ - u8 ec; - u64 esr; - u64 hpfar, far; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - vcpu->arch.fault.far_el2 = far; - vcpu->arch.fault.hpfar_el2 = hpfar; - return true; -} - -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) -{ - bool vhe, sve_guest, sve_host; - u8 hsr_ec; - - if (!system_supports_fpsimd()) - return false; - - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - vhe = true; - } else { - sve_guest = false; - sve_host = false; - vhe = has_vhe(); - } - - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && - hsr_ec != ESR_ELx_EC_SVE) - return false; - - /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest) - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) - return false; - - /* Valid trap. Switch the context: */ - - if (vhe) { - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; - - if (sve_guest) - reg |= CPACR_EL1_ZEN; - - write_sysreg(reg, cpacr_el1); - } else { - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, - cptr_el2); - } - - isb(); - - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - /* - * In the SVE case, VHE is assumed: it is enforced by - * Kconfig and kvm_arch_init(). - */ - if (sve_host) { - struct thread_struct *thread = container_of( - vcpu->arch.host_fpsimd_state, - struct thread_struct, uw.fpsimd_state); - - sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); - } else { - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - } - - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - - if (sve_guest) { - sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); - } else { - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); - } - - /* Skip restoring fpexc32 for AArch64 guests */ - if (!(read_sysreg(hcr_el2) & HCR_RW)) - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], - fpexc32_el2); - - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; - - return true; -} - -static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) -{ - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); - int rt = kvm_vcpu_sys_get_rt(vcpu); - u64 val = vcpu_get_reg(vcpu, rt); - - /* - * The normal sysreg handling code expects to see the traps, - * let's not do anything here. - */ - if (vcpu->arch.hcr_el2 & HCR_TVM) - return false; - - switch (sysreg) { - case SYS_SCTLR_EL1: - write_sysreg_el1(val, SYS_SCTLR); - break; - case SYS_TTBR0_EL1: - write_sysreg_el1(val, SYS_TTBR0); - break; - case SYS_TTBR1_EL1: - write_sysreg_el1(val, SYS_TTBR1); - break; - case SYS_TCR_EL1: - write_sysreg_el1(val, SYS_TCR); - break; - case SYS_ESR_EL1: - write_sysreg_el1(val, SYS_ESR); - break; - case SYS_FAR_EL1: - write_sysreg_el1(val, SYS_FAR); - break; - case SYS_AFSR0_EL1: - write_sysreg_el1(val, SYS_AFSR0); - break; - case SYS_AFSR1_EL1: - write_sysreg_el1(val, SYS_AFSR1); - break; - case SYS_MAIR_EL1: - write_sysreg_el1(val, SYS_MAIR); - break; - case SYS_AMAIR_EL1: - write_sysreg_el1(val, SYS_AMAIR); - break; - case SYS_CONTEXTIDR_EL1: - write_sysreg_el1(val, SYS_CONTEXTIDR); - break; - default: - return false; - } - - __kvm_skip_instr(vcpu); - return true; -} - -/* - * Return true when we were able to fixup the guest exit and should return to - * the guest, false when we should restore the host state and return to the - * main run loop. - */ -static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - - /* - * We're using the raw exception code in order to only process - * the trap if no SError is pending. We will come back to the - * same PC once the SError has been injected, and replay the - * trapping instruction. - */ - if (*exit_code != ARM_EXCEPTION_TRAP) - goto exit; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) - return true; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - return true; - - if (!__populate_fault_info(vcpu)) - return true; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_dabt_isextabt(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - } - -exit: - /* Return to the host kernel and handle the exit */ - return false; -} - -static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - -/** - * Disable host events, enable guest events - */ -static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - -/* Switch to the guest for VHE systems running in EL2 */ -static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - u64 exit_code; - - host_ctxt = vcpu->arch.host_cpu_context; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - sysreg_save_host_state_vhe(host_ctxt); - - /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs above. We must now call __activate_vm - * before __activate_traps, because __activate_vm configures - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE - * (among other things). - */ - __activate_vm(vcpu->kvm); - __activate_traps(vcpu); - - sysreg_restore_guest_state_vhe(guest_ctxt); - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - sysreg_save_guest_state_vhe(guest_ctxt); - - __deactivate_traps(vcpu); - - sysreg_restore_host_state_vhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - __debug_switch_to_host(vcpu); - - return exit_code; -} -NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); - -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - int ret; - - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); - - ret = __kvm_vcpu_run_vhe(vcpu); - - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); - - return ret; -} - -/* Switch to the guest for legacy non-VHE systems */ -int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - bool pmu_switch_needed; - u64 exit_code; - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - pmr_sync(); - } - - vcpu = kern_hyp_va(vcpu); - - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); - - __sysreg_save_state_nvhe(host_ctxt); - - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - * - * Also, and in order to be able to deal with erratum #1319537 (A57) - * and #1319367 (A72), we must ensure that all VM-related sysreg are - * restored before we enable S2 translation. - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_state_nvhe(guest_ctxt); - - __activate_vm(kern_hyp_va(vcpu->kvm)); - __activate_traps(vcpu); - - __hyp_vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - __sysreg_save_state_nvhe(guest_ctxt); - __sysreg32_save_state(vcpu); - __timer_disable_traps(vcpu); - __hyp_vgic_save_state(vcpu); - - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - - __sysreg_restore_state_nvhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - /* - * This must come after restoring the host sysregs, since a non-VHE - * system may enable SPE here and make use of the TTBRs. - */ - __debug_switch_to_host(vcpu); - - if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); - - /* Returning to host will clear PSR.I, remask PMR if needed */ - if (system_uses_irq_prio_masking()) - gic_write_pmr(GIC_PRIO_IRQOFF); - - return exit_code; -} - -static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; - -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *__host_ctxt) -{ - struct kvm_vcpu *vcpu; - unsigned long str_va; - - vcpu = __host_ctxt->__hyp_running_vcpu; - - if (read_sysreg(vttbr_el2)) { - __timer_disable_traps(vcpu); - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - __sysreg_restore_state_nvhe(__host_ctxt); - } - - /* - * Force the panic string to be loaded from the literal pool, - * making sure it is a kernel address and not a PC-relative - * reference. - */ - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); - - __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} - -static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *host_ctxt) -{ - struct kvm_vcpu *vcpu; - vcpu = host_ctxt->__hyp_running_vcpu; - - __deactivate_traps(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); - - panic(__hyp_panic_string, - spsr, elr, - read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} -NOKPROBE_SYMBOL(__hyp_call_panic_vhe); - -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) -{ - u64 spsr = read_sysreg_el2(SYS_SPSR); - u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); - - if (!has_vhe()) - __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); - else - __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); - - unreachable(); -} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c deleted file mode 100644 index 6d2df9fe0b5d..000000000000 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ /dev/null @@ -1,331 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/compiler.h> -#include <linux/kvm_host.h> - -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -/* - * Non-VHE: Both host and guest must save everything. - * - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and - * pstate, which are handled as part of the el2 return state) on every - * switch (sp_el0 is being dealt with in the assembly code). - * tpidr_el0 and tpidrro_el0 only need to be switched when going - * to host userspace or a different VCPU. EL1 registers only need to be - * switched when potentially going to run a different VCPU. The latter two - * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. - */ - -static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); -} - -static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); -} - -static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); - - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); -} - -static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) -{ - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_el1_state(ctxt); - __sysreg_save_common_state(ctxt); - __sysreg_save_user_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} - -void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); - -void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); - -static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); -} - -static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); -} - -static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - - if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } else if (!ctxt->__hyp_running_vcpu) { - /* - * Must only be done for guest registers, hence the context - * test. We're coming from the host, so SCTLR.M is already - * set. Pairs with __activate_traps_nvhe(). - */ - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | - TCR_EPD1_MASK | TCR_EPD0_MASK), - SYS_TCR); - isb(); - } - - write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && - ctxt->__hyp_running_vcpu) { - /* - * Must only be done for host registers, hence the context - * test. Pairs with __deactivate_traps_nvhe(). - */ - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * deconfigured and disabled. We can now restore the host's - * S1 configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } - - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); -} - -static void __hyp_text -__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) -{ - u64 pstate = ctxt->gp_regs.regs.pstate; - u64 mode = pstate & PSR_AA32_MODE_MASK; - - /* - * Safety check to ensure we're setting the CPU up to enter the guest - * in a less privileged mode. - * - * If we are attempting a return to EL2 or higher in AArch64 state, - * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that - * we'll take an illegal exception state exception immediately after - * the ERET to the guest. Attempts to return to AArch32 Hyp will - * result in an illegal exception return because EL2's execution state - * is determined by SCR_EL3.RW. - */ - if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) - pstate = PSR_MODE_EL2h | PSR_IL_BIT; - - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); - write_sysreg_el2(pstate, SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_el1_state(ctxt); - __sysreg_restore_common_state(ctxt); - __sysreg_restore_user_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} - -void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); - -void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); - -void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); - - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); -} - -void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); - - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); -} - -/** - * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Load system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_load() function - * and loading system register state early avoids having to load them on - * every entry to the VM. - */ -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - - if (!has_vhe()) - return; - - __sysreg_save_user_state(host_ctxt); - - /* - * Load guest EL1 and user state - * - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = true; - - activate_traps_vhe_load(vcpu); -} - -/** - * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Save guest system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_put() function - * and deferring saving system register state until we're no longer running the - * VCPU avoids having to save them on every exit from the VM. - */ -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - - if (!has_vhe()) - return; - - deactivate_traps_vhe_put(); - - __sysreg_save_el1_state(guest_ctxt); - __sysreg_save_user_state(guest_ctxt); - __sysreg32_save_state(vcpu); - - /* Restore host user state */ - __sysreg_restore_user_state(host_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = false; -} - -void __hyp_text __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c deleted file mode 100644 index ceaddbe4279f..000000000000 --- a/arch/arm64/kvm/hyp/tlb.c +++ /dev/null @@ -1,241 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/irqflags.h> - -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/tlbflush.h> - -struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; -}; - -static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - u64 val; - - local_irq_save(cxt->flags); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { - /* - * For CPUs that are affected by ARM errata 1165522 or 1530923, - * we cannot trust stage-1 to be in a correct state at that - * point. Since we do not want to force a full load of the - * vcpu state, we prevent the EL1 page-table walker to - * allocate new TLBs. This is done by setting the EPD bits - * in the TCR_EL1 register. We also need to prevent it to - * allocate IPA->PA walks, so we enable the S1 MMU... - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); - val |= SCTLR_ELx_M; - write_sysreg_el1(val, SYS_SCTLR); - } - - /* - * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and - * most TLB operations target EL2/EL0. In order to affect the - * guest TLBs (EL1/EL0), we need to change one of these two - * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so - * let's flip TGE before executing the TLB operation. - * - * ARM erratum 1165522 requires some special handling (again), - * as we need to make sure both stages of translation are in - * place before clearing TGE. __load_guest_stage2() already - * has an ISB in order to deal with this. - */ - __load_guest_stage2(kvm); - val = read_sysreg(hcr_el2); - val &= ~HCR_TGE; - write_sysreg(val, hcr_el2); - isb(); -} - -static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { - u64 val; - - /* - * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - isb(); - } - - __load_guest_stage2(kvm); - isb(); -} - -static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_guest_vhe(kvm, cxt); - else - __tlb_switch_to_guest_nvhe(kvm, cxt); -} - -static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - /* - * We're done with the TLB operation, let's restore the host's - * view of HCR_EL2. - */ - write_sysreg(0, vttbr_el2); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - isb(); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { - /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - write_sysreg_el1(cxt->sctlr, SYS_SCTLR); - } - - local_irq_restore(cxt->flags); -} - -static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - write_sysreg(0, vttbr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { - /* Ensure write of the host VMID */ - isb(); - /* Restore the host's TCR_EL1 */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - } -} - -static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_host_vhe(kvm, cxt); - else - __tlb_switch_to_host_nvhe(kvm, cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - ipa >>= 12; - __tlbi(ipas2e1is, ipa); - - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb(ish); - __tlbi(vmalle1is); - dsb(ish); - isb(); - - /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. - */ - if (!has_vhe() && icache_is_vpipt()) - __flush_icache_all(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalls12e1is); - dsb(ish); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); - struct tlb_inv_context cxt; - - /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalle1); - dsb(nsh); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_flush_vm_context(void) -{ - dsb(ishst); - __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - - dsb(ish); -} diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 4f3a087e36d5..bd1bab551d48 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -13,7 +13,7 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +static bool __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); @@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) * 0: Not a GICV access * -1: Illegal GICV access successfully performed */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct vgic_dist *vgic = &kvm->arch.vgic; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 000000000000..452f4cacd674 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,1099 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) +#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) + +static u64 __gic_v3_get_lr(unsigned int lr) +{ + switch (lr & 0xf) { + case 0: + return read_gicreg(ICH_LR0_EL2); + case 1: + return read_gicreg(ICH_LR1_EL2); + case 2: + return read_gicreg(ICH_LR2_EL2); + case 3: + return read_gicreg(ICH_LR3_EL2); + case 4: + return read_gicreg(ICH_LR4_EL2); + case 5: + return read_gicreg(ICH_LR5_EL2); + case 6: + return read_gicreg(ICH_LR6_EL2); + case 7: + return read_gicreg(ICH_LR7_EL2); + case 8: + return read_gicreg(ICH_LR8_EL2); + case 9: + return read_gicreg(ICH_LR9_EL2); + case 10: + return read_gicreg(ICH_LR10_EL2); + case 11: + return read_gicreg(ICH_LR11_EL2); + case 12: + return read_gicreg(ICH_LR12_EL2); + case 13: + return read_gicreg(ICH_LR13_EL2); + case 14: + return read_gicreg(ICH_LR14_EL2); + case 15: + return read_gicreg(ICH_LR15_EL2); + } + + unreachable(); +} + +static void __gic_v3_set_lr(u64 val, int lr) +{ + switch (lr & 0xf) { + case 0: + write_gicreg(val, ICH_LR0_EL2); + break; + case 1: + write_gicreg(val, ICH_LR1_EL2); + break; + case 2: + write_gicreg(val, ICH_LR2_EL2); + break; + case 3: + write_gicreg(val, ICH_LR3_EL2); + break; + case 4: + write_gicreg(val, ICH_LR4_EL2); + break; + case 5: + write_gicreg(val, ICH_LR5_EL2); + break; + case 6: + write_gicreg(val, ICH_LR6_EL2); + break; + case 7: + write_gicreg(val, ICH_LR7_EL2); + break; + case 8: + write_gicreg(val, ICH_LR8_EL2); + break; + case 9: + write_gicreg(val, ICH_LR9_EL2); + break; + case 10: + write_gicreg(val, ICH_LR10_EL2); + break; + case 11: + write_gicreg(val, ICH_LR11_EL2); + break; + case 12: + write_gicreg(val, ICH_LR12_EL2); + break; + case 13: + write_gicreg(val, ICH_LR13_EL2); + break; + case 14: + write_gicreg(val, ICH_LR14_EL2); + break; + case 15: + write_gicreg(val, ICH_LR15_EL2); + break; + } +} + +static void __vgic_v3_write_ap0rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP0R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP0R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP0R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP0R3_EL2); + break; + } +} + +static void __vgic_v3_write_ap1rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP1R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP1R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP1R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP1R3_EL2); + break; + } +} + +static u32 __vgic_v3_read_ap0rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP0R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP0R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP0R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP0R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +static u32 __vgic_v3_read_ap1rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP1R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP1R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP1R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP1R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +{ + u64 used_lrs = cpu_if->used_lrs; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface when reading the + * LRs, and when reading back the VMCR on non-VHE systems. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + dsb(sy); + isb(); + } + } + + if (used_lrs || cpu_if->its_vpe.its_vm) { + int i; + u32 elrsr; + + elrsr = read_gicreg(ICH_ELRSR_EL2); + + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1 << i)) + cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; + else + cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); + + __gic_v3_set_lr(0, i); + } + } +} + +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +{ + u64 used_lrs = cpu_if->used_lrs; + int i; + + if (used_lrs || cpu_if->its_vpe.its_vm) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } + + /* + * Ensure that writes to the LRs, and on non-VHE systems ensure that + * the write to the VMCR in __vgic_v3_activate_traps(), will have + * reached the (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } + } +} + +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC, and VMCR_EL2 in + * particular. This logic must be called before + * __vgic_v3_restore_state(). + */ + if (!cpu_if->vgic_sre) { + write_gicreg(0, ICC_SRE_EL1); + isb(); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + + if (has_vhe()) { + /* + * Ensure that the write to the VMCR will have reached + * the (re)distributors. This ensure the guest will + * read the correct values from the memory-mapped + * interface. + */ + isb(); + dsb(sy); + } + } + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); +} + +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + + if (!cpu_if->vgic_sre) { + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + isb(); + write_gicreg(1, ICC_SRE_EL1); + } + + /* + * If we were trapping system registers, we enabled the VGIC even if + * no interrupts were being injected, and we disable it again here. + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(0, ICH_HCR_EL2); +} + +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + u32 nr_pre_bits; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + fallthrough; + case 6: + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + fallthrough; + default: + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); + } + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + fallthrough; + case 6: + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + fallthrough; + default: + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); + } +} + +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + u32 nr_pre_bits; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + fallthrough; + case 6: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + fallthrough; + default: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); + } + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + fallthrough; + case 6: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + fallthrough; + default: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); + } +} + +void __vgic_v3_init_lrs(void) +{ + int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); + int i; + + for (i = 0; i <= max_lr_idx; i++) + __gic_v3_set_lr(0, i); +} + +u64 __vgic_v3_get_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} + +u64 __vgic_v3_read_vmcr(void) +{ + return read_gicreg(ICH_VMCR_EL2); +} + +void __vgic_v3_write_vmcr(u32 vmcr) +{ + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static int __vgic_v3_bpr_min(void) +{ + /* See Pseudocode for VPriorityGroup */ + return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); +} + +static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_esr(vcpu); + u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + + return crm != 8; +} + +#define GICv3_IDLE_PRIORITY 0xff + +static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + u8 priority = GICv3_IDLE_PRIORITY; + int i, lr = -1; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* Not pending in the state? */ + if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) + continue; + + /* Group-0 interrupt, but Group-0 disabled? */ + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + continue; + + /* Group-1 interrupt, but Group-1 disabled? */ + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + continue; + + /* Not the highest priority? */ + if (lr_prio >= priority) + continue; + + /* This is a candidate */ + priority = lr_prio; + *lr_val = val; + lr = i; + } + + if (lr == -1) + *lr_val = ICC_IAR1_EL1_SPURIOUS; + + return lr; +} + +static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + int i; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + + if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && + (val & ICH_LR_ACTIVE_BIT)) { + *lr_val = val; + return i; + } + } + + *lr_val = ICC_IAR1_EL1_SPURIOUS; + return -1; +} + +static int __vgic_v3_get_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 val; + + /* + * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers + * contain the active priority levels for this VCPU + * for the maximum number of supported priority + * levels, and we return the full priority level only + * if the BPR is programmed to its minimum, otherwise + * we return a combination of the priority level and + * subpriority, as determined by the setting of the + * BPR, but without the full subpriority. + */ + val = __vgic_v3_read_ap0rn(i); + val |= __vgic_v3_read_ap1rn(i); + if (!val) { + hap += 32; + continue; + } + + return (hap + __ffs(val)) << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static unsigned int __vgic_v3_get_bpr0(u32 vmcr) +{ + return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; +} + +static unsigned int __vgic_v3_get_bpr1(u32 vmcr) +{ + unsigned int bpr; + + if (vmcr & ICH_VMCR_CBPR_MASK) { + bpr = __vgic_v3_get_bpr0(vmcr); + if (bpr < 7) + bpr++; + } else { + bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + } + + return bpr; +} + +/* + * Convert a priority to a preemption level, taking the relevant BPR + * into account by zeroing the sub-priority bits. + */ +static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +{ + unsigned int bpr; + + if (!grp) + bpr = __vgic_v3_get_bpr0(vmcr) + 1; + else + bpr = __vgic_v3_get_bpr1(vmcr); + + return pri & (GENMASK(7, 0) << bpr); +} + +/* + * The priority value is independent of any of the BPR values, so we + * normalize it using the minimal BPR value. This guarantees that no + * matter what the guest does with its BPR, we can always set/get the + * same value of a priority. + */ +static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +{ + u8 pre, ap; + u32 val; + int apr; + + pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); + ap = pre >> __vgic_v3_bpr_min(); + apr = ap / 32; + + if (!grp) { + val = __vgic_v3_read_ap0rn(apr); + __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); + } else { + val = __vgic_v3_read_ap1rn(apr); + __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); + } +} + +static int __vgic_v3_clear_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 ap0, ap1; + int c0, c1; + + ap0 = __vgic_v3_read_ap0rn(i); + ap1 = __vgic_v3_read_ap1rn(i); + if (!ap0 && !ap1) { + hap += 32; + continue; + } + + c0 = ap0 ? __ffs(ap0) : 32; + c1 = ap1 ? __ffs(ap1) : 32; + + /* Always clear the LSB, which is the highest priority */ + if (c0 < c1) { + ap0 &= ~BIT(c0); + __vgic_v3_write_ap0rn(ap0, i); + hap += c0; + } else { + ap1 &= ~BIT(c1); + __vgic_v3_write_ap1rn(ap1, i); + hap += c1; + } + + /* Rescale to 8 bits of priority */ + return hap << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + u8 lr_prio, pmr; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr < 0) + goto spurious; + + if (grp != !!(lr_val & ICH_LR_GROUP)) + goto spurious; + + pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + if (pmr <= lr_prio) + goto spurious; + + if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) + goto spurious; + + lr_val &= ~ICH_LR_STATE; + /* No active state for LPIs */ + if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) + lr_val |= ICH_LR_ACTIVE_BIT; + __gic_v3_set_lr(lr_val, lr); + __vgic_v3_set_active_priority(lr_prio, vmcr, grp); + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); + return; + +spurious: + vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); +} + +static void __vgic_v3_clear_active_lr(int lr, u64 lr_val) +{ + lr_val &= ~ICH_LR_ACTIVE_BIT; + if (lr_val & ICH_LR_HW) { + u32 pid; + + pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + gic_write_dir(pid); + } + + __gic_v3_set_lr(lr_val, lr); +} + +static void __vgic_v3_bump_eoicount(void) +{ + u32 hcr; + + hcr = read_gicreg(ICH_HCR_EL2); + hcr += 1 << ICH_HCR_EOIcount_SHIFT; + write_gicreg(hcr, ICH_HCR_EL2); +} + +static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + int lr; + + /* EOImode == 0, nothing to be done here */ + if (!(vmcr & ICH_VMCR_EOIM_MASK)) + return; + + /* No deactivate to be performed on an LPI */ + if (vid >= VGIC_MIN_LPI) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + u8 lr_prio, act_prio; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + /* Drop priority in any case */ + act_prio = __vgic_v3_clear_highest_active_priority(); + + /* If EOIing an LPI, no deactivate to be performed */ + if (vid >= VGIC_MIN_LPI) + return; + + /* EOImode == 1, nothing to be done here */ + if (vmcr & ICH_VMCR_EOIM_MASK) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* If priorities or group do not match, the guest has fscked-up. */ + if (grp != !!(lr_val & ICH_LR_GROUP) || + __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) + return; + + /* Let's now perform the deactivation */ + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); +} + +static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); +} + +static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG0_MASK; + else + vmcr &= ~ICH_VMCR_ENG0_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG1_MASK; + else + vmcr &= ~ICH_VMCR_ENG1_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); +} + +static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); +} + +static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min() - 1; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR0_SHIFT; + val &= ICH_VMCR_BPR0_MASK; + vmcr &= ~ICH_VMCR_BPR0_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min(); + + if (vmcr & ICH_VMCR_CBPR_MASK) + return; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR1_SHIFT; + val &= ICH_VMCR_BPR1_MASK; + vmcr &= ~ICH_VMCR_BPR1_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val; + + if (!__vgic_v3_get_group(vcpu)) + val = __vgic_v3_read_ap0rn(n); + else + val = __vgic_v3_read_ap1rn(n); + + vcpu_set_reg(vcpu, rt, val); +} + +static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (!__vgic_v3_get_group(vcpu)) + __vgic_v3_write_ap0rn(val, n); + else + __vgic_v3_write_ap1rn(val, n); +} + +static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 0); +} + +static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 1); +} + +static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 2); +} + +static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 3); +} + +static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 0); +} + +static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 1); +} + +static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 2); +} + +static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 3); +} + +static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + int lr, lr_grp, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr == -1) + goto spurious; + + lr_grp = !!(lr_val & ICH_LR_GROUP); + if (lr_grp != grp) + lr_val = ICC_IAR1_EL1_SPURIOUS; + +spurious: + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); +} + +static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vmcr &= ICH_VMCR_PMR_MASK; + vmcr >>= ICH_VMCR_PMR_SHIFT; + vcpu_set_reg(vcpu, rt, vmcr); +} + +static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + val <<= ICH_VMCR_PMR_SHIFT; + val &= ICH_VMCR_PMR_MASK; + vmcr &= ~ICH_VMCR_PMR_MASK; + vmcr |= val; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 val = __vgic_v3_get_highest_active_priority(); + vcpu_set_reg(vcpu, rt, val); +} + +static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vtr, val; + + vtr = read_gicreg(ICH_VTR_EL2); + /* PRIbits */ + val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + /* IDbits */ + val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; + /* A3V */ + val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; + /* EOImode */ + val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + /* CBPR */ + val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + + vcpu_set_reg(vcpu, rt, val); +} + +static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (val & ICC_CTLR_EL1_CBPR_MASK) + vmcr |= ICH_VMCR_CBPR_MASK; + else + vmcr &= ~ICH_VMCR_CBPR_MASK; + + if (val & ICC_CTLR_EL1_EOImode_MASK) + vmcr |= ICH_VMCR_EOIM_MASK; + else + vmcr &= ~ICH_VMCR_EOIM_MASK; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + int rt; + u32 esr; + u32 vmcr; + void (*fn)(struct kvm_vcpu *, u32, int); + bool is_read; + u32 sysreg; + + esr = kvm_vcpu_get_esr(vcpu); + if (vcpu_mode_is_32bit(vcpu)) { + if (!kvm_condition_valid(vcpu)) { + __kvm_skip_instr(vcpu); + return 1; + } + + sysreg = esr_cp15_to_sysreg(esr); + } else { + sysreg = esr_sys64_to_sysreg(esr); + } + + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + switch (sysreg) { + case SYS_ICC_IAR0_EL1: + case SYS_ICC_IAR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_iar; + break; + case SYS_ICC_EOIR0_EL1: + case SYS_ICC_EOIR1_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_eoir; + break; + case SYS_ICC_IGRPEN1_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen1; + else + fn = __vgic_v3_write_igrpen1; + break; + case SYS_ICC_BPR1_EL1: + if (is_read) + fn = __vgic_v3_read_bpr1; + else + fn = __vgic_v3_write_bpr1; + break; + case SYS_ICC_AP0Rn_EL1(0): + case SYS_ICC_AP1Rn_EL1(0): + if (is_read) + fn = __vgic_v3_read_apxr0; + else + fn = __vgic_v3_write_apxr0; + break; + case SYS_ICC_AP0Rn_EL1(1): + case SYS_ICC_AP1Rn_EL1(1): + if (is_read) + fn = __vgic_v3_read_apxr1; + else + fn = __vgic_v3_write_apxr1; + break; + case SYS_ICC_AP0Rn_EL1(2): + case SYS_ICC_AP1Rn_EL1(2): + if (is_read) + fn = __vgic_v3_read_apxr2; + else + fn = __vgic_v3_write_apxr2; + break; + case SYS_ICC_AP0Rn_EL1(3): + case SYS_ICC_AP1Rn_EL1(3): + if (is_read) + fn = __vgic_v3_read_apxr3; + else + fn = __vgic_v3_write_apxr3; + break; + case SYS_ICC_HPPIR0_EL1: + case SYS_ICC_HPPIR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_hppir; + break; + case SYS_ICC_IGRPEN0_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen0; + else + fn = __vgic_v3_write_igrpen0; + break; + case SYS_ICC_BPR0_EL1: + if (is_read) + fn = __vgic_v3_read_bpr0; + else + fn = __vgic_v3_write_bpr0; + break; + case SYS_ICC_DIR_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_dir; + break; + case SYS_ICC_RPR_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_rpr; + break; + case SYS_ICC_CTLR_EL1: + if (is_read) + fn = __vgic_v3_read_ctlr; + else + fn = __vgic_v3_write_ctlr; + break; + case SYS_ICC_PMR_EL1: + if (is_read) + fn = __vgic_v3_read_pmr; + else + fn = __vgic_v3_write_pmr; + break; + default: + return 0; + } + + vmcr = __vgic_v3_read_vmcr(); + rt = kvm_vcpu_sys_get_rt(vcpu); + fn(vcpu, vmcr, rt); + + __kvm_skip_instr(vcpu); + + return 1; +} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile new file mode 100644 index 000000000000..461e97c375cc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_VHE_HYPERVISOR__ +ccflags-y := -D__KVM_VHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c new file mode 100644 index 000000000000..f1e2e5a00933 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/debug-sr.h> + +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c new file mode 100644 index 000000000000..c1da4f86ccac --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/switch.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~CPACR_EL1_ZEN; + + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + + val |= CPTR_EL2_TAM; + + if (update_fp_enabled(vcpu)) { + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } else { + val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cpacr_el1); + + write_sysreg(kvm_get_hyp_vector(), vbar_el1); +} +NOKPROBE_SYMBOL(__activate_traps); + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + extern char vectors[]; /* kernel exception vectors */ + + ___deactivate_traps(vcpu); + + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by + * the host. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); + write_sysreg(vectors, vbar_el1); +} +NOKPROBE_SYMBOL(__deactivate_traps); + +void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +{ + __activate_traps_common(vcpu); +} + +void deactivate_traps_vhe_put(void) +{ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); + + __deactivate_traps_common(); +} + +/* Switch to the guest for VHE systems running in EL2 */ +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + u64 exit_code; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + sysreg_save_host_state_vhe(host_ctxt); + + /* + * ARM erratum 1165522 requires us to configure both stage 1 and + * stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. + * + * We have already configured the guest's stage 1 translation in + * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm + * before __activate_traps, because __activate_vm configures + * stage 2 translation, and __activate_traps clear HCR_EL2.TGE + * (among other things). + */ + __activate_vm(vcpu->arch.hw_mmu); + __activate_traps(vcpu); + + sysreg_restore_guest_state_vhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + sysreg_save_guest_state_vhe(guest_ctxt); + + __deactivate_traps(vcpu); + + sysreg_restore_host_state_vhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + __debug_switch_to_host(vcpu); + + return exit_code; +} +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} + +static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, + struct kvm_cpu_context *host_ctxt) +{ + struct kvm_vcpu *vcpu; + vcpu = host_ctxt->__hyp_running_vcpu; + + __deactivate_traps(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); + + panic(__hyp_panic_string, + spsr, elr, + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); +} +NOKPROBE_SYMBOL(__hyp_call_panic); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + + __hyp_call_panic(spsr, elr, par, host_ctxt); + unreachable(); +} + +asmlinkage void kvm_unexpected_el2_exception(void) +{ + return __kvm_unexpected_el2_exception(); +} diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c new file mode 100644 index 000000000000..996471e4c138 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and + * pstate, which are handled as part of the el2 return state) on every + * switch (sp_el0 is being dealt with in the assembly code). + * tpidr_el0 and tpidrro_el0 only need to be switched when going + * to host userspace or a different VCPU. EL1 registers only need to be + * switched when potentially going to run a different VCPU. The latter two + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. + */ + +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); + +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); + +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); + +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); + +/** + * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __sysreg_save_user_state(host_ctxt); + + /* + * Load guest EL1 and user state + * + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_user_state(guest_ctxt); + __sysreg_restore_el1_state(guest_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = true; + + activate_traps_vhe_load(vcpu); +} + +/** + * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + deactivate_traps_vhe_put(); + + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); + __sysreg32_save_state(vcpu); + + /* Restore host user state */ + __sysreg_restore_user_state(host_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = false; +} diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c new file mode 100644 index 000000000000..4cda674a8be6 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <asm/kvm_hyp.h> + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c new file mode 100644 index 000000000000..fd7895945bbc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/irqflags.h> + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + unsigned long flags; + u64 tcr; + u64 sctlr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + u64 val; + + local_irq_save(cxt->flags); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that + * point. Since we do not want to force a full load of the + * vcpu state, we prevent the EL1 page-table walker to + * allocate new TLBs. This is done by setting the EPD bits + * in the TCR_EL1 register. We also need to prevent it to + * allocate IPA->PA walks, so we enable the S1 MMU... + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + } + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + * + * ARM erratum 1165522 requires some special handling (again), + * as we need to make sure both stages of translation are in + * place before clearing TGE. __load_guest_stage2() already + * has an ISB in order to deal with this. + */ + __load_guest_stage2(mmu); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + isb(); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Restore the registers to what they were */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + } + + local_irq_restore(cxt->flags); +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} |