summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm/hyp
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r--arch/arm64/kvm/hyp/Makefile34
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c140
-rw-r--r--arch/arm64/kvm/hyp/entry.S19
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S86
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h (renamed from arch/arm64/kvm/hyp/debug-sr.c)152
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h542
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h193
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile62
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c77
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S172
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c277
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c46
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c48
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c161
-rw-r--r--arch/arm64/kvm/hyp/smccc_wa.S32
-rw-r--r--arch/arm64/kvm/hyp/switch.c875
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c331
-rw-r--r--arch/arm64/kvm/hyp/tlb.c241
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c4
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c1099
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile11
-rw-r--r--arch/arm64/kvm/hyp/vhe/debug-sr.c26
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c224
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c114
-rw-r--r--arch/arm64/kvm/hyp/vhe/timer-sr.c12
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c162
27 files changed, 3512 insertions, 1629 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..f54f0e89a71c 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,28 +3,12 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
- $(DISABLE_STACKLEAK_PLUGIN)
-
-KVM=../../../../virt/kvm
-
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
-
-obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
-obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
-obj-$(CONFIG_KVM_ARM_HOST) += entry.o
-obj-$(CONFIG_KVM_ARM_HOST) += switch.o
-obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
-obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
-obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
-
-# KVM code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
+incdir := $(srctree)/$(src)/include
+subdir-asflags-y := -I$(incdir)
+subdir-ccflags-y := -I$(incdir) \
+ -fno-stack-protector \
+ -DDISABLE_BRANCH_PROFILING \
+ $(DISABLE_STACKLEAK_PLUGIN)
+
+obj-$(CONFIG_KVM) += vhe/ nvhe/
+obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
new file mode 100644
index 000000000000..ae56d8a4b382
--- /dev/null
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyp portion of the (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+ 0xF0F0, /* EQ == Z set */
+ 0x0F0F, /* NE */
+ 0xCCCC, /* CS == C set */
+ 0x3333, /* CC */
+ 0xFF00, /* MI == N set */
+ 0x00FF, /* PL */
+ 0xAAAA, /* VS == V set */
+ 0x5555, /* VC */
+ 0x0C0C, /* HI == C set && Z clear */
+ 0xF3F3, /* LS == C clear || Z set */
+ 0xAA55, /* GE == (N==V) */
+ 0x55AA, /* LT == (N!=V) */
+ 0x0A05, /* GT == (!Z && (N==V)) */
+ 0xF5FA, /* LE == (Z || (N!=V)) */
+ 0xFFFF, /* AL always */
+ 0 /* NV */
+};
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+ unsigned long cpsr;
+ u32 cpsr_cond;
+ int cond;
+
+ /* Top two bits non-zero? Unconditional. */
+ if (kvm_vcpu_get_esr(vcpu) >> 30)
+ return true;
+
+ /* Is condition field valid? */
+ cond = kvm_vcpu_get_condition(vcpu);
+ if (cond == 0xE)
+ return true;
+
+ cpsr = *vcpu_cpsr(vcpu);
+
+ if (cond < 0) {
+ /* This can happen in Thumb mode: examine IT state. */
+ unsigned long it;
+
+ it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+ /* it == 0 => unconditional. */
+ if (it == 0)
+ return true;
+
+ /* The cond for this insn works out as the top 4 bits. */
+ cond = (it >> 4);
+ }
+
+ cpsr_cond = cpsr >> 28;
+
+ if (!((cc_map[cond] >> cpsr_cond) & 1))
+ return false;
+
+ return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu: The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long itbits, cond;
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ bool is_arm = !(cpsr & PSR_AA32_T_BIT);
+
+ if (is_arm || !(cpsr & PSR_AA32_IT_MASK))
+ return;
+
+ cond = (cpsr & 0xe000) >> 13;
+ itbits = (cpsr & 0x1c00) >> (10 - 2);
+ itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+ /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+ if ((itbits & 0x7) == 0)
+ itbits = cond = 0;
+ else
+ itbits = (itbits << 1) & 0x1f;
+
+ cpsr &= ~PSR_AA32_IT_MASK;
+ cpsr |= cond << 13;
+ cpsr |= (itbits & 0x1c) << (10 - 2);
+ cpsr |= (itbits & 0x3) << 25;
+ *vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+ u32 pc = *vcpu_pc(vcpu);
+ bool is_thumb;
+
+ is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT);
+ if (is_thumb && !is_wide_instr)
+ pc += 2;
+ else
+ pc += 4;
+
+ *vcpu_pc(vcpu) = pc;
+
+ kvm_adjust_itstate(vcpu);
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 90186cf6473e..76e7eaf4675e 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -16,12 +16,10 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_ptrauth.h>
-#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8)
.text
- .pushsection .hyp.text, "ax"
/*
* We treat x18 as callee-saved as the host may use it as a platform
@@ -198,20 +196,23 @@ alternative_endif
// This is our single instruction exception window. A pending
// SError is guaranteed to occur at the earliest when we unmask
// it, and at the latest just after the ISB.
- .global abort_guest_exit_start
abort_guest_exit_start:
isb
- .global abort_guest_exit_end
abort_guest_exit_end:
msr daifset, #4 // Mask aborts
+ ret
+
+ _kvm_extable abort_guest_exit_start, 9997f
+ _kvm_extable abort_guest_exit_end, 9997f
+9997:
+ msr daifset, #4 // Mask aborts
+ mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT)
- // If the exception took place, restore the EL1 exception
- // context so that we can report some information.
- // Merge the exception code with the SError pending bit.
- tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f
+ // restore the EL1 exception context so that we can report some
+ // information. Merge the exception code with the SError pending bit.
msr elr_el2, x2
msr esr_el2, x3
msr spsr_el2, x4
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 5b8ff517ff10..01f114aa47b0 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -9,7 +9,6 @@
#include <asm/fpsimdmacros.h>
.text
- .pushsection .hyp.text, "ax"
SYM_FUNC_START(__fpsimd_save_state)
fpsimd_save x0, 1
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9c5cfb04170e..46b4dab933d0 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -15,8 +15,31 @@
#include <asm/kvm_mmu.h>
#include <asm/mmu.h>
+.macro save_caller_saved_regs_vect
+ /* x0 and x1 were saved in the vector entry */
+ stp x2, x3, [sp, #-16]!
+ stp x4, x5, [sp, #-16]!
+ stp x6, x7, [sp, #-16]!
+ stp x8, x9, [sp, #-16]!
+ stp x10, x11, [sp, #-16]!
+ stp x12, x13, [sp, #-16]!
+ stp x14, x15, [sp, #-16]!
+ stp x16, x17, [sp, #-16]!
+.endm
+
+.macro restore_caller_saved_regs_vect
+ ldp x16, x17, [sp], #16
+ ldp x14, x15, [sp], #16
+ ldp x12, x13, [sp], #16
+ ldp x10, x11, [sp], #16
+ ldp x8, x9, [sp], #16
+ ldp x6, x7, [sp], #16
+ ldp x4, x5, [sp], #16
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+.endm
+
.text
- .pushsection .hyp.text, "ax"
.macro do_el2_call
/*
@@ -40,6 +63,7 @@ el1_sync: // Guest trapped into EL2
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
+#ifdef __KVM_NVHE_HYPERVISOR__
mrs x1, vttbr_el2 // If vttbr is valid, the guest
cbnz x1, el1_hvc_guest // called HVC
@@ -74,6 +98,7 @@ el1_sync: // Guest trapped into EL2
eret
sb
+#endif /* __KVM_NVHE_HYPERVISOR__ */
el1_hvc_guest:
/*
@@ -142,13 +167,19 @@ el1_error:
b __guest_exit
el2_sync:
- /* Check for illegal exception return, otherwise panic */
+ /* Check for illegal exception return */
mrs x0, spsr_el2
+ tbnz x0, #20, 1f
+
+ save_caller_saved_regs_vect
+ stp x29, x30, [sp, #-16]!
+ bl kvm_unexpected_el2_exception
+ ldp x29, x30, [sp], #16
+ restore_caller_saved_regs_vect
- /* if this was something else, then panic! */
- tst x0, #PSR_IL_BIT
- b.eq __hyp_panic
+ eret
+1:
/* Let's attempt a recovery from the illegal exception return */
get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IL
@@ -156,30 +187,18 @@ el2_sync:
el2_error:
- ldp x0, x1, [sp], #16
+ save_caller_saved_regs_vect
+ stp x29, x30, [sp, #-16]!
+
+ bl kvm_unexpected_el2_exception
+
+ ldp x29, x30, [sp], #16
+ restore_caller_saved_regs_vect
- /*
- * Only two possibilities:
- * 1) Either we come from the exit path, having just unmasked
- * PSTATE.A: change the return code to an EL2 fault, and
- * carry on, as we're already in a sane state to handle it.
- * 2) Or we come from anywhere else, and that's a bug: we panic.
- *
- * For (1), x0 contains the original return code and x1 doesn't
- * contain anything meaningful at that stage. We can reuse them
- * as temp registers.
- * For (2), who cares?
- */
- mrs x0, elr_el2
- adr x1, abort_guest_exit_start
- cmp x0, x1
- adr x1, abort_guest_exit_end
- ccmp x0, x1, #4, ne
- b.ne __hyp_panic
- mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT)
eret
sb
+#ifdef __KVM_NVHE_HYPERVISOR__
SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
@@ -189,6 +208,7 @@ SYM_FUNC_START(__hyp_do_panic)
eret
sb
SYM_FUNC_END(__hyp_do_panic)
+#endif
SYM_CODE_START(__hyp_panic)
get_host_ctxt x0, x1
@@ -318,20 +338,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs)
1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ
.org 1b
SYM_CODE_END(__bp_harden_hyp_vecs)
-
- .popsection
-
-SYM_CODE_START(__smccc_workaround_1_smc)
- esb
- sub sp, sp, #(8 * 4)
- stp x2, x3, [sp, #(8 * 0)]
- stp x0, x1, [sp, #(8 * 2)]
- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
- smc #0
- ldp x2, x3, [sp, #(8 * 0)]
- ldp x0, x1, [sp, #(8 * 2)]
- add sp, sp, #(8 * 4)
-1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
- .org 1b
-SYM_CODE_END(__smccc_workaround_1_smc)
#endif
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 0fc9872a1467..5e28ea6aa097 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -4,6 +4,9 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__
+#define __ARM64_KVM_HYP_DEBUG_SR_H__
+
#include <linux/compiler.h>
#include <linux/kvm_host.h>
@@ -18,120 +21,75 @@
#define save_debug(ptr,reg,nr) \
switch (nr) { \
case 15: ptr[15] = read_debug(reg, 15); \
- /* Fall through */ \
+ fallthrough; \
case 14: ptr[14] = read_debug(reg, 14); \
- /* Fall through */ \
+ fallthrough; \
case 13: ptr[13] = read_debug(reg, 13); \
- /* Fall through */ \
+ fallthrough; \
case 12: ptr[12] = read_debug(reg, 12); \
- /* Fall through */ \
+ fallthrough; \
case 11: ptr[11] = read_debug(reg, 11); \
- /* Fall through */ \
+ fallthrough; \
case 10: ptr[10] = read_debug(reg, 10); \
- /* Fall through */ \
+ fallthrough; \
case 9: ptr[9] = read_debug(reg, 9); \
- /* Fall through */ \
+ fallthrough; \
case 8: ptr[8] = read_debug(reg, 8); \
- /* Fall through */ \
+ fallthrough; \
case 7: ptr[7] = read_debug(reg, 7); \
- /* Fall through */ \
+ fallthrough; \
case 6: ptr[6] = read_debug(reg, 6); \
- /* Fall through */ \
+ fallthrough; \
case 5: ptr[5] = read_debug(reg, 5); \
- /* Fall through */ \
+ fallthrough; \
case 4: ptr[4] = read_debug(reg, 4); \
- /* Fall through */ \
+ fallthrough; \
case 3: ptr[3] = read_debug(reg, 3); \
- /* Fall through */ \
+ fallthrough; \
case 2: ptr[2] = read_debug(reg, 2); \
- /* Fall through */ \
+ fallthrough; \
case 1: ptr[1] = read_debug(reg, 1); \
- /* Fall through */ \
+ fallthrough; \
default: ptr[0] = read_debug(reg, 0); \
}
#define restore_debug(ptr,reg,nr) \
switch (nr) { \
case 15: write_debug(ptr[15], reg, 15); \
- /* Fall through */ \
+ fallthrough; \
case 14: write_debug(ptr[14], reg, 14); \
- /* Fall through */ \
+ fallthrough; \
case 13: write_debug(ptr[13], reg, 13); \
- /* Fall through */ \
+ fallthrough; \
case 12: write_debug(ptr[12], reg, 12); \
- /* Fall through */ \
+ fallthrough; \
case 11: write_debug(ptr[11], reg, 11); \
- /* Fall through */ \
+ fallthrough; \
case 10: write_debug(ptr[10], reg, 10); \
- /* Fall through */ \
+ fallthrough; \
case 9: write_debug(ptr[9], reg, 9); \
- /* Fall through */ \
+ fallthrough; \
case 8: write_debug(ptr[8], reg, 8); \
- /* Fall through */ \
+ fallthrough; \
case 7: write_debug(ptr[7], reg, 7); \
- /* Fall through */ \
+ fallthrough; \
case 6: write_debug(ptr[6], reg, 6); \
- /* Fall through */ \
+ fallthrough; \
case 5: write_debug(ptr[5], reg, 5); \
- /* Fall through */ \
+ fallthrough; \
case 4: write_debug(ptr[4], reg, 4); \
- /* Fall through */ \
+ fallthrough; \
case 3: write_debug(ptr[3], reg, 3); \
- /* Fall through */ \
+ fallthrough; \
case 2: write_debug(ptr[2], reg, 2); \
- /* Fall through */ \
+ fallthrough; \
case 1: write_debug(ptr[1], reg, 1); \
- /* Fall through */ \
+ fallthrough; \
default: write_debug(ptr[0], reg, 0); \
}
-static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
-{
- u64 reg;
-
- /* Clear pmscr in case of early return */
- *pmscr_el1 = 0;
-
- /* SPE present on this CPU? */
- if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
- ID_AA64DFR0_PMSVER_SHIFT))
- return;
-
- /* Yes; is it owned by EL3? */
- reg = read_sysreg_s(SYS_PMBIDR_EL1);
- if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
- return;
-
- /* No; is the host actually using the thing? */
- reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
- if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
- return;
-
- /* Yes; save the control register and disable data generation */
- *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
- write_sysreg_s(0, SYS_PMSCR_EL1);
- isb();
-
- /* Now drain all buffered data to memory */
- psb_csync();
- dsb(nsh);
-}
-
-static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
-{
- if (!pmscr_el1)
- return;
-
- /* The host page table is installed, but not yet synchronised */
- isb();
-
- /* Re-enable data generation */
- write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
-}
-
-static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
@@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
save_debug(dbg->dbg_wcr, dbgwcr, wrps);
save_debug(dbg->dbg_wvr, dbgwvr, wrps);
- ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+ ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1);
}
-static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __debug_restore_state(struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
@@ -165,60 +122,47 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
- write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1);
}
-void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- /*
- * Non-VHE: Disable and flush SPE data generation
- * VHE: The vcpu can run, but it can't hide.
- */
- if (!has_vhe())
- __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
-
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
- __debug_save_state(vcpu, host_dbg, host_ctxt);
- __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_save_state(host_dbg, host_ctxt);
+ __debug_restore_state(guest_dbg, guest_ctxt);
}
-void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
+static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!has_vhe())
- __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
-
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
- __debug_save_state(vcpu, guest_dbg, guest_ctxt);
- __debug_restore_state(vcpu, host_dbg, host_ctxt);
+ __debug_save_state(guest_dbg, guest_ctxt);
+ __debug_restore_state(host_dbg, host_ctxt);
vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
-u32 __hyp_text __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
+#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
new file mode 100644
index 000000000000..0261308bf944
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_SWITCH_H__
+#define __ARM64_KVM_HYP_SWITCH_H__
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/extable.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+extern const char __hyp_panic_string[];
+
+extern struct exception_table_entry __start___kvm_ex_table;
+extern struct exception_table_entry __stop___kvm_ex_table;
+
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When the system doesn't support FP/SIMD, we cannot rely on
+ * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+ * abort on the very first access to FP and thus we should never
+ * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+ * trap the accesses.
+ */
+ if (!system_supports_fpsimd() ||
+ vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_FP_HOST);
+
+ return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
+}
+
+/* Save the 32-bit only FPSIMD system register state */
+static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2);
+}
+
+static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+ * it will cause an exception.
+ */
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
+ write_sysreg(1 << 30, fpexc32_el2);
+ isb();
+ }
+}
+
+static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
+ write_sysreg(1 << 15, hstr_el2);
+
+ /*
+ * Make sure we trap PMU access from EL0 to EL2. Also sanitize
+ * PMSELR_EL0 to make sure it never contains the cycle
+ * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+ * EL1 instead of being trapped to EL2.
+ */
+ write_sysreg(0, pmselr_el0);
+ write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static inline void __deactivate_traps_common(void)
+{
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
+
+static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ hcr |= HCR_TVM;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+}
+
+static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If we pended a virtual abort, preserve it until it gets
+ * cleared. See D1.14.3 (Virtual Interrupts) for details, but
+ * the crucial bit is "On taking a vSError interrupt,
+ * HCR_EL2.VSE is cleared to 0."
+ */
+ if (vcpu->arch.hcr_el2 & HCR_VSE) {
+ vcpu->arch.hcr_el2 &= ~HCR_VSE;
+ vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ }
+}
+
+static inline void __activate_vm(struct kvm_s2_mmu *mmu)
+{
+ __load_guest_stage2(mmu);
+}
+
+static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg(par_el1);
+ if (!__kvm_at("s1e1r", far))
+ tmp = read_sysreg(par_el1);
+ else
+ tmp = SYS_PAR_EL1_F; /* back to the guest */
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & SYS_PAR_EL1_F))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = PAR_TO_HPFAR(tmp);
+ return true;
+}
+
+static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u8 ec;
+ u64 esr;
+ u64 hpfar, far;
+
+ esr = vcpu->arch.fault.esr_el2;
+ ec = ESR_ELx_EC(esr);
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ far = read_sysreg_el2(SYS_FAR);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ vcpu->arch.fault.far_el2 = far;
+ vcpu->arch.fault.hpfar_el2 = hpfar;
+ return true;
+}
+
+/* Check for an FPSIMD/SVE trap and handle as appropriate */
+static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+{
+ bool vhe, sve_guest, sve_host;
+ u8 esr_ec;
+
+ if (!system_supports_fpsimd())
+ return false;
+
+ /*
+ * Currently system_supports_sve() currently implies has_vhe(),
+ * so the check is redundant. However, has_vhe() can be determined
+ * statically and helps the compiler remove dead code.
+ */
+ if (has_vhe() && system_supports_sve()) {
+ sve_guest = vcpu_has_sve(vcpu);
+ sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
+ vhe = true;
+ } else {
+ sve_guest = false;
+ sve_host = false;
+ vhe = has_vhe();
+ }
+
+ esr_ec = kvm_vcpu_trap_get_class(vcpu);
+ if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
+ esr_ec != ESR_ELx_EC_SVE)
+ return false;
+
+ /* Don't handle SVE traps for non-SVE vcpus here: */
+ if (!sve_guest)
+ if (esr_ec != ESR_ELx_EC_FP_ASIMD)
+ return false;
+
+ /* Valid trap. Switch the context: */
+
+ if (vhe) {
+ u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+ if (sve_guest)
+ reg |= CPACR_EL1_ZEN;
+
+ write_sysreg(reg, cpacr_el1);
+ } else {
+ write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+ cptr_el2);
+ }
+
+ isb();
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+ /*
+ * In the SVE case, VHE is assumed: it is enforced by
+ * Kconfig and kvm_arch_init().
+ */
+ if (sve_host) {
+ struct thread_struct *thread = container_of(
+ vcpu->arch.host_fpsimd_state,
+ struct thread_struct, uw.fpsimd_state);
+
+ sve_save_state(sve_pffr(thread),
+ &vcpu->arch.host_fpsimd_state->fpsr);
+ } else {
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ }
+
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ }
+
+ if (sve_guest) {
+ sve_load_state(vcpu_sve_pffr(vcpu),
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
+ write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
+ } else {
+ __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ }
+
+ /* Skip restoring fpexc32 for AArch64 guests */
+ if (!(read_sysreg(hcr_el2) & HCR_RW))
+ write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
+
+ vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+ return true;
+}
+
+static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ /*
+ * The normal sysreg handling code expects to see the traps,
+ * let's not do anything here.
+ */
+ if (vcpu->arch.hcr_el2 & HCR_TVM)
+ return false;
+
+ switch (sysreg) {
+ case SYS_SCTLR_EL1:
+ write_sysreg_el1(val, SYS_SCTLR);
+ break;
+ case SYS_TTBR0_EL1:
+ write_sysreg_el1(val, SYS_TTBR0);
+ break;
+ case SYS_TTBR1_EL1:
+ write_sysreg_el1(val, SYS_TTBR1);
+ break;
+ case SYS_TCR_EL1:
+ write_sysreg_el1(val, SYS_TCR);
+ break;
+ case SYS_ESR_EL1:
+ write_sysreg_el1(val, SYS_ESR);
+ break;
+ case SYS_FAR_EL1:
+ write_sysreg_el1(val, SYS_FAR);
+ break;
+ case SYS_AFSR0_EL1:
+ write_sysreg_el1(val, SYS_AFSR0);
+ break;
+ case SYS_AFSR1_EL1:
+ write_sysreg_el1(val, SYS_AFSR1);
+ break;
+ case SYS_MAIR_EL1:
+ write_sysreg_el1(val, SYS_MAIR);
+ break;
+ case SYS_AMAIR_EL1:
+ write_sysreg_el1(val, SYS_AMAIR);
+ break;
+ case SYS_CONTEXTIDR_EL1:
+ write_sysreg_el1(val, SYS_CONTEXTIDR);
+ break;
+ default:
+ return false;
+ }
+
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
+static inline bool esr_is_ptrauth_trap(u32 esr)
+{
+ u32 ec = ESR_ELx_EC(esr);
+
+ if (ec == ESR_ELx_EC_PAC)
+ return true;
+
+ if (ec != ESR_ELx_EC_SYS64)
+ return false;
+
+ switch (esr_sys64_to_sysreg(esr)) {
+ case SYS_APIAKEYLO_EL1:
+ case SYS_APIAKEYHI_EL1:
+ case SYS_APIBKEYLO_EL1:
+ case SYS_APIBKEYHI_EL1:
+ case SYS_APDAKEYLO_EL1:
+ case SYS_APDAKEYHI_EL1:
+ case SYS_APDBKEYLO_EL1:
+ case SYS_APDBKEYHI_EL1:
+ case SYS_APGAKEYLO_EL1:
+ case SYS_APGAKEYHI_EL1:
+ return true;
+ }
+
+ return false;
+}
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+} while(0)
+
+static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *ctxt;
+ u64 val;
+
+ if (!vcpu_has_ptrauth(vcpu) ||
+ !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ return false;
+
+ ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ __ptrauth_save_key(ctxt, APIA);
+ __ptrauth_save_key(ctxt, APIB);
+ __ptrauth_save_key(ctxt, APDA);
+ __ptrauth_save_key(ctxt, APDB);
+ __ptrauth_save_key(ctxt, APGA);
+
+ vcpu_ptrauth_enable(vcpu);
+
+ val = read_sysreg(hcr_el2);
+ val |= (HCR_API | HCR_APK);
+ write_sysreg(val, hcr_el2);
+
+ return true;
+}
+
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+
+ /*
+ * We're using the raw exception code in order to only process
+ * the trap if no SError is pending. We will come back to the
+ * same PC once the SError has been injected, and replay the
+ * trapping instruction.
+ */
+ if (*exit_code != ARM_EXCEPTION_TRAP)
+ goto exit;
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ * Similarly for trapped SVE accesses.
+ */
+ if (__hyp_handle_fpsimd(vcpu))
+ return true;
+
+ if (__hyp_handle_ptrauth(vcpu))
+ return true;
+
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
+ kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_abt_issea(vcpu) &&
+ !kvm_vcpu_abt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+
+ /* Promote an illegal access to an SError.*/
+ if (ret == -1)
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+
+ goto exit;
+ }
+ }
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+ int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+ }
+
+exit:
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
+
+static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_final_cap(ARM64_SSBD))
+ return false;
+
+ return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * The host runs with the workaround always present. If the
+ * guest wants it disabled, so be it...
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * If the guest has disabled the workaround, bring it back on.
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
+static inline void __kvm_unexpected_el2_exception(void)
+{
+ unsigned long addr, fixup;
+ struct kvm_cpu_context *host_ctxt;
+ struct exception_table_entry *entry, *end;
+ unsigned long elr_el2 = read_sysreg(elr_el2);
+
+ entry = hyp_symbol_addr(__start___kvm_ex_table);
+ end = hyp_symbol_addr(__stop___kvm_ex_table);
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+
+ while (entry < end) {
+ addr = (unsigned long)&entry->insn + entry->insn;
+ fixup = (unsigned long)&entry->fixup + entry->fixup;
+
+ if (addr != elr_el2) {
+ entry++;
+ continue;
+ }
+
+ write_sysreg(fixup, elr_el2);
+ return;
+ }
+
+ hyp_panic(host_ctxt);
+}
+
+#endif /* __ARM64_KVM_HYP_SWITCH_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
new file mode 100644
index 000000000000..7a986030145f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__
+#define __ARM64_KVM_HYP_SYSREG_SR_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+}
+
+static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0);
+ ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
+}
+
+static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
+ ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
+ ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR);
+ ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
+ ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1);
+ ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR);
+ ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
+ ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
+ ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1);
+ ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR);
+ ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR);
+ ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR);
+ ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
+ ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
+ ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
+ ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1);
+ ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
+ ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
+ ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
+ ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
+}
+
+static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
+ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+}
+
+static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+}
+
+static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
+}
+
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
+ write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1);
+
+ if (has_vhe() ||
+ !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ } else if (!ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for guest registers, hence the context
+ * test. We're coming from the host, so SCTLR.M is already
+ * set. Pairs with nVHE's __activate_traps().
+ */
+ write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) |
+ TCR_EPD1_MASK | TCR_EPD0_MASK),
+ SYS_TCR);
+ isb();
+ }
+
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
+ write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+
+ if (!has_vhe() &&
+ cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
+ ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for host registers, hence the context
+ * test. Pairs with nVHE's __deactivate_traps().
+ */
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * deconfigured and disabled. We can now restore the host's
+ * S1 configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ }
+
+ write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
+}
+
+static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ u64 pstate = ctxt->regs.pstate;
+ u64 mode = pstate & PSR_AA32_MODE_MASK;
+
+ /*
+ * Safety check to ensure we're setting the CPU up to enter the guest
+ * in a less privileged mode.
+ *
+ * If we are attempting a return to EL2 or higher in AArch64 state,
+ * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
+ * we'll take an illegal exception state exception immediately after
+ * the ERET to the guest. Attempts to return to AArch32 Hyp will
+ * result in an illegal exception return because EL2's execution state
+ * is determined by SCR_EL3.RW.
+ */
+ if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
+ pstate = PSR_MODE_EL2h | PSR_IL_BIT;
+
+ write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
+ write_sysreg_el2(pstate, SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
+}
+
+static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt);
+ vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und);
+ vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq);
+ vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq);
+
+ __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
+ __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
+}
+
+static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt);
+ write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und);
+ write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq);
+ write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq);
+
+ write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
+ write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
+}
+
+#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
new file mode 100644
index 000000000000..aef76487edc2
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+#
+
+asflags-y := -D__KVM_NVHE_HYPERVISOR__
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__
+
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o
+obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+ ../fpsimd.o ../hyp-entry.o
+
+obj-y := $(patsubst %.o,%.hyp.o,$(obj-y))
+extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y))
+
+$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE
+ $(call if_changed_rule,as_o_S)
+$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE
+ $(call if_changed,hypcopy)
+
+# Disable reordering functions by GCC (enabled at -O2).
+# This pass puts functions into '.text.*' sections to aid the linker
+# in optimizing ELF layout. See HYPCOPY comment below for more info.
+ccflags-y += $(call cc-option,-fno-reorder-functions)
+
+# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
+# and relevant ELF section names to avoid clashes with VHE code/data.
+#
+# Hyp code is assumed to be in the '.text' section of the input object
+# files (with the exception of specialized sections such as
+# '.hyp.idmap.text'). This assumption may be broken by a compiler that
+# divides code into sections like '.text.unlikely' so as to optimize
+# ELF layout. HYPCOPY checks that no such sections exist in the input
+# using `objdump`, otherwise they would be linked together with other
+# kernel code and not memory-mapped correctly at runtime.
+quiet_cmd_hypcopy = HYPCOPY $@
+ cmd_hypcopy = \
+ if $(OBJDUMP) -h $< | grep -F '.text.'; then \
+ echo "$@: function reordering not supported in nVHE hyp code" >&2; \
+ /bin/false; \
+ fi; \
+ $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \
+ --rename-section=.text=.hyp.text \
+ $< $@
+
+# Remove ftrace and Shadow Call Stack CFLAGS.
+# This is equivalent to the 'notrace' and '__noscs' annotations.
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
+# KVM nVHE code is run at a different exception code with a different map, so
+# compiler instrumentation that inserts callbacks or checks into the code may
+# cause crashes. Just disable it.
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+# Skip objtool checking for this directory because nVHE code is compiled with
+# non-standard build rules.
+OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
new file mode 100644
index 000000000000..91a711aa8382
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/debug-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static void __debug_save_spe(u64 *pmscr_el1)
+{
+ u64 reg;
+
+ /* Clear pmscr in case of early return */
+ *pmscr_el1 = 0;
+
+ /* SPE present on this CPU? */
+ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+ ID_AA64DFR0_PMSVER_SHIFT))
+ return;
+
+ /* Yes; is it owned by EL3? */
+ reg = read_sysreg_s(SYS_PMBIDR_EL1);
+ if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
+ return;
+
+ /* No; is the host actually using the thing? */
+ reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
+ return;
+
+ /* Yes; save the control register and disable data generation */
+ *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
+ write_sysreg_s(0, SYS_PMSCR_EL1);
+ isb();
+
+ /* Now drain all buffered data to memory */
+ psb_csync();
+ dsb(nsh);
+}
+
+static void __debug_restore_spe(u64 pmscr_el1)
+{
+ if (!pmscr_el1)
+ return;
+
+ /* The host page table is installed, but not yet synchronised */
+ isb();
+
+ /* Re-enable data generation */
+ write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ /* Disable and flush SPE data generation */
+ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_switch_to_guest_common(vcpu);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_switch_to_host_common(vcpu);
+}
+
+u32 __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
new file mode 100644
index 000000000000..d9434e90c06d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+ .text
+ .pushsection .hyp.idmap.text, "ax"
+
+ .align 11
+
+SYM_CODE_START(__kvm_hyp_init)
+ ventry __invalid // Synchronous EL2t
+ ventry __invalid // IRQ EL2t
+ ventry __invalid // FIQ EL2t
+ ventry __invalid // Error EL2t
+
+ ventry __invalid // Synchronous EL2h
+ ventry __invalid // IRQ EL2h
+ ventry __invalid // FIQ EL2h
+ ventry __invalid // Error EL2h
+
+ ventry __do_hyp_init // Synchronous 64-bit EL1
+ ventry __invalid // IRQ 64-bit EL1
+ ventry __invalid // FIQ 64-bit EL1
+ ventry __invalid // Error 64-bit EL1
+
+ ventry __invalid // Synchronous 32-bit EL1
+ ventry __invalid // IRQ 32-bit EL1
+ ventry __invalid // FIQ 32-bit EL1
+ ventry __invalid // Error 32-bit EL1
+
+__invalid:
+ b .
+
+ /*
+ * x0: HYP pgd
+ * x1: HYP stack
+ * x2: HYP vectors
+ * x3: per-CPU offset
+ */
+__do_hyp_init:
+ /* Check for a stub HVC call */
+ cmp x0, #HVC_STUB_HCALL_NR
+ b.lo __kvm_handle_stub_hvc
+
+ phys_to_ttbr x4, x0
+alternative_if ARM64_HAS_CNP
+ orr x4, x4, #TTBR_CNP_BIT
+alternative_else_nop_endif
+ msr ttbr0_el2, x4
+
+ mrs x4, tcr_el1
+ mov_q x5, TCR_EL2_MASK
+ and x4, x4, x5
+ mov x5, #TCR_EL2_RES1
+ orr x4, x4, x5
+
+ /*
+ * The ID map may be configured to use an extended virtual address
+ * range. This is only the case if system RAM is out of range for the
+ * currently configured page size and VA_BITS, in which case we will
+ * also need the extended virtual range for the HYP ID map, or we won't
+ * be able to enable the EL2 MMU.
+ *
+ * However, at EL2, there is only one TTBR register, and we can't switch
+ * between translation tables *and* update TCR_EL2.T0SZ at the same
+ * time. Bottom line: we need to use the extended range with *both* our
+ * translation tables.
+ *
+ * So use the same T0SZ value we use for the ID map.
+ */
+ ldr_l x5, idmap_t0sz
+ bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
+ /*
+ * Set the PS bits in TCR_EL2.
+ */
+ tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
+
+ msr tcr_el2, x4
+
+ mrs x4, mair_el1
+ msr mair_el2, x4
+ isb
+
+ /* Invalidate the stale TLBs from Bootloader */
+ tlbi alle2
+ dsb sy
+
+ /*
+ * Preserve all the RES1 bits while setting the default flags,
+ * as well as the EE bit on BE. Drop the A flag since the compiler
+ * is allowed to generate unaligned accesses.
+ */
+ mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
+CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
+ orr x4, x4, x5
+alternative_else_nop_endif
+ msr sctlr_el2, x4
+ isb
+
+ /* Set the stack and new vectors */
+ kern_hyp_va x1
+ mov sp, x1
+ msr vbar_el2, x2
+
+ /* Set tpidr_el2 for use by HYP */
+ msr tpidr_el2, x3
+
+ /* Hello, World! */
+ eret
+SYM_CODE_END(__kvm_hyp_init)
+
+SYM_CODE_START(__kvm_handle_stub_hvc)
+ cmp x0, #HVC_SOFT_RESTART
+ b.ne 1f
+
+ /* This is where we're about to jump, staying at EL2 */
+ msr elr_el2, x1
+ mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h)
+ msr spsr_el2, x0
+
+ /* Shuffle the arguments, and don't come back */
+ mov x0, x2
+ mov x1, x3
+ mov x2, x4
+ b reset
+
+1: cmp x0, #HVC_RESET_VECTORS
+ b.ne 1f
+
+ /*
+ * Set the HVC_RESET_VECTORS return code before entering the common
+ * path so that we do not clobber x0-x2 in case we are coming via
+ * HVC_SOFT_RESTART.
+ */
+ mov x0, xzr
+reset:
+ /* Reset kvm back to the hyp stub. */
+ mrs x5, sctlr_el2
+ mov_q x6, SCTLR_ELx_FLAGS
+ bic x5, x5, x6 // Clear SCTL_M and etc
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x5
+ isb
+
+ /* Install stub vectors */
+ adr_l x5, __hyp_stub_vectors
+ msr vbar_el2, x5
+ eret
+
+1: /* Bad stub call */
+ mov_q x0, HVC_STUB_ERR
+ eret
+
+SYM_CODE_END(__kvm_handle_stub_hvc)
+
+ .popsection
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
new file mode 100644
index 000000000000..0970442d2dbc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/switch.h>
+#include <hyp/sysreg-sr.h>
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ ___activate_traps(vcpu);
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
+ if (!update_fp_enabled(vcpu)) {
+ val |= CPTR_EL2_TFP;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cptr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * configured and enabled. We can now restore the guest's S1
+ * configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ }
+}
+
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 mdcr_el2;
+
+ ___deactivate_traps(vcpu);
+
+ mdcr_el2 = read_sysreg(mdcr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ u64 val;
+
+ /*
+ * Set the TCR and SCTLR registers in the exact opposite
+ * sequence as __activate_traps (first prevent walks,
+ * then force the MMU on). A generous sprinkling of isb()
+ * ensure that things happen in this exact order.
+ */
+ val = read_sysreg_el1(SYS_TCR);
+ write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+ isb();
+ val = read_sysreg_el1(SYS_SCTLR);
+ write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+ isb();
+ }
+
+ __deactivate_traps_common();
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+ mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+ write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+ write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static void __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
+ __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+ }
+}
+
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+ __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
+ }
+}
+
+/**
+ * Disable host events, enable guest events
+ */
+static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenclr_el0);
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenset_el0);
+
+ return (pmu->events_host || pmu->events_guest);
+}
+
+/**
+ * Disable guest events, enable host events
+ */
+static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenclr_el0);
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenset_el0);
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool pmu_switch_needed;
+ u64 exit_code;
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ */
+ if (system_uses_irq_prio_masking()) {
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+ pmr_sync();
+ }
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+
+ __sysreg_save_state_nvhe(host_ctxt);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ *
+ * Also, and in order to be able to deal with erratum #1319537 (A57)
+ * and #1319367 (A72), we must ensure that all VM-related sysreg are
+ * restored before we enable S2 translation.
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+
+ __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu));
+ __activate_traps(vcpu);
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ __sysreg_save_state_nvhe(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+ __timer_disable_traps(vcpu);
+ __hyp_vgic_save_state(vcpu);
+
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+
+ __sysreg_restore_state_nvhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ /*
+ * This must come after restoring the host sysregs, since a non-VHE
+ * system may enable SPE here and make use of the TTBRs.
+ */
+ __debug_switch_to_host(vcpu);
+
+ if (pmu_switch_needed)
+ __pmu_switch_to_host(host_ctxt);
+
+ /* Returning to host will clear PSR.I, remask PMR if needed */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQOFF);
+
+ return exit_code;
+}
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
+ u64 par = read_sysreg(par_el1);
+ struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu;
+ unsigned long str_va;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(host_ctxt);
+ }
+
+ /*
+ * Force the panic string to be loaded from the literal pool,
+ * making sure it is a kernel address and not a PC-relative
+ * reference.
+ */
+ asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
+
+ __hyp_do_panic(str_va,
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
+ read_sysreg(hpfar_el2), par, vcpu);
+ unreachable();
+}
+
+asmlinkage void kvm_unexpected_el2_exception(void)
+{
+ return __kvm_unexpected_el2_exception();
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
new file mode 100644
index 000000000000..88a25fc8fcd3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/sysreg-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * Non-VHE: Both host and guest must save everything.
+ */
+
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+
+void __kvm_enable_ssbs(void)
+{
+ u64 tmp;
+
+ asm volatile(
+ "mrs %0, sctlr_el2\n"
+ "orr %0, %0, %1\n"
+ "msr sctlr_el2, %0"
+ : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
new file mode 100644
index 000000000000..9072e71693ba
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <clocksource/arm_arch_timer.h>
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_hyp.h>
+
+void __kvm_timer_set_cntvoff(u64 cntvoff)
+{
+ write_sysreg(cntvoff, cntvoff_el2);
+}
+
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
+void __timer_disable_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ /* Allow physical timer/counter access for the host */
+ val = read_sysreg(cnthctl_el2);
+ val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+ write_sysreg(val, cnthctl_el2);
+}
+
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
+void __timer_enable_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ /*
+ * Disallow physical timer access for the guest
+ * Physical counter access is allowed
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~CNTHCTL_EL1PCEN;
+ val |= CNTHCTL_EL1PCTEN;
+ write_sysreg(val, cnthctl_el2);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
new file mode 100644
index 000000000000..b15d65a42042
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+struct tlb_inv_context {
+ u64 tcr;
+};
+
+static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
+{
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ u64 val;
+
+ /*
+ * For CPUs that are affected by ARM 1319367, we need to
+ * avoid a host Stage-1 walk while we have the guest's
+ * VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the S1 MMU is enabled, so we can
+ * simply set the EPD bits to avoid any further TLB fill.
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ isb();
+ }
+
+ /*
+ * __load_guest_stage2() includes an ISB only when the AT
+ * workaround is applied. Take care of the opposite condition,
+ * ensuring that we always have an ISB, but not two ISBs back
+ * to back.
+ */
+ __load_guest_stage2(mmu);
+ asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
+}
+
+static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+{
+ write_sysreg(0, vttbr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /* Ensure write of the host VMID */
+ isb();
+ /* Restore the host's TCR_EL1 */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ }
+}
+
+void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, int level)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi_level(ipas2e1is, ipa, level);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /*
+ * If the host is running at EL1 and we have a VPIPT I-cache,
+ * then we must perform I-cache maintenance at EL2 in order for
+ * it to have an effect on the guest. Since the guest cannot hit
+ * I-cache lines allocated with a different VMID, we don't need
+ * to worry about junk out of guest reset (we nuke the I-cache on
+ * VMID rollover), but we do need to be careful when remapping
+ * executable pages for the same guest. This can happen when KSM
+ * takes a CoW fault on an executable page, copies the page into
+ * a page that was previously mapped in the guest and then needs
+ * to invalidate the guest view of the I-cache for that page
+ * from EL1. To solve this, we invalidate the entire I-cache when
+ * unmapping a page from a guest if we have a VPIPT I-cache but
+ * the host is running at EL1. As above, we could do better if
+ * we had the VA.
+ *
+ * The moral of this story is: if you have a VPIPT I-cache, then
+ * you should be running with VHE enabled.
+ */
+ if (icache_is_vpipt())
+ __flush_icache_all();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
+ dsb(ish);
+}
diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S
new file mode 100644
index 000000000000..b0441dbdf68b
--- /dev/null
+++ b/arch/arm64/kvm/hyp/smccc_wa.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2018 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/linkage.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+ /*
+ * This is not executed directly and is instead copied into the vectors
+ * by install_bp_hardening_cb().
+ */
+ .data
+ .pushsection .rodata
+ .global __smccc_workaround_1_smc
+SYM_DATA_START(__smccc_workaround_1_smc)
+ esb
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
+ .org 1b
+SYM_DATA_END(__smccc_workaround_1_smc)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
deleted file mode 100644
index 8a1e81a400e0..000000000000
--- a/arch/arm64/kvm/hyp/switch.c
+++ /dev/null
@@ -1,875 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-#include <linux/jump_label.h>
-#include <uapi/linux/psci.h>
-
-#include <kvm/arm_psci.h>
-
-#include <asm/barrier.h>
-#include <asm/cpufeature.h>
-#include <asm/kprobes.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-#include <asm/fpsimd.h>
-#include <asm/debug-monitors.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-
-/* Check whether the FP regs were dirtied while in the host-side run loop: */
-static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
-{
- /*
- * When the system doesn't support FP/SIMD, we cannot rely on
- * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
- * abort on the very first access to FP and thus we should never
- * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
- * trap the accesses.
- */
- if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_FP_HOST);
-
- return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
-}
-
-/* Save the 32-bit only FPSIMD system register state */
-static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
-{
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-}
-
-static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
-{
- /*
- * We are about to set CPTR_EL2.TFP to trap all floating point
- * register accesses to EL2, however, the ARM ARM clearly states that
- * traps are only taken to EL2 if the operation would not otherwise
- * trap to EL1. Therefore, always make sure that for 32-bit guests,
- * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
- * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
- * it will cause an exception.
- */
- if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
- write_sysreg(1 << 30, fpexc32_el2);
- isb();
- }
-}
-
-static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
-{
- /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
- write_sysreg(1 << 15, hstr_el2);
-
- /*
- * Make sure we trap PMU access from EL0 to EL2. Also sanitize
- * PMSELR_EL0 to make sure it never contains the cycle
- * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
- * EL1 instead of being trapped to EL2.
- */
- write_sysreg(0, pmselr_el0);
- write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-}
-
-static void __hyp_text __deactivate_traps_common(void)
-{
- write_sysreg(0, hstr_el2);
- write_sysreg(0, pmuserenr_el0);
-}
-
-static void activate_traps_vhe(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
- val &= ~CPACR_EL1_ZEN;
-
- /*
- * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
- * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
- * except for some missing controls, such as TAM.
- * In this case, CPTR_EL2.TAM has the same position with or without
- * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
- * shift value for trapping the AMU accesses.
- */
-
- val |= CPTR_EL2_TAM;
-
- if (update_fp_enabled(vcpu)) {
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- } else {
- val &= ~CPACR_EL1_FPEN;
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cpacr_el1);
-
- write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-NOKPROBE_SYMBOL(activate_traps_vhe);
-
-static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- __activate_traps_common(vcpu);
-
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
- if (!update_fp_enabled(vcpu)) {
- val |= CPTR_EL2_TFP;
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cptr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
- struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
-
- isb();
- /*
- * At this stage, and thanks to the above isb(), S2 is
- * configured and enabled. We can now restore the guest's S1
- * configuration: SCTLR, and only then TCR.
- */
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- isb();
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- }
-}
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
-{
- u64 hcr = vcpu->arch.hcr_el2;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
- hcr |= HCR_TVM;
-
- write_sysreg(hcr, hcr_el2);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- if (has_vhe())
- activate_traps_vhe(vcpu);
- else
- __activate_traps_nvhe(vcpu);
-}
-
-static void deactivate_traps_vhe(void)
-{
- extern char vectors[]; /* kernel exception vectors */
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-
- /*
- * ARM errata 1165522 and 1530923 require the actual execution of the
- * above before we can switch to the EL2/EL0 translation regime used by
- * the host.
- */
- asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE));
-
- write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
- write_sysreg(vectors, vbar_el1);
-}
-NOKPROBE_SYMBOL(deactivate_traps_vhe);
-
-static void __hyp_text __deactivate_traps_nvhe(void)
-{
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
- u64 val;
-
- /*
- * Set the TCR and SCTLR registers in the exact opposite
- * sequence as __activate_traps_nvhe (first prevent walks,
- * then force the MMU on). A generous sprinkling of isb()
- * ensure that things happen in this exact order.
- */
- val = read_sysreg_el1(SYS_TCR);
- write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
- isb();
- val = read_sysreg_el1(SYS_SCTLR);
- write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
- isb();
- }
-
- __deactivate_traps_common();
-
- mdcr_el2 &= MDCR_EL2_HPMN_MASK;
- mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
-
- write_sysreg(mdcr_el2, mdcr_el2);
- write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
- write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
-}
-
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
-{
- /*
- * If we pended a virtual abort, preserve it until it gets
- * cleared. See D1.14.3 (Virtual Interrupts) for details, but
- * the crucial bit is "On taking a vSError interrupt,
- * HCR_EL2.VSE is cleared to 0."
- */
- if (vcpu->arch.hcr_el2 & HCR_VSE) {
- vcpu->arch.hcr_el2 &= ~HCR_VSE;
- vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
- }
-
- if (has_vhe())
- deactivate_traps_vhe();
- else
- __deactivate_traps_nvhe();
-}
-
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
-{
- __activate_traps_common(vcpu);
-}
-
-void deactivate_traps_vhe_put(void)
-{
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
- mdcr_el2 &= MDCR_EL2_HPMN_MASK |
- MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
- MDCR_EL2_TPMS;
-
- write_sysreg(mdcr_el2, mdcr_el2);
-
- __deactivate_traps_common();
-}
-
-static void __hyp_text __activate_vm(struct kvm *kvm)
-{
- __load_guest_stage2(kvm);
-}
-
-static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-{
- write_sysreg(0, vttbr_el2);
-}
-
-/* Save VGICv3 state on non-VHE systems */
-static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_save_state(vcpu);
- __vgic_v3_deactivate_traps(vcpu);
- }
-}
-
-/* Restore VGICv3 state on non_VEH systems */
-static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_activate_traps(vcpu);
- __vgic_v3_restore_state(vcpu);
- }
-}
-
-static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
- u64 par, tmp;
-
- /*
- * Resolve the IPA the hard way using the guest VA.
- *
- * Stage-1 translation already validated the memory access
- * rights. As such, we can use the EL1 translation regime, and
- * don't have to distinguish between EL0 and EL1 access.
- *
- * We do need to save/restore PAR_EL1 though, as we haven't
- * saved the guest context yet, and we may return early...
- */
- par = read_sysreg(par_el1);
- asm volatile("at s1e1r, %0" : : "r" (far));
- isb();
-
- tmp = read_sysreg(par_el1);
- write_sysreg(par, par_el1);
-
- if (unlikely(tmp & SYS_PAR_EL1_F))
- return false; /* Translation failed, back to guest */
-
- /* Convert PAR to HPFAR format */
- *hpfar = PAR_TO_HPFAR(tmp);
- return true;
-}
-
-static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
-{
- u8 ec;
- u64 esr;
- u64 hpfar, far;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- far = read_sysreg_el2(SYS_FAR);
-
- /*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
- *
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
- */
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
- hpfar = read_sysreg(hpfar_el2);
- }
-
- vcpu->arch.fault.far_el2 = far;
- vcpu->arch.fault.hpfar_el2 = hpfar;
- return true;
-}
-
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
-{
- bool vhe, sve_guest, sve_host;
- u8 hsr_ec;
-
- if (!system_supports_fpsimd())
- return false;
-
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- vhe = true;
- } else {
- sve_guest = false;
- sve_host = false;
- vhe = has_vhe();
- }
-
- hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
- hsr_ec != ESR_ELx_EC_SVE)
- return false;
-
- /* Don't handle SVE traps for non-SVE vcpus here: */
- if (!sve_guest)
- if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
- return false;
-
- /* Valid trap. Switch the context: */
-
- if (vhe) {
- u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
-
- if (sve_guest)
- reg |= CPACR_EL1_ZEN;
-
- write_sysreg(reg, cpacr_el1);
- } else {
- write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
- cptr_el2);
- }
-
- isb();
-
- if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- /*
- * In the SVE case, VHE is assumed: it is enforced by
- * Kconfig and kvm_arch_init().
- */
- if (sve_host) {
- struct thread_struct *thread = container_of(
- vcpu->arch.host_fpsimd_state,
- struct thread_struct, uw.fpsimd_state);
-
- sve_save_state(sve_pffr(thread),
- &vcpu->arch.host_fpsimd_state->fpsr);
- } else {
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
- }
-
- vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
- }
-
- if (sve_guest) {
- sve_load_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
- sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
- write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
- } else {
- __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
- }
-
- /* Skip restoring fpexc32 for AArch64 guests */
- if (!(read_sysreg(hcr_el2) & HCR_RW))
- write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
- fpexc32_el2);
-
- vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
-
- return true;
-}
-
-static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
-{
- u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
- int rt = kvm_vcpu_sys_get_rt(vcpu);
- u64 val = vcpu_get_reg(vcpu, rt);
-
- /*
- * The normal sysreg handling code expects to see the traps,
- * let's not do anything here.
- */
- if (vcpu->arch.hcr_el2 & HCR_TVM)
- return false;
-
- switch (sysreg) {
- case SYS_SCTLR_EL1:
- write_sysreg_el1(val, SYS_SCTLR);
- break;
- case SYS_TTBR0_EL1:
- write_sysreg_el1(val, SYS_TTBR0);
- break;
- case SYS_TTBR1_EL1:
- write_sysreg_el1(val, SYS_TTBR1);
- break;
- case SYS_TCR_EL1:
- write_sysreg_el1(val, SYS_TCR);
- break;
- case SYS_ESR_EL1:
- write_sysreg_el1(val, SYS_ESR);
- break;
- case SYS_FAR_EL1:
- write_sysreg_el1(val, SYS_FAR);
- break;
- case SYS_AFSR0_EL1:
- write_sysreg_el1(val, SYS_AFSR0);
- break;
- case SYS_AFSR1_EL1:
- write_sysreg_el1(val, SYS_AFSR1);
- break;
- case SYS_MAIR_EL1:
- write_sysreg_el1(val, SYS_MAIR);
- break;
- case SYS_AMAIR_EL1:
- write_sysreg_el1(val, SYS_AMAIR);
- break;
- case SYS_CONTEXTIDR_EL1:
- write_sysreg_el1(val, SYS_CONTEXTIDR);
- break;
- default:
- return false;
- }
-
- __kvm_skip_instr(vcpu);
- return true;
-}
-
-/*
- * Return true when we were able to fixup the guest exit and should return to
- * the guest, false when we should restore the host state and return to the
- * main run loop.
- */
-static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
- vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
-
- /*
- * We're using the raw exception code in order to only process
- * the trap if no SError is pending. We will come back to the
- * same PC once the SError has been injected, and replay the
- * trapping instruction.
- */
- if (*exit_code != ARM_EXCEPTION_TRAP)
- goto exit;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
- handle_tx2_tvm(vcpu))
- return true;
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- * Similarly for trapped SVE accesses.
- */
- if (__hyp_handle_fpsimd(vcpu))
- return true;
-
- if (!__populate_fault_info(vcpu))
- return true;
-
- if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- bool valid;
-
- valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
- kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
- kvm_vcpu_dabt_isvalid(vcpu) &&
- !kvm_vcpu_dabt_isextabt(vcpu) &&
- !kvm_vcpu_dabt_iss1tw(vcpu);
-
- if (valid) {
- int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- return true;
-
- /* Promote an illegal access to an SError.*/
- if (ret == -1)
- *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
- goto exit;
- }
- }
-
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
- int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- return true;
- }
-
-exit:
- /* Return to the host kernel and handle the exit */
- return false;
-}
-
-static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
-{
- if (!cpus_have_final_cap(ARM64_SSBD))
- return false;
-
- return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
-}
-
-static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
- /*
- * The host runs with the workaround always present. If the
- * guest wants it disabled, so be it...
- */
- if (__needs_ssbd_off(vcpu) &&
- __hyp_this_cpu_read(arm64_ssbd_callback_required))
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
-#endif
-}
-
-static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
- /*
- * If the guest has disabled the workaround, bring it back on.
- */
- if (__needs_ssbd_off(vcpu) &&
- __hyp_this_cpu_read(arm64_ssbd_callback_required))
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
-#endif
-}
-
-/**
- * Disable host events, enable guest events
- */
-static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenclr_el0);
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenset_el0);
-
- return (pmu->events_host || pmu->events_guest);
-}
-
-/**
- * Disable guest events, enable host events
- */
-static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenclr_el0);
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenset_el0);
-}
-
-/* Switch to the guest for VHE systems running in EL2 */
-static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- u64 exit_code;
-
- host_ctxt = vcpu->arch.host_cpu_context;
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- sysreg_save_host_state_vhe(host_ctxt);
-
- /*
- * ARM erratum 1165522 requires us to configure both stage 1 and
- * stage 2 translation for the guest context before we clear
- * HCR_EL2.TGE.
- *
- * We have already configured the guest's stage 1 translation in
- * kvm_vcpu_load_sysregs above. We must now call __activate_vm
- * before __activate_traps, because __activate_vm configures
- * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
- * (among other things).
- */
- __activate_vm(vcpu->kvm);
- __activate_traps(vcpu);
-
- sysreg_restore_guest_state_vhe(guest_ctxt);
- __debug_switch_to_guest(vcpu);
-
- __set_guest_arch_workaround_state(vcpu);
-
- do {
- /* Jump in the fire! */
- exit_code = __guest_enter(vcpu, host_ctxt);
-
- /* And we're baaack! */
- } while (fixup_guest_exit(vcpu, &exit_code));
-
- __set_host_arch_workaround_state(vcpu);
-
- sysreg_save_guest_state_vhe(guest_ctxt);
-
- __deactivate_traps(vcpu);
-
- sysreg_restore_host_state_vhe(host_ctxt);
-
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
- __fpsimd_save_fpexc32(vcpu);
-
- __debug_switch_to_host(vcpu);
-
- return exit_code;
-}
-NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
-
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- local_daif_mask();
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- *
- * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
- * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
- */
- pmr_sync();
-
- ret = __kvm_vcpu_run_vhe(vcpu);
-
- /*
- * local_daif_restore() takes care to properly restore PSTATE.DAIF
- * and the GIC PMR if the host is using IRQ priorities.
- */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
- */
- isb();
-
- return ret;
-}
-
-/* Switch to the guest for legacy non-VHE systems */
-int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool pmu_switch_needed;
- u64 exit_code;
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- */
- if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
- pmr_sync();
- }
-
- vcpu = kern_hyp_va(vcpu);
-
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
-
- __sysreg_save_state_nvhe(host_ctxt);
-
- /*
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- *
- * Also, and in order to be able to deal with erratum #1319537 (A57)
- * and #1319367 (A72), we must ensure that all VM-related sysreg are
- * restored before we enable S2 translation.
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_state_nvhe(guest_ctxt);
-
- __activate_vm(kern_hyp_va(vcpu->kvm));
- __activate_traps(vcpu);
-
- __hyp_vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- __debug_switch_to_guest(vcpu);
-
- __set_guest_arch_workaround_state(vcpu);
-
- do {
- /* Jump in the fire! */
- exit_code = __guest_enter(vcpu, host_ctxt);
-
- /* And we're baaack! */
- } while (fixup_guest_exit(vcpu, &exit_code));
-
- __set_host_arch_workaround_state(vcpu);
-
- __sysreg_save_state_nvhe(guest_ctxt);
- __sysreg32_save_state(vcpu);
- __timer_disable_traps(vcpu);
- __hyp_vgic_save_state(vcpu);
-
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
-
- __sysreg_restore_state_nvhe(host_ctxt);
-
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
- __fpsimd_save_fpexc32(vcpu);
-
- /*
- * This must come after restoring the host sysregs, since a non-VHE
- * system may enable SPE here and make use of the TTBRs.
- */
- __debug_switch_to_host(vcpu);
-
- if (pmu_switch_needed)
- __pmu_switch_to_host(host_ctxt);
-
- /* Returning to host will clear PSR.I, remask PMR if needed */
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQOFF);
-
- return exit_code;
-}
-
-static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
- struct kvm_cpu_context *__host_ctxt)
-{
- struct kvm_vcpu *vcpu;
- unsigned long str_va;
-
- vcpu = __host_ctxt->__hyp_running_vcpu;
-
- if (read_sysreg(vttbr_el2)) {
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __sysreg_restore_state_nvhe(__host_ctxt);
- }
-
- /*
- * Force the panic string to be loaded from the literal pool,
- * making sure it is a kernel address and not a PC-relative
- * reference.
- */
- asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
-
- __hyp_do_panic(str_va,
- spsr, elr,
- read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
- read_sysreg(hpfar_el2), par, vcpu);
-}
-
-static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
- struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_vcpu *vcpu;
- vcpu = host_ctxt->__hyp_running_vcpu;
-
- __deactivate_traps(vcpu);
- sysreg_restore_host_state_vhe(host_ctxt);
-
- panic(__hyp_panic_string,
- spsr, elr,
- read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
- read_sysreg(hpfar_el2), par, vcpu);
-}
-NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
-{
- u64 spsr = read_sysreg_el2(SYS_SPSR);
- u64 elr = read_sysreg_el2(SYS_ELR);
- u64 par = read_sysreg(par_el1);
-
- if (!has_vhe())
- __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
- else
- __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
-
- unreachable();
-}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
deleted file mode 100644
index 6d2df9fe0b5d..000000000000
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ /dev/null
@@ -1,331 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kprobes.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-/*
- * Non-VHE: Both host and guest must save everything.
- *
- * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
- * pstate, which are handled as part of the el2 return state) on every
- * switch (sp_el0 is being dealt with in the assembly code).
- * tpidr_el0 and tpidrro_el0 only need to be switched when going
- * to host userspace or a different VCPU. EL1 registers only need to be
- * switched when potentially going to run a different VCPU. The latter two
- * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
- */
-
-static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
-}
-
-static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
- ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
-}
-
-static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
- ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR);
- ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR);
- ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0);
- ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1);
- ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR);
- ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR);
- ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0);
- ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1);
- ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR);
- ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR);
- ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR);
- ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR);
- ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR);
- ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL);
- ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
- ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
-
- ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
- ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR);
- ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR);
-}
-
-static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
-}
-
-void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_el1_state(ctxt);
- __sysreg_save_common_state(ctxt);
- __sysreg_save_user_state(ctxt);
- __sysreg_save_el2_return_state(ctxt);
-}
-
-void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_common_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
-
-void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_common_state(ctxt);
- __sysreg_save_el2_return_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
-
-static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
-}
-
-static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
-}
-
-static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
- write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
-
- if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- } else if (!ctxt->__hyp_running_vcpu) {
- /*
- * Must only be done for guest registers, hence the context
- * test. We're coming from the host, so SCTLR.M is already
- * set. Pairs with __activate_traps_nvhe().
- */
- write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
- TCR_EPD1_MASK | TCR_EPD0_MASK),
- SYS_TCR);
- isb();
- }
-
- write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
- write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
- write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
- write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
- write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
- write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
- write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR);
- write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR);
- write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR);
- write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR);
- write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR);
- write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL);
- write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
- write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) &&
- ctxt->__hyp_running_vcpu) {
- /*
- * Must only be done for host registers, hence the context
- * test. Pairs with __deactivate_traps_nvhe().
- */
- isb();
- /*
- * At this stage, and thanks to the above isb(), S2 is
- * deconfigured and disabled. We can now restore the host's
- * S1 configuration: SCTLR, and only then TCR.
- */
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- isb();
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- }
-
- write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
- write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
- write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
-}
-
-static void __hyp_text
-__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
-{
- u64 pstate = ctxt->gp_regs.regs.pstate;
- u64 mode = pstate & PSR_AA32_MODE_MASK;
-
- /*
- * Safety check to ensure we're setting the CPU up to enter the guest
- * in a less privileged mode.
- *
- * If we are attempting a return to EL2 or higher in AArch64 state,
- * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
- * we'll take an illegal exception state exception immediately after
- * the ERET to the guest. Attempts to return to AArch32 Hyp will
- * result in an illegal exception return because EL2's execution state
- * is determined by SCR_EL3.RW.
- */
- if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
- pstate = PSR_MODE_EL2h | PSR_IL_BIT;
-
- write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
- write_sysreg_el2(pstate, SYS_SPSR);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
-}
-
-void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_el1_state(ctxt);
- __sysreg_restore_common_state(ctxt);
- __sysreg_restore_user_state(ctxt);
- __sysreg_restore_el2_return_state(ctxt);
-}
-
-void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_common_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
-
-void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_common_state(ctxt);
- __sysreg_restore_el2_return_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
-
-void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
-{
- u64 *spsr, *sysreg;
-
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- spsr = vcpu->arch.ctxt.gp_regs.spsr;
- sysreg = vcpu->arch.ctxt.sys_regs;
-
- spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
- spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
- spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
- spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
-
- sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
- sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
-
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
- sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
-}
-
-void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
-{
- u64 *spsr, *sysreg;
-
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- spsr = vcpu->arch.ctxt.gp_regs.spsr;
- sysreg = vcpu->arch.ctxt.sys_regs;
-
- write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
- write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
- write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
- write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
-
- write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
- write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
-
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
- write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
-}
-
-/**
- * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
- *
- * @vcpu: The VCPU pointer
- *
- * Load system registers that do not affect the host's execution, for
- * example EL1 system registers on a VHE system where the host kernel
- * runs at EL2. This function is called from KVM's vcpu_load() function
- * and loading system register state early avoids having to load them on
- * every entry to the VM.
- */
-void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
- struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
-
- if (!has_vhe())
- return;
-
- __sysreg_save_user_state(host_ctxt);
-
- /*
- * Load guest EL1 and user state
- *
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_user_state(guest_ctxt);
- __sysreg_restore_el1_state(guest_ctxt);
-
- vcpu->arch.sysregs_loaded_on_cpu = true;
-
- activate_traps_vhe_load(vcpu);
-}
-
-/**
- * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
- *
- * @vcpu: The VCPU pointer
- *
- * Save guest system registers that do not affect the host's execution, for
- * example EL1 system registers on a VHE system where the host kernel
- * runs at EL2. This function is called from KVM's vcpu_put() function
- * and deferring saving system register state until we're no longer running the
- * VCPU avoids having to save them on every exit from the VM.
- */
-void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
- struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
-
- if (!has_vhe())
- return;
-
- deactivate_traps_vhe_put();
-
- __sysreg_save_el1_state(guest_ctxt);
- __sysreg_save_user_state(guest_ctxt);
- __sysreg32_save_state(vcpu);
-
- /* Restore host user state */
- __sysreg_restore_user_state(host_ctxt);
-
- vcpu->arch.sysregs_loaded_on_cpu = false;
-}
-
-void __hyp_text __kvm_enable_ssbs(void)
-{
- u64 tmp;
-
- asm volatile(
- "mrs %0, sctlr_el2\n"
- "orr %0, %0, %1\n"
- "msr sctlr_el2, %0"
- : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
-}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
deleted file mode 100644
index ceaddbe4279f..000000000000
--- a/arch/arm64/kvm/hyp/tlb.c
+++ /dev/null
@@ -1,241 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/irqflags.h>
-
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-#include <asm/tlbflush.h>
-
-struct tlb_inv_context {
- unsigned long flags;
- u64 tcr;
- u64 sctlr;
-};
-
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- u64 val;
-
- local_irq_save(cxt->flags);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
- /*
- * For CPUs that are affected by ARM errata 1165522 or 1530923,
- * we cannot trust stage-1 to be in a correct state at that
- * point. Since we do not want to force a full load of the
- * vcpu state, we prevent the EL1 page-table walker to
- * allocate new TLBs. This is done by setting the EPD bits
- * in the TCR_EL1 register. We also need to prevent it to
- * allocate IPA->PA walks, so we enable the S1 MMU...
- */
- val = cxt->tcr = read_sysreg_el1(SYS_TCR);
- val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, SYS_TCR);
- val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
- val |= SCTLR_ELx_M;
- write_sysreg_el1(val, SYS_SCTLR);
- }
-
- /*
- * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
- * most TLB operations target EL2/EL0. In order to affect the
- * guest TLBs (EL1/EL0), we need to change one of these two
- * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
- * let's flip TGE before executing the TLB operation.
- *
- * ARM erratum 1165522 requires some special handling (again),
- * as we need to make sure both stages of translation are in
- * place before clearing TGE. __load_guest_stage2() already
- * has an ISB in order to deal with this.
- */
- __load_guest_stage2(kvm);
- val = read_sysreg(hcr_el2);
- val &= ~HCR_TGE;
- write_sysreg(val, hcr_el2);
- isb();
-}
-
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
- u64 val;
-
- /*
- * For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
- */
- val = cxt->tcr = read_sysreg_el1(SYS_TCR);
- val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, SYS_TCR);
- isb();
- }
-
- __load_guest_stage2(kvm);
- isb();
-}
-
-static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (has_vhe())
- __tlb_switch_to_guest_vhe(kvm, cxt);
- else
- __tlb_switch_to_guest_nvhe(kvm, cxt);
-}
-
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- /*
- * We're done with the TLB operation, let's restore the host's
- * view of HCR_EL2.
- */
- write_sysreg(0, vttbr_el2);
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
- isb();
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
- /* Restore the registers to what they were */
- write_sysreg_el1(cxt->tcr, SYS_TCR);
- write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
- }
-
- local_irq_restore(cxt->flags);
-}
-
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- write_sysreg(0, vttbr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
- /* Ensure write of the host VMID */
- isb();
- /* Restore the host's TCR_EL1 */
- write_sysreg_el1(cxt->tcr, SYS_TCR);
- }
-}
-
-static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (has_vhe())
- __tlb_switch_to_host_vhe(kvm, cxt);
- else
- __tlb_switch_to_host_nvhe(kvm, cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
-{
- struct tlb_inv_context cxt;
-
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest(kvm, &cxt);
-
- /*
- * We could do so much better if we had the VA as well.
- * Instead, we invalidate Stage-2 for this IPA, and the
- * whole of Stage-1. Weep...
- */
- ipa >>= 12;
- __tlbi(ipas2e1is, ipa);
-
- /*
- * We have to ensure completion of the invalidation at Stage-2,
- * since a table walk on another CPU could refill a TLB with a
- * complete (S1 + S2) walk based on the old Stage-2 mapping if
- * the Stage-1 invalidation happened first.
- */
- dsb(ish);
- __tlbi(vmalle1is);
- dsb(ish);
- isb();
-
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (!has_vhe() && icache_is_vpipt())
- __flush_icache_all();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
-{
- struct tlb_inv_context cxt;
-
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest(kvm, &cxt);
-
- __tlbi(vmalls12e1is);
- dsb(ish);
- isb();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
- struct tlb_inv_context cxt;
-
- /* Switch to requested VMID */
- __tlb_switch_to_guest(kvm, &cxt);
-
- __tlbi(vmalle1);
- dsb(nsh);
- isb();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_flush_vm_context(void)
-{
- dsb(ishst);
- __tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
- dsb(ish);
-}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 4f3a087e36d5..bd1bab551d48 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -13,7 +13,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
-static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+static bool __is_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
@@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
* 0: Not a GICV access
* -1: Illegal GICV access successfully performed
*/
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_dist *vgic = &kvm->arch.vgic;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
new file mode 100644
index 000000000000..452f4cacd674
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -0,0 +1,1099 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+#define vtr_to_max_lr_idx(v) ((v) & 0xf)
+#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
+#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
+
+static u64 __gic_v3_get_lr(unsigned int lr)
+{
+ switch (lr & 0xf) {
+ case 0:
+ return read_gicreg(ICH_LR0_EL2);
+ case 1:
+ return read_gicreg(ICH_LR1_EL2);
+ case 2:
+ return read_gicreg(ICH_LR2_EL2);
+ case 3:
+ return read_gicreg(ICH_LR3_EL2);
+ case 4:
+ return read_gicreg(ICH_LR4_EL2);
+ case 5:
+ return read_gicreg(ICH_LR5_EL2);
+ case 6:
+ return read_gicreg(ICH_LR6_EL2);
+ case 7:
+ return read_gicreg(ICH_LR7_EL2);
+ case 8:
+ return read_gicreg(ICH_LR8_EL2);
+ case 9:
+ return read_gicreg(ICH_LR9_EL2);
+ case 10:
+ return read_gicreg(ICH_LR10_EL2);
+ case 11:
+ return read_gicreg(ICH_LR11_EL2);
+ case 12:
+ return read_gicreg(ICH_LR12_EL2);
+ case 13:
+ return read_gicreg(ICH_LR13_EL2);
+ case 14:
+ return read_gicreg(ICH_LR14_EL2);
+ case 15:
+ return read_gicreg(ICH_LR15_EL2);
+ }
+
+ unreachable();
+}
+
+static void __gic_v3_set_lr(u64 val, int lr)
+{
+ switch (lr & 0xf) {
+ case 0:
+ write_gicreg(val, ICH_LR0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_LR1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_LR2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_LR3_EL2);
+ break;
+ case 4:
+ write_gicreg(val, ICH_LR4_EL2);
+ break;
+ case 5:
+ write_gicreg(val, ICH_LR5_EL2);
+ break;
+ case 6:
+ write_gicreg(val, ICH_LR6_EL2);
+ break;
+ case 7:
+ write_gicreg(val, ICH_LR7_EL2);
+ break;
+ case 8:
+ write_gicreg(val, ICH_LR8_EL2);
+ break;
+ case 9:
+ write_gicreg(val, ICH_LR9_EL2);
+ break;
+ case 10:
+ write_gicreg(val, ICH_LR10_EL2);
+ break;
+ case 11:
+ write_gicreg(val, ICH_LR11_EL2);
+ break;
+ case 12:
+ write_gicreg(val, ICH_LR12_EL2);
+ break;
+ case 13:
+ write_gicreg(val, ICH_LR13_EL2);
+ break;
+ case 14:
+ write_gicreg(val, ICH_LR14_EL2);
+ break;
+ case 15:
+ write_gicreg(val, ICH_LR15_EL2);
+ break;
+ }
+}
+
+static void __vgic_v3_write_ap0rn(u32 val, int n)
+{
+ switch (n) {
+ case 0:
+ write_gicreg(val, ICH_AP0R0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_AP0R1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_AP0R2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_AP0R3_EL2);
+ break;
+ }
+}
+
+static void __vgic_v3_write_ap1rn(u32 val, int n)
+{
+ switch (n) {
+ case 0:
+ write_gicreg(val, ICH_AP1R0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_AP1R1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_AP1R2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_AP1R3_EL2);
+ break;
+ }
+}
+
+static u32 __vgic_v3_read_ap0rn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ val = read_gicreg(ICH_AP0R0_EL2);
+ break;
+ case 1:
+ val = read_gicreg(ICH_AP0R1_EL2);
+ break;
+ case 2:
+ val = read_gicreg(ICH_AP0R2_EL2);
+ break;
+ case 3:
+ val = read_gicreg(ICH_AP0R3_EL2);
+ break;
+ default:
+ unreachable();
+ }
+
+ return val;
+}
+
+static u32 __vgic_v3_read_ap1rn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ val = read_gicreg(ICH_AP1R0_EL2);
+ break;
+ case 1:
+ val = read_gicreg(ICH_AP1R1_EL2);
+ break;
+ case 2:
+ val = read_gicreg(ICH_AP1R2_EL2);
+ break;
+ case 3:
+ val = read_gicreg(ICH_AP1R3_EL2);
+ break;
+ default:
+ unreachable();
+ }
+
+ return val;
+}
+
+void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
+{
+ u64 used_lrs = cpu_if->used_lrs;
+
+ /*
+ * Make sure stores to the GIC via the memory mapped interface
+ * are now visible to the system register interface when reading the
+ * LRs, and when reading back the VMCR on non-VHE systems.
+ */
+ if (used_lrs || !has_vhe()) {
+ if (!cpu_if->vgic_sre) {
+ dsb(sy);
+ isb();
+ }
+ }
+
+ if (used_lrs || cpu_if->its_vpe.its_vm) {
+ int i;
+ u32 elrsr;
+
+ elrsr = read_gicreg(ICH_ELRSR_EL2);
+
+ write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
+
+ for (i = 0; i < used_lrs; i++) {
+ if (elrsr & (1 << i))
+ cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
+ else
+ cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
+
+ __gic_v3_set_lr(0, i);
+ }
+ }
+}
+
+void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
+{
+ u64 used_lrs = cpu_if->used_lrs;
+ int i;
+
+ if (used_lrs || cpu_if->its_vpe.its_vm) {
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+
+ for (i = 0; i < used_lrs; i++)
+ __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ }
+
+ /*
+ * Ensure that writes to the LRs, and on non-VHE systems ensure that
+ * the write to the VMCR in __vgic_v3_activate_traps(), will have
+ * reached the (re)distributors. This ensure the guest will read the
+ * correct values from the memory-mapped interface.
+ */
+ if (used_lrs || !has_vhe()) {
+ if (!cpu_if->vgic_sre) {
+ isb();
+ dsb(sy);
+ }
+ }
+}
+
+void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
+{
+ /*
+ * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
+ * Group0 interrupt (as generated in GICv2 mode) to be
+ * delivered as a FIQ to the guest, with potentially fatal
+ * consequences. So we must make sure that ICC_SRE_EL1 has
+ * been actually programmed with the value we want before
+ * starting to mess with the rest of the GIC, and VMCR_EL2 in
+ * particular. This logic must be called before
+ * __vgic_v3_restore_state().
+ */
+ if (!cpu_if->vgic_sre) {
+ write_gicreg(0, ICC_SRE_EL1);
+ isb();
+ write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+
+ if (has_vhe()) {
+ /*
+ * Ensure that the write to the VMCR will have reached
+ * the (re)distributors. This ensure the guest will
+ * read the correct values from the memory-mapped
+ * interface.
+ */
+ isb();
+ dsb(sy);
+ }
+ }
+
+ /*
+ * Prevent the guest from touching the GIC system registers if
+ * SRE isn't enabled for GICv3 emulation.
+ */
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
+
+ /*
+ * If we need to trap system registers, we must write
+ * ICH_HCR_EL2 anyway, even if no interrupts are being
+ * injected,
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+}
+
+void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
+{
+ u64 val;
+
+ if (!cpu_if->vgic_sre) {
+ cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
+ }
+
+ val = read_gicreg(ICC_SRE_EL2);
+ write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+
+ if (!cpu_if->vgic_sre) {
+ /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
+ isb();
+ write_gicreg(1, ICC_SRE_EL1);
+ }
+
+ /*
+ * If we were trapping system registers, we enabled the VGIC even if
+ * no interrupts were being injected, and we disable it again here.
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
+ write_gicreg(0, ICH_HCR_EL2);
+}
+
+void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ u64 val;
+ u32 nr_pre_bits;
+
+ val = read_gicreg(ICH_VTR_EL2);
+ nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+ switch (nr_pre_bits) {
+ case 7:
+ cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
+ cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
+ fallthrough;
+ case 6:
+ cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
+ fallthrough;
+ default:
+ cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
+ }
+
+ switch (nr_pre_bits) {
+ case 7:
+ cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
+ cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
+ fallthrough;
+ case 6:
+ cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
+ fallthrough;
+ default:
+ cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
+ }
+}
+
+void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ u64 val;
+ u32 nr_pre_bits;
+
+ val = read_gicreg(ICH_VTR_EL2);
+ nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+ switch (nr_pre_bits) {
+ case 7:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
+ fallthrough;
+ case 6:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
+ fallthrough;
+ default:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
+ }
+
+ switch (nr_pre_bits) {
+ case 7:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
+ fallthrough;
+ case 6:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
+ fallthrough;
+ default:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
+ }
+}
+
+void __vgic_v3_init_lrs(void)
+{
+ int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
+ int i;
+
+ for (i = 0; i <= max_lr_idx; i++)
+ __gic_v3_set_lr(0, i);
+}
+
+u64 __vgic_v3_get_ich_vtr_el2(void)
+{
+ return read_gicreg(ICH_VTR_EL2);
+}
+
+u64 __vgic_v3_read_vmcr(void)
+{
+ return read_gicreg(ICH_VMCR_EL2);
+}
+
+void __vgic_v3_write_vmcr(u32 vmcr)
+{
+ write_gicreg(vmcr, ICH_VMCR_EL2);
+}
+
+static int __vgic_v3_bpr_min(void)
+{
+ /* See Pseudocode for VPriorityGroup */
+ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2));
+}
+
+static int __vgic_v3_get_group(struct kvm_vcpu *vcpu)
+{
+ u32 esr = kvm_vcpu_get_esr(vcpu);
+ u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
+
+ return crm != 8;
+}
+
+#define GICv3_IDLE_PRIORITY 0xff
+
+static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr,
+ u64 *lr_val)
+{
+ unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
+ u8 priority = GICv3_IDLE_PRIORITY;
+ int i, lr = -1;
+
+ for (i = 0; i < used_lrs; i++) {
+ u64 val = __gic_v3_get_lr(i);
+ u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+ /* Not pending in the state? */
+ if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT)
+ continue;
+
+ /* Group-0 interrupt, but Group-0 disabled? */
+ if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK))
+ continue;
+
+ /* Group-1 interrupt, but Group-1 disabled? */
+ if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK))
+ continue;
+
+ /* Not the highest priority? */
+ if (lr_prio >= priority)
+ continue;
+
+ /* This is a candidate */
+ priority = lr_prio;
+ *lr_val = val;
+ lr = i;
+ }
+
+ if (lr == -1)
+ *lr_val = ICC_IAR1_EL1_SPURIOUS;
+
+ return lr;
+}
+
+static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid,
+ u64 *lr_val)
+{
+ unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
+ int i;
+
+ for (i = 0; i < used_lrs; i++) {
+ u64 val = __gic_v3_get_lr(i);
+
+ if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid &&
+ (val & ICH_LR_ACTIVE_BIT)) {
+ *lr_val = val;
+ return i;
+ }
+ }
+
+ *lr_val = ICC_IAR1_EL1_SPURIOUS;
+ return -1;
+}
+
+static int __vgic_v3_get_highest_active_priority(void)
+{
+ u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+ u32 hap = 0;
+ int i;
+
+ for (i = 0; i < nr_apr_regs; i++) {
+ u32 val;
+
+ /*
+ * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers
+ * contain the active priority levels for this VCPU
+ * for the maximum number of supported priority
+ * levels, and we return the full priority level only
+ * if the BPR is programmed to its minimum, otherwise
+ * we return a combination of the priority level and
+ * subpriority, as determined by the setting of the
+ * BPR, but without the full subpriority.
+ */
+ val = __vgic_v3_read_ap0rn(i);
+ val |= __vgic_v3_read_ap1rn(i);
+ if (!val) {
+ hap += 32;
+ continue;
+ }
+
+ return (hap + __ffs(val)) << __vgic_v3_bpr_min();
+ }
+
+ return GICv3_IDLE_PRIORITY;
+}
+
+static unsigned int __vgic_v3_get_bpr0(u32 vmcr)
+{
+ return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+}
+
+static unsigned int __vgic_v3_get_bpr1(u32 vmcr)
+{
+ unsigned int bpr;
+
+ if (vmcr & ICH_VMCR_CBPR_MASK) {
+ bpr = __vgic_v3_get_bpr0(vmcr);
+ if (bpr < 7)
+ bpr++;
+ } else {
+ bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+ }
+
+ return bpr;
+}
+
+/*
+ * Convert a priority to a preemption level, taking the relevant BPR
+ * into account by zeroing the sub-priority bits.
+ */
+static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
+{
+ unsigned int bpr;
+
+ if (!grp)
+ bpr = __vgic_v3_get_bpr0(vmcr) + 1;
+ else
+ bpr = __vgic_v3_get_bpr1(vmcr);
+
+ return pri & (GENMASK(7, 0) << bpr);
+}
+
+/*
+ * The priority value is independent of any of the BPR values, so we
+ * normalize it using the minimal BPR value. This guarantees that no
+ * matter what the guest does with its BPR, we can always set/get the
+ * same value of a priority.
+ */
+static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
+{
+ u8 pre, ap;
+ u32 val;
+ int apr;
+
+ pre = __vgic_v3_pri_to_pre(pri, vmcr, grp);
+ ap = pre >> __vgic_v3_bpr_min();
+ apr = ap / 32;
+
+ if (!grp) {
+ val = __vgic_v3_read_ap0rn(apr);
+ __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr);
+ } else {
+ val = __vgic_v3_read_ap1rn(apr);
+ __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr);
+ }
+}
+
+static int __vgic_v3_clear_highest_active_priority(void)
+{
+ u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+ u32 hap = 0;
+ int i;
+
+ for (i = 0; i < nr_apr_regs; i++) {
+ u32 ap0, ap1;
+ int c0, c1;
+
+ ap0 = __vgic_v3_read_ap0rn(i);
+ ap1 = __vgic_v3_read_ap1rn(i);
+ if (!ap0 && !ap1) {
+ hap += 32;
+ continue;
+ }
+
+ c0 = ap0 ? __ffs(ap0) : 32;
+ c1 = ap1 ? __ffs(ap1) : 32;
+
+ /* Always clear the LSB, which is the highest priority */
+ if (c0 < c1) {
+ ap0 &= ~BIT(c0);
+ __vgic_v3_write_ap0rn(ap0, i);
+ hap += c0;
+ } else {
+ ap1 &= ~BIT(c1);
+ __vgic_v3_write_ap1rn(ap1, i);
+ hap += c1;
+ }
+
+ /* Rescale to 8 bits of priority */
+ return hap << __vgic_v3_bpr_min();
+ }
+
+ return GICv3_IDLE_PRIORITY;
+}
+
+static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 lr_val;
+ u8 lr_prio, pmr;
+ int lr, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val);
+ if (lr < 0)
+ goto spurious;
+
+ if (grp != !!(lr_val & ICH_LR_GROUP))
+ goto spurious;
+
+ pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+ lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+ if (pmr <= lr_prio)
+ goto spurious;
+
+ if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp))
+ goto spurious;
+
+ lr_val &= ~ICH_LR_STATE;
+ /* No active state for LPIs */
+ if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
+ lr_val |= ICH_LR_ACTIVE_BIT;
+ __gic_v3_set_lr(lr_val, lr);
+ __vgic_v3_set_active_priority(lr_prio, vmcr, grp);
+ vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
+ return;
+
+spurious:
+ vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
+}
+
+static void __vgic_v3_clear_active_lr(int lr, u64 lr_val)
+{
+ lr_val &= ~ICH_LR_ACTIVE_BIT;
+ if (lr_val & ICH_LR_HW) {
+ u32 pid;
+
+ pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT;
+ gic_write_dir(pid);
+ }
+
+ __gic_v3_set_lr(lr_val, lr);
+}
+
+static void __vgic_v3_bump_eoicount(void)
+{
+ u32 hcr;
+
+ hcr = read_gicreg(ICH_HCR_EL2);
+ hcr += 1 << ICH_HCR_EOIcount_SHIFT;
+ write_gicreg(hcr, ICH_HCR_EL2);
+}
+
+static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 vid = vcpu_get_reg(vcpu, rt);
+ u64 lr_val;
+ int lr;
+
+ /* EOImode == 0, nothing to be done here */
+ if (!(vmcr & ICH_VMCR_EOIM_MASK))
+ return;
+
+ /* No deactivate to be performed on an LPI */
+ if (vid >= VGIC_MIN_LPI)
+ return;
+
+ lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+ if (lr == -1) {
+ __vgic_v3_bump_eoicount();
+ return;
+ }
+
+ __vgic_v3_clear_active_lr(lr, lr_val);
+}
+
+static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 vid = vcpu_get_reg(vcpu, rt);
+ u64 lr_val;
+ u8 lr_prio, act_prio;
+ int lr, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ /* Drop priority in any case */
+ act_prio = __vgic_v3_clear_highest_active_priority();
+
+ /* If EOIing an LPI, no deactivate to be performed */
+ if (vid >= VGIC_MIN_LPI)
+ return;
+
+ /* EOImode == 1, nothing to be done here */
+ if (vmcr & ICH_VMCR_EOIM_MASK)
+ return;
+
+ lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+ if (lr == -1) {
+ __vgic_v3_bump_eoicount();
+ return;
+ }
+
+ lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+ /* If priorities or group do not match, the guest has fscked-up. */
+ if (grp != !!(lr_val & ICH_LR_GROUP) ||
+ __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio)
+ return;
+
+ /* Let's now perform the deactivation */
+ __vgic_v3_clear_active_lr(lr, lr_val);
+}
+
+static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK));
+}
+
+static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
+}
+
+static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & 1)
+ vmcr |= ICH_VMCR_ENG0_MASK;
+ else
+ vmcr &= ~ICH_VMCR_ENG0_MASK;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & 1)
+ vmcr |= ICH_VMCR_ENG1_MASK;
+ else
+ vmcr &= ~ICH_VMCR_ENG1_MASK;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr));
+}
+
+static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr));
+}
+
+static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+ u8 bpr_min = __vgic_v3_bpr_min() - 1;
+
+ /* Enforce BPR limiting */
+ if (val < bpr_min)
+ val = bpr_min;
+
+ val <<= ICH_VMCR_BPR0_SHIFT;
+ val &= ICH_VMCR_BPR0_MASK;
+ vmcr &= ~ICH_VMCR_BPR0_MASK;
+ vmcr |= val;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+ u8 bpr_min = __vgic_v3_bpr_min();
+
+ if (vmcr & ICH_VMCR_CBPR_MASK)
+ return;
+
+ /* Enforce BPR limiting */
+ if (val < bpr_min)
+ val = bpr_min;
+
+ val <<= ICH_VMCR_BPR1_SHIFT;
+ val &= ICH_VMCR_BPR1_MASK;
+ vmcr &= ~ICH_VMCR_BPR1_MASK;
+ vmcr |= val;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+{
+ u32 val;
+
+ if (!__vgic_v3_get_group(vcpu))
+ val = __vgic_v3_read_ap0rn(n);
+ else
+ val = __vgic_v3_read_ap1rn(n);
+
+ vcpu_set_reg(vcpu, rt, val);
+}
+
+static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+{
+ u32 val = vcpu_get_reg(vcpu, rt);
+
+ if (!__vgic_v3_get_group(vcpu))
+ __vgic_v3_write_ap0rn(val, n);
+ else
+ __vgic_v3_write_ap1rn(val, n);
+}
+
+static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 0);
+}
+
+static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 1);
+}
+
+static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 2);
+}
+
+static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 3);
+}
+
+static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 0);
+}
+
+static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 1);
+}
+
+static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 2);
+}
+
+static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 3);
+}
+
+static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 lr_val;
+ int lr, lr_grp, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val);
+ if (lr == -1)
+ goto spurious;
+
+ lr_grp = !!(lr_val & ICH_LR_GROUP);
+ if (lr_grp != grp)
+ lr_val = ICC_IAR1_EL1_SPURIOUS;
+
+spurious:
+ vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
+}
+
+static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vmcr &= ICH_VMCR_PMR_MASK;
+ vmcr >>= ICH_VMCR_PMR_SHIFT;
+ vcpu_set_reg(vcpu, rt, vmcr);
+}
+
+static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 val = vcpu_get_reg(vcpu, rt);
+
+ val <<= ICH_VMCR_PMR_SHIFT;
+ val &= ICH_VMCR_PMR_MASK;
+ vmcr &= ~ICH_VMCR_PMR_MASK;
+ vmcr |= val;
+
+ write_gicreg(vmcr, ICH_VMCR_EL2);
+}
+
+static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 val = __vgic_v3_get_highest_active_priority();
+ vcpu_set_reg(vcpu, rt, val);
+}
+
+static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 vtr, val;
+
+ vtr = read_gicreg(ICH_VTR_EL2);
+ /* PRIbits */
+ val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
+ /* IDbits */
+ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT;
+ /* A3V */
+ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
+ /* EOImode */
+ val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT;
+ /* CBPR */
+ val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+
+ vcpu_set_reg(vcpu, rt, val);
+}
+
+static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & ICC_CTLR_EL1_CBPR_MASK)
+ vmcr |= ICH_VMCR_CBPR_MASK;
+ else
+ vmcr &= ~ICH_VMCR_CBPR_MASK;
+
+ if (val & ICC_CTLR_EL1_EOImode_MASK)
+ vmcr |= ICH_VMCR_EOIM_MASK;
+ else
+ vmcr &= ~ICH_VMCR_EOIM_MASK;
+
+ write_gicreg(vmcr, ICH_VMCR_EL2);
+}
+
+int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+ int rt;
+ u32 esr;
+ u32 vmcr;
+ void (*fn)(struct kvm_vcpu *, u32, int);
+ bool is_read;
+ u32 sysreg;
+
+ esr = kvm_vcpu_get_esr(vcpu);
+ if (vcpu_mode_is_32bit(vcpu)) {
+ if (!kvm_condition_valid(vcpu)) {
+ __kvm_skip_instr(vcpu);
+ return 1;
+ }
+
+ sysreg = esr_cp15_to_sysreg(esr);
+ } else {
+ sysreg = esr_sys64_to_sysreg(esr);
+ }
+
+ is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+ switch (sysreg) {
+ case SYS_ICC_IAR0_EL1:
+ case SYS_ICC_IAR1_EL1:
+ if (unlikely(!is_read))
+ return 0;
+ fn = __vgic_v3_read_iar;
+ break;
+ case SYS_ICC_EOIR0_EL1:
+ case SYS_ICC_EOIR1_EL1:
+ if (unlikely(is_read))
+ return 0;
+ fn = __vgic_v3_write_eoir;
+ break;
+ case SYS_ICC_IGRPEN1_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_igrpen1;
+ else
+ fn = __vgic_v3_write_igrpen1;
+ break;
+ case SYS_ICC_BPR1_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_bpr1;
+ else
+ fn = __vgic_v3_write_bpr1;
+ break;
+ case SYS_ICC_AP0Rn_EL1(0):
+ case SYS_ICC_AP1Rn_EL1(0):
+ if (is_read)
+ fn = __vgic_v3_read_apxr0;
+ else
+ fn = __vgic_v3_write_apxr0;
+ break;
+ case SYS_ICC_AP0Rn_EL1(1):
+ case SYS_ICC_AP1Rn_EL1(1):
+ if (is_read)
+ fn = __vgic_v3_read_apxr1;
+ else
+ fn = __vgic_v3_write_apxr1;
+ break;
+ case SYS_ICC_AP0Rn_EL1(2):
+ case SYS_ICC_AP1Rn_EL1(2):
+ if (is_read)
+ fn = __vgic_v3_read_apxr2;
+ else
+ fn = __vgic_v3_write_apxr2;
+ break;
+ case SYS_ICC_AP0Rn_EL1(3):
+ case SYS_ICC_AP1Rn_EL1(3):
+ if (is_read)
+ fn = __vgic_v3_read_apxr3;
+ else
+ fn = __vgic_v3_write_apxr3;
+ break;
+ case SYS_ICC_HPPIR0_EL1:
+ case SYS_ICC_HPPIR1_EL1:
+ if (unlikely(!is_read))
+ return 0;
+ fn = __vgic_v3_read_hppir;
+ break;
+ case SYS_ICC_IGRPEN0_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_igrpen0;
+ else
+ fn = __vgic_v3_write_igrpen0;
+ break;
+ case SYS_ICC_BPR0_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_bpr0;
+ else
+ fn = __vgic_v3_write_bpr0;
+ break;
+ case SYS_ICC_DIR_EL1:
+ if (unlikely(is_read))
+ return 0;
+ fn = __vgic_v3_write_dir;
+ break;
+ case SYS_ICC_RPR_EL1:
+ if (unlikely(!is_read))
+ return 0;
+ fn = __vgic_v3_read_rpr;
+ break;
+ case SYS_ICC_CTLR_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_ctlr;
+ else
+ fn = __vgic_v3_write_ctlr;
+ break;
+ case SYS_ICC_PMR_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_pmr;
+ else
+ fn = __vgic_v3_write_pmr;
+ break;
+ default:
+ return 0;
+ }
+
+ vmcr = __vgic_v3_read_vmcr();
+ rt = kvm_vcpu_sys_get_rt(vcpu);
+ fn(vcpu, vmcr, rt);
+
+ __kvm_skip_instr(vcpu);
+
+ return 1;
+}
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
new file mode 100644
index 000000000000..461e97c375cc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+#
+
+asflags-y := -D__KVM_VHE_HYPERVISOR__
+ccflags-y := -D__KVM_VHE_HYPERVISOR__
+
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
+obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+ ../fpsimd.o ../hyp-entry.o
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
new file mode 100644
index 000000000000..f1e2e5a00933
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/debug-sr.h>
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_hyp.h>
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ __debug_switch_to_guest_common(vcpu);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ __debug_switch_to_host_common(vcpu);
+}
+
+u32 __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
new file mode 100644
index 000000000000..c1da4f86ccac
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/switch.h>
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ ___activate_traps(vcpu);
+
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~CPACR_EL1_ZEN;
+
+ /*
+ * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
+ * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
+ * except for some missing controls, such as TAM.
+ * In this case, CPTR_EL2.TAM has the same position with or without
+ * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
+ * shift value for trapping the AMU accesses.
+ */
+
+ val |= CPTR_EL2_TAM;
+
+ if (update_fp_enabled(vcpu)) {
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ } else {
+ val &= ~CPACR_EL1_FPEN;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+NOKPROBE_SYMBOL(__activate_traps);
+
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ extern char vectors[]; /* kernel exception vectors */
+
+ ___deactivate_traps(vcpu);
+
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
+ /*
+ * ARM errata 1165522 and 1530923 require the actual execution of the
+ * above before we can switch to the EL2/EL0 translation regime used by
+ * the host.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+
+ write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
+ write_sysreg(vectors, vbar_el1);
+}
+NOKPROBE_SYMBOL(__deactivate_traps);
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
+}
+
+/* Switch to the guest for VHE systems running in EL2 */
+static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ u64 exit_code;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ /*
+ * ARM erratum 1165522 requires us to configure both stage 1 and
+ * stage 2 translation for the guest context before we clear
+ * HCR_EL2.TGE.
+ *
+ * We have already configured the guest's stage 1 translation in
+ * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm
+ * before __activate_traps, because __activate_vm configures
+ * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
+ * (among other things).
+ */
+ __activate_vm(vcpu->arch.hw_mmu);
+ __activate_traps(vcpu);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
+
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ local_daif_mask();
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
+ */
+ pmr_sync();
+
+ ret = __kvm_vcpu_run_vhe(vcpu);
+
+ /*
+ * local_daif_restore() takes care to properly restore PSTATE.DAIF
+ * and the GIC PMR if the host is using IRQ priorities.
+ */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
+
+ return ret;
+}
+
+static void __hyp_call_panic(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ panic(__hyp_panic_string,
+ spsr, elr,
+ read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
+ read_sysreg(hpfar_el2), par, vcpu);
+}
+NOKPROBE_SYMBOL(__hyp_call_panic);
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
+ u64 par = read_sysreg(par_el1);
+
+ __hyp_call_panic(spsr, elr, par, host_ctxt);
+ unreachable();
+}
+
+asmlinkage void kvm_unexpected_el2_exception(void)
+{
+ return __kvm_unexpected_el2_exception();
+}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
new file mode 100644
index 000000000000..996471e4c138
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/sysreg-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
+ * pstate, which are handled as part of the el2 return state) on every
+ * switch (sp_el0 is being dealt with in the assembly code).
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
+ */
+
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
+
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
+
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
+
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
+
+/**
+ * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c
new file mode 100644
index 000000000000..4cda674a8be6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <asm/kvm_hyp.h>
+
+void __kvm_timer_set_cntvoff(u64 cntvoff)
+{
+ write_sysreg(cntvoff, cntvoff_el2);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
new file mode 100644
index 000000000000..fd7895945bbc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/irqflags.h>
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+struct tlb_inv_context {
+ unsigned long flags;
+ u64 tcr;
+ u64 sctlr;
+};
+
+static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
+{
+ u64 val;
+
+ local_irq_save(cxt->flags);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /*
+ * For CPUs that are affected by ARM errata 1165522 or 1530923,
+ * we cannot trust stage-1 to be in a correct state at that
+ * point. Since we do not want to force a full load of the
+ * vcpu state, we prevent the EL1 page-table walker to
+ * allocate new TLBs. This is done by setting the EPD bits
+ * in the TCR_EL1 register. We also need to prevent it to
+ * allocate IPA->PA walks, so we enable the S1 MMU...
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ }
+
+ /*
+ * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+ * most TLB operations target EL2/EL0. In order to affect the
+ * guest TLBs (EL1/EL0), we need to change one of these two
+ * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+ * let's flip TGE before executing the TLB operation.
+ *
+ * ARM erratum 1165522 requires some special handling (again),
+ * as we need to make sure both stages of translation are in
+ * place before clearing TGE. __load_guest_stage2() already
+ * has an ISB in order to deal with this.
+ */
+ __load_guest_stage2(mmu);
+ val = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ write_sysreg(val, hcr_el2);
+ isb();
+}
+
+static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+{
+ /*
+ * We're done with the TLB operation, let's restore the host's
+ * view of HCR_EL2.
+ */
+ write_sysreg(0, vttbr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ isb();
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /* Restore the registers to what they were */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ }
+
+ local_irq_restore(cxt->flags);
+}
+
+void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, int level)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi_level(ipas2e1is, ipa, level);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
+ dsb(ish);
+}