diff options
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r-- | arch/arm64/kvm/guest.c | 61 | ||||
-rw-r--r-- | arch/arm64/kvm/handle_exit.c | 7 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp-init.S | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/hyp-entry.S | 16 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/s2-setup.c | 90 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/switch.c | 13 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/sysreg-sr.c | 30 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/tlb.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/reset.c | 108 |
10 files changed, 226 insertions, 107 deletions
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..dd436a50fce7 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; @@ -338,15 +391,15 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_CORTEX_A53; case ARM_CPU_PART_CORTEX_A57: return KVM_ARM_TARGET_CORTEX_A57; - }; + } break; case ARM_CPU_IMP_APM: switch (part_number) { case APM_CPU_PART_POTENZA: return KVM_ARM_TARGET_XGENE_POTENZA; - }; + } break; - }; + } /* Return a default generic target */ return KVM_ARM_TARGET_GENERIC_V8; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e5e741bfffe1..35a81bebd02b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; return 0; + case ARM_EXCEPTION_IL: + /* + * We attempted an illegal exception return. Guest state must + * have been corrupted somehow. Give up. + */ + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + return -EINVAL; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ea9225160786..4576b86a5579 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 2fabc2dc1966..82d1904328ad 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o -obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 24b4fbafe3e4..b1f14f736962 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -162,6 +162,20 @@ el1_error: mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit +el2_sync: + /* Check for illegal exception return, otherwise panic */ + mrs x0, spsr_el2 + + /* if this was something else, then panic! */ + tst x0, #PSR_IL_BIT + b.eq __hyp_panic + + /* Let's attempt a recovery from the illegal exception return */ + get_vcpu_ptr x1, x0 + mov x0, #ARM_EXCEPTION_IL + b __guest_exit + + el2_error: ldp x0, x1, [sp], #16 @@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector) invalid_vect el2t_fiq_invalid // FIQ EL2t invalid_vect el2t_error_invalid // Error EL2t - invalid_vect el2h_sync_invalid // Synchronous EL2h + valid_vect el2_sync // Synchronous EL2h invalid_vect el2h_irq_invalid // IRQ EL2h invalid_vect el2h_fiq_invalid // FIQ EL2h valid_vect el2_error // Error EL2h diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c deleted file mode 100644 index 603e1ee83e89..000000000000 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2016 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/types.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_hyp.h> - -u32 __hyp_text __init_stage2_translation(void) -{ - u64 val = VTCR_EL2_FLAGS; - u64 parange; - u64 tmp; - - /* - * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS - * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while - * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... - */ - parange = read_sysreg(id_aa64mmfr0_el1) & 7; - if (parange > ID_AA64MMFR0_PARANGE_MAX) - parange = ID_AA64MMFR0_PARANGE_MAX; - val |= parange << 16; - - /* Compute the actual PARange... */ - switch (parange) { - case 0: - parange = 32; - break; - case 1: - parange = 36; - break; - case 2: - parange = 40; - break; - case 3: - parange = 42; - break; - case 4: - parange = 44; - break; - case 5: - default: - parange = 48; - break; - } - - /* - * ... and clamp it to 40 bits, unless we have some braindead - * HW that implements less than that. In all cases, we'll - * return that value for the rest of the kernel to decide what - * to do. - */ - val |= 64 - (parange > 40 ? 40 : parange); - - /* - * Check the availability of Hardware Access Flag / Dirty Bit - * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. - */ - tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; - if (tmp) - val |= VTCR_EL2_HA; - - /* - * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS - * bit in VTCR_EL2. - */ - tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf; - val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ? - VTCR_EL2_VS_16BIT : - VTCR_EL2_VS_8BIT; - - write_sysreg(val, vtcr_el2); - - return parange; -} diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d496ef579859..7cc175c88a37 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; val &= ~CPACR_EL1_ZEN; - if (!update_fp_enabled(vcpu)) + if (!update_fp_enabled(vcpu)) { val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } write_sysreg(val, cpacr_el1); @@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) val = CPTR_EL2_DEFAULT; val |= CPTR_EL2_TTA | CPTR_EL2_TZ; - if (!update_fp_enabled(vcpu)) + if (!update_fp_enabled(vcpu)) { val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } write_sysreg(val, cptr_el2); } @@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - __activate_traps_fpsimd32(vcpu); if (has_vhe()) activate_traps_vhe(vcpu); else @@ -195,7 +198,7 @@ void deactivate_traps_vhe_put(void) static void __hyp_text __activate_vm(struct kvm *kvm) { - write_sysreg(kvm->arch.vttbr, vttbr_el2); + __load_guest_stage2(kvm); } static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) @@ -260,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) return false; /* Translation failed, back to guest */ /* Convert PAR to HPFAR format */ - *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4; + *hpfar = PAR_TO_HPFAR(tmp); return true; } diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..68d6f7c3b237 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) static void __hyp_text __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) { + u64 pstate = ctxt->gp_regs.regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); - write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); + write_sysreg_el2(pstate, spsr); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); @@ -288,3 +305,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 131c7772703c..4dbd9c69a96d 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so * let's flip TGE before executing the TLB operation. */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); + __load_guest_stage2(kvm); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg(val, hcr_el2); @@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) { - write_sysreg(kvm->arch.vttbr, vttbr_el2); + __load_guest_stage2(kvm); isb(); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e37c78bbe1ca..b72a3dd56204 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -26,6 +26,7 @@ #include <kvm/arm_arch_timer.h> +#include <asm/cpufeature.h> #include <asm/cputype.h> #include <asm/ptrace.h> #include <asm/kvm_arm.h> @@ -33,6 +34,9 @@ #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> +/* Maximum phys_shift supported for any VM on this host */ +static u32 kvm_ipa_limit; + /* * ARMv8 Reset Values */ @@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void) } /** - * kvm_arch_dev_ioctl_check_extension + * kvm_arch_vm_ioctl_check_extension * * We currently assume that the number of HW registers is uniform * across all CPUs (see cpuinfo_sanity_check). */ -int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) +int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: - case KVM_CAP_VCPU_EVENTS: r = 1; break; + case KVM_CAP_ARM_VM_IPA_SIZE: + r = kvm_ipa_limit; + break; default: r = 0; } @@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu); } + +void kvm_set_ipa_limit(void) +{ + unsigned int ipa_max, pa_max, va_max, parange; + + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; + pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); + + /* Clamp the IPA limit to the PA size supported by the kernel */ + ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max; + /* + * Since our stage2 table is dependent on the stage1 page table code, + * we must always honor the following condition: + * + * Number of levels in Stage1 >= Number of levels in Stage2. + * + * So clamp the ipa limit further down to limit the number of levels. + * Since we can concatenate upto 16 tables at entry level, we could + * go upto 4bits above the maximum VA addressible with the current + * number of levels. + */ + va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; + va_max += 4; + + if (va_max < ipa_max) + ipa_max = va_max; + + /* + * If the final limit is lower than the real physical address + * limit of the CPUs, report the reason. + */ + if (ipa_max < pa_max) + pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", + (va_max < pa_max) ? "Virtual" : "Physical"); + + WARN(ipa_max < KVM_PHYS_SHIFT, + "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); + kvm_ipa_limit = ipa_max; + kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); +} + +/* + * Configure the VTCR_EL2 for this VM. The VTCR value is common + * across all the physical CPUs on the system. We use system wide + * sanitised values to fill in different fields, except for Hardware + * Management of Access Flags. HA Flag is set unconditionally on + * all CPUs, as it is safe to run with or without the feature and + * the bit is RES0 on CPUs that don't support it. + */ +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) +{ + u64 vtcr = VTCR_EL2_FLAGS; + u32 parange, phys_shift; + u8 lvls; + + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + return -EINVAL; + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (phys_shift) { + if (phys_shift > kvm_ipa_limit || + phys_shift < 32) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + } + + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; + if (parange > ID_AA64MMFR0_PARANGE_MAX) + parange = ID_AA64MMFR0_PARANGE_MAX; + vtcr |= parange << VTCR_EL2_PS_SHIFT; + + vtcr |= VTCR_EL2_T0SZ(phys_shift); + /* + * Use a minimum 2 level page table to prevent splitting + * host PMD huge pages at stage2. + */ + lvls = stage2_pgtable_levels(phys_shift); + if (lvls < 2) + lvls = 2; + vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); + + /* + * Enable the Hardware Access Flag management, unconditionally + * on all CPUs. The features is RES0 on CPUs without the support + * and must be ignored by the CPUs. + */ + vtcr |= VTCR_EL2_HA; + + /* Set the vmid bits */ + vtcr |= (kvm_get_vmid_bits() == 16) ? + VTCR_EL2_VS_16BIT : + VTCR_EL2_VS_8BIT; + kvm->arch.vtcr = vtcr; + return 0; +} |