summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/arm.c5
-rw-r--r--arch/arm64/kvm/hypercalls.c12
-rw-r--r--arch/arm64/kvm/mmu.c15
-rw-r--r--arch/arm64/kvm/nested.c53
-rw-r--r--arch/arm64/kvm/sys_regs.c75
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c28
6 files changed, 158 insertions, 30 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a0d01c46e408..48cafb65d6ac 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -997,6 +997,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
static int check_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu))
+ return -EIO;
+
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
kvm_vcpu_sleep(vcpu);
@@ -1031,6 +1034,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_dirty_ring_check_request(vcpu))
return 0;
+
+ check_nested_vcpu_requests(vcpu);
}
return 1;
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 5763d979d8ca..ee6573befb81 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -317,7 +317,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
* to the guest, and hide SSBS so that the
* guest stays protected.
*/
- if (cpus_have_final_cap(ARM64_SSBS))
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
break;
fallthrough;
case SPECTRE_UNAFFECTED:
@@ -428,7 +428,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
* Convert the workaround level into an easy-to-compare number, where higher
* values mean better protection.
*/
-static int get_kernel_wa_level(u64 regid)
+static int get_kernel_wa_level(struct kvm_vcpu *vcpu, u64 regid)
{
switch (regid) {
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
@@ -449,7 +449,7 @@ static int get_kernel_wa_level(u64 regid)
* don't have any FW mitigation if SSBS is there at
* all times.
*/
- if (cpus_have_final_cap(ARM64_SSBS))
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
fallthrough;
case SPECTRE_UNAFFECTED:
@@ -486,7 +486,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
- val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+ val = get_kernel_wa_level(vcpu, reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
break;
case KVM_REG_ARM_STD_BMAP:
val = READ_ONCE(smccc_feat->std_bmap);
@@ -588,7 +588,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
return -EINVAL;
- if (get_kernel_wa_level(reg->id) < val)
+ if (get_kernel_wa_level(vcpu, reg->id) < val)
return -EINVAL;
return 0;
@@ -624,7 +624,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
* We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
* other way around.
*/
- if (get_kernel_wa_level(reg->id) < wa_level)
+ if (get_kernel_wa_level(vcpu, reg->id) < wa_level)
return -EINVAL;
return 0;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index a509b63bd4dd..0f7658aefa1a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -328,9 +328,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
may_block));
}
-void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
+ u64 size, bool may_block)
{
- __unmap_stage2_range(mmu, start, size, true);
+ __unmap_stage2_range(mmu, start, size, may_block);
}
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
@@ -1015,7 +1016,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
- kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
+ kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start, true);
}
hva = vm_end;
} while (hva < reg_end);
@@ -1042,7 +1043,7 @@ void stage2_unmap_vm(struct kvm *kvm)
kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap(kvm, true);
write_unlock(&kvm->mmu_lock);
mmap_read_unlock(current->mm);
@@ -1912,7 +1913,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
(range->end - range->start) << PAGE_SHIFT,
range->may_block);
- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap(kvm, range->may_block);
return false;
}
@@ -2179,8 +2180,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
write_lock(&kvm->mmu_lock);
- kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size);
- kvm_nested_s2_unmap(kvm);
+ kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size, true);
+ kvm_nested_s2_unmap(kvm, true);
write_unlock(&kvm->mmu_lock);
}
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index f9e30dd34c7a..c4b17d90fc49 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
/* Set the scene for the next search */
kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
- /* Clear the old state */
+ /* Make sure we don't forget to do the laundry */
if (kvm_s2_mmu_valid(s2_mmu))
- kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu));
+ s2_mmu->pending_unmap = true;
/*
* The virtual VMID (modulo CnP) will be used as a key when matching
@@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
out:
atomic_inc(&s2_mmu->refcnt);
+
+ /*
+ * Set the vCPU request to perform an unmap, even if the pending unmap
+ * originates from another vCPU. This guarantees that the MMU has been
+ * completely unmapped before any vCPU actually uses it, and allows
+ * multiple vCPUs to lend a hand with completing the unmap.
+ */
+ if (s2_mmu->pending_unmap)
+ kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu);
+
return s2_mmu;
}
@@ -663,6 +673,13 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
{
+ /*
+ * The vCPU kept its reference on the MMU after the last put, keep
+ * rolling with it.
+ */
+ if (vcpu->arch.hw_mmu)
+ return;
+
if (is_hyp_ctxt(vcpu)) {
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
} else {
@@ -674,10 +691,18 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
- if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) {
+ /*
+ * Keep a reference on the associated stage-2 MMU if the vCPU is
+ * scheduling out and not in WFI emulation, suggesting it is likely to
+ * reuse the MMU sometime soon.
+ */
+ if (vcpu->scheduled_out && !vcpu_get_flag(vcpu, IN_WFI))
+ return;
+
+ if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu))
atomic_dec(&vcpu->arch.hw_mmu->refcnt);
- vcpu->arch.hw_mmu = NULL;
- }
+
+ vcpu->arch.hw_mmu = NULL;
}
/*
@@ -730,7 +755,7 @@ void kvm_nested_s2_wp(struct kvm *kvm)
}
}
-void kvm_nested_s2_unmap(struct kvm *kvm)
+void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
{
int i;
@@ -740,7 +765,7 @@ void kvm_nested_s2_unmap(struct kvm *kvm)
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
if (kvm_s2_mmu_valid(mmu))
- kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu));
+ kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block);
}
}
@@ -1184,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
return 0;
}
+
+void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) {
+ struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ if (mmu->pending_unmap) {
+ kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true);
+ mmu->pending_unmap = false;
+ }
+ write_unlock(&vcpu->kvm->mmu_lock);
+ }
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index dad88e31f953..375052d8cd22 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1527,6 +1527,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
break;
case SYS_ID_AA64PFR2_EL1:
/* We only expose FPMR */
@@ -1550,7 +1558,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
break;
case SYS_ID_AA64MMFR3_EL1:
- val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE;
+ val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
+ ID_AA64MMFR3_EL1_S1PIE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
@@ -2376,7 +2385,19 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP), },
- ID_SANITISED(ID_AA64PFR1_EL1),
+ ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR |
+ ID_AA64PFR1_EL1_DF2 |
+ ID_AA64PFR1_EL1_MTEX |
+ ID_AA64PFR1_EL1_THE |
+ ID_AA64PFR1_EL1_GCS |
+ ID_AA64PFR1_EL1_MTE_frac |
+ ID_AA64PFR1_EL1_NMI |
+ ID_AA64PFR1_EL1_RNDR_trap |
+ ID_AA64PFR1_EL1_SME |
+ ID_AA64PFR1_EL1_RES0 |
+ ID_AA64PFR1_EL1_MPAM_frac |
+ ID_AA64PFR1_EL1_RAS_frac |
+ ID_AA64PFR1_EL1_MTE)),
ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
@@ -2390,7 +2411,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.get_user = get_id_reg,
.set_user = set_id_aa64dfr0_el1,
.reset = read_sanitised_id_aa64dfr0_el1,
- .val = ID_AA64DFR0_EL1_PMUVer_MASK |
+ /*
+ * Prior to FEAT_Debugv8.9, the architecture defines context-aware
+ * breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs).
+ * KVM does not trap + emulate the breakpoint registers, and as such
+ * cannot support a layout that misaligns with the underlying hardware.
+ * While it may be possible to describe a subset that aligns with
+ * hardware, just prevent changes to BRPs and CTX_CMPs altogether for
+ * simplicity.
+ *
+ * See DDI0487K.a, section D2.8.3 Breakpoint types and linking
+ * of breakpoints for more details.
+ */
+ .val = ID_AA64DFR0_EL1_DoubleLock_MASK |
+ ID_AA64DFR0_EL1_WRPs_MASK |
+ ID_AA64DFR0_EL1_PMUVer_MASK |
ID_AA64DFR0_EL1_DebugVer_MASK, },
ID_SANITISED(ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2),
@@ -2433,6 +2468,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)),
ID_SANITISED(ID_AA64MMFR4_EL1),
ID_UNALLOCATED(7,5),
@@ -2903,7 +2939,7 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the
* corresponding VMIDs.
*/
- kvm_nested_s2_unmap(vcpu->kvm);
+ kvm_nested_s2_unmap(vcpu->kvm, true);
write_unlock(&vcpu->kvm->mmu_lock);
@@ -2955,7 +2991,30 @@ union tlbi_info {
static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu,
const union tlbi_info *info)
{
- kvm_stage2_unmap_range(mmu, info->range.start, info->range.size);
+ /*
+ * The unmap operation is allowed to drop the MMU lock and block, which
+ * means that @mmu could be used for a different context than the one
+ * currently being invalidated.
+ *
+ * This behavior is still safe, as:
+ *
+ * 1) The vCPU(s) that recycled the MMU are responsible for invalidating
+ * the entire MMU before reusing it, which still honors the intent
+ * of a TLBI.
+ *
+ * 2) Until the guest TLBI instruction is 'retired' (i.e. increment PC
+ * and ERET to the guest), other vCPUs are allowed to use stale
+ * translations.
+ *
+ * 3) Accidentally unmapping an unrelated MMU context is nonfatal, and
+ * at worst may cause more aborts for shadow stage-2 fills.
+ *
+ * Dropping the MMU lock also implies that shadow stage-2 fills could
+ * happen behind the back of the TLBI. This is still safe, though, as
+ * the L1 needs to put its stage-2 in a consistent state before doing
+ * the TLBI.
+ */
+ kvm_stage2_unmap_range(mmu, info->range.start, info->range.size, true);
}
static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -3050,7 +3109,11 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu,
max_size = compute_tlb_inval_range(mmu, info->ipa.addr);
base_addr &= ~(max_size - 1);
- kvm_stage2_unmap_range(mmu, base_addr, max_size);
+ /*
+ * See comment in s2_mmu_unmap_range() for why this is allowed to
+ * reschedule.
+ */
+ kvm_stage2_unmap_range(mmu, base_addr, max_size, true);
}
static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index e7c53e8af3d1..d88fdeaf6144 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -417,8 +417,28 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL;
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ /*
+ * If this vCPU is being destroyed because of a failed creation
+ * then unregister the redistributor to avoid leaving behind a
+ * dangling pointer to the vCPU struct.
+ *
+ * vCPUs that have been successfully created (i.e. added to
+ * kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as
+ * this function gets called while holding kvm->arch.config_lock
+ * in the VM teardown path and would otherwise introduce a lock
+ * inversion w.r.t. kvm->srcu.
+ *
+ * vCPUs that failed creation are torn down outside of the
+ * kvm->arch.config_lock and do not get unregistered in
+ * kvm_vgic_destroy(), meaning it is both safe and necessary to
+ * do so here.
+ */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu)
+ vgic_unregister_redist_iodev(vcpu);
+
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ }
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -536,10 +556,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
out:
mutex_unlock(&kvm->arch.config_lock);
out_slots:
- mutex_unlock(&kvm->slots_lock);
-
if (ret)
- kvm_vgic_destroy(kvm);
+ kvm_vm_dead(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
return ret;
}