summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOliver Upton <oliver.upton@linux.dev>2024-10-08 01:30:27 +0200
committerMarc Zyngier <maz@kernel.org>2024-10-08 11:40:27 +0200
commitc268f204f7c5784e84583c1c44d427bac09f517a (patch)
tree10b75651a487bf8ad69cb263cc1b47c0515b22cc
parentKVM: arm64: nv: Do not block when unmapping stage-2 if disallowed (diff)
downloadlinux-c268f204f7c5784e84583c1c44d427bac09f517a.tar.xz
linux-c268f204f7c5784e84583c1c44d427bac09f517a.zip
KVM: arm64: nv: Punt stage-2 recycling to a vCPU request
Currently, when a nested MMU is repurposed for some other MMU context, KVM unmaps everything during vcpu_load() while holding the MMU lock for write. This is quite a performance bottleneck for large nested VMs, as all vCPU scheduling will spin until the unmap completes. Start punting the MMU cleanup to a vCPU request, where it is then possible to periodically release the MMU lock and CPU in the presence of contention. Ensure that no vCPU winds up using a stale MMU by tracking the pending unmap on the S2 MMU itself and requesting an unmap on every vCPU that finds it. Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20241007233028.2236133-4-oliver.upton@linux.dev Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r--arch/arm64/include/asm/kvm_host.h7
-rw-r--r--arch/arm64/include/asm/kvm_nested.h2
-rw-r--r--arch/arm64/kvm/arm.c2
-rw-r--r--arch/arm64/kvm/nested.c28
4 files changed, 37 insertions, 2 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 94cff508874b..bf64fed9820e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -51,6 +51,7 @@
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
+#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -212,6 +213,12 @@ struct kvm_s2_mmu {
bool nested_stage2_enabled;
/*
+ * true when this MMU needs to be unmapped before being used for a new
+ * purpose.
+ */
+ bool pending_unmap;
+
+ /*
* 0: Nobody is currently using this, check vttbr for validity
* >0: Somebody is actively using this.
*/
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index e74b90dcfac4..233e65522716 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
+extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+
struct kvm_s2_trans {
phys_addr_t output;
unsigned long block_size;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a0d01c46e408..c7b659604bae 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1031,6 +1031,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_dirty_ring_check_request(vcpu))
return 0;
+
+ check_nested_vcpu_requests(vcpu);
}
return 1;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 58d3b998793a..c4b17d90fc49 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
/* Set the scene for the next search */
kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
- /* Clear the old state */
+ /* Make sure we don't forget to do the laundry */
if (kvm_s2_mmu_valid(s2_mmu))
- kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu), false);
+ s2_mmu->pending_unmap = true;
/*
* The virtual VMID (modulo CnP) will be used as a key when matching
@@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
out:
atomic_inc(&s2_mmu->refcnt);
+
+ /*
+ * Set the vCPU request to perform an unmap, even if the pending unmap
+ * originates from another vCPU. This guarantees that the MMU has been
+ * completely unmapped before any vCPU actually uses it, and allows
+ * multiple vCPUs to lend a hand with completing the unmap.
+ */
+ if (s2_mmu->pending_unmap)
+ kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu);
+
return s2_mmu;
}
@@ -1199,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
return 0;
}
+
+void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) {
+ struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ if (mmu->pending_unmap) {
+ kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true);
+ mmu->pending_unmap = false;
+ }
+ write_unlock(&vcpu->kvm->mmu_lock);
+ }
+}