diff options
author | Oliver Upton <oliver.upton@linux.dev> | 2024-03-07 01:55:53 +0100 |
---|---|---|
committer | Oliver Upton <oliver.upton@linux.dev> | 2024-03-07 01:55:53 +0100 |
commit | 8dbc41105e96641e9c1569f512d19f0046a02463 (patch) | |
tree | c1cbcfd6be578a2748ef881f52ce031085c0864e | |
parent | Merge branch kvm-arm64/vm-configuration into kvmarm/next (diff) | |
parent | KVM: arm64: vgic: Don't acquire the lpi_list_lock in vgic_put_irq() (diff) | |
download | linux-8dbc41105e96641e9c1569f512d19f0046a02463.tar.xz linux-8dbc41105e96641e9c1569f512d19f0046a02463.zip |
Merge branch kvm-arm64/lpi-xarray into kvmarm/next
* kvm-arm64/lpi-xarray:
: xarray-based representation of vgic LPIs
:
: KVM's linked-list of LPI state has proven to be a bottleneck in LPI
: injection paths, due to lock serialization when acquiring / releasing a
: reference on an IRQ.
:
: Start the tedious process of reworking KVM's LPI injection by replacing
: the LPI linked-list with an xarray, leveraging this to allow RCU readers
: to walk it outside of the spinlock.
KVM: arm64: vgic: Don't acquire the lpi_list_lock in vgic_put_irq()
KVM: arm64: vgic: Ensure the irq refcount is nonzero when taking a ref
KVM: arm64: vgic: Rely on RCU protection in vgic_get_lpi()
KVM: arm64: vgic: Free LPI vgic_irq structs in an RCU-safe manner
KVM: arm64: vgic: Use atomics to count LPIs
KVM: arm64: vgic: Get rid of the LPI linked-list
KVM: arm64: vgic-its: Walk the LPI xarray in vgic_copy_lpi_list()
KVM: arm64: vgic-v3: Iterate the xarray to find pending LPIs
KVM: arm64: vgic: Use xarray to find LPI in vgic_get_lpi()
KVM: arm64: vgic: Store LPIs in an xarray
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-debug.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-init.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-its.c | 55 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v3.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.c | 60 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.h | 15 | ||||
-rw-r--r-- | include/kvm/arm_vgic.h | 9 |
7 files changed, 79 insertions, 69 deletions
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 85606a531dc3..389025ce7749 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); if (v3) - seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); + seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count)); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index df51157420e8..6b1c3b9e8199 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - INIT_LIST_HEAD(&dist->lpi_list_head); INIT_LIST_HEAD(&dist->lpi_translation_cache); raw_spin_lock_init(&dist->lpi_list_lock); + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } /* CREATION */ @@ -366,6 +366,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) if (vgic_supports_direct_msis(kvm)) vgic_v4_teardown(kvm); + + xa_destroy(&dist->lpi_xa); } static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index c734180a5bcf..2ce842e35e68 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&irq->lpi_list); + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + if (ret) { + kfree(irq); + return ERR_PTR(ret); + } + INIT_LIST_HEAD(&irq->ap_list); raw_spin_lock_init(&irq->irq_lock); @@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, * There could be a race with another vgic_add_lpi(), so we need to * check that we don't add a second list entry with the same LPI. */ - list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { - if (oldirq->intid != intid) - continue; - + oldirq = xa_load(&dist->lpi_xa, intid); + if (vgic_try_get_irq_kref(oldirq)) { /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; - /* - * This increases the refcount, the caller is expected to - * call vgic_put_irq() on the returned pointer once it's - * finished with the IRQ. - */ - vgic_get_irq_kref(irq); + goto out_unlock; + } + ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0)); + if (ret) { + xa_release(&dist->lpi_xa, intid); + kfree(irq); goto out_unlock; } - list_add_tail(&irq->lpi_list, &dist->lpi_list_head); - dist->lpi_list_count++; + atomic_inc(&dist->lpi_count); out_unlock: raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + if (ret) + return ERR_PTR(ret); + /* * We "cache" the configuration table entries in our struct vgic_irq's. * However we only have those structs for mapped IRQs, so we read in @@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, return 0; } +#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1) + /* * Create a snapshot of the current LPIs targeting @vcpu, so that we can * enumerate those LPIs without holding any lock. @@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) { struct vgic_dist *dist = &kvm->arch.vgic; + XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET); struct vgic_irq *irq; unsigned long flags; u32 *intids; @@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) * command). If coming from another path (such as enabling LPIs), * we must be careful not to overrun the array. */ - irq_count = READ_ONCE(dist->lpi_list_count); + irq_count = atomic_read(&dist->lpi_count); intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); if (!intids) return -ENOMEM; raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + rcu_read_lock(); + + xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) { if (i == irq_count) break; /* We don't need to "get" the IRQ, as we hold the list lock. */ @@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) continue; intids[i++] = irq->intid; } + + rcu_read_unlock(); raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); *intid_ptr = intids; @@ -592,8 +604,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); irq = __vgic_its_check_cache(dist, db, devid, eventid); - if (irq) - vgic_get_irq_kref(irq); + if (!vgic_try_get_irq_kref(irq)) + irq = NULL; raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); @@ -637,8 +649,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, * was in the cache, and increment it on the new interrupt. */ if (cte->irq) - __vgic_put_lpi_locked(kvm, cte->irq); + vgic_put_irq(kvm, cte->irq); + /* + * The irq refcount is guaranteed to be nonzero while holding the + * its_lock, as the ITE (and the reference it holds) cannot be freed. + */ + lockdep_assert_held(&its->its_lock); vgic_get_irq_kref(irq); cte->db = db; @@ -669,7 +686,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm) if (!cte->irq) break; - __vgic_put_lpi_locked(kvm, cte->irq); + vgic_put_irq(kvm, cte->irq); cte->irq = NULL; } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 9465d3706ab9..4ea3340786b9 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) struct vgic_irq *irq; gpa_t last_ptr = ~(gpa_t)0; bool vlpi_avail = false; + unsigned long index; int ret = 0; u8 val; @@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) vlpi_avail = true; } - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + xa_for_each(&dist->lpi_xa, index, irq) { int byte_offset, bit_nr; struct kvm_vcpu *vcpu; gpa_t pendbase, ptr; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index db2a95762b1b..f963f410788a 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -30,7 +30,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * its->its_lock (mutex) * vgic_cpu->ap_list_lock must be taken with IRQs disabled * kvm->lpi_list_lock must be taken with IRQs disabled - * vgic_irq->irq_lock must be taken with IRQs disabled + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, * we have to disable IRQs before taking this lock and everything lower @@ -54,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { */ /* - * Iterate over the VM's list of mapped LPIs to find the one with a - * matching interrupt ID and return a reference to the IRQ structure. + * Index the VM's xarray of mapped LPIs and return a reference to the IRQ + * structure. The caller is expected to call vgic_put_irq() later once it's + * finished with the IRQ. */ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = NULL; - unsigned long flags; - - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { - if (irq->intid != intid) - continue; + rcu_read_lock(); - /* - * This increases the refcount, the caller is expected to - * call vgic_put_irq() later once it's finished with the IRQ. - */ - vgic_get_irq_kref(irq); - goto out_unlock; - } - irq = NULL; + irq = xa_load(&dist->lpi_xa, intid); + if (!vgic_try_get_irq_kref(irq)) + irq = NULL; -out_unlock: - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + rcu_read_unlock(); return irq; } @@ -120,22 +111,6 @@ static void vgic_irq_release(struct kref *ref) { } -/* - * Drop the refcount on the LPI. Must be called with lpi_list_lock held. - */ -void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - if (!kref_put(&irq->refcount, vgic_irq_release)) - return; - - list_del(&irq->lpi_list); - dist->lpi_list_count--; - - kfree(irq); -} - void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -144,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) if (irq->intid < VGIC_MIN_LPI) return; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - __vgic_put_lpi_locked(kvm, irq); - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + xa_lock_irqsave(&dist->lpi_xa, flags); + __xa_erase(&dist->lpi_xa, irq->intid); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + + atomic_dec(&dist->lpi_count); + kfree_rcu(irq, rcu); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) @@ -404,7 +385,8 @@ retry: /* * Grab a reference to the irq to reflect the fact that it is - * now in the ap_list. + * now in the ap_list. This is safe as the caller must already hold a + * reference on the irq. */ vgic_get_irq_kref(irq); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 8d134569d0a1..0c2b82de8fa3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, gpa_t addr, int len); struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); -void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); @@ -220,12 +219,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); -static inline void vgic_get_irq_kref(struct vgic_irq *irq) +static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) { + if (!irq) + return false; + if (irq->intid < VGIC_MIN_LPI) - return; + return true; - kref_get(&irq->refcount); + return kref_get_unless_zero(&irq->refcount); +} + +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + WARN_ON_ONCE(!vgic_try_get_irq_kref(irq)); } void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 8cc38e836f54..47035946648e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -13,6 +13,7 @@ #include <linux/spinlock.h> #include <linux/static_key.h> #include <linux/types.h> +#include <linux/xarray.h> #include <kvm/iodev.h> #include <linux/list.h> #include <linux/jump_label.h> @@ -116,7 +117,7 @@ struct irq_ops { struct vgic_irq { raw_spinlock_t irq_lock; /* Protects the content of the struct */ - struct list_head lpi_list; /* Used to link all LPIs together */ + struct rcu_head rcu; struct list_head ap_list; struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU @@ -273,10 +274,10 @@ struct vgic_dist { */ u64 propbaser; - /* Protects the lpi_list and the count value below. */ + /* Protects the lpi_list. */ raw_spinlock_t lpi_list_lock; - struct list_head lpi_list_head; - int lpi_list_count; + struct xarray lpi_xa; + atomic_t lpi_count; /* LPI translation cache */ struct list_head lpi_translation_cache; |