diff options
Diffstat (limited to 'virt/kvm/eventfd.c')
-rw-r--r-- | virt/kvm/eventfd.c | 308 |
1 files changed, 167 insertions, 141 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa49..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -23,6 +23,7 @@ #include <linux/kvm_host.h> #include <linux/kvm.h> +#include <linux/kvm_irqfd.h> #include <linux/workqueue.h> #include <linux/syscalls.h> #include <linux/wait.h> @@ -34,73 +35,20 @@ #include <linux/srcu.h> #include <linux/slab.h> #include <linux/seqlock.h> +#include <linux/irqbypass.h> #include <trace/events/kvm.h> #include <kvm/iodev.h> #ifdef CONFIG_HAVE_KVM_IRQFD -/* - * -------------------------------------------------------------------- - * irqfd: Allows an fd to be used to inject an interrupt to the guest - * - * Credit goes to Avi Kivity for the original idea. - * -------------------------------------------------------------------- - */ - -/* - * Resampling irqfds are a special variety of irqfds used to emulate - * level triggered interrupts. The interrupt is asserted on eventfd - * trigger. On acknowledgement through the irq ack notifier, the - * interrupt is de-asserted and userspace is notified through the - * resamplefd. All resamplers on the same gsi are de-asserted - * together, so we don't need to track the state of each individual - * user. We can also therefore share the same irq source ID. - */ -struct _irqfd_resampler { - struct kvm *kvm; - /* - * List of resampling struct _irqfd objects sharing this gsi. - * RCU list modified under kvm->irqfds.resampler_lock - */ - struct list_head list; - struct kvm_irq_ack_notifier notifier; - /* - * Entry in list of kvm->irqfd.resampler_list. Use for sharing - * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock - */ - struct list_head link; -}; - -struct _irqfd { - /* Used for MSI fast-path */ - struct kvm *kvm; - wait_queue_t wait; - /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry irq_entry; - seqcount_t irq_entry_sc; - /* Used for level IRQ fast-path */ - int gsi; - struct work_struct inject; - /* The resampler used by this irqfd (resampler-only) */ - struct _irqfd_resampler *resampler; - /* Eventfd notified on resample (resampler-only) */ - struct eventfd_ctx *resamplefd; - /* Entry in list of irqfds for a resampler (resampler-only) */ - struct list_head resampler_link; - /* Used for setup/shutdown */ - struct eventfd_ctx *eventfd; - struct list_head list; - poll_table pt; - struct work_struct shutdown; -}; static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, inject); struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { @@ -121,12 +69,13 @@ irqfd_inject(struct work_struct *work) static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; int idx; - resampler = container_of(kian, struct _irqfd_resampler, notifier); + resampler = container_of(kian, + struct kvm_kernel_irqfd_resampler, notifier); kvm = resampler->kvm; kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, @@ -141,9 +90,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) } static void -irqfd_resampler_shutdown(struct _irqfd *irqfd) +irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) { - struct _irqfd_resampler *resampler = irqfd->resampler; + struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; struct kvm *kvm = resampler->kvm; mutex_lock(&kvm->irqfds.resampler_lock); @@ -168,7 +117,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) static void irqfd_shutdown(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, shutdown); u64 cnt; /* @@ -191,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) /* * It is now safe to release the object's resources */ +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irq_bypass_unregister_consumer(&irqfd->consumer); +#endif eventfd_ctx_put(irqfd->eventfd); kfree(irqfd); } @@ -198,7 +151,7 @@ irqfd_shutdown(struct work_struct *work) /* assumes kvm->irqfds.lock is held */ static bool -irqfd_is_active(struct _irqfd *irqfd) +irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { return list_empty(&irqfd->list) ? false : true; } @@ -209,7 +162,7 @@ irqfd_is_active(struct _irqfd *irqfd) * assumes kvm->irqfds.lock is held */ static void -irqfd_deactivate(struct _irqfd *irqfd) +irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { BUG_ON(!irqfd_is_active(irqfd)); @@ -218,13 +171,23 @@ irqfd_deactivate(struct _irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } +int __attribute__((weak)) kvm_arch_set_irq_inatomic( + struct kvm_kernel_irq_routing_entry *irq, + struct kvm *kvm, int irq_source_id, + int level, + bool line_status) +{ + return -EWOULDBLOCK; +} + /* * Called with wqh->lock held and interrupts disabled */ static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); + struct kvm_kernel_irqfd *irqfd = + container_of(wait, struct kvm_kernel_irqfd, wait); unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; @@ -238,10 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = irqfd->irq_entry; } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq.type == KVM_IRQ_ROUTING_MSI) - kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false); - else + if (kvm_arch_set_irq_inatomic(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } @@ -274,37 +236,54 @@ static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { - struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); + struct kvm_kernel_irqfd *irqfd = + container_of(pt, struct kvm_kernel_irqfd, pt); add_wait_queue(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) +static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - int i, n_entries; + int n_entries; n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); - irqfd->irq_entry.type = 0; - e = entries; - for (i = 0; i < n_entries; ++i, ++e) { - /* Only fast-path MSI. */ - if (e->type == KVM_IRQ_ROUTING_MSI) - irqfd->irq_entry = *e; - } + if (n_entries == 1) + irqfd->irq_entry = *e; + else + irqfd->irq_entry.type = 0; write_seqcount_end(&irqfd->irq_entry_sc); } +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +void __attribute__((weak)) kvm_arch_irq_bypass_stop( + struct irq_bypass_consumer *cons) +{ +} + +void __attribute__((weak)) kvm_arch_irq_bypass_start( + struct irq_bypass_consumer *cons) +{ +} + +int __attribute__((weak)) kvm_arch_update_irqfd_routing( + struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + return 0; +} +#endif + static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; @@ -340,7 +319,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) irqfd->eventfd = eventfd; if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; resamplefd = eventfd_ctx_fdget(args->resamplefd); if (IS_ERR(resamplefd)) { @@ -428,6 +407,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) * we might race against the POLLHUP */ fdput(f); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irqfd->consumer.token = (void *)irqfd->eventfd; + irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; + irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; + irqfd->consumer.stop = kvm_arch_irq_bypass_stop; + irqfd->consumer.start = kvm_arch_irq_bypass_start; + ret = irq_bypass_register_consumer(&irqfd->consumer); + if (ret) + pr_info("irq bypass consumer (token %p) registration fails: %d\n", + irqfd->consumer.token, ret); +#endif return 0; @@ -469,9 +459,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) } EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; + + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ int gsi, idx; trace_kvm_ack_irq(irqchip, pin); @@ -479,10 +478,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + kvm_notify_acked_gsi(kvm, gsi); srcu_read_unlock(&kvm->irq_srcu, idx); } @@ -525,7 +521,7 @@ kvm_eventfd_init(struct kvm *kvm) static int kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd; eventfd = eventfd_ctx_fdget(args->fd); @@ -581,7 +577,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) void kvm_irqfd_release(struct kvm *kvm) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; spin_lock_irq(&kvm->irqfds.lock); @@ -604,13 +600,23 @@ kvm_irqfd_release(struct kvm *kvm) */ void kvm_irq_routing_update(struct kvm *kvm) { - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) + list_for_each_entry(irqfd, &kvm->irqfds.items, list) { irqfd_update(kvm, irqfd); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + if (irqfd->producer) { + int ret = kvm_arch_update_irqfd_routing( + irqfd->kvm, irqfd->producer->irq, + irqfd->gsi, 1); + WARN_ON(ret); + } +#endif + } + spin_unlock_irq(&kvm->irqfds.lock); } @@ -771,40 +777,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -843,16 +823,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -860,8 +830,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -873,14 +841,13 @@ fail: } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -901,10 +868,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -918,6 +881,69 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + + return ret; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + int ret; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { |