From 30656177c4080460b936709ff6648f201d7d2c1a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 21 Mar 2018 12:46:21 -0600 Subject: vfio/pci: Add ioeventfd support The ioeventfd here is actually irqfd handling of an ioeventfd such as supported in KVM. A user is able to pre-program a device write to occur when the eventfd triggers. This is yet another instance of eventfd-irqfd triggering between KVM and vfio. The impetus for this is high frequency writes to pages which are virtualized in QEMU. Enabling this near-direct write path for selected registers within the virtualized page can improve performance and reduce overhead. Specifically this is initially targeted at NVIDIA graphics cards where the driver issues a write to an MMIO register within a virtualized region in order to allow the MSI interrupt to re-trigger. Reviewed-by: Peter Xu Reviewed-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'drivers/vfio/pci/vfio_pci.c') diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b0f759476900..c6822149b394 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -305,6 +305,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; struct vfio_pci_dummy_resource *dummy_res, *tmp; + struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp; int i, bar; /* Stop the device from further DMA */ @@ -314,6 +315,15 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) VFIO_IRQ_SET_ACTION_TRIGGER, vdev->irq_type, 0, 0, NULL); + /* Device closed, don't need mutex here */ + list_for_each_entry_safe(ioeventfd, ioeventfd_tmp, + &vdev->ioeventfds_list, next) { + vfio_virqfd_disable(&ioeventfd->virqfd); + list_del(&ioeventfd->next); + kfree(ioeventfd); + } + vdev->ioeventfds_nr = 0; + vdev->virq_disabled = false; for (i = 0; i < vdev->num_regions; i++) @@ -1012,6 +1022,28 @@ hot_reset_release: kfree(groups); return ret; + } else if (cmd == VFIO_DEVICE_IOEVENTFD) { + struct vfio_device_ioeventfd ioeventfd; + int count; + + minsz = offsetofend(struct vfio_device_ioeventfd, fd); + + if (copy_from_user(&ioeventfd, (void __user *)arg, minsz)) + return -EFAULT; + + if (ioeventfd.argsz < minsz) + return -EINVAL; + + if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK) + return -EINVAL; + + count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK; + + if (hweight8(count) != 1 || ioeventfd.fd < -1) + return -EINVAL; + + return vfio_pci_ioeventfd(vdev, ioeventfd.offset, + ioeventfd.data, count, ioeventfd.fd); } return -ENOTTY; @@ -1174,6 +1206,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vdev->irq_type = VFIO_PCI_NUM_IRQS; mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); + mutex_init(&vdev->ioeventfds_lock); + INIT_LIST_HEAD(&vdev->ioeventfds_list); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) { @@ -1215,6 +1249,7 @@ static void vfio_pci_remove(struct pci_dev *pdev) vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); kfree(vdev->region); + mutex_destroy(&vdev->ioeventfds_lock); kfree(vdev); if (vfio_pci_is_vga(pdev)) { -- cgit v1.2.3