diff options
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc.c | 127 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_core.c | 174 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_driver.c | 19 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_private.h | 39 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_sysfs.c | 59 | ||||
-rw-r--r-- | drivers/vfio/mdev/vfio_mdev.c | 80 | ||||
-rw-r--r-- | drivers/vfio/pci/Kconfig | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/Makefile | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 278 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_igd.c | 53 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_nvlink2.c | 490 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 15 | ||||
-rw-r--r-- | drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c | 2 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_amba.c | 8 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform.c | 20 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_common.c | 56 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_private.h | 5 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 210 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 71 |
21 files changed, 588 insertions, 1128 deletions
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index f27e25112c40..980e59551301 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -75,7 +75,8 @@ static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev) goto unlock; } - cont_vdev = vfio_device_data(device); + cont_vdev = + container_of(device, struct vfio_fsl_mc_device, vdev); if (!cont_vdev || !cont_vdev->reflck) { vfio_device_put(device); ret = -ENODEV; @@ -135,9 +136,10 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev) kfree(vdev->regions); } -static int vfio_fsl_mc_open(void *device_data) +static int vfio_fsl_mc_open(struct vfio_device *core_vdev) { - struct vfio_fsl_mc_device *vdev = device_data; + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); int ret; if (!try_module_get(THIS_MODULE)) @@ -161,9 +163,10 @@ err_reg_init: return ret; } -static void vfio_fsl_mc_release(void *device_data) +static void vfio_fsl_mc_release(struct vfio_device *core_vdev) { - struct vfio_fsl_mc_device *vdev = device_data; + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); int ret; mutex_lock(&vdev->reflck->lock); @@ -197,11 +200,12 @@ static void vfio_fsl_mc_release(void *device_data) module_put(THIS_MODULE); } -static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd, - unsigned long arg) +static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev, + unsigned int cmd, unsigned long arg) { unsigned long minsz; - struct vfio_fsl_mc_device *vdev = device_data; + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); struct fsl_mc_device *mc_dev = vdev->mc_dev; switch (cmd) { @@ -327,10 +331,11 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd, } } -static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf, +static ssize_t vfio_fsl_mc_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos) { - struct vfio_fsl_mc_device *vdev = device_data; + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; struct fsl_mc_device *mc_dev = vdev->mc_dev; @@ -404,10 +409,12 @@ static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data) return 0; } -static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_fsl_mc_write(struct vfio_device *core_vdev, + const char __user *buf, size_t count, + loff_t *ppos) { - struct vfio_fsl_mc_device *vdev = device_data; + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos); loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK; struct fsl_mc_device *mc_dev = vdev->mc_dev; @@ -468,9 +475,11 @@ static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region, size, vma->vm_page_prot); } -static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma) +static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma) { - struct vfio_fsl_mc_device *vdev = device_data; + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); struct fsl_mc_device *mc_dev = vdev->mc_dev; unsigned int index; @@ -568,23 +577,39 @@ static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev) dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret); goto out_nc_unreg; } + return 0; + +out_nc_unreg: + bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); + return ret; +} +static int vfio_fsl_mc_scan_container(struct fsl_mc_device *mc_dev) +{ + int ret; + + /* non dprc devices do not scan for other devices */ + if (!is_fsl_mc_bus_dprc(mc_dev)) + return 0; ret = dprc_scan_container(mc_dev, false); if (ret) { - dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret); - goto out_dprc_cleanup; + dev_err(&mc_dev->dev, + "VFIO_FSL_MC: Container scanning failed (%d)\n", ret); + dprc_remove_devices(mc_dev, NULL, 0); + return ret; } - return 0; +} + +static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + + if (!is_fsl_mc_bus_dprc(mc_dev)) + return; -out_dprc_cleanup: - dprc_remove_devices(mc_dev, NULL, 0); dprc_cleanup(mc_dev); -out_nc_unreg: bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); - vdev->nb.notifier_call = NULL; - - return ret; } static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) @@ -600,36 +625,50 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) return -EINVAL; } - vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL); + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { ret = -ENOMEM; goto out_group_put; } + vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops); vdev->mc_dev = mc_dev; - - ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev); - if (ret) { - dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n"); - goto out_group_put; - } + mutex_init(&vdev->igate); ret = vfio_fsl_mc_reflck_attach(vdev); if (ret) - goto out_group_dev; + goto out_kfree; ret = vfio_fsl_mc_init_device(vdev); if (ret) goto out_reflck; - mutex_init(&vdev->igate); + ret = vfio_register_group_dev(&vdev->vdev); + if (ret) { + dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n"); + goto out_device; + } + /* + * This triggers recursion into vfio_fsl_mc_probe() on another device + * and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the + * vfio_add_group_dev() above. It has no impact on this vdev, so it is + * safe to be after the vfio device is made live. + */ + ret = vfio_fsl_mc_scan_container(mc_dev); + if (ret) + goto out_group_dev; + dev_set_drvdata(dev, vdev); return 0; +out_group_dev: + vfio_unregister_group_dev(&vdev->vdev); +out_device: + vfio_fsl_uninit_device(vdev); out_reflck: vfio_fsl_mc_reflck_put(vdev->reflck); -out_group_dev: - vfio_del_group_dev(dev); +out_kfree: + kfree(vdev); out_group_put: vfio_iommu_group_put(group, dev); return ret; @@ -637,25 +676,17 @@ out_group_put: static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) { - struct vfio_fsl_mc_device *vdev; struct device *dev = &mc_dev->dev; + struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev); - vdev = vfio_del_group_dev(dev); - if (!vdev) - return -EINVAL; - + vfio_unregister_group_dev(&vdev->vdev); mutex_destroy(&vdev->igate); + dprc_remove_devices(mc_dev, NULL, 0); + vfio_fsl_uninit_device(vdev); vfio_fsl_mc_reflck_put(vdev->reflck); - if (is_fsl_mc_bus_dprc(mc_dev)) { - dprc_remove_devices(mc_dev, NULL, 0); - dprc_cleanup(mc_dev); - } - - if (vdev->nb.notifier_call) - bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb); - + kfree(vdev); vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); return 0; diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h index a97ee691ed47..89700e00e77d 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h @@ -36,6 +36,7 @@ struct vfio_fsl_mc_region { }; struct vfio_fsl_mc_device { + struct vfio_device vdev; struct fsl_mc_device *mc_dev; struct notifier_block nb; int refcnt; diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 6de97d25a3f8..2a85d6fcb7dd 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -29,39 +29,39 @@ static DEFINE_MUTEX(mdev_list_lock); struct device *mdev_parent_dev(struct mdev_device *mdev) { - return mdev->parent->dev; + return mdev->type->parent->dev; } EXPORT_SYMBOL(mdev_parent_dev); -void *mdev_get_drvdata(struct mdev_device *mdev) -{ - return mdev->driver_data; -} -EXPORT_SYMBOL(mdev_get_drvdata); - -void mdev_set_drvdata(struct mdev_device *mdev, void *data) -{ - mdev->driver_data = data; -} -EXPORT_SYMBOL(mdev_set_drvdata); - -struct device *mdev_dev(struct mdev_device *mdev) +/* + * Return the index in supported_type_groups that this mdev_device was created + * from. + */ +unsigned int mdev_get_type_group_id(struct mdev_device *mdev) { - return &mdev->dev; + return mdev->type->type_group_id; } -EXPORT_SYMBOL(mdev_dev); +EXPORT_SYMBOL(mdev_get_type_group_id); -struct mdev_device *mdev_from_dev(struct device *dev) +/* + * Used in mdev_type_attribute sysfs functions to return the index in the + * supported_type_groups that the sysfs is called from. + */ +unsigned int mtype_get_type_group_id(struct mdev_type *mtype) { - return dev_is_mdev(dev) ? to_mdev_device(dev) : NULL; + return mtype->type_group_id; } -EXPORT_SYMBOL(mdev_from_dev); +EXPORT_SYMBOL(mtype_get_type_group_id); -const guid_t *mdev_uuid(struct mdev_device *mdev) +/* + * Used in mdev_type_attribute sysfs functions to return the parent struct + * device + */ +struct device *mtype_get_parent_dev(struct mdev_type *mtype) { - return &mdev->uuid; + return mtype->parent->dev; } -EXPORT_SYMBOL(mdev_uuid); +EXPORT_SYMBOL(mtype_get_parent_dev); /* Should be called holding parent_list_lock */ static struct mdev_parent *__find_parent_device(struct device *dev) @@ -75,7 +75,7 @@ static struct mdev_parent *__find_parent_device(struct device *dev) return NULL; } -static void mdev_release_parent(struct kref *kref) +void mdev_release_parent(struct kref *kref) { struct mdev_parent *parent = container_of(kref, struct mdev_parent, ref); @@ -85,31 +85,14 @@ static void mdev_release_parent(struct kref *kref) put_device(dev); } -static struct mdev_parent *mdev_get_parent(struct mdev_parent *parent) -{ - if (parent) - kref_get(&parent->ref); - - return parent; -} - -static void mdev_put_parent(struct mdev_parent *parent) -{ - if (parent) - kref_put(&parent->ref, mdev_release_parent); -} - /* Caller must hold parent unreg_sem read or write lock */ static void mdev_device_remove_common(struct mdev_device *mdev) { - struct mdev_parent *parent; - struct mdev_type *type; + struct mdev_parent *parent = mdev->type->parent; int ret; - type = to_mdev_type(mdev->type_kobj); - mdev_remove_sysfs_files(&mdev->dev, type); + mdev_remove_sysfs_files(mdev); device_del(&mdev->dev); - parent = mdev->parent; lockdep_assert_held(&parent->unreg_sem); ret = parent->ops->remove(mdev); if (ret) @@ -117,17 +100,14 @@ static void mdev_device_remove_common(struct mdev_device *mdev) /* Balances with device_initialize() */ put_device(&mdev->dev); - mdev_put_parent(parent); } static int mdev_device_remove_cb(struct device *dev, void *data) { - if (dev_is_mdev(dev)) { - struct mdev_device *mdev; + struct mdev_device *mdev = mdev_from_dev(dev); - mdev = to_mdev_device(dev); + if (mdev) mdev_device_remove_common(mdev); - } return 0; } @@ -256,8 +236,13 @@ void mdev_unregister_device(struct device *dev) } EXPORT_SYMBOL(mdev_unregister_device); -static void mdev_device_free(struct mdev_device *mdev) +static void mdev_device_release(struct device *dev) { + struct mdev_device *mdev = to_mdev_device(dev); + + /* Pairs with the get in mdev_device_create() */ + kobject_put(&mdev->type->kobj); + mutex_lock(&mdev_list_lock); list_del(&mdev->next); mutex_unlock(&mdev_list_lock); @@ -266,24 +251,11 @@ static void mdev_device_free(struct mdev_device *mdev) kfree(mdev); } -static void mdev_device_release(struct device *dev) -{ - struct mdev_device *mdev = to_mdev_device(dev); - - mdev_device_free(mdev); -} - -int mdev_device_create(struct kobject *kobj, - struct device *dev, const guid_t *uuid) +int mdev_device_create(struct mdev_type *type, const guid_t *uuid) { int ret; struct mdev_device *mdev, *tmp; - struct mdev_parent *parent; - struct mdev_type *type = to_mdev_type(kobj); - - parent = mdev_get_parent(type->parent); - if (!parent) - return -EINVAL; + struct mdev_parent *parent = type->parent; mutex_lock(&mdev_list_lock); @@ -291,50 +263,50 @@ int mdev_device_create(struct kobject *kobj, list_for_each_entry(tmp, &mdev_list, next) { if (guid_equal(&tmp->uuid, uuid)) { mutex_unlock(&mdev_list_lock); - ret = -EEXIST; - goto mdev_fail; + return -EEXIST; } } mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) { mutex_unlock(&mdev_list_lock); - ret = -ENOMEM; - goto mdev_fail; + return -ENOMEM; } + device_initialize(&mdev->dev); + mdev->dev.parent = parent->dev; + mdev->dev.bus = &mdev_bus_type; + mdev->dev.release = mdev_device_release; + mdev->dev.groups = parent->ops->mdev_attr_groups; + mdev->type = type; + /* Pairs with the put in mdev_device_release() */ + kobject_get(&type->kobj); + guid_copy(&mdev->uuid, uuid); list_add(&mdev->next, &mdev_list); mutex_unlock(&mdev_list_lock); - mdev->parent = parent; + ret = dev_set_name(&mdev->dev, "%pUl", uuid); + if (ret) + goto out_put_device; /* Check if parent unregistration has started */ if (!down_read_trylock(&parent->unreg_sem)) { - mdev_device_free(mdev); ret = -ENODEV; - goto mdev_fail; + goto out_put_device; } - device_initialize(&mdev->dev); - mdev->dev.parent = dev; - mdev->dev.bus = &mdev_bus_type; - mdev->dev.release = mdev_device_release; - dev_set_name(&mdev->dev, "%pUl", uuid); - mdev->dev.groups = parent->ops->mdev_attr_groups; - mdev->type_kobj = kobj; - - ret = parent->ops->create(kobj, mdev); + ret = parent->ops->create(mdev); if (ret) - goto ops_create_fail; + goto out_unlock; ret = device_add(&mdev->dev); if (ret) - goto add_fail; + goto out_remove; - ret = mdev_create_sysfs_files(&mdev->dev, type); + ret = mdev_create_sysfs_files(mdev); if (ret) - goto sysfs_fail; + goto out_del; mdev->active = true; dev_dbg(&mdev->dev, "MDEV: created\n"); @@ -342,24 +314,21 @@ int mdev_device_create(struct kobject *kobj, return 0; -sysfs_fail: +out_del: device_del(&mdev->dev); -add_fail: +out_remove: parent->ops->remove(mdev); -ops_create_fail: +out_unlock: up_read(&parent->unreg_sem); +out_put_device: put_device(&mdev->dev); -mdev_fail: - mdev_put_parent(parent); return ret; } -int mdev_device_remove(struct device *dev) +int mdev_device_remove(struct mdev_device *mdev) { - struct mdev_device *mdev, *tmp; - struct mdev_parent *parent; - - mdev = to_mdev_device(dev); + struct mdev_device *tmp; + struct mdev_parent *parent = mdev->type->parent; mutex_lock(&mdev_list_lock); list_for_each_entry(tmp, &mdev_list, next) { @@ -380,7 +349,6 @@ int mdev_device_remove(struct device *dev) mdev->active = false; mutex_unlock(&mdev_list_lock); - parent = mdev->parent; /* Check if parent unregistration has started */ if (!down_read_trylock(&parent->unreg_sem)) return -ENODEV; @@ -390,24 +358,6 @@ int mdev_device_remove(struct device *dev) return 0; } -int mdev_set_iommu_device(struct device *dev, struct device *iommu_device) -{ - struct mdev_device *mdev = to_mdev_device(dev); - - mdev->iommu_device = iommu_device; - - return 0; -} -EXPORT_SYMBOL(mdev_set_iommu_device); - -struct device *mdev_get_iommu_device(struct device *dev) -{ - struct mdev_device *mdev = to_mdev_device(dev); - - return mdev->iommu_device; -} -EXPORT_SYMBOL(mdev_get_iommu_device); - static int __init mdev_init(void) { return mdev_bus_register(); diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c index 0d3223aee20b..041699571b7e 100644 --- a/drivers/vfio/mdev/mdev_driver.c +++ b/drivers/vfio/mdev/mdev_driver.c @@ -39,7 +39,8 @@ static void mdev_detach_iommu(struct mdev_device *mdev) static int mdev_probe(struct device *dev) { - struct mdev_driver *drv = to_mdev_driver(dev->driver); + struct mdev_driver *drv = + container_of(dev->driver, struct mdev_driver, driver); struct mdev_device *mdev = to_mdev_device(dev); int ret; @@ -47,8 +48,8 @@ static int mdev_probe(struct device *dev) if (ret) return ret; - if (drv && drv->probe) { - ret = drv->probe(dev); + if (drv->probe) { + ret = drv->probe(mdev); if (ret) mdev_detach_iommu(mdev); } @@ -58,11 +59,12 @@ static int mdev_probe(struct device *dev) static int mdev_remove(struct device *dev) { - struct mdev_driver *drv = to_mdev_driver(dev->driver); + struct mdev_driver *drv = + container_of(dev->driver, struct mdev_driver, driver); struct mdev_device *mdev = to_mdev_device(dev); - if (drv && drv->remove) - drv->remove(dev); + if (drv->remove) + drv->remove(mdev); mdev_detach_iommu(mdev); @@ -79,16 +81,13 @@ EXPORT_SYMBOL_GPL(mdev_bus_type); /** * mdev_register_driver - register a new MDEV driver * @drv: the driver to register - * @owner: module owner of driver to be registered * * Returns a negative value on error, otherwise 0. **/ -int mdev_register_driver(struct mdev_driver *drv, struct module *owner) +int mdev_register_driver(struct mdev_driver *drv) { /* initialize common driver fields */ - drv->driver.name = drv->name; drv->driver.bus = &mdev_bus_type; - drv->driver.owner = owner; /* register with core */ return driver_register(&drv->driver); diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index 7d922950caaf..6999c89db7b1 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Mediated device interal definitions + * Mediated device internal definitions * * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. * Author: Neo Jia <cjia@nvidia.com> @@ -24,26 +24,12 @@ struct mdev_parent { struct rw_semaphore unreg_sem; }; -struct mdev_device { - struct device dev; - struct mdev_parent *parent; - guid_t uuid; - void *driver_data; - struct list_head next; - struct kobject *type_kobj; - struct device *iommu_device; - bool active; -}; - -#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) -#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) - struct mdev_type { struct kobject kobj; struct kobject *devices_kobj; struct mdev_parent *parent; struct list_head next; - struct attribute_group *group; + unsigned int type_group_id; }; #define to_mdev_type_attr(_attr) \ @@ -54,11 +40,22 @@ struct mdev_type { int parent_create_sysfs_files(struct mdev_parent *parent); void parent_remove_sysfs_files(struct mdev_parent *parent); -int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type); -void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type); +int mdev_create_sysfs_files(struct mdev_device *mdev); +void mdev_remove_sysfs_files(struct mdev_device *mdev); + +int mdev_device_create(struct mdev_type *kobj, const guid_t *uuid); +int mdev_device_remove(struct mdev_device *dev); + +void mdev_release_parent(struct kref *kref); + +static inline void mdev_get_parent(struct mdev_parent *parent) +{ + kref_get(&parent->ref); +} -int mdev_device_create(struct kobject *kobj, - struct device *dev, const guid_t *uuid); -int mdev_device_remove(struct device *dev); +static inline void mdev_put_parent(struct mdev_parent *parent) +{ + kref_put(&parent->ref, mdev_release_parent); +} #endif /* MDEV_PRIVATE_H */ diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 917fd84c1c6f..f5cf1931c54e 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -26,7 +26,7 @@ static ssize_t mdev_type_attr_show(struct kobject *kobj, ssize_t ret = -EIO; if (attr->show) - ret = attr->show(kobj, type->parent->dev, buf); + ret = attr->show(type, attr, buf); return ret; } @@ -39,7 +39,7 @@ static ssize_t mdev_type_attr_store(struct kobject *kobj, ssize_t ret = -EIO; if (attr->store) - ret = attr->store(&type->kobj, type->parent->dev, buf, count); + ret = attr->store(type, attr, buf, count); return ret; } @@ -48,8 +48,9 @@ static const struct sysfs_ops mdev_type_sysfs_ops = { .store = mdev_type_attr_store, }; -static ssize_t create_store(struct kobject *kobj, struct device *dev, - const char *buf, size_t count) +static ssize_t create_store(struct mdev_type *mtype, + struct mdev_type_attribute *attr, const char *buf, + size_t count) { char *str; guid_t uuid; @@ -67,7 +68,7 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev, if (ret) return ret; - ret = mdev_device_create(kobj, dev, &uuid); + ret = mdev_device_create(mtype, &uuid); if (ret) return ret; @@ -81,6 +82,8 @@ static void mdev_type_release(struct kobject *kobj) struct mdev_type *type = to_mdev_type(kobj); pr_debug("Releasing group %s\n", kobj->name); + /* Pairs with the get in add_mdev_supported_type() */ + mdev_put_parent(type->parent); kfree(type); } @@ -90,9 +93,11 @@ static struct kobj_type mdev_type_ktype = { }; static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, - struct attribute_group *group) + unsigned int type_group_id) { struct mdev_type *type; + struct attribute_group *group = + parent->ops->supported_type_groups[type_group_id]; int ret; if (!group->name) { @@ -105,6 +110,10 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, return ERR_PTR(-ENOMEM); type->kobj.kset = parent->mdev_types_kset; + type->parent = parent; + /* Pairs with the put in mdev_type_release() */ + mdev_get_parent(parent); + type->type_group_id = type_group_id; ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL, "%s-%s", dev_driver_string(parent->dev), @@ -130,9 +139,6 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, ret = -ENOMEM; goto attrs_failed; } - - type->group = group; - type->parent = parent; return type; attrs_failed: @@ -147,8 +153,11 @@ attr_create_failed: static void remove_mdev_supported_type(struct mdev_type *type) { + struct attribute_group *group = + type->parent->ops->supported_type_groups[type->type_group_id]; + sysfs_remove_files(&type->kobj, - (const struct attribute **)type->group->attrs); + (const struct attribute **)group->attrs); kobject_put(type->devices_kobj); sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr); kobject_del(&type->kobj); @@ -162,8 +171,7 @@ static int add_mdev_supported_type_groups(struct mdev_parent *parent) for (i = 0; parent->ops->supported_type_groups[i]; i++) { struct mdev_type *type; - type = add_mdev_supported_type(parent, - parent->ops->supported_type_groups[i]); + type = add_mdev_supported_type(parent, i); if (IS_ERR(type)) { struct mdev_type *ltype, *tmp; @@ -225,6 +233,7 @@ create_err: static ssize_t remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct mdev_device *mdev = to_mdev_device(dev); unsigned long val; if (kstrtoul(buf, 0, &val) < 0) @@ -233,7 +242,7 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, if (val && device_remove_file_self(dev, attr)) { int ret; - ret = mdev_device_remove(dev); + ret = mdev_device_remove(mdev); if (ret) return ret; } @@ -248,34 +257,38 @@ static const struct attribute *mdev_device_attrs[] = { NULL, }; -int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type) +int mdev_create_sysfs_files(struct mdev_device *mdev) { + struct mdev_type *type = mdev->type; + struct kobject *kobj = &mdev->dev.kobj; int ret; - ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev)); + ret = sysfs_create_link(type->devices_kobj, kobj, dev_name(&mdev->dev)); if (ret) return ret; - ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type"); + ret = sysfs_create_link(kobj, &type->kobj, "mdev_type"); if (ret) goto type_link_failed; - ret = sysfs_create_files(&dev->kobj, mdev_device_attrs); + ret = sysfs_create_files(kobj, mdev_device_attrs); if (ret) goto create_files_failed; return ret; create_files_failed: - sysfs_remove_link(&dev->kobj, "mdev_type"); + sysfs_remove_link(kobj, "mdev_type"); type_link_failed: - sysfs_remove_link(type->devices_kobj, dev_name(dev)); + sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev)); return ret; } -void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type) +void mdev_remove_sysfs_files(struct mdev_device *mdev) { - sysfs_remove_files(&dev->kobj, mdev_device_attrs); - sysfs_remove_link(&dev->kobj, "mdev_type"); - sysfs_remove_link(type->devices_kobj, dev_name(dev)); + struct kobject *kobj = &mdev->dev.kobj; + + sysfs_remove_files(kobj, mdev_device_attrs); + sysfs_remove_link(kobj, "mdev_type"); + sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev)); } diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index b52eea128549..922729071c5a 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -21,10 +21,11 @@ #define DRIVER_AUTHOR "NVIDIA Corporation" #define DRIVER_DESC "VFIO based driver for Mediated device" -static int vfio_mdev_open(void *device_data) +static int vfio_mdev_open(struct vfio_device *core_vdev) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; + int ret; if (unlikely(!parent->ops->open)) @@ -40,10 +41,10 @@ static int vfio_mdev_open(void *device_data) return ret; } -static void vfio_mdev_release(void *device_data) +static void vfio_mdev_release(struct vfio_device *core_vdev) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; if (likely(parent->ops->release)) parent->ops->release(mdev); @@ -51,11 +52,11 @@ static void vfio_mdev_release(void *device_data) module_put(THIS_MODULE); } -static long vfio_mdev_unlocked_ioctl(void *device_data, +static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->ioctl)) return -EINVAL; @@ -63,11 +64,11 @@ static long vfio_mdev_unlocked_ioctl(void *device_data, return parent->ops->ioctl(mdev, cmd, arg); } -static ssize_t vfio_mdev_read(void *device_data, char __user *buf, +static ssize_t vfio_mdev_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->read)) return -EINVAL; @@ -75,11 +76,12 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf, return parent->ops->read(mdev, buf, count, ppos); } -static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_mdev_write(struct vfio_device *core_vdev, + const char __user *buf, size_t count, + loff_t *ppos) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->write)) return -EINVAL; @@ -87,10 +89,11 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, return parent->ops->write(mdev, buf, count, ppos); } -static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) +static int vfio_mdev_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->mmap)) return -EINVAL; @@ -98,10 +101,10 @@ static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) return parent->ops->mmap(mdev, vma); } -static void vfio_mdev_request(void *device_data, unsigned int count) +static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count) { - struct mdev_device *mdev = device_data; - struct mdev_parent *parent = mdev->parent; + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; if (parent->ops->request) parent->ops->request(mdev, count); @@ -121,27 +124,46 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = { .request = vfio_mdev_request, }; -static int vfio_mdev_probe(struct device *dev) +static int vfio_mdev_probe(struct mdev_device *mdev) { - struct mdev_device *mdev = to_mdev_device(dev); + struct vfio_device *vdev; + int ret; - return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev); + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); + ret = vfio_register_group_dev(vdev); + if (ret) { + kfree(vdev); + return ret; + } + dev_set_drvdata(&mdev->dev, vdev); + return 0; } -static void vfio_mdev_remove(struct device *dev) +static void vfio_mdev_remove(struct mdev_device *mdev) { - vfio_del_group_dev(dev); + struct vfio_device *vdev = dev_get_drvdata(&mdev->dev); + + vfio_unregister_group_dev(vdev); + kfree(vdev); } static struct mdev_driver vfio_mdev_driver = { - .name = "vfio_mdev", + .driver = { + .name = "vfio_mdev", + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + }, .probe = vfio_mdev_probe, .remove = vfio_mdev_remove, }; static int __init vfio_mdev_init(void) { - return mdev_register_driver(&vfio_mdev_driver, THIS_MODULE); + return mdev_register_driver(&vfio_mdev_driver); } static void __exit vfio_mdev_exit(void) diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 4abddbebd4b2..53ce78d7d07b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -39,9 +39,3 @@ config VFIO_PCI_IGD and LPC bridge config space. To enable Intel IGD assignment through vfio-pci, say Y. - -config VFIO_PCI_NVLINK2 - def_bool y - depends on VFIO_PCI && PPC_POWERNV && SPAPR_TCE_IOMMU - help - VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index eff97a7cd9f1..3ff42093962f 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -2,7 +2,6 @@ vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o -vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 65e7e6b44578..bd7c482c948a 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -378,7 +378,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; - if (vfio_pci_is_vga(pdev) && pdev->vendor == PCI_VENDOR_ID_INTEL && IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { @@ -389,24 +388,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } } - if (pdev->vendor == PCI_VENDOR_ID_NVIDIA && - IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) { - ret = vfio_pci_nvdia_v100_nvlink2_init(vdev); - if (ret && ret != -ENODEV) { - pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n"); - goto disable_exit; - } - } - - if (pdev->vendor == PCI_VENDOR_ID_IBM && - IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) { - ret = vfio_pci_ibm_npu2_init(vdev); - if (ret && ret != -ENODEV) { - pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n"); - goto disable_exit; - } - } - vfio_pci_probe_mmaps(vdev); return 0; @@ -517,30 +498,29 @@ out: static struct pci_driver vfio_pci_driver; -static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev, - struct vfio_device **pf_dev) +static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev) { struct pci_dev *physfn = pci_physfn(vdev->pdev); + struct vfio_device *pf_dev; if (!vdev->pdev->is_virtfn) return NULL; - *pf_dev = vfio_device_get_from_dev(&physfn->dev); - if (!*pf_dev) + pf_dev = vfio_device_get_from_dev(&physfn->dev); + if (!pf_dev) return NULL; if (pci_dev_driver(physfn) != &vfio_pci_driver) { - vfio_device_put(*pf_dev); + vfio_device_put(pf_dev); return NULL; } - return vfio_device_data(*pf_dev); + return container_of(pf_dev, struct vfio_pci_device, vdev); } static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val) { - struct vfio_device *pf_dev; - struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev); + struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev); if (!pf_vdev) return; @@ -550,12 +530,13 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val) WARN_ON(pf_vdev->vf_token->users < 0); mutex_unlock(&pf_vdev->vf_token->lock); - vfio_device_put(pf_dev); + vfio_device_put(&pf_vdev->vdev); } -static void vfio_pci_release(void *device_data) +static void vfio_pci_release(struct vfio_device *core_vdev) { - struct vfio_pci_device *vdev = device_data; + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); mutex_lock(&vdev->reflck->lock); @@ -581,9 +562,10 @@ static void vfio_pci_release(void *device_data) module_put(THIS_MODULE); } -static int vfio_pci_open(void *device_data) +static int vfio_pci_open(struct vfio_device *core_vdev) { - struct vfio_pci_device *vdev = device_data; + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -792,15 +774,16 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, } struct vfio_devices { - struct vfio_device **devices; + struct vfio_pci_device **devices; int cur_index; int max_index; }; -static long vfio_pci_ioctl(void *device_data, +static long vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { - struct vfio_pci_device *vdev = device_data; + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); unsigned long minsz; if (cmd == VFIO_DEVICE_GET_INFO) { @@ -1280,9 +1263,7 @@ reset_info_exit: goto hot_reset_release; for (; mem_idx < devs.cur_index; mem_idx++) { - struct vfio_pci_device *tmp; - - tmp = vfio_device_data(devs.devices[mem_idx]); + struct vfio_pci_device *tmp = devs.devices[mem_idx]; ret = down_write_trylock(&tmp->memory_lock); if (!ret) { @@ -1297,17 +1278,13 @@ reset_info_exit: hot_reset_release: for (i = 0; i < devs.cur_index; i++) { - struct vfio_device *device; - struct vfio_pci_device *tmp; - - device = devs.devices[i]; - tmp = vfio_device_data(device); + struct vfio_pci_device *tmp = devs.devices[i]; if (i < mem_idx) up_write(&tmp->memory_lock); else mutex_unlock(&tmp->vma_lock); - vfio_device_put(device); + vfio_device_put(&tmp->vdev); } kfree(devs.devices); @@ -1402,11 +1379,10 @@ hot_reset_release: return -ENOTTY; } -static ssize_t vfio_pci_rw(void *device_data, char __user *buf, +static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - struct vfio_pci_device *vdev = device_data; if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; @@ -1434,22 +1410,28 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, return -EINVAL; } -static ssize_t vfio_pci_read(void *device_data, char __user *buf, +static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos) { + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); + if (!count) return 0; - return vfio_pci_rw(device_data, buf, count, ppos, false); + return vfio_pci_rw(vdev, buf, count, ppos, false); } -static ssize_t vfio_pci_write(void *device_data, const char __user *buf, +static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos) { + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); + if (!count) return 0; - return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); + return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true); } /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ @@ -1646,9 +1628,10 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = { .fault = vfio_pci_mmap_fault, }; -static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) +static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { - struct vfio_pci_device *vdev = device_data; + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); struct pci_dev *pdev = vdev->pdev; unsigned int index; u64 phys_len, req_len, pgoff, req_start; @@ -1656,6 +1639,8 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + return -EINVAL; if (vma->vm_end < vma->vm_start) return -EINVAL; if ((vma->vm_flags & VM_SHARED) == 0) @@ -1664,7 +1649,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) int regnum = index - VFIO_PCI_NUM_REGIONS; struct vfio_pci_region *region = vdev->region + regnum; - if (region && region->ops && region->ops->mmap && + if (region->ops && region->ops->mmap && (region->flags & VFIO_REGION_INFO_FLAG_MMAP)) return region->ops->mmap(vdev, region, vma); return -EINVAL; @@ -1714,9 +1699,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) return 0; } -static void vfio_pci_request(void *device_data, unsigned int count) +static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count) { - struct vfio_pci_device *vdev = device_data; + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); struct pci_dev *pdev = vdev->pdev; mutex_lock(&vdev->igate); @@ -1767,8 +1753,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, return 0; /* No VF token provided or required */ if (vdev->pdev->is_virtfn) { - struct vfio_device *pf_dev; - struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev); + struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev); bool match; if (!pf_vdev) { @@ -1781,7 +1766,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, } if (!vf_token) { - vfio_device_put(pf_dev); + vfio_device_put(&pf_vdev->vdev); pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); return -EACCES; @@ -1791,7 +1776,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, match = uuid_equal(uuid, &pf_vdev->vf_token->uuid); mutex_unlock(&pf_vdev->vf_token->lock); - vfio_device_put(pf_dev); + vfio_device_put(&pf_vdev->vdev); if (!match) { pci_info_ratelimited(vdev->pdev, @@ -1830,9 +1815,10 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, #define VF_TOKEN_ARG "vf_token=" -static int vfio_pci_match(void *device_data, char *buf) +static int vfio_pci_match(struct vfio_device *core_vdev, char *buf) { - struct vfio_pci_device *vdev = device_data; + struct vfio_pci_device *vdev = + container_of(core_vdev, struct vfio_pci_device, vdev); bool vf_token = false; uuid_t uuid; int ret; @@ -1922,6 +1908,68 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, return 0; } +static int vfio_pci_vf_init(struct vfio_pci_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; + int ret; + + if (!pdev->is_physfn) + return 0; + + vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL); + if (!vdev->vf_token) + return -ENOMEM; + + mutex_init(&vdev->vf_token->lock); + uuid_gen(&vdev->vf_token->uuid); + + vdev->nb.notifier_call = vfio_pci_bus_notifier; + ret = bus_register_notifier(&pci_bus_type, &vdev->nb); + if (ret) { + kfree(vdev->vf_token); + return ret; + } + return 0; +} + +static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev) +{ + if (!vdev->vf_token) + return; + + bus_unregister_notifier(&pci_bus_type, &vdev->nb); + WARN_ON(vdev->vf_token->users); + mutex_destroy(&vdev->vf_token->lock); + kfree(vdev->vf_token); +} + +static int vfio_pci_vga_init(struct vfio_pci_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; + int ret; + + if (!vfio_pci_is_vga(pdev)) + return 0; + + ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode); + if (ret) + return ret; + vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false)); + return 0; +} + +static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; + + if (!vfio_pci_is_vga(pdev)) + return; + vga_client_register(pdev, NULL, NULL, NULL); + vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | + VGA_RSRC_LEGACY_MEM); +} + static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct vfio_pci_device *vdev; @@ -1957,6 +2005,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_group_put; } + vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops); vdev->pdev = pdev; vdev->irq_type = VFIO_PCI_NUM_IRQS; mutex_init(&vdev->igate); @@ -1968,35 +2017,15 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); - ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); + ret = vfio_pci_reflck_attach(vdev); if (ret) goto out_free; - - ret = vfio_pci_reflck_attach(vdev); + ret = vfio_pci_vf_init(vdev); if (ret) - goto out_del_group_dev; - - if (pdev->is_physfn) { - vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL); - if (!vdev->vf_token) { - ret = -ENOMEM; - goto out_reflck; - } - - mutex_init(&vdev->vf_token->lock); - uuid_gen(&vdev->vf_token->uuid); - - vdev->nb.notifier_call = vfio_pci_bus_notifier; - ret = bus_register_notifier(&pci_bus_type, &vdev->nb); - if (ret) - goto out_vf_token; - } - - if (vfio_pci_is_vga(pdev)) { - vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode); - vga_set_legacy_decoding(pdev, - vfio_pci_set_vga_decode(vdev, false)); - } + goto out_reflck; + ret = vfio_pci_vga_init(vdev); + if (ret) + goto out_vf; vfio_pci_probe_power_state(vdev); @@ -2014,15 +2043,21 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vfio_pci_set_power_state(vdev, PCI_D3hot); } - return ret; + ret = vfio_register_group_dev(&vdev->vdev); + if (ret) + goto out_power; + dev_set_drvdata(&pdev->dev, vdev); + return 0; -out_vf_token: - kfree(vdev->vf_token); +out_power: + if (!disable_idle_d3) + vfio_pci_set_power_state(vdev, PCI_D0); +out_vf: + vfio_pci_vf_uninit(vdev); out_reflck: vfio_pci_reflck_put(vdev->reflck); -out_del_group_dev: - vfio_del_group_dev(&pdev->dev); out_free: + kfree(vdev->pm_save); kfree(vdev); out_group_put: vfio_iommu_group_put(group, &pdev->dev); @@ -2031,41 +2066,25 @@ out_group_put: static void vfio_pci_remove(struct pci_dev *pdev) { - struct vfio_pci_device *vdev; + struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev); pci_disable_sriov(pdev); - vdev = vfio_del_group_dev(&pdev->dev); - if (!vdev) - return; - - if (vdev->vf_token) { - WARN_ON(vdev->vf_token->users); - mutex_destroy(&vdev->vf_token->lock); - kfree(vdev->vf_token); - } - - if (vdev->nb.notifier_call) - bus_unregister_notifier(&pci_bus_type, &vdev->nb); + vfio_unregister_group_dev(&vdev->vdev); + vfio_pci_vf_uninit(vdev); vfio_pci_reflck_put(vdev->reflck); + vfio_pci_vga_uninit(vdev); vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); - kfree(vdev->region); - mutex_destroy(&vdev->ioeventfds_lock); if (!disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D0); + mutex_destroy(&vdev->ioeventfds_lock); + kfree(vdev->region); kfree(vdev->pm_save); kfree(vdev); - - if (vfio_pci_is_vga(pdev)) { - vga_client_register(pdev, NULL, NULL, NULL); - vga_set_legacy_decoding(pdev, - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | - VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); - } } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -2078,11 +2097,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, if (device == NULL) return PCI_ERS_RESULT_DISCONNECT; - vdev = vfio_device_data(device); - if (vdev == NULL) { - vfio_device_put(device); - return PCI_ERS_RESULT_DISCONNECT; - } + vdev = container_of(device, struct vfio_pci_device, vdev); mutex_lock(&vdev->igate); @@ -2098,7 +2113,6 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { - struct vfio_pci_device *vdev; struct vfio_device *device; int ret = 0; @@ -2111,12 +2125,6 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) if (!device) return -ENODEV; - vdev = vfio_device_data(device); - if (!vdev) { - vfio_device_put(device); - return -ENODEV; - } - if (nr_virtfn == 0) pci_disable_sriov(pdev); else @@ -2176,7 +2184,7 @@ static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data) return 0; } - vdev = vfio_device_data(device); + vdev = container_of(device, struct vfio_pci_device, vdev); if (vdev->reflck) { vfio_pci_reflck_get(vdev->reflck); @@ -2238,7 +2246,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) return -EBUSY; } - vdev = vfio_device_data(device); + vdev = container_of(device, struct vfio_pci_device, vdev); /* Fault if the device is not unused */ if (vdev->refcnt) { @@ -2246,7 +2254,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) return -EBUSY; } - devs->devices[devs->cur_index++] = device; + devs->devices[devs->cur_index++] = vdev; return 0; } @@ -2268,7 +2276,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) return -EBUSY; } - vdev = vfio_device_data(device); + vdev = container_of(device, struct vfio_pci_device, vdev); /* * Locking multiple devices is prone to deadlock, runaway and @@ -2279,7 +2287,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) return -EBUSY; } - devs->devices[devs->cur_index++] = device; + devs->devices[devs->cur_index++] = vdev; return 0; } @@ -2327,7 +2335,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) /* Does at least one need a reset? */ for (i = 0; i < devs.cur_index; i++) { - tmp = vfio_device_data(devs.devices[i]); + tmp = devs.devices[i]; if (tmp->needs_reset) { ret = pci_reset_bus(vdev->pdev); break; @@ -2336,7 +2344,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) put_devs: for (i = 0; i < devs.cur_index; i++) { - tmp = vfio_device_data(devs.devices[i]); + tmp = devs.devices[i]; /* * If reset was successful, affected devices no longer need @@ -2352,7 +2360,7 @@ put_devs: vfio_pci_set_power_state(tmp, PCI_D3hot); } - vfio_device_put(devs.devices[i]); + vfio_device_put(&tmp->vdev); } kfree(devs.devices); @@ -2409,7 +2417,7 @@ static int __init vfio_pci_init(void) { int ret; - /* Allocate shared config space permision data used by all devices */ + /* Allocate shared config space permission data used by all devices */ ret = vfio_pci_init_perm_bits(); if (ret) return ret; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index a402adee8a21..d57f037f65b8 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -101,7 +101,7 @@ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = { /* * Read/Write Permission Bits - one bit for each bit in capability * Any field can be read if it exists, but what is read depends on - * whether the field is 'virtualized', or just pass thru to the + * whether the field is 'virtualized', or just pass through to the * hardware. Any virtualized field is also virtualized for writes. * Writes are only permitted if they have a 1 bit here. */ diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index e66dfb0178ed..228df565e9bc 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -21,6 +21,10 @@ #define OPREGION_SIZE (8 * 1024) #define OPREGION_PCI_ADDR 0xfc +#define OPREGION_RVDA 0x3ba +#define OPREGION_RVDS 0x3c2 +#define OPREGION_VERSION 0x16 + static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { @@ -58,6 +62,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) u32 addr, size; void *base; int ret; + u16 version; ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr); if (ret) @@ -83,6 +88,54 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) size *= 1024; /* In KB */ + /* + * Support opregion v2.1+ + * When VBT data exceeds 6KB size and cannot be within mailbox #4, then + * the Extended VBT region next to opregion is used to hold the VBT data. + * RVDA (Relative Address of VBT Data from Opregion Base) and RVDS + * (Raw VBT Data Size) from opregion structure member are used to hold the + * address from region base and size of VBT data. RVDA/RVDS are not + * defined before opregion 2.0. + * + * opregion 2.1+: RVDA is unsigned, relative offset from + * opregion base, and should point to the end of opregion. + * otherwise, exposing to userspace to allow read access to everything between + * the OpRegion and VBT is not safe. + * RVDS is defined as size in bytes. + * + * opregion 2.0: rvda is the physical VBT address. + * Since rvda is HPA it cannot be directly used in guest. + * And it should not be practically available for end user,so it is not supported. + */ + version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION)); + if (version >= 0x0200) { + u64 rvda; + u32 rvds; + + rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA)); + rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS)); + if (rvda && rvds) { + /* no support for opregion v2.0 with physical VBT address */ + if (version == 0x0200) { + memunmap(base); + pci_err(vdev->pdev, + "IGD assignment does not support opregion v2.0 with an extended VBT region\n"); + return -EINVAL; + } + + if (rvda != size) { + memunmap(base); + pci_err(vdev->pdev, + "Extended VBT does not follow opregion on version 0x%04x\n", + version); + return -EINVAL; + } + + /* region size for opregion v2.0+: opregion and VBT size. */ + size += rvds; + } + } + if (size != OPREGION_SIZE) { memunmap(base); base = memremap(addr, size, MEMREMAP_WB); diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c deleted file mode 100644 index 9adcf6a8f888..000000000000 --- a/drivers/vfio/pci/vfio_pci_nvlink2.c +++ /dev/null @@ -1,490 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2. - * - * Copyright (C) 2018 IBM Corp. All rights reserved. - * Author: Alexey Kardashevskiy <aik@ozlabs.ru> - * - * Register an on-GPU RAM region for cacheable access. - * - * Derived from original vfio_pci_igd.c: - * Copyright (C) 2016 Red Hat, Inc. All rights reserved. - * Author: Alex Williamson <alex.williamson@redhat.com> - */ - -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/uaccess.h> -#include <linux/vfio.h> -#include <linux/sched/mm.h> -#include <linux/mmu_context.h> -#include <asm/kvm_ppc.h> -#include "vfio_pci_private.h" - -#define CREATE_TRACE_POINTS -#include "trace.h" - -EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap_fault); -EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap); -EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_npu2_mmap); - -struct vfio_pci_nvgpu_data { - unsigned long gpu_hpa; /* GPU RAM physical address */ - unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */ - unsigned long useraddr; /* GPU RAM userspace address */ - unsigned long size; /* Size of the GPU RAM window (usually 128GB) */ - struct mm_struct *mm; - struct mm_iommu_table_group_mem_t *mem; /* Pre-registered RAM descr. */ - struct pci_dev *gpdev; - struct notifier_block group_notifier; -}; - -static size_t vfio_pci_nvgpu_rw(struct vfio_pci_device *vdev, - char __user *buf, size_t count, loff_t *ppos, bool iswrite) -{ - unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - struct vfio_pci_nvgpu_data *data = vdev->region[i].data; - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - loff_t posaligned = pos & PAGE_MASK, posoff = pos & ~PAGE_MASK; - size_t sizealigned; - void __iomem *ptr; - - if (pos >= vdev->region[i].size) - return -EINVAL; - - count = min(count, (size_t)(vdev->region[i].size - pos)); - - /* - * We map only a bit of GPU RAM for a short time instead of mapping it - * for the guest lifetime as: - * - * 1) we do not know GPU RAM size, only aperture which is 4-8 times - * bigger than actual RAM size (16/32GB RAM vs. 128GB aperture); - * 2) mapping GPU RAM allows CPU to prefetch and if this happens - * before NVLink bridge is reset (which fences GPU RAM), - * hardware management interrupts (HMI) might happen, this - * will freeze NVLink bridge. - * - * This is not fast path anyway. - */ - sizealigned = ALIGN(posoff + count, PAGE_SIZE); - ptr = ioremap_cache(data->gpu_hpa + posaligned, sizealigned); - if (!ptr) - return -EFAULT; - - if (iswrite) { - if (copy_from_user(ptr + posoff, buf, count)) - count = -EFAULT; - else - *ppos += count; - } else { - if (copy_to_user(buf, ptr + posoff, count)) - count = -EFAULT; - else - *ppos += count; - } - - iounmap(ptr); - - return count; -} - -static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev, - struct vfio_pci_region *region) -{ - struct vfio_pci_nvgpu_data *data = region->data; - long ret; - - /* If there were any mappings at all... */ - if (data->mm) { - if (data->mem) { - ret = mm_iommu_put(data->mm, data->mem); - WARN_ON(ret); - } - - mmdrop(data->mm); - } - - vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY, - &data->group_notifier); - - pnv_npu2_unmap_lpar_dev(data->gpdev); - - kfree(data); -} - -static vm_fault_t vfio_pci_nvgpu_mmap_fault(struct vm_fault *vmf) -{ - vm_fault_t ret; - struct vm_area_struct *vma = vmf->vma; - struct vfio_pci_region *region = vma->vm_private_data; - struct vfio_pci_nvgpu_data *data = region->data; - unsigned long vmf_off = (vmf->address - vma->vm_start) >> PAGE_SHIFT; - unsigned long nv2pg = data->gpu_hpa >> PAGE_SHIFT; - unsigned long vm_pgoff = vma->vm_pgoff & - ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); - unsigned long pfn = nv2pg + vm_pgoff + vmf_off; - - ret = vmf_insert_pfn(vma, vmf->address, pfn); - trace_vfio_pci_nvgpu_mmap_fault(data->gpdev, pfn << PAGE_SHIFT, - vmf->address, ret); - - return ret; -} - -static const struct vm_operations_struct vfio_pci_nvgpu_mmap_vmops = { - .fault = vfio_pci_nvgpu_mmap_fault, -}; - -static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev, - struct vfio_pci_region *region, struct vm_area_struct *vma) -{ - int ret; - struct vfio_pci_nvgpu_data *data = region->data; - - if (data->useraddr) - return -EPERM; - - if (vma->vm_end - vma->vm_start > data->size) - return -EINVAL; - - vma->vm_private_data = region; - vma->vm_flags |= VM_PFNMAP; - vma->vm_ops = &vfio_pci_nvgpu_mmap_vmops; - - /* - * Calling mm_iommu_newdev() here once as the region is not - * registered yet and therefore right initialization will happen now. - * Other places will use mm_iommu_find() which returns - * registered @mem and does not go gup(). - */ - data->useraddr = vma->vm_start; - data->mm = current->mm; - - mmgrab(data->mm); - ret = (int) mm_iommu_newdev(data->mm, data->useraddr, - vma_pages(vma), data->gpu_hpa, &data->mem); - - trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr, - vma->vm_end - vma->vm_start, ret); - - return ret; -} - -static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev, - struct vfio_pci_region *region, struct vfio_info_cap *caps) -{ - struct vfio_pci_nvgpu_data *data = region->data; - struct vfio_region_info_cap_nvlink2_ssatgt cap = { - .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, - .header.version = 1, - .tgt = data->gpu_tgt - }; - - return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); -} - -static const struct vfio_pci_regops vfio_pci_nvgpu_regops = { - .rw = vfio_pci_nvgpu_rw, - .release = vfio_pci_nvgpu_release, - .mmap = vfio_pci_nvgpu_mmap, - .add_capability = vfio_pci_nvgpu_add_capability, -}; - -static int vfio_pci_nvgpu_group_notifier(struct notifier_block *nb, - unsigned long action, void *opaque) -{ - struct kvm *kvm = opaque; - struct vfio_pci_nvgpu_data *data = container_of(nb, - struct vfio_pci_nvgpu_data, - group_notifier); - - if (action == VFIO_GROUP_NOTIFY_SET_KVM && kvm && - pnv_npu2_map_lpar_dev(data->gpdev, - kvm->arch.lpid, MSR_DR | MSR_PR)) - return NOTIFY_BAD; - - return NOTIFY_OK; -} - -int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev) -{ - int ret; - u64 reg[2]; - u64 tgt = 0; - struct device_node *npu_node, *mem_node; - struct pci_dev *npu_dev; - struct vfio_pci_nvgpu_data *data; - uint32_t mem_phandle = 0; - unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM; - - /* - * PCI config space does not tell us about NVLink presense but - * platform does, use this. - */ - npu_dev = pnv_pci_get_npu_dev(vdev->pdev, 0); - if (!npu_dev) - return -ENODEV; - - npu_node = pci_device_to_OF_node(npu_dev); - if (!npu_node) - return -EINVAL; - - if (of_property_read_u32(npu_node, "memory-region", &mem_phandle)) - return -ENODEV; - - mem_node = of_find_node_by_phandle(mem_phandle); - if (!mem_node) - return -EINVAL; - - if (of_property_read_variable_u64_array(mem_node, "reg", reg, - ARRAY_SIZE(reg), ARRAY_SIZE(reg)) != - ARRAY_SIZE(reg)) - return -EINVAL; - - if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) { - dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n"); - return -EFAULT; - } - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->gpu_hpa = reg[0]; - data->gpu_tgt = tgt; - data->size = reg[1]; - - dev_dbg(&vdev->pdev->dev, "%lx..%lx\n", data->gpu_hpa, - data->gpu_hpa + data->size - 1); - - data->gpdev = vdev->pdev; - data->group_notifier.notifier_call = vfio_pci_nvgpu_group_notifier; - - ret = vfio_register_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY, - &events, &data->group_notifier); - if (ret) - goto free_exit; - - /* - * We have just set KVM, we do not need the listener anymore. - * Also, keeping it registered means that if more than one GPU is - * assigned, we will get several similar notifiers notifying about - * the same device again which does not help with anything. - */ - vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY, - &data->group_notifier); - - ret = vfio_pci_register_dev_region(vdev, - PCI_VENDOR_ID_NVIDIA | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, - VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM, - &vfio_pci_nvgpu_regops, - data->size, - VFIO_REGION_INFO_FLAG_READ | - VFIO_REGION_INFO_FLAG_WRITE | - VFIO_REGION_INFO_FLAG_MMAP, - data); - if (ret) - goto free_exit; - - return 0; -free_exit: - kfree(data); - - return ret; -} - -/* - * IBM NPU2 bridge - */ -struct vfio_pci_npu2_data { - void *base; /* ATSD register virtual address, for emulated access */ - unsigned long mmio_atsd; /* ATSD physical address */ - unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */ - unsigned int link_speed; /* The link speed from DT's ibm,nvlink-speed */ -}; - -static size_t vfio_pci_npu2_rw(struct vfio_pci_device *vdev, - char __user *buf, size_t count, loff_t *ppos, bool iswrite) -{ - unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - struct vfio_pci_npu2_data *data = vdev->region[i].data; - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - - if (pos >= vdev->region[i].size) - return -EINVAL; - - count = min(count, (size_t)(vdev->region[i].size - pos)); - - if (iswrite) { - if (copy_from_user(data->base + pos, buf, count)) - return -EFAULT; - } else { - if (copy_to_user(buf, data->base + pos, count)) - return -EFAULT; - } - *ppos += count; - - return count; -} - -static int vfio_pci_npu2_mmap(struct vfio_pci_device *vdev, - struct vfio_pci_region *region, struct vm_area_struct *vma) -{ - int ret; - struct vfio_pci_npu2_data *data = region->data; - unsigned long req_len = vma->vm_end - vma->vm_start; - - if (req_len != PAGE_SIZE) - return -EINVAL; - - vma->vm_flags |= VM_PFNMAP; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - ret = remap_pfn_range(vma, vma->vm_start, data->mmio_atsd >> PAGE_SHIFT, - req_len, vma->vm_page_prot); - trace_vfio_pci_npu2_mmap(vdev->pdev, data->mmio_atsd, vma->vm_start, - vma->vm_end - vma->vm_start, ret); - - return ret; -} - -static void vfio_pci_npu2_release(struct vfio_pci_device *vdev, - struct vfio_pci_region *region) -{ - struct vfio_pci_npu2_data *data = region->data; - - memunmap(data->base); - kfree(data); -} - -static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev, - struct vfio_pci_region *region, struct vfio_info_cap *caps) -{ - struct vfio_pci_npu2_data *data = region->data; - struct vfio_region_info_cap_nvlink2_ssatgt captgt = { - .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, - .header.version = 1, - .tgt = data->gpu_tgt - }; - struct vfio_region_info_cap_nvlink2_lnkspd capspd = { - .header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD, - .header.version = 1, - .link_speed = data->link_speed - }; - int ret; - - ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt)); - if (ret) - return ret; - - return vfio_info_add_capability(caps, &capspd.header, sizeof(capspd)); -} - -static const struct vfio_pci_regops vfio_pci_npu2_regops = { - .rw = vfio_pci_npu2_rw, - .mmap = vfio_pci_npu2_mmap, - .release = vfio_pci_npu2_release, - .add_capability = vfio_pci_npu2_add_capability, -}; - -int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) -{ - int ret; - struct vfio_pci_npu2_data *data; - struct device_node *nvlink_dn; - u32 nvlink_index = 0, mem_phandle = 0; - struct pci_dev *npdev = vdev->pdev; - struct device_node *npu_node = pci_device_to_OF_node(npdev); - struct pci_controller *hose = pci_bus_to_host(npdev->bus); - u64 mmio_atsd = 0; - u64 tgt = 0; - u32 link_speed = 0xff; - - /* - * PCI config space does not tell us about NVLink presense but - * platform does, use this. - */ - if (!pnv_pci_get_gpu_dev(vdev->pdev)) - return -ENODEV; - - if (of_property_read_u32(npu_node, "memory-region", &mem_phandle)) - return -ENODEV; - - /* - * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links - * so we can allocate one register per link, using nvlink index as - * a key. - * There is always at least one ATSD register so as long as at least - * NVLink bridge #0 is passed to the guest, ATSD will be available. - */ - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return -ENODEV; - - if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index, - &mmio_atsd)) { - if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0, - &mmio_atsd)) { - dev_warn(&vdev->pdev->dev, "No available ATSD found\n"); - mmio_atsd = 0; - } else { - dev_warn(&vdev->pdev->dev, - "Using fallback ibm,mmio-atsd[0] for ATSD.\n"); - } - } - - if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) { - dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n"); - return -EFAULT; - } - - if (of_property_read_u32(npu_node, "ibm,nvlink-speed", &link_speed)) { - dev_warn(&vdev->pdev->dev, "No ibm,nvlink-speed found\n"); - return -EFAULT; - } - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->mmio_atsd = mmio_atsd; - data->gpu_tgt = tgt; - data->link_speed = link_speed; - if (data->mmio_atsd) { - data->base = memremap(data->mmio_atsd, SZ_64K, MEMREMAP_WT); - if (!data->base) { - ret = -ENOMEM; - goto free_exit; - } - } - - /* - * We want to expose the capability even if this specific NVLink - * did not get its own ATSD register because capabilities - * belong to VFIO regions and normally there will be ATSD register - * assigned to the NVLink bridge. - */ - ret = vfio_pci_register_dev_region(vdev, - PCI_VENDOR_ID_IBM | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE, - VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD, - &vfio_pci_npu2_regops, - data->mmio_atsd ? PAGE_SIZE : 0, - VFIO_REGION_INFO_FLAG_READ | - VFIO_REGION_INFO_FLAG_WRITE | - VFIO_REGION_INFO_FLAG_MMAP, - data); - if (ret) - goto free_exit; - - return 0; - -free_exit: - if (data->base) - memunmap(data->base); - kfree(data); - - return ret; -} diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 9cd1882a05af..5a36272cecbf 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -100,6 +100,7 @@ struct vfio_pci_mmap_vma { }; struct vfio_pci_device { + struct vfio_device vdev; struct pci_dev *pdev; void __iomem *barmap[PCI_STD_NUM_BARS]; bool bar_mmap_supported[PCI_STD_NUM_BARS]; @@ -199,20 +200,6 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev) return -ENODEV; } #endif -#ifdef CONFIG_VFIO_PCI_NVLINK2 -extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev); -extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev); -#else -static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev) -{ - return -ENODEV; -} - -static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) -{ - return -ENODEV; -} -#endif #ifdef CONFIG_S390 extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index 09a9453b75c5..63cc7f0b2e4a 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -26,7 +26,7 @@ #define XGMAC_DMA_CONTROL 0x00000f18 /* Ctrl (Operational Mode) */ #define XGMAC_DMA_INTR_ENA 0x00000f1c /* Interrupt Enable */ -/* DMA Control registe defines */ +/* DMA Control register defines */ #define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */ #define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */ diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index 3626c2150101..f970eb2a999f 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -66,16 +66,18 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) if (ret) { kfree(vdev->name); kfree(vdev); + return ret; } - return ret; + dev_set_drvdata(&adev->dev, vdev); + return 0; } static void vfio_amba_remove(struct amba_device *adev) { - struct vfio_platform_device *vdev = - vfio_platform_remove_common(&adev->dev); + struct vfio_platform_device *vdev = dev_get_drvdata(&adev->dev); + vfio_platform_remove_common(vdev); kfree(vdev->name); kfree(vdev); } diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 9fb6818cea12..e4027799a154 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -54,23 +54,21 @@ static int vfio_platform_probe(struct platform_device *pdev) vdev->reset_required = reset_required; ret = vfio_platform_probe_common(vdev, &pdev->dev); - if (ret) + if (ret) { kfree(vdev); - - return ret; + return ret; + } + dev_set_drvdata(&pdev->dev, vdev); + return 0; } static int vfio_platform_remove(struct platform_device *pdev) { - struct vfio_platform_device *vdev; - - vdev = vfio_platform_remove_common(&pdev->dev); - if (vdev) { - kfree(vdev); - return 0; - } + struct vfio_platform_device *vdev = dev_get_drvdata(&pdev->dev); - return -EINVAL; + vfio_platform_remove_common(vdev); + kfree(vdev); + return 0; } static struct platform_driver vfio_platform_driver = { diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index fb4b385191f2..361e5b57e369 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -218,9 +218,10 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev, return -EINVAL; } -static void vfio_platform_release(void *device_data) +static void vfio_platform_release(struct vfio_device *core_vdev) { - struct vfio_platform_device *vdev = device_data; + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); mutex_lock(&driver_lock); @@ -244,9 +245,10 @@ static void vfio_platform_release(void *device_data) module_put(vdev->parent_module); } -static int vfio_platform_open(void *device_data) +static int vfio_platform_open(struct vfio_device *core_vdev) { - struct vfio_platform_device *vdev = device_data; + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); int ret; if (!try_module_get(vdev->parent_module)) @@ -293,10 +295,12 @@ err_reg: return ret; } -static long vfio_platform_ioctl(void *device_data, +static long vfio_platform_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { - struct vfio_platform_device *vdev = device_data; + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); + unsigned long minsz; if (cmd == VFIO_DEVICE_GET_INFO) { @@ -455,10 +459,11 @@ err: return -EFAULT; } -static ssize_t vfio_platform_read(void *device_data, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_platform_read(struct vfio_device *core_vdev, + char __user *buf, size_t count, loff_t *ppos) { - struct vfio_platform_device *vdev = device_data; + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; @@ -531,10 +536,11 @@ err: return -EFAULT; } -static ssize_t vfio_platform_write(void *device_data, const char __user *buf, +static ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos) { - struct vfio_platform_device *vdev = device_data; + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; @@ -573,9 +579,10 @@ static int vfio_platform_mmap_mmio(struct vfio_platform_region region, req_len, vma->vm_page_prot); } -static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma) +static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { - struct vfio_platform_device *vdev = device_data; + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); unsigned int index; index = vma->vm_pgoff >> (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT); @@ -659,8 +666,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, struct iommu_group *group; int ret; - if (!vdev) - return -EINVAL; + vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops); ret = vfio_platform_acpi_probe(vdev, dev); if (ret) @@ -685,13 +691,13 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, goto put_reset; } - ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev); + ret = vfio_register_group_dev(&vdev->vdev); if (ret) goto put_iommu; mutex_init(&vdev->igate); - pm_runtime_enable(vdev->device); + pm_runtime_enable(dev); return 0; put_iommu: @@ -702,19 +708,13 @@ put_reset: } EXPORT_SYMBOL_GPL(vfio_platform_probe_common); -struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) +void vfio_platform_remove_common(struct vfio_platform_device *vdev) { - struct vfio_platform_device *vdev; - - vdev = vfio_del_group_dev(dev); + vfio_unregister_group_dev(&vdev->vdev); - if (vdev) { - pm_runtime_disable(vdev->device); - vfio_platform_put_reset(vdev); - vfio_iommu_group_put(dev->iommu_group, dev); - } - - return vdev; + pm_runtime_disable(vdev->device); + vfio_platform_put_reset(vdev); + vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev); } EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 289089910643..a5ba82c8cbc3 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/interrupt.h> +#include <linux/vfio.h> #define VFIO_PLATFORM_OFFSET_SHIFT 40 #define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1) @@ -42,6 +43,7 @@ struct vfio_platform_region { }; struct vfio_platform_device { + struct vfio_device vdev; struct vfio_platform_region *regions; u32 num_regions; struct vfio_platform_irq *irqs; @@ -80,8 +82,7 @@ struct vfio_platform_reset_node { extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, struct device *dev); -extern struct vfio_platform_device *vfio_platform_remove_common - (struct device *dev); +void vfio_platform_remove_common(struct vfio_platform_device *vdev); extern int vfio_platform_irq_init(struct vfio_platform_device *vdev); extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 38779e6fd80c..5e631c359ef2 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -46,7 +46,6 @@ static struct vfio { struct mutex group_lock; struct cdev group_cdev; dev_t group_devt; - wait_queue_head_t release_q; } vfio; struct vfio_iommu_driver { @@ -90,15 +89,6 @@ struct vfio_group { struct blocking_notifier_head notifier; }; -struct vfio_device { - struct kref kref; - struct device *dev; - const struct vfio_device_ops *ops; - struct vfio_group *group; - struct list_head group_next; - void *device_data; -}; - #ifdef CONFIG_VFIO_NOIOMMU static bool noiommu __read_mostly; module_param_named(enable_unsafe_noiommu_mode, @@ -109,8 +99,8 @@ MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. Thi /* * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe * and remove functions, any use cases other than acquiring the first - * reference for the purpose of calling vfio_add_group_dev() or removing - * that symmetric reference after vfio_del_group_dev() should use the raw + * reference for the purpose of calling vfio_register_group_dev() or removing + * that symmetric reference after vfio_unregister_group_dev() should use the raw * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put() * removes the device from the dummy group and cannot be nested. */ @@ -532,67 +522,17 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev) /** * Device objects - create, release, get, put, search */ -static -struct vfio_device *vfio_group_create_device(struct vfio_group *group, - struct device *dev, - const struct vfio_device_ops *ops, - void *device_data) -{ - struct vfio_device *device; - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) - return ERR_PTR(-ENOMEM); - - kref_init(&device->kref); - device->dev = dev; - device->group = group; - device->ops = ops; - device->device_data = device_data; - dev_set_drvdata(dev, device); - - /* No need to get group_lock, caller has group reference */ - vfio_group_get(group); - - mutex_lock(&group->device_lock); - list_add(&device->group_next, &group->device_list); - group->dev_counter++; - mutex_unlock(&group->device_lock); - - return device; -} - -static void vfio_device_release(struct kref *kref) -{ - struct vfio_device *device = container_of(kref, - struct vfio_device, kref); - struct vfio_group *group = device->group; - - list_del(&device->group_next); - group->dev_counter--; - mutex_unlock(&group->device_lock); - - dev_set_drvdata(device->dev, NULL); - - kfree(device); - - /* vfio_del_group_dev may be waiting for this device */ - wake_up(&vfio.release_q); -} - /* Device reference always implies a group reference */ void vfio_device_put(struct vfio_device *device) { - struct vfio_group *group = device->group; - kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); - vfio_group_put(group); + if (refcount_dec_and_test(&device->refcount)) + complete(&device->comp); } EXPORT_SYMBOL_GPL(vfio_device_put); -static void vfio_device_get(struct vfio_device *device) +static bool vfio_device_try_get(struct vfio_device *device) { - vfio_group_get(device->group); - kref_get(&device->kref); + return refcount_inc_not_zero(&device->refcount); } static struct vfio_device *vfio_group_get_device(struct vfio_group *group, @@ -602,8 +542,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, mutex_lock(&group->device_lock); list_for_each_entry(device, &group->device_list, group_next) { - if (device->dev == dev) { - vfio_device_get(device); + if (device->dev == dev && vfio_device_try_get(device)) { mutex_unlock(&group->device_lock); return device; } @@ -801,14 +740,22 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, /** * VFIO driver API */ -int vfio_add_group_dev(struct device *dev, - const struct vfio_device_ops *ops, void *device_data) +void vfio_init_group_dev(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops) +{ + init_completion(&device->comp); + device->dev = dev; + device->ops = ops; +} +EXPORT_SYMBOL_GPL(vfio_init_group_dev); + +int vfio_register_group_dev(struct vfio_device *device) { + struct vfio_device *existing_device; struct iommu_group *iommu_group; struct vfio_group *group; - struct vfio_device *device; - iommu_group = iommu_group_get(dev); + iommu_group = iommu_group_get(device->dev); if (!iommu_group) return -EINVAL; @@ -827,31 +774,29 @@ int vfio_add_group_dev(struct device *dev, iommu_group_put(iommu_group); } - device = vfio_group_get_device(group, dev); - if (device) { - dev_WARN(dev, "Device already exists on group %d\n", + existing_device = vfio_group_get_device(group, device->dev); + if (existing_device) { + dev_WARN(device->dev, "Device already exists on group %d\n", iommu_group_id(iommu_group)); - vfio_device_put(device); + vfio_device_put(existing_device); vfio_group_put(group); return -EBUSY; } - device = vfio_group_create_device(group, dev, ops, device_data); - if (IS_ERR(device)) { - vfio_group_put(group); - return PTR_ERR(device); - } + /* Our reference on group is moved to the device */ + device->group = group; - /* - * Drop all but the vfio_device reference. The vfio_device holds - * a reference to the vfio_group, which holds a reference to the - * iommu_group. - */ - vfio_group_put(group); + /* Refcounting can't start until the driver calls register */ + refcount_set(&device->refcount, 1); + + mutex_lock(&group->device_lock); + list_add(&device->group_next, &group->device_list); + group->dev_counter++; + mutex_unlock(&group->device_lock); return 0; } -EXPORT_SYMBOL_GPL(vfio_add_group_dev); +EXPORT_SYMBOL_GPL(vfio_register_group_dev); /** * Get a reference to the vfio_device for a device. Even if the @@ -886,7 +831,7 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, int ret; if (it->ops->match) { - ret = it->ops->match(it->device_data, buf); + ret = it->ops->match(it, buf); if (ret < 0) { device = ERR_PTR(ret); break; @@ -895,9 +840,8 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, ret = !strcmp(dev_name(it->dev), buf); } - if (ret) { + if (ret && vfio_device_try_get(it)) { device = it; - vfio_device_get(device); break; } } @@ -907,32 +851,15 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, } /* - * Caller must hold a reference to the vfio_device - */ -void *vfio_device_data(struct vfio_device *device) -{ - return device->device_data; -} -EXPORT_SYMBOL_GPL(vfio_device_data); - -/* * Decrement the device reference count and wait for the device to be * removed. Open file descriptors for the device... */ -void *vfio_del_group_dev(struct device *dev) +void vfio_unregister_group_dev(struct vfio_device *device) { - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct vfio_device *device = dev_get_drvdata(dev); struct vfio_group *group = device->group; - void *device_data = device->device_data; struct vfio_unbound_dev *unbound; unsigned int i = 0; bool interrupted = false; - - /* - * The group exists so long as we have a device reference. Get - * a group reference and use it to scan for the device going away. - */ - vfio_group_get(group); + long rc; /* * When the device is removed from the group, the group suddenly @@ -945,7 +872,7 @@ void *vfio_del_group_dev(struct device *dev) */ unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); if (unbound) { - unbound->dev = dev; + unbound->dev = device->dev; mutex_lock(&group->unbound_lock); list_add(&unbound->unbound_next, &group->unbound_list); mutex_unlock(&group->unbound_lock); @@ -953,44 +880,33 @@ void *vfio_del_group_dev(struct device *dev) WARN_ON(!unbound); vfio_device_put(device); - - /* - * If the device is still present in the group after the above - * 'put', then it is in use and we need to request it from the - * bus driver. The driver may in turn need to request the - * device from the user. We send the request on an arbitrary - * interval with counter to allow the driver to take escalating - * measures to release the device if it has the ability to do so. - */ - add_wait_queue(&vfio.release_q, &wait); - - do { - device = vfio_group_get_device(group, dev); - if (!device) - break; - + rc = try_wait_for_completion(&device->comp); + while (rc <= 0) { if (device->ops->request) - device->ops->request(device_data, i++); - - vfio_device_put(device); + device->ops->request(device, i++); if (interrupted) { - wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10); + rc = wait_for_completion_timeout(&device->comp, + HZ * 10); } else { - wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10); - if (signal_pending(current)) { + rc = wait_for_completion_interruptible_timeout( + &device->comp, HZ * 10); + if (rc < 0) { interrupted = true; - dev_warn(dev, + dev_warn(device->dev, "Device is currently in use, task" " \"%s\" (%d) " "blocked until device is released", current->comm, task_pid_nr(current)); } } + } - } while (1); + mutex_lock(&group->device_lock); + list_del(&device->group_next); + group->dev_counter--; + mutex_unlock(&group->device_lock); - remove_wait_queue(&vfio.release_q, &wait); /* * In order to support multiple devices per group, devices can be * plucked from the group while other devices in the group are still @@ -1008,11 +924,10 @@ void *vfio_del_group_dev(struct device *dev) if (list_empty(&group->device_list)) wait_event(group->container_q, !group->container); + /* Matches the get in vfio_register_group_dev() */ vfio_group_put(group); - - return device_data; } -EXPORT_SYMBOL_GPL(vfio_del_group_dev); +EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); /** * VFIO base fd, /dev/vfio/vfio @@ -1454,7 +1369,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) if (IS_ERR(device)) return PTR_ERR(device); - ret = device->ops->open(device->device_data); + ret = device->ops->open(device); if (ret) { vfio_device_put(device); return ret; @@ -1466,7 +1381,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) */ ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) { - device->ops->release(device->device_data); + device->ops->release(device); vfio_device_put(device); return ret; } @@ -1476,7 +1391,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) if (IS_ERR(filep)) { put_unused_fd(ret); ret = PTR_ERR(filep); - device->ops->release(device->device_data); + device->ops->release(device); vfio_device_put(device); return ret; } @@ -1633,7 +1548,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - device->ops->release(device->device_data); + device->ops->release(device); vfio_group_try_dissolve_container(device->group); @@ -1650,7 +1565,7 @@ static long vfio_device_fops_unl_ioctl(struct file *filep, if (unlikely(!device->ops->ioctl)) return -EINVAL; - return device->ops->ioctl(device->device_data, cmd, arg); + return device->ops->ioctl(device, cmd, arg); } static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, @@ -1661,7 +1576,7 @@ static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, if (unlikely(!device->ops->read)) return -EINVAL; - return device->ops->read(device->device_data, buf, count, ppos); + return device->ops->read(device, buf, count, ppos); } static ssize_t vfio_device_fops_write(struct file *filep, @@ -1673,7 +1588,7 @@ static ssize_t vfio_device_fops_write(struct file *filep, if (unlikely(!device->ops->write)) return -EINVAL; - return device->ops->write(device->device_data, buf, count, ppos); + return device->ops->write(device, buf, count, ppos); } static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) @@ -1683,7 +1598,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) if (unlikely(!device->ops->mmap)) return -EINVAL; - return device->ops->mmap(device->device_data, vma); + return device->ops->mmap(device, vma); } static const struct file_operations vfio_device_fops = { @@ -2379,7 +2294,6 @@ static int __init vfio_init(void) mutex_init(&vfio.iommu_drivers_lock); INIT_LIST_HEAD(&vfio.group_list); INIT_LIST_HEAD(&vfio.iommu_drivers_list); - init_waitqueue_head(&vfio.release_q); ret = misc_register(&vfio_dev); if (ret) { diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 45cbfd4879a5..a0747c35a778 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -16,7 +16,7 @@ * IOMMU to support the IOMMU API and have few to no restrictions around * the IOVA range that can be mapped. The Type1 IOMMU is currently * optimized for relatively static mappings of a userspace process with - * userpsace pages pinned into memory. We also assume devices and IOMMU + * userspace pages pinned into memory. We also assume devices and IOMMU * domains are PCI based as the IOMMU API is still centered around a * device/bus interface rather than a group interface. */ @@ -77,7 +77,6 @@ struct vfio_iommu { bool v2; bool nesting; bool dirty_page_tracking; - bool pinned_page_dirty_scope; bool container_open; }; @@ -877,7 +876,7 @@ again: /* * If iommu capable domain exist in the container then all pages are - * already pinned and accounted. Accouting should be done if there is no + * already pinned and accounted. Accounting should be done if there is no * iommu capable domain in the container. */ do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); @@ -960,7 +959,7 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, bool do_accounting; int i; - if (!iommu || !user_pfn) + if (!iommu || !user_pfn || npage <= 0) return -EINVAL; /* Supported for v2 version only */ @@ -977,13 +976,13 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, iova = user_pfn[i] << PAGE_SHIFT; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); if (!dma) - goto unpin_exit; + break; + vfio_unpin_page_external(dma, iova, do_accounting); } -unpin_exit: mutex_unlock(&iommu->lock); - return i > npage ? npage : (i > 0 ? i : -EINVAL); + return i > 0 ? i : -EINVAL; } static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, @@ -1933,28 +1932,13 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, return ret; } -static struct device *vfio_mdev_get_iommu_device(struct device *dev) -{ - struct device *(*fn)(struct device *dev); - struct device *iommu_device; - - fn = symbol_get(mdev_get_iommu_device); - if (fn) { - iommu_device = fn(dev); - symbol_put(mdev_get_iommu_device); - - return iommu_device; - } - - return NULL; -} - static int vfio_mdev_attach_domain(struct device *dev, void *data) { + struct mdev_device *mdev = to_mdev_device(dev); struct iommu_domain *domain = data; struct device *iommu_device; - iommu_device = vfio_mdev_get_iommu_device(dev); + iommu_device = mdev_get_iommu_device(mdev); if (iommu_device) { if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) return iommu_aux_attach_device(domain, iommu_device); @@ -1967,10 +1951,11 @@ static int vfio_mdev_attach_domain(struct device *dev, void *data) static int vfio_mdev_detach_domain(struct device *dev, void *data) { + struct mdev_device *mdev = to_mdev_device(dev); struct iommu_domain *domain = data; struct device *iommu_device; - iommu_device = vfio_mdev_get_iommu_device(dev); + iommu_device = mdev_get_iommu_device(mdev); if (iommu_device) { if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) iommu_aux_detach_device(domain, iommu_device); @@ -2018,9 +2003,10 @@ static bool vfio_bus_is_mdev(struct bus_type *bus) static int vfio_mdev_iommu_device(struct device *dev, void *data) { + struct mdev_device *mdev = to_mdev_device(dev); struct device **old = data, *new; - new = vfio_mdev_get_iommu_device(dev); + new = mdev_get_iommu_device(mdev); if (!new || (*old && *old != new)) return -EINVAL; @@ -2177,7 +2163,7 @@ static int vfio_iommu_resv_exclude(struct list_head *iova, continue; /* * Insert a new node if current node overlaps with the - * reserve region to exlude that from valid iova range. + * reserve region to exclude that from valid iova range. * Note that, new node is inserted before the current * node and finally the current node is deleted keeping * the list updated and sorted. @@ -2262,7 +2248,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, int ret; bool resv_msi, msi_remap; phys_addr_t resv_msi_base = 0; - struct iommu_domain_geometry geo; + struct iommu_domain_geometry *geo; LIST_HEAD(iova_copy); LIST_HEAD(group_resv_regions); @@ -2330,10 +2316,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } if (iommu->nesting) { - int attr = 1; - - ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING, - &attr); + ret = iommu_enable_nesting(domain->domain); if (ret) goto out_domain; } @@ -2343,10 +2326,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_domain; /* Get aperture info */ - iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo); - - if (vfio_iommu_aper_conflict(iommu, geo.aperture_start, - geo.aperture_end)) { + geo = &domain->domain->geometry; + if (vfio_iommu_aper_conflict(iommu, geo->aperture_start, + geo->aperture_end)) { ret = -EINVAL; goto out_detach; } @@ -2369,8 +2351,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_detach; - ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start, - geo.aperture_end); + ret = vfio_iommu_aper_resize(&iova_copy, geo->aperture_start, + geo->aperture_end); if (ret) goto out_detach; @@ -2503,7 +2485,6 @@ static void vfio_iommu_aper_expand(struct vfio_iommu *iommu, struct list_head *iova_copy) { struct vfio_domain *domain; - struct iommu_domain_geometry geo; struct vfio_iova *node; dma_addr_t start = 0; dma_addr_t end = (dma_addr_t)~0; @@ -2512,12 +2493,12 @@ static void vfio_iommu_aper_expand(struct vfio_iommu *iommu, return; list_for_each_entry(domain, &iommu->domain_list, next) { - iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, - &geo); - if (geo.aperture_start > start) - start = geo.aperture_start; - if (geo.aperture_end < end) - end = geo.aperture_end; + struct iommu_domain_geometry *geo = &domain->domain->geometry; + + if (geo->aperture_start > start) + start = geo->aperture_start; + if (geo->aperture_end < end) + end = geo->aperture_end; } /* Modify aperture limits. The new aper is either same or bigger */ |