summaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c127
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc_private.h1
-rw-r--r--drivers/vfio/mdev/mdev_core.c174
-rw-r--r--drivers/vfio/mdev/mdev_driver.c19
-rw-r--r--drivers/vfio/mdev/mdev_private.h39
-rw-r--r--drivers/vfio/mdev/mdev_sysfs.c59
-rw-r--r--drivers/vfio/mdev/vfio_mdev.c80
-rw-r--r--drivers/vfio/pci/Kconfig6
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c278
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c53
-rw-r--r--drivers/vfio/pci/vfio_pci_nvlink2.c490
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h15
-rw-r--r--drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c2
-rw-r--r--drivers/vfio/platform/vfio_amba.c8
-rw-r--r--drivers/vfio/platform/vfio_platform.c20
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c56
-rw-r--r--drivers/vfio/platform/vfio_platform_private.h5
-rw-r--r--drivers/vfio/vfio.c210
-rw-r--r--drivers/vfio/vfio_iommu_type1.c71
21 files changed, 588 insertions, 1128 deletions
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index f27e25112c40..980e59551301 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -75,7 +75,8 @@ static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
goto unlock;
}
- cont_vdev = vfio_device_data(device);
+ cont_vdev =
+ container_of(device, struct vfio_fsl_mc_device, vdev);
if (!cont_vdev || !cont_vdev->reflck) {
vfio_device_put(device);
ret = -ENODEV;
@@ -135,9 +136,10 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
kfree(vdev->regions);
}
-static int vfio_fsl_mc_open(void *device_data)
+static int vfio_fsl_mc_open(struct vfio_device *core_vdev)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
int ret;
if (!try_module_get(THIS_MODULE))
@@ -161,9 +163,10 @@ err_reg_init:
return ret;
}
-static void vfio_fsl_mc_release(void *device_data)
+static void vfio_fsl_mc_release(struct vfio_device *core_vdev)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
int ret;
mutex_lock(&vdev->reflck->lock);
@@ -197,11 +200,12 @@ static void vfio_fsl_mc_release(void *device_data)
module_put(THIS_MODULE);
}
-static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
- unsigned long arg)
+static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev,
+ unsigned int cmd, unsigned long arg)
{
unsigned long minsz;
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
switch (cmd) {
@@ -327,10 +331,11 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
}
}
-static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
+static ssize_t vfio_fsl_mc_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
@@ -404,10 +409,12 @@ static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
return 0;
}
-static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_fsl_mc_write(struct vfio_device *core_vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
@@ -468,9 +475,11 @@ static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
size, vma->vm_page_prot);
}
-static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
unsigned int index;
@@ -568,23 +577,39 @@ static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev)
dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret);
goto out_nc_unreg;
}
+ return 0;
+
+out_nc_unreg:
+ bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
+ return ret;
+}
+static int vfio_fsl_mc_scan_container(struct fsl_mc_device *mc_dev)
+{
+ int ret;
+
+ /* non dprc devices do not scan for other devices */
+ if (!is_fsl_mc_bus_dprc(mc_dev))
+ return 0;
ret = dprc_scan_container(mc_dev, false);
if (ret) {
- dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
- goto out_dprc_cleanup;
+ dev_err(&mc_dev->dev,
+ "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
+ dprc_remove_devices(mc_dev, NULL, 0);
+ return ret;
}
-
return 0;
+}
+
+static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+ if (!is_fsl_mc_bus_dprc(mc_dev))
+ return;
-out_dprc_cleanup:
- dprc_remove_devices(mc_dev, NULL, 0);
dprc_cleanup(mc_dev);
-out_nc_unreg:
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
- vdev->nb.notifier_call = NULL;
-
- return ret;
}
static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
@@ -600,36 +625,50 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
return -EINVAL;
}
- vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev) {
ret = -ENOMEM;
goto out_group_put;
}
+ vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops);
vdev->mc_dev = mc_dev;
-
- ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
- if (ret) {
- dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
- goto out_group_put;
- }
+ mutex_init(&vdev->igate);
ret = vfio_fsl_mc_reflck_attach(vdev);
if (ret)
- goto out_group_dev;
+ goto out_kfree;
ret = vfio_fsl_mc_init_device(vdev);
if (ret)
goto out_reflck;
- mutex_init(&vdev->igate);
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret) {
+ dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
+ goto out_device;
+ }
+ /*
+ * This triggers recursion into vfio_fsl_mc_probe() on another device
+ * and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the
+ * vfio_add_group_dev() above. It has no impact on this vdev, so it is
+ * safe to be after the vfio device is made live.
+ */
+ ret = vfio_fsl_mc_scan_container(mc_dev);
+ if (ret)
+ goto out_group_dev;
+ dev_set_drvdata(dev, vdev);
return 0;
+out_group_dev:
+ vfio_unregister_group_dev(&vdev->vdev);
+out_device:
+ vfio_fsl_uninit_device(vdev);
out_reflck:
vfio_fsl_mc_reflck_put(vdev->reflck);
-out_group_dev:
- vfio_del_group_dev(dev);
+out_kfree:
+ kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, dev);
return ret;
@@ -637,25 +676,17 @@ out_group_put:
static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
{
- struct vfio_fsl_mc_device *vdev;
struct device *dev = &mc_dev->dev;
+ struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev);
- vdev = vfio_del_group_dev(dev);
- if (!vdev)
- return -EINVAL;
-
+ vfio_unregister_group_dev(&vdev->vdev);
mutex_destroy(&vdev->igate);
+ dprc_remove_devices(mc_dev, NULL, 0);
+ vfio_fsl_uninit_device(vdev);
vfio_fsl_mc_reflck_put(vdev->reflck);
- if (is_fsl_mc_bus_dprc(mc_dev)) {
- dprc_remove_devices(mc_dev, NULL, 0);
- dprc_cleanup(mc_dev);
- }
-
- if (vdev->nb.notifier_call)
- bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
-
+ kfree(vdev);
vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
return 0;
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
index a97ee691ed47..89700e00e77d 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -36,6 +36,7 @@ struct vfio_fsl_mc_region {
};
struct vfio_fsl_mc_device {
+ struct vfio_device vdev;
struct fsl_mc_device *mc_dev;
struct notifier_block nb;
int refcnt;
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 6de97d25a3f8..2a85d6fcb7dd 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -29,39 +29,39 @@ static DEFINE_MUTEX(mdev_list_lock);
struct device *mdev_parent_dev(struct mdev_device *mdev)
{
- return mdev->parent->dev;
+ return mdev->type->parent->dev;
}
EXPORT_SYMBOL(mdev_parent_dev);
-void *mdev_get_drvdata(struct mdev_device *mdev)
-{
- return mdev->driver_data;
-}
-EXPORT_SYMBOL(mdev_get_drvdata);
-
-void mdev_set_drvdata(struct mdev_device *mdev, void *data)
-{
- mdev->driver_data = data;
-}
-EXPORT_SYMBOL(mdev_set_drvdata);
-
-struct device *mdev_dev(struct mdev_device *mdev)
+/*
+ * Return the index in supported_type_groups that this mdev_device was created
+ * from.
+ */
+unsigned int mdev_get_type_group_id(struct mdev_device *mdev)
{
- return &mdev->dev;
+ return mdev->type->type_group_id;
}
-EXPORT_SYMBOL(mdev_dev);
+EXPORT_SYMBOL(mdev_get_type_group_id);
-struct mdev_device *mdev_from_dev(struct device *dev)
+/*
+ * Used in mdev_type_attribute sysfs functions to return the index in the
+ * supported_type_groups that the sysfs is called from.
+ */
+unsigned int mtype_get_type_group_id(struct mdev_type *mtype)
{
- return dev_is_mdev(dev) ? to_mdev_device(dev) : NULL;
+ return mtype->type_group_id;
}
-EXPORT_SYMBOL(mdev_from_dev);
+EXPORT_SYMBOL(mtype_get_type_group_id);
-const guid_t *mdev_uuid(struct mdev_device *mdev)
+/*
+ * Used in mdev_type_attribute sysfs functions to return the parent struct
+ * device
+ */
+struct device *mtype_get_parent_dev(struct mdev_type *mtype)
{
- return &mdev->uuid;
+ return mtype->parent->dev;
}
-EXPORT_SYMBOL(mdev_uuid);
+EXPORT_SYMBOL(mtype_get_parent_dev);
/* Should be called holding parent_list_lock */
static struct mdev_parent *__find_parent_device(struct device *dev)
@@ -75,7 +75,7 @@ static struct mdev_parent *__find_parent_device(struct device *dev)
return NULL;
}
-static void mdev_release_parent(struct kref *kref)
+void mdev_release_parent(struct kref *kref)
{
struct mdev_parent *parent = container_of(kref, struct mdev_parent,
ref);
@@ -85,31 +85,14 @@ static void mdev_release_parent(struct kref *kref)
put_device(dev);
}
-static struct mdev_parent *mdev_get_parent(struct mdev_parent *parent)
-{
- if (parent)
- kref_get(&parent->ref);
-
- return parent;
-}
-
-static void mdev_put_parent(struct mdev_parent *parent)
-{
- if (parent)
- kref_put(&parent->ref, mdev_release_parent);
-}
-
/* Caller must hold parent unreg_sem read or write lock */
static void mdev_device_remove_common(struct mdev_device *mdev)
{
- struct mdev_parent *parent;
- struct mdev_type *type;
+ struct mdev_parent *parent = mdev->type->parent;
int ret;
- type = to_mdev_type(mdev->type_kobj);
- mdev_remove_sysfs_files(&mdev->dev, type);
+ mdev_remove_sysfs_files(mdev);
device_del(&mdev->dev);
- parent = mdev->parent;
lockdep_assert_held(&parent->unreg_sem);
ret = parent->ops->remove(mdev);
if (ret)
@@ -117,17 +100,14 @@ static void mdev_device_remove_common(struct mdev_device *mdev)
/* Balances with device_initialize() */
put_device(&mdev->dev);
- mdev_put_parent(parent);
}
static int mdev_device_remove_cb(struct device *dev, void *data)
{
- if (dev_is_mdev(dev)) {
- struct mdev_device *mdev;
+ struct mdev_device *mdev = mdev_from_dev(dev);
- mdev = to_mdev_device(dev);
+ if (mdev)
mdev_device_remove_common(mdev);
- }
return 0;
}
@@ -256,8 +236,13 @@ void mdev_unregister_device(struct device *dev)
}
EXPORT_SYMBOL(mdev_unregister_device);
-static void mdev_device_free(struct mdev_device *mdev)
+static void mdev_device_release(struct device *dev)
{
+ struct mdev_device *mdev = to_mdev_device(dev);
+
+ /* Pairs with the get in mdev_device_create() */
+ kobject_put(&mdev->type->kobj);
+
mutex_lock(&mdev_list_lock);
list_del(&mdev->next);
mutex_unlock(&mdev_list_lock);
@@ -266,24 +251,11 @@ static void mdev_device_free(struct mdev_device *mdev)
kfree(mdev);
}
-static void mdev_device_release(struct device *dev)
-{
- struct mdev_device *mdev = to_mdev_device(dev);
-
- mdev_device_free(mdev);
-}
-
-int mdev_device_create(struct kobject *kobj,
- struct device *dev, const guid_t *uuid)
+int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
{
int ret;
struct mdev_device *mdev, *tmp;
- struct mdev_parent *parent;
- struct mdev_type *type = to_mdev_type(kobj);
-
- parent = mdev_get_parent(type->parent);
- if (!parent)
- return -EINVAL;
+ struct mdev_parent *parent = type->parent;
mutex_lock(&mdev_list_lock);
@@ -291,50 +263,50 @@ int mdev_device_create(struct kobject *kobj,
list_for_each_entry(tmp, &mdev_list, next) {
if (guid_equal(&tmp->uuid, uuid)) {
mutex_unlock(&mdev_list_lock);
- ret = -EEXIST;
- goto mdev_fail;
+ return -EEXIST;
}
}
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev) {
mutex_unlock(&mdev_list_lock);
- ret = -ENOMEM;
- goto mdev_fail;
+ return -ENOMEM;
}
+ device_initialize(&mdev->dev);
+ mdev->dev.parent = parent->dev;
+ mdev->dev.bus = &mdev_bus_type;
+ mdev->dev.release = mdev_device_release;
+ mdev->dev.groups = parent->ops->mdev_attr_groups;
+ mdev->type = type;
+ /* Pairs with the put in mdev_device_release() */
+ kobject_get(&type->kobj);
+
guid_copy(&mdev->uuid, uuid);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
- mdev->parent = parent;
+ ret = dev_set_name(&mdev->dev, "%pUl", uuid);
+ if (ret)
+ goto out_put_device;
/* Check if parent unregistration has started */
if (!down_read_trylock(&parent->unreg_sem)) {
- mdev_device_free(mdev);
ret = -ENODEV;
- goto mdev_fail;
+ goto out_put_device;
}
- device_initialize(&mdev->dev);
- mdev->dev.parent = dev;
- mdev->dev.bus = &mdev_bus_type;
- mdev->dev.release = mdev_device_release;
- dev_set_name(&mdev->dev, "%pUl", uuid);
- mdev->dev.groups = parent->ops->mdev_attr_groups;
- mdev->type_kobj = kobj;
-
- ret = parent->ops->create(kobj, mdev);
+ ret = parent->ops->create(mdev);
if (ret)
- goto ops_create_fail;
+ goto out_unlock;
ret = device_add(&mdev->dev);
if (ret)
- goto add_fail;
+ goto out_remove;
- ret = mdev_create_sysfs_files(&mdev->dev, type);
+ ret = mdev_create_sysfs_files(mdev);
if (ret)
- goto sysfs_fail;
+ goto out_del;
mdev->active = true;
dev_dbg(&mdev->dev, "MDEV: created\n");
@@ -342,24 +314,21 @@ int mdev_device_create(struct kobject *kobj,
return 0;
-sysfs_fail:
+out_del:
device_del(&mdev->dev);
-add_fail:
+out_remove:
parent->ops->remove(mdev);
-ops_create_fail:
+out_unlock:
up_read(&parent->unreg_sem);
+out_put_device:
put_device(&mdev->dev);
-mdev_fail:
- mdev_put_parent(parent);
return ret;
}
-int mdev_device_remove(struct device *dev)
+int mdev_device_remove(struct mdev_device *mdev)
{
- struct mdev_device *mdev, *tmp;
- struct mdev_parent *parent;
-
- mdev = to_mdev_device(dev);
+ struct mdev_device *tmp;
+ struct mdev_parent *parent = mdev->type->parent;
mutex_lock(&mdev_list_lock);
list_for_each_entry(tmp, &mdev_list, next) {
@@ -380,7 +349,6 @@ int mdev_device_remove(struct device *dev)
mdev->active = false;
mutex_unlock(&mdev_list_lock);
- parent = mdev->parent;
/* Check if parent unregistration has started */
if (!down_read_trylock(&parent->unreg_sem))
return -ENODEV;
@@ -390,24 +358,6 @@ int mdev_device_remove(struct device *dev)
return 0;
}
-int mdev_set_iommu_device(struct device *dev, struct device *iommu_device)
-{
- struct mdev_device *mdev = to_mdev_device(dev);
-
- mdev->iommu_device = iommu_device;
-
- return 0;
-}
-EXPORT_SYMBOL(mdev_set_iommu_device);
-
-struct device *mdev_get_iommu_device(struct device *dev)
-{
- struct mdev_device *mdev = to_mdev_device(dev);
-
- return mdev->iommu_device;
-}
-EXPORT_SYMBOL(mdev_get_iommu_device);
-
static int __init mdev_init(void)
{
return mdev_bus_register();
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
index 0d3223aee20b..041699571b7e 100644
--- a/drivers/vfio/mdev/mdev_driver.c
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -39,7 +39,8 @@ static void mdev_detach_iommu(struct mdev_device *mdev)
static int mdev_probe(struct device *dev)
{
- struct mdev_driver *drv = to_mdev_driver(dev->driver);
+ struct mdev_driver *drv =
+ container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
int ret;
@@ -47,8 +48,8 @@ static int mdev_probe(struct device *dev)
if (ret)
return ret;
- if (drv && drv->probe) {
- ret = drv->probe(dev);
+ if (drv->probe) {
+ ret = drv->probe(mdev);
if (ret)
mdev_detach_iommu(mdev);
}
@@ -58,11 +59,12 @@ static int mdev_probe(struct device *dev)
static int mdev_remove(struct device *dev)
{
- struct mdev_driver *drv = to_mdev_driver(dev->driver);
+ struct mdev_driver *drv =
+ container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
- if (drv && drv->remove)
- drv->remove(dev);
+ if (drv->remove)
+ drv->remove(mdev);
mdev_detach_iommu(mdev);
@@ -79,16 +81,13 @@ EXPORT_SYMBOL_GPL(mdev_bus_type);
/**
* mdev_register_driver - register a new MDEV driver
* @drv: the driver to register
- * @owner: module owner of driver to be registered
*
* Returns a negative value on error, otherwise 0.
**/
-int mdev_register_driver(struct mdev_driver *drv, struct module *owner)
+int mdev_register_driver(struct mdev_driver *drv)
{
/* initialize common driver fields */
- drv->driver.name = drv->name;
drv->driver.bus = &mdev_bus_type;
- drv->driver.owner = owner;
/* register with core */
return driver_register(&drv->driver);
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 7d922950caaf..6999c89db7b1 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Mediated device interal definitions
+ * Mediated device internal definitions
*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Author: Neo Jia <cjia@nvidia.com>
@@ -24,26 +24,12 @@ struct mdev_parent {
struct rw_semaphore unreg_sem;
};
-struct mdev_device {
- struct device dev;
- struct mdev_parent *parent;
- guid_t uuid;
- void *driver_data;
- struct list_head next;
- struct kobject *type_kobj;
- struct device *iommu_device;
- bool active;
-};
-
-#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)
-#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
-
struct mdev_type {
struct kobject kobj;
struct kobject *devices_kobj;
struct mdev_parent *parent;
struct list_head next;
- struct attribute_group *group;
+ unsigned int type_group_id;
};
#define to_mdev_type_attr(_attr) \
@@ -54,11 +40,22 @@ struct mdev_type {
int parent_create_sysfs_files(struct mdev_parent *parent);
void parent_remove_sysfs_files(struct mdev_parent *parent);
-int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
-void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
+int mdev_create_sysfs_files(struct mdev_device *mdev);
+void mdev_remove_sysfs_files(struct mdev_device *mdev);
+
+int mdev_device_create(struct mdev_type *kobj, const guid_t *uuid);
+int mdev_device_remove(struct mdev_device *dev);
+
+void mdev_release_parent(struct kref *kref);
+
+static inline void mdev_get_parent(struct mdev_parent *parent)
+{
+ kref_get(&parent->ref);
+}
-int mdev_device_create(struct kobject *kobj,
- struct device *dev, const guid_t *uuid);
-int mdev_device_remove(struct device *dev);
+static inline void mdev_put_parent(struct mdev_parent *parent)
+{
+ kref_put(&parent->ref, mdev_release_parent);
+}
#endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index 917fd84c1c6f..f5cf1931c54e 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -26,7 +26,7 @@ static ssize_t mdev_type_attr_show(struct kobject *kobj,
ssize_t ret = -EIO;
if (attr->show)
- ret = attr->show(kobj, type->parent->dev, buf);
+ ret = attr->show(type, attr, buf);
return ret;
}
@@ -39,7 +39,7 @@ static ssize_t mdev_type_attr_store(struct kobject *kobj,
ssize_t ret = -EIO;
if (attr->store)
- ret = attr->store(&type->kobj, type->parent->dev, buf, count);
+ ret = attr->store(type, attr, buf, count);
return ret;
}
@@ -48,8 +48,9 @@ static const struct sysfs_ops mdev_type_sysfs_ops = {
.store = mdev_type_attr_store,
};
-static ssize_t create_store(struct kobject *kobj, struct device *dev,
- const char *buf, size_t count)
+static ssize_t create_store(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr, const char *buf,
+ size_t count)
{
char *str;
guid_t uuid;
@@ -67,7 +68,7 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev,
if (ret)
return ret;
- ret = mdev_device_create(kobj, dev, &uuid);
+ ret = mdev_device_create(mtype, &uuid);
if (ret)
return ret;
@@ -81,6 +82,8 @@ static void mdev_type_release(struct kobject *kobj)
struct mdev_type *type = to_mdev_type(kobj);
pr_debug("Releasing group %s\n", kobj->name);
+ /* Pairs with the get in add_mdev_supported_type() */
+ mdev_put_parent(type->parent);
kfree(type);
}
@@ -90,9 +93,11 @@ static struct kobj_type mdev_type_ktype = {
};
static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
- struct attribute_group *group)
+ unsigned int type_group_id)
{
struct mdev_type *type;
+ struct attribute_group *group =
+ parent->ops->supported_type_groups[type_group_id];
int ret;
if (!group->name) {
@@ -105,6 +110,10 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
return ERR_PTR(-ENOMEM);
type->kobj.kset = parent->mdev_types_kset;
+ type->parent = parent;
+ /* Pairs with the put in mdev_type_release() */
+ mdev_get_parent(parent);
+ type->type_group_id = type_group_id;
ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
"%s-%s", dev_driver_string(parent->dev),
@@ -130,9 +139,6 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
ret = -ENOMEM;
goto attrs_failed;
}
-
- type->group = group;
- type->parent = parent;
return type;
attrs_failed:
@@ -147,8 +153,11 @@ attr_create_failed:
static void remove_mdev_supported_type(struct mdev_type *type)
{
+ struct attribute_group *group =
+ type->parent->ops->supported_type_groups[type->type_group_id];
+
sysfs_remove_files(&type->kobj,
- (const struct attribute **)type->group->attrs);
+ (const struct attribute **)group->attrs);
kobject_put(type->devices_kobj);
sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
kobject_del(&type->kobj);
@@ -162,8 +171,7 @@ static int add_mdev_supported_type_groups(struct mdev_parent *parent)
for (i = 0; parent->ops->supported_type_groups[i]; i++) {
struct mdev_type *type;
- type = add_mdev_supported_type(parent,
- parent->ops->supported_type_groups[i]);
+ type = add_mdev_supported_type(parent, i);
if (IS_ERR(type)) {
struct mdev_type *ltype, *tmp;
@@ -225,6 +233,7 @@ create_err:
static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
+ struct mdev_device *mdev = to_mdev_device(dev);
unsigned long val;
if (kstrtoul(buf, 0, &val) < 0)
@@ -233,7 +242,7 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
if (val && device_remove_file_self(dev, attr)) {
int ret;
- ret = mdev_device_remove(dev);
+ ret = mdev_device_remove(mdev);
if (ret)
return ret;
}
@@ -248,34 +257,38 @@ static const struct attribute *mdev_device_attrs[] = {
NULL,
};
-int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
+int mdev_create_sysfs_files(struct mdev_device *mdev)
{
+ struct mdev_type *type = mdev->type;
+ struct kobject *kobj = &mdev->dev.kobj;
int ret;
- ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
+ ret = sysfs_create_link(type->devices_kobj, kobj, dev_name(&mdev->dev));
if (ret)
return ret;
- ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
+ ret = sysfs_create_link(kobj, &type->kobj, "mdev_type");
if (ret)
goto type_link_failed;
- ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
+ ret = sysfs_create_files(kobj, mdev_device_attrs);
if (ret)
goto create_files_failed;
return ret;
create_files_failed:
- sysfs_remove_link(&dev->kobj, "mdev_type");
+ sysfs_remove_link(kobj, "mdev_type");
type_link_failed:
- sysfs_remove_link(type->devices_kobj, dev_name(dev));
+ sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev));
return ret;
}
-void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type)
+void mdev_remove_sysfs_files(struct mdev_device *mdev)
{
- sysfs_remove_files(&dev->kobj, mdev_device_attrs);
- sysfs_remove_link(&dev->kobj, "mdev_type");
- sysfs_remove_link(type->devices_kobj, dev_name(dev));
+ struct kobject *kobj = &mdev->dev.kobj;
+
+ sysfs_remove_files(kobj, mdev_device_attrs);
+ sysfs_remove_link(kobj, "mdev_type");
+ sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev));
}
diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index b52eea128549..922729071c5a 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -21,10 +21,11 @@
#define DRIVER_AUTHOR "NVIDIA Corporation"
#define DRIVER_DESC "VFIO based driver for Mediated device"
-static int vfio_mdev_open(void *device_data)
+static int vfio_mdev_open(struct vfio_device *core_vdev)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
+
int ret;
if (unlikely(!parent->ops->open))
@@ -40,10 +41,10 @@ static int vfio_mdev_open(void *device_data)
return ret;
}
-static void vfio_mdev_release(void *device_data)
+static void vfio_mdev_release(struct vfio_device *core_vdev)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
if (likely(parent->ops->release))
parent->ops->release(mdev);
@@ -51,11 +52,11 @@ static void vfio_mdev_release(void *device_data)
module_put(THIS_MODULE);
}
-static long vfio_mdev_unlocked_ioctl(void *device_data,
+static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->ioctl))
return -EINVAL;
@@ -63,11 +64,11 @@ static long vfio_mdev_unlocked_ioctl(void *device_data,
return parent->ops->ioctl(mdev, cmd, arg);
}
-static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
+static ssize_t vfio_mdev_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->read))
return -EINVAL;
@@ -75,11 +76,12 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
return parent->ops->read(mdev, buf, count, ppos);
}
-static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_mdev_write(struct vfio_device *core_vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->write))
return -EINVAL;
@@ -87,10 +89,11 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
return parent->ops->write(mdev, buf, count, ppos);
}
-static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_mdev_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
if (unlikely(!parent->ops->mmap))
return -EINVAL;
@@ -98,10 +101,10 @@ static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
return parent->ops->mmap(mdev, vma);
}
-static void vfio_mdev_request(void *device_data, unsigned int count)
+static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count)
{
- struct mdev_device *mdev = device_data;
- struct mdev_parent *parent = mdev->parent;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
+ struct mdev_parent *parent = mdev->type->parent;
if (parent->ops->request)
parent->ops->request(mdev, count);
@@ -121,27 +124,46 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = {
.request = vfio_mdev_request,
};
-static int vfio_mdev_probe(struct device *dev)
+static int vfio_mdev_probe(struct mdev_device *mdev)
{
- struct mdev_device *mdev = to_mdev_device(dev);
+ struct vfio_device *vdev;
+ int ret;
- return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev);
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+
+ vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
+ ret = vfio_register_group_dev(vdev);
+ if (ret) {
+ kfree(vdev);
+ return ret;
+ }
+ dev_set_drvdata(&mdev->dev, vdev);
+ return 0;
}
-static void vfio_mdev_remove(struct device *dev)
+static void vfio_mdev_remove(struct mdev_device *mdev)
{
- vfio_del_group_dev(dev);
+ struct vfio_device *vdev = dev_get_drvdata(&mdev->dev);
+
+ vfio_unregister_group_dev(vdev);
+ kfree(vdev);
}
static struct mdev_driver vfio_mdev_driver = {
- .name = "vfio_mdev",
+ .driver = {
+ .name = "vfio_mdev",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ },
.probe = vfio_mdev_probe,
.remove = vfio_mdev_remove,
};
static int __init vfio_mdev_init(void)
{
- return mdev_register_driver(&vfio_mdev_driver, THIS_MODULE);
+ return mdev_register_driver(&vfio_mdev_driver);
}
static void __exit vfio_mdev_exit(void)
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 4abddbebd4b2..53ce78d7d07b 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -39,9 +39,3 @@ config VFIO_PCI_IGD
and LPC bridge config space.
To enable Intel IGD assignment through vfio-pci, say Y.
-
-config VFIO_PCI_NVLINK2
- def_bool y
- depends on VFIO_PCI && PPC_POWERNV && SPAPR_TCE_IOMMU
- help
- VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index eff97a7cd9f1..3ff42093962f 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -2,7 +2,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
-vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 65e7e6b44578..bd7c482c948a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -378,7 +378,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
-
if (vfio_pci_is_vga(pdev) &&
pdev->vendor == PCI_VENDOR_ID_INTEL &&
IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
@@ -389,24 +388,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
- if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
- IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
- ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
- if (ret && ret != -ENODEV) {
- pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n");
- goto disable_exit;
- }
- }
-
- if (pdev->vendor == PCI_VENDOR_ID_IBM &&
- IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
- ret = vfio_pci_ibm_npu2_init(vdev);
- if (ret && ret != -ENODEV) {
- pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n");
- goto disable_exit;
- }
- }
-
vfio_pci_probe_mmaps(vdev);
return 0;
@@ -517,30 +498,29 @@ out:
static struct pci_driver vfio_pci_driver;
-static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
- struct vfio_device **pf_dev)
+static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev)
{
struct pci_dev *physfn = pci_physfn(vdev->pdev);
+ struct vfio_device *pf_dev;
if (!vdev->pdev->is_virtfn)
return NULL;
- *pf_dev = vfio_device_get_from_dev(&physfn->dev);
- if (!*pf_dev)
+ pf_dev = vfio_device_get_from_dev(&physfn->dev);
+ if (!pf_dev)
return NULL;
if (pci_dev_driver(physfn) != &vfio_pci_driver) {
- vfio_device_put(*pf_dev);
+ vfio_device_put(pf_dev);
return NULL;
}
- return vfio_device_data(*pf_dev);
+ return container_of(pf_dev, struct vfio_pci_device, vdev);
}
static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
{
- struct vfio_device *pf_dev;
- struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
+ struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
if (!pf_vdev)
return;
@@ -550,12 +530,13 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
WARN_ON(pf_vdev->vf_token->users < 0);
mutex_unlock(&pf_vdev->vf_token->lock);
- vfio_device_put(pf_dev);
+ vfio_device_put(&pf_vdev->vdev);
}
-static void vfio_pci_release(void *device_data)
+static void vfio_pci_release(struct vfio_device *core_vdev)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
mutex_lock(&vdev->reflck->lock);
@@ -581,9 +562,10 @@ static void vfio_pci_release(void *device_data)
module_put(THIS_MODULE);
}
-static int vfio_pci_open(void *device_data)
+static int vfio_pci_open(struct vfio_device *core_vdev)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -792,15 +774,16 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
}
struct vfio_devices {
- struct vfio_device **devices;
+ struct vfio_pci_device **devices;
int cur_index;
int max_index;
};
-static long vfio_pci_ioctl(void *device_data,
+static long vfio_pci_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
unsigned long minsz;
if (cmd == VFIO_DEVICE_GET_INFO) {
@@ -1280,9 +1263,7 @@ reset_info_exit:
goto hot_reset_release;
for (; mem_idx < devs.cur_index; mem_idx++) {
- struct vfio_pci_device *tmp;
-
- tmp = vfio_device_data(devs.devices[mem_idx]);
+ struct vfio_pci_device *tmp = devs.devices[mem_idx];
ret = down_write_trylock(&tmp->memory_lock);
if (!ret) {
@@ -1297,17 +1278,13 @@ reset_info_exit:
hot_reset_release:
for (i = 0; i < devs.cur_index; i++) {
- struct vfio_device *device;
- struct vfio_pci_device *tmp;
-
- device = devs.devices[i];
- tmp = vfio_device_data(device);
+ struct vfio_pci_device *tmp = devs.devices[i];
if (i < mem_idx)
up_write(&tmp->memory_lock);
else
mutex_unlock(&tmp->vma_lock);
- vfio_device_put(device);
+ vfio_device_put(&tmp->vdev);
}
kfree(devs.devices);
@@ -1402,11 +1379,10 @@ hot_reset_release:
return -ENOTTY;
}
-static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
+static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
- struct vfio_pci_device *vdev = device_data;
if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
return -EINVAL;
@@ -1434,22 +1410,28 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
return -EINVAL;
}
-static ssize_t vfio_pci_read(void *device_data, char __user *buf,
+static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
+
if (!count)
return 0;
- return vfio_pci_rw(device_data, buf, count, ppos, false);
+ return vfio_pci_rw(vdev, buf, count, ppos, false);
}
-static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
+static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
+
if (!count)
return 0;
- return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
+ return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
}
/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
@@ -1646,9 +1628,10 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = {
.fault = vfio_pci_mmap_fault,
};
-static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
struct pci_dev *pdev = vdev->pdev;
unsigned int index;
u64 phys_len, req_len, pgoff, req_start;
@@ -1656,6 +1639,8 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ return -EINVAL;
if (vma->vm_end < vma->vm_start)
return -EINVAL;
if ((vma->vm_flags & VM_SHARED) == 0)
@@ -1664,7 +1649,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
int regnum = index - VFIO_PCI_NUM_REGIONS;
struct vfio_pci_region *region = vdev->region + regnum;
- if (region && region->ops && region->ops->mmap &&
+ if (region->ops && region->ops->mmap &&
(region->flags & VFIO_REGION_INFO_FLAG_MMAP))
return region->ops->mmap(vdev, region, vma);
return -EINVAL;
@@ -1714,9 +1699,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
return 0;
}
-static void vfio_pci_request(void *device_data, unsigned int count)
+static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
struct pci_dev *pdev = vdev->pdev;
mutex_lock(&vdev->igate);
@@ -1767,8 +1753,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
return 0; /* No VF token provided or required */
if (vdev->pdev->is_virtfn) {
- struct vfio_device *pf_dev;
- struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
+ struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
bool match;
if (!pf_vdev) {
@@ -1781,7 +1766,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
}
if (!vf_token) {
- vfio_device_put(pf_dev);
+ vfio_device_put(&pf_vdev->vdev);
pci_info_ratelimited(vdev->pdev,
"VF token required to access device\n");
return -EACCES;
@@ -1791,7 +1776,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
mutex_unlock(&pf_vdev->vf_token->lock);
- vfio_device_put(pf_dev);
+ vfio_device_put(&pf_vdev->vdev);
if (!match) {
pci_info_ratelimited(vdev->pdev,
@@ -1830,9 +1815,10 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
#define VF_TOKEN_ARG "vf_token="
-static int vfio_pci_match(void *device_data, char *buf)
+static int vfio_pci_match(struct vfio_device *core_vdev, char *buf)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
bool vf_token = false;
uuid_t uuid;
int ret;
@@ -1922,6 +1908,68 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
return 0;
}
+static int vfio_pci_vf_init(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ if (!pdev->is_physfn)
+ return 0;
+
+ vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
+ if (!vdev->vf_token)
+ return -ENOMEM;
+
+ mutex_init(&vdev->vf_token->lock);
+ uuid_gen(&vdev->vf_token->uuid);
+
+ vdev->nb.notifier_call = vfio_pci_bus_notifier;
+ ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
+ if (ret) {
+ kfree(vdev->vf_token);
+ return ret;
+ }
+ return 0;
+}
+
+static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)
+{
+ if (!vdev->vf_token)
+ return;
+
+ bus_unregister_notifier(&pci_bus_type, &vdev->nb);
+ WARN_ON(vdev->vf_token->users);
+ mutex_destroy(&vdev->vf_token->lock);
+ kfree(vdev->vf_token);
+}
+
+static int vfio_pci_vga_init(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ if (!vfio_pci_is_vga(pdev))
+ return 0;
+
+ ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
+ if (ret)
+ return ret;
+ vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));
+ return 0;
+}
+
+static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+
+ if (!vfio_pci_is_vga(pdev))
+ return;
+ vga_client_register(pdev, NULL, NULL, NULL);
+ vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
+ VGA_RSRC_LEGACY_IO |
+ VGA_RSRC_LEGACY_MEM);
+}
+
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_device *vdev;
@@ -1957,6 +2005,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_group_put;
}
+ vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops);
vdev->pdev = pdev;
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
@@ -1968,35 +2017,15 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&vdev->vma_list);
init_rwsem(&vdev->memory_lock);
- ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
+ ret = vfio_pci_reflck_attach(vdev);
if (ret)
goto out_free;
-
- ret = vfio_pci_reflck_attach(vdev);
+ ret = vfio_pci_vf_init(vdev);
if (ret)
- goto out_del_group_dev;
-
- if (pdev->is_physfn) {
- vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
- if (!vdev->vf_token) {
- ret = -ENOMEM;
- goto out_reflck;
- }
-
- mutex_init(&vdev->vf_token->lock);
- uuid_gen(&vdev->vf_token->uuid);
-
- vdev->nb.notifier_call = vfio_pci_bus_notifier;
- ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
- if (ret)
- goto out_vf_token;
- }
-
- if (vfio_pci_is_vga(pdev)) {
- vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
- vga_set_legacy_decoding(pdev,
- vfio_pci_set_vga_decode(vdev, false));
- }
+ goto out_reflck;
+ ret = vfio_pci_vga_init(vdev);
+ if (ret)
+ goto out_vf;
vfio_pci_probe_power_state(vdev);
@@ -2014,15 +2043,21 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vfio_pci_set_power_state(vdev, PCI_D3hot);
}
- return ret;
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret)
+ goto out_power;
+ dev_set_drvdata(&pdev->dev, vdev);
+ return 0;
-out_vf_token:
- kfree(vdev->vf_token);
+out_power:
+ if (!disable_idle_d3)
+ vfio_pci_set_power_state(vdev, PCI_D0);
+out_vf:
+ vfio_pci_vf_uninit(vdev);
out_reflck:
vfio_pci_reflck_put(vdev->reflck);
-out_del_group_dev:
- vfio_del_group_dev(&pdev->dev);
out_free:
+ kfree(vdev->pm_save);
kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, &pdev->dev);
@@ -2031,41 +2066,25 @@ out_group_put:
static void vfio_pci_remove(struct pci_dev *pdev)
{
- struct vfio_pci_device *vdev;
+ struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);
pci_disable_sriov(pdev);
- vdev = vfio_del_group_dev(&pdev->dev);
- if (!vdev)
- return;
-
- if (vdev->vf_token) {
- WARN_ON(vdev->vf_token->users);
- mutex_destroy(&vdev->vf_token->lock);
- kfree(vdev->vf_token);
- }
-
- if (vdev->nb.notifier_call)
- bus_unregister_notifier(&pci_bus_type, &vdev->nb);
+ vfio_unregister_group_dev(&vdev->vdev);
+ vfio_pci_vf_uninit(vdev);
vfio_pci_reflck_put(vdev->reflck);
+ vfio_pci_vga_uninit(vdev);
vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
- kfree(vdev->region);
- mutex_destroy(&vdev->ioeventfds_lock);
if (!disable_idle_d3)
vfio_pci_set_power_state(vdev, PCI_D0);
+ mutex_destroy(&vdev->ioeventfds_lock);
+ kfree(vdev->region);
kfree(vdev->pm_save);
kfree(vdev);
-
- if (vfio_pci_is_vga(pdev)) {
- vga_client_register(pdev, NULL, NULL, NULL);
- vga_set_legacy_decoding(pdev,
- VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
- VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
- }
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -2078,11 +2097,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
if (device == NULL)
return PCI_ERS_RESULT_DISCONNECT;
- vdev = vfio_device_data(device);
- if (vdev == NULL) {
- vfio_device_put(device);
- return PCI_ERS_RESULT_DISCONNECT;
- }
+ vdev = container_of(device, struct vfio_pci_device, vdev);
mutex_lock(&vdev->igate);
@@ -2098,7 +2113,6 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
- struct vfio_pci_device *vdev;
struct vfio_device *device;
int ret = 0;
@@ -2111,12 +2125,6 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
if (!device)
return -ENODEV;
- vdev = vfio_device_data(device);
- if (!vdev) {
- vfio_device_put(device);
- return -ENODEV;
- }
-
if (nr_virtfn == 0)
pci_disable_sriov(pdev);
else
@@ -2176,7 +2184,7 @@ static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data)
return 0;
}
- vdev = vfio_device_data(device);
+ vdev = container_of(device, struct vfio_pci_device, vdev);
if (vdev->reflck) {
vfio_pci_reflck_get(vdev->reflck);
@@ -2238,7 +2246,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- vdev = vfio_device_data(device);
+ vdev = container_of(device, struct vfio_pci_device, vdev);
/* Fault if the device is not unused */
if (vdev->refcnt) {
@@ -2246,7 +2254,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- devs->devices[devs->cur_index++] = device;
+ devs->devices[devs->cur_index++] = vdev;
return 0;
}
@@ -2268,7 +2276,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- vdev = vfio_device_data(device);
+ vdev = container_of(device, struct vfio_pci_device, vdev);
/*
* Locking multiple devices is prone to deadlock, runaway and
@@ -2279,7 +2287,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- devs->devices[devs->cur_index++] = device;
+ devs->devices[devs->cur_index++] = vdev;
return 0;
}
@@ -2327,7 +2335,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
/* Does at least one need a reset? */
for (i = 0; i < devs.cur_index; i++) {
- tmp = vfio_device_data(devs.devices[i]);
+ tmp = devs.devices[i];
if (tmp->needs_reset) {
ret = pci_reset_bus(vdev->pdev);
break;
@@ -2336,7 +2344,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
put_devs:
for (i = 0; i < devs.cur_index; i++) {
- tmp = vfio_device_data(devs.devices[i]);
+ tmp = devs.devices[i];
/*
* If reset was successful, affected devices no longer need
@@ -2352,7 +2360,7 @@ put_devs:
vfio_pci_set_power_state(tmp, PCI_D3hot);
}
- vfio_device_put(devs.devices[i]);
+ vfio_device_put(&tmp->vdev);
}
kfree(devs.devices);
@@ -2409,7 +2417,7 @@ static int __init vfio_pci_init(void)
{
int ret;
- /* Allocate shared config space permision data used by all devices */
+ /* Allocate shared config space permission data used by all devices */
ret = vfio_pci_init_perm_bits();
if (ret)
return ret;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index a402adee8a21..d57f037f65b8 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -101,7 +101,7 @@ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = {
/*
* Read/Write Permission Bits - one bit for each bit in capability
* Any field can be read if it exists, but what is read depends on
- * whether the field is 'virtualized', or just pass thru to the
+ * whether the field is 'virtualized', or just pass through to the
* hardware. Any virtualized field is also virtualized for writes.
* Writes are only permitted if they have a 1 bit here.
*/
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index e66dfb0178ed..228df565e9bc 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -21,6 +21,10 @@
#define OPREGION_SIZE (8 * 1024)
#define OPREGION_PCI_ADDR 0xfc
+#define OPREGION_RVDA 0x3ba
+#define OPREGION_RVDS 0x3c2
+#define OPREGION_VERSION 0x16
+
static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
@@ -58,6 +62,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
u32 addr, size;
void *base;
int ret;
+ u16 version;
ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
if (ret)
@@ -83,6 +88,54 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
size *= 1024; /* In KB */
+ /*
+ * Support opregion v2.1+
+ * When VBT data exceeds 6KB size and cannot be within mailbox #4, then
+ * the Extended VBT region next to opregion is used to hold the VBT data.
+ * RVDA (Relative Address of VBT Data from Opregion Base) and RVDS
+ * (Raw VBT Data Size) from opregion structure member are used to hold the
+ * address from region base and size of VBT data. RVDA/RVDS are not
+ * defined before opregion 2.0.
+ *
+ * opregion 2.1+: RVDA is unsigned, relative offset from
+ * opregion base, and should point to the end of opregion.
+ * otherwise, exposing to userspace to allow read access to everything between
+ * the OpRegion and VBT is not safe.
+ * RVDS is defined as size in bytes.
+ *
+ * opregion 2.0: rvda is the physical VBT address.
+ * Since rvda is HPA it cannot be directly used in guest.
+ * And it should not be practically available for end user,so it is not supported.
+ */
+ version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION));
+ if (version >= 0x0200) {
+ u64 rvda;
+ u32 rvds;
+
+ rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA));
+ rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));
+ if (rvda && rvds) {
+ /* no support for opregion v2.0 with physical VBT address */
+ if (version == 0x0200) {
+ memunmap(base);
+ pci_err(vdev->pdev,
+ "IGD assignment does not support opregion v2.0 with an extended VBT region\n");
+ return -EINVAL;
+ }
+
+ if (rvda != size) {
+ memunmap(base);
+ pci_err(vdev->pdev,
+ "Extended VBT does not follow opregion on version 0x%04x\n",
+ version);
+ return -EINVAL;
+ }
+
+ /* region size for opregion v2.0+: opregion and VBT size. */
+ size += rvds;
+ }
+ }
+
if (size != OPREGION_SIZE) {
memunmap(base);
base = memremap(addr, size, MEMREMAP_WB);
diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
deleted file mode 100644
index 9adcf6a8f888..000000000000
--- a/drivers/vfio/pci/vfio_pci_nvlink2.c
+++ /dev/null
@@ -1,490 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2.
- *
- * Copyright (C) 2018 IBM Corp. All rights reserved.
- * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
- *
- * Register an on-GPU RAM region for cacheable access.
- *
- * Derived from original vfio_pci_igd.c:
- * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
- * Author: Alex Williamson <alex.williamson@redhat.com>
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/uaccess.h>
-#include <linux/vfio.h>
-#include <linux/sched/mm.h>
-#include <linux/mmu_context.h>
-#include <asm/kvm_ppc.h>
-#include "vfio_pci_private.h"
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap_fault);
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap);
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_npu2_mmap);
-
-struct vfio_pci_nvgpu_data {
- unsigned long gpu_hpa; /* GPU RAM physical address */
- unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
- unsigned long useraddr; /* GPU RAM userspace address */
- unsigned long size; /* Size of the GPU RAM window (usually 128GB) */
- struct mm_struct *mm;
- struct mm_iommu_table_group_mem_t *mem; /* Pre-registered RAM descr. */
- struct pci_dev *gpdev;
- struct notifier_block group_notifier;
-};
-
-static size_t vfio_pci_nvgpu_rw(struct vfio_pci_device *vdev,
- char __user *buf, size_t count, loff_t *ppos, bool iswrite)
-{
- unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
- struct vfio_pci_nvgpu_data *data = vdev->region[i].data;
- loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
- loff_t posaligned = pos & PAGE_MASK, posoff = pos & ~PAGE_MASK;
- size_t sizealigned;
- void __iomem *ptr;
-
- if (pos >= vdev->region[i].size)
- return -EINVAL;
-
- count = min(count, (size_t)(vdev->region[i].size - pos));
-
- /*
- * We map only a bit of GPU RAM for a short time instead of mapping it
- * for the guest lifetime as:
- *
- * 1) we do not know GPU RAM size, only aperture which is 4-8 times
- * bigger than actual RAM size (16/32GB RAM vs. 128GB aperture);
- * 2) mapping GPU RAM allows CPU to prefetch and if this happens
- * before NVLink bridge is reset (which fences GPU RAM),
- * hardware management interrupts (HMI) might happen, this
- * will freeze NVLink bridge.
- *
- * This is not fast path anyway.
- */
- sizealigned = ALIGN(posoff + count, PAGE_SIZE);
- ptr = ioremap_cache(data->gpu_hpa + posaligned, sizealigned);
- if (!ptr)
- return -EFAULT;
-
- if (iswrite) {
- if (copy_from_user(ptr + posoff, buf, count))
- count = -EFAULT;
- else
- *ppos += count;
- } else {
- if (copy_to_user(buf, ptr + posoff, count))
- count = -EFAULT;
- else
- *ppos += count;
- }
-
- iounmap(ptr);
-
- return count;
-}
-
-static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region)
-{
- struct vfio_pci_nvgpu_data *data = region->data;
- long ret;
-
- /* If there were any mappings at all... */
- if (data->mm) {
- if (data->mem) {
- ret = mm_iommu_put(data->mm, data->mem);
- WARN_ON(ret);
- }
-
- mmdrop(data->mm);
- }
-
- vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
- &data->group_notifier);
-
- pnv_npu2_unmap_lpar_dev(data->gpdev);
-
- kfree(data);
-}
-
-static vm_fault_t vfio_pci_nvgpu_mmap_fault(struct vm_fault *vmf)
-{
- vm_fault_t ret;
- struct vm_area_struct *vma = vmf->vma;
- struct vfio_pci_region *region = vma->vm_private_data;
- struct vfio_pci_nvgpu_data *data = region->data;
- unsigned long vmf_off = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
- unsigned long nv2pg = data->gpu_hpa >> PAGE_SHIFT;
- unsigned long vm_pgoff = vma->vm_pgoff &
- ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
- unsigned long pfn = nv2pg + vm_pgoff + vmf_off;
-
- ret = vmf_insert_pfn(vma, vmf->address, pfn);
- trace_vfio_pci_nvgpu_mmap_fault(data->gpdev, pfn << PAGE_SHIFT,
- vmf->address, ret);
-
- return ret;
-}
-
-static const struct vm_operations_struct vfio_pci_nvgpu_mmap_vmops = {
- .fault = vfio_pci_nvgpu_mmap_fault,
-};
-
-static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vm_area_struct *vma)
-{
- int ret;
- struct vfio_pci_nvgpu_data *data = region->data;
-
- if (data->useraddr)
- return -EPERM;
-
- if (vma->vm_end - vma->vm_start > data->size)
- return -EINVAL;
-
- vma->vm_private_data = region;
- vma->vm_flags |= VM_PFNMAP;
- vma->vm_ops = &vfio_pci_nvgpu_mmap_vmops;
-
- /*
- * Calling mm_iommu_newdev() here once as the region is not
- * registered yet and therefore right initialization will happen now.
- * Other places will use mm_iommu_find() which returns
- * registered @mem and does not go gup().
- */
- data->useraddr = vma->vm_start;
- data->mm = current->mm;
-
- mmgrab(data->mm);
- ret = (int) mm_iommu_newdev(data->mm, data->useraddr,
- vma_pages(vma), data->gpu_hpa, &data->mem);
-
- trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr,
- vma->vm_end - vma->vm_start, ret);
-
- return ret;
-}
-
-static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vfio_info_cap *caps)
-{
- struct vfio_pci_nvgpu_data *data = region->data;
- struct vfio_region_info_cap_nvlink2_ssatgt cap = {
- .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
- .header.version = 1,
- .tgt = data->gpu_tgt
- };
-
- return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
-}
-
-static const struct vfio_pci_regops vfio_pci_nvgpu_regops = {
- .rw = vfio_pci_nvgpu_rw,
- .release = vfio_pci_nvgpu_release,
- .mmap = vfio_pci_nvgpu_mmap,
- .add_capability = vfio_pci_nvgpu_add_capability,
-};
-
-static int vfio_pci_nvgpu_group_notifier(struct notifier_block *nb,
- unsigned long action, void *opaque)
-{
- struct kvm *kvm = opaque;
- struct vfio_pci_nvgpu_data *data = container_of(nb,
- struct vfio_pci_nvgpu_data,
- group_notifier);
-
- if (action == VFIO_GROUP_NOTIFY_SET_KVM && kvm &&
- pnv_npu2_map_lpar_dev(data->gpdev,
- kvm->arch.lpid, MSR_DR | MSR_PR))
- return NOTIFY_BAD;
-
- return NOTIFY_OK;
-}
-
-int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
-{
- int ret;
- u64 reg[2];
- u64 tgt = 0;
- struct device_node *npu_node, *mem_node;
- struct pci_dev *npu_dev;
- struct vfio_pci_nvgpu_data *data;
- uint32_t mem_phandle = 0;
- unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
-
- /*
- * PCI config space does not tell us about NVLink presense but
- * platform does, use this.
- */
- npu_dev = pnv_pci_get_npu_dev(vdev->pdev, 0);
- if (!npu_dev)
- return -ENODEV;
-
- npu_node = pci_device_to_OF_node(npu_dev);
- if (!npu_node)
- return -EINVAL;
-
- if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
- return -ENODEV;
-
- mem_node = of_find_node_by_phandle(mem_phandle);
- if (!mem_node)
- return -EINVAL;
-
- if (of_property_read_variable_u64_array(mem_node, "reg", reg,
- ARRAY_SIZE(reg), ARRAY_SIZE(reg)) !=
- ARRAY_SIZE(reg))
- return -EINVAL;
-
- if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
- dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
- return -EFAULT;
- }
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->gpu_hpa = reg[0];
- data->gpu_tgt = tgt;
- data->size = reg[1];
-
- dev_dbg(&vdev->pdev->dev, "%lx..%lx\n", data->gpu_hpa,
- data->gpu_hpa + data->size - 1);
-
- data->gpdev = vdev->pdev;
- data->group_notifier.notifier_call = vfio_pci_nvgpu_group_notifier;
-
- ret = vfio_register_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
- &events, &data->group_notifier);
- if (ret)
- goto free_exit;
-
- /*
- * We have just set KVM, we do not need the listener anymore.
- * Also, keeping it registered means that if more than one GPU is
- * assigned, we will get several similar notifiers notifying about
- * the same device again which does not help with anything.
- */
- vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
- &data->group_notifier);
-
- ret = vfio_pci_register_dev_region(vdev,
- PCI_VENDOR_ID_NVIDIA | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
- VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
- &vfio_pci_nvgpu_regops,
- data->size,
- VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE |
- VFIO_REGION_INFO_FLAG_MMAP,
- data);
- if (ret)
- goto free_exit;
-
- return 0;
-free_exit:
- kfree(data);
-
- return ret;
-}
-
-/*
- * IBM NPU2 bridge
- */
-struct vfio_pci_npu2_data {
- void *base; /* ATSD register virtual address, for emulated access */
- unsigned long mmio_atsd; /* ATSD physical address */
- unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
- unsigned int link_speed; /* The link speed from DT's ibm,nvlink-speed */
-};
-
-static size_t vfio_pci_npu2_rw(struct vfio_pci_device *vdev,
- char __user *buf, size_t count, loff_t *ppos, bool iswrite)
-{
- unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
- struct vfio_pci_npu2_data *data = vdev->region[i].data;
- loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
-
- if (pos >= vdev->region[i].size)
- return -EINVAL;
-
- count = min(count, (size_t)(vdev->region[i].size - pos));
-
- if (iswrite) {
- if (copy_from_user(data->base + pos, buf, count))
- return -EFAULT;
- } else {
- if (copy_to_user(buf, data->base + pos, count))
- return -EFAULT;
- }
- *ppos += count;
-
- return count;
-}
-
-static int vfio_pci_npu2_mmap(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vm_area_struct *vma)
-{
- int ret;
- struct vfio_pci_npu2_data *data = region->data;
- unsigned long req_len = vma->vm_end - vma->vm_start;
-
- if (req_len != PAGE_SIZE)
- return -EINVAL;
-
- vma->vm_flags |= VM_PFNMAP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- ret = remap_pfn_range(vma, vma->vm_start, data->mmio_atsd >> PAGE_SHIFT,
- req_len, vma->vm_page_prot);
- trace_vfio_pci_npu2_mmap(vdev->pdev, data->mmio_atsd, vma->vm_start,
- vma->vm_end - vma->vm_start, ret);
-
- return ret;
-}
-
-static void vfio_pci_npu2_release(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region)
-{
- struct vfio_pci_npu2_data *data = region->data;
-
- memunmap(data->base);
- kfree(data);
-}
-
-static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vfio_info_cap *caps)
-{
- struct vfio_pci_npu2_data *data = region->data;
- struct vfio_region_info_cap_nvlink2_ssatgt captgt = {
- .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
- .header.version = 1,
- .tgt = data->gpu_tgt
- };
- struct vfio_region_info_cap_nvlink2_lnkspd capspd = {
- .header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD,
- .header.version = 1,
- .link_speed = data->link_speed
- };
- int ret;
-
- ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt));
- if (ret)
- return ret;
-
- return vfio_info_add_capability(caps, &capspd.header, sizeof(capspd));
-}
-
-static const struct vfio_pci_regops vfio_pci_npu2_regops = {
- .rw = vfio_pci_npu2_rw,
- .mmap = vfio_pci_npu2_mmap,
- .release = vfio_pci_npu2_release,
- .add_capability = vfio_pci_npu2_add_capability,
-};
-
-int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
-{
- int ret;
- struct vfio_pci_npu2_data *data;
- struct device_node *nvlink_dn;
- u32 nvlink_index = 0, mem_phandle = 0;
- struct pci_dev *npdev = vdev->pdev;
- struct device_node *npu_node = pci_device_to_OF_node(npdev);
- struct pci_controller *hose = pci_bus_to_host(npdev->bus);
- u64 mmio_atsd = 0;
- u64 tgt = 0;
- u32 link_speed = 0xff;
-
- /*
- * PCI config space does not tell us about NVLink presense but
- * platform does, use this.
- */
- if (!pnv_pci_get_gpu_dev(vdev->pdev))
- return -ENODEV;
-
- if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
- return -ENODEV;
-
- /*
- * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
- * so we can allocate one register per link, using nvlink index as
- * a key.
- * There is always at least one ATSD register so as long as at least
- * NVLink bridge #0 is passed to the guest, ATSD will be available.
- */
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return -ENODEV;
-
- if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
- &mmio_atsd)) {
- if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
- &mmio_atsd)) {
- dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
- mmio_atsd = 0;
- } else {
- dev_warn(&vdev->pdev->dev,
- "Using fallback ibm,mmio-atsd[0] for ATSD.\n");
- }
- }
-
- if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
- dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
- return -EFAULT;
- }
-
- if (of_property_read_u32(npu_node, "ibm,nvlink-speed", &link_speed)) {
- dev_warn(&vdev->pdev->dev, "No ibm,nvlink-speed found\n");
- return -EFAULT;
- }
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->mmio_atsd = mmio_atsd;
- data->gpu_tgt = tgt;
- data->link_speed = link_speed;
- if (data->mmio_atsd) {
- data->base = memremap(data->mmio_atsd, SZ_64K, MEMREMAP_WT);
- if (!data->base) {
- ret = -ENOMEM;
- goto free_exit;
- }
- }
-
- /*
- * We want to expose the capability even if this specific NVLink
- * did not get its own ATSD register because capabilities
- * belong to VFIO regions and normally there will be ATSD register
- * assigned to the NVLink bridge.
- */
- ret = vfio_pci_register_dev_region(vdev,
- PCI_VENDOR_ID_IBM |
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
- VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
- &vfio_pci_npu2_regops,
- data->mmio_atsd ? PAGE_SIZE : 0,
- VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE |
- VFIO_REGION_INFO_FLAG_MMAP,
- data);
- if (ret)
- goto free_exit;
-
- return 0;
-
-free_exit:
- if (data->base)
- memunmap(data->base);
- kfree(data);
-
- return ret;
-}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9cd1882a05af..5a36272cecbf 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -100,6 +100,7 @@ struct vfio_pci_mmap_vma {
};
struct vfio_pci_device {
+ struct vfio_device vdev;
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS];
bool bar_mmap_supported[PCI_STD_NUM_BARS];
@@ -199,20 +200,6 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
-#ifdef CONFIG_VFIO_PCI_NVLINK2
-extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
-extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
-#else
-static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
-{
- return -ENODEV;
-}
-
-static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
-{
- return -ENODEV;
-}
-#endif
#ifdef CONFIG_S390
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
index 09a9453b75c5..63cc7f0b2e4a 100644
--- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
+++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
@@ -26,7 +26,7 @@
#define XGMAC_DMA_CONTROL 0x00000f18 /* Ctrl (Operational Mode) */
#define XGMAC_DMA_INTR_ENA 0x00000f1c /* Interrupt Enable */
-/* DMA Control registe defines */
+/* DMA Control register defines */
#define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */
#define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index 3626c2150101..f970eb2a999f 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -66,16 +66,18 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
if (ret) {
kfree(vdev->name);
kfree(vdev);
+ return ret;
}
- return ret;
+ dev_set_drvdata(&adev->dev, vdev);
+ return 0;
}
static void vfio_amba_remove(struct amba_device *adev)
{
- struct vfio_platform_device *vdev =
- vfio_platform_remove_common(&adev->dev);
+ struct vfio_platform_device *vdev = dev_get_drvdata(&adev->dev);
+ vfio_platform_remove_common(vdev);
kfree(vdev->name);
kfree(vdev);
}
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 9fb6818cea12..e4027799a154 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -54,23 +54,21 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->reset_required = reset_required;
ret = vfio_platform_probe_common(vdev, &pdev->dev);
- if (ret)
+ if (ret) {
kfree(vdev);
-
- return ret;
+ return ret;
+ }
+ dev_set_drvdata(&pdev->dev, vdev);
+ return 0;
}
static int vfio_platform_remove(struct platform_device *pdev)
{
- struct vfio_platform_device *vdev;
-
- vdev = vfio_platform_remove_common(&pdev->dev);
- if (vdev) {
- kfree(vdev);
- return 0;
- }
+ struct vfio_platform_device *vdev = dev_get_drvdata(&pdev->dev);
- return -EINVAL;
+ vfio_platform_remove_common(vdev);
+ kfree(vdev);
+ return 0;
}
static struct platform_driver vfio_platform_driver = {
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index fb4b385191f2..361e5b57e369 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -218,9 +218,10 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
return -EINVAL;
}
-static void vfio_platform_release(void *device_data)
+static void vfio_platform_release(struct vfio_device *core_vdev)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
mutex_lock(&driver_lock);
@@ -244,9 +245,10 @@ static void vfio_platform_release(void *device_data)
module_put(vdev->parent_module);
}
-static int vfio_platform_open(void *device_data)
+static int vfio_platform_open(struct vfio_device *core_vdev)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
int ret;
if (!try_module_get(vdev->parent_module))
@@ -293,10 +295,12 @@ err_reg:
return ret;
}
-static long vfio_platform_ioctl(void *device_data,
+static long vfio_platform_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
+
unsigned long minsz;
if (cmd == VFIO_DEVICE_GET_INFO) {
@@ -455,10 +459,11 @@ err:
return -EFAULT;
}
-static ssize_t vfio_platform_read(void *device_data, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_platform_read(struct vfio_device *core_vdev,
+ char __user *buf, size_t count, loff_t *ppos)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;
@@ -531,10 +536,11 @@ err:
return -EFAULT;
}
-static ssize_t vfio_platform_write(void *device_data, const char __user *buf,
+static ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;
@@ -573,9 +579,10 @@ static int vfio_platform_mmap_mmio(struct vfio_platform_region region,
req_len, vma->vm_page_prot);
}
-static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index;
index = vma->vm_pgoff >> (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT);
@@ -659,8 +666,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct iommu_group *group;
int ret;
- if (!vdev)
- return -EINVAL;
+ vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops);
ret = vfio_platform_acpi_probe(vdev, dev);
if (ret)
@@ -685,13 +691,13 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
goto put_reset;
}
- ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
+ ret = vfio_register_group_dev(&vdev->vdev);
if (ret)
goto put_iommu;
mutex_init(&vdev->igate);
- pm_runtime_enable(vdev->device);
+ pm_runtime_enable(dev);
return 0;
put_iommu:
@@ -702,19 +708,13 @@ put_reset:
}
EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
-struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
+void vfio_platform_remove_common(struct vfio_platform_device *vdev)
{
- struct vfio_platform_device *vdev;
-
- vdev = vfio_del_group_dev(dev);
+ vfio_unregister_group_dev(&vdev->vdev);
- if (vdev) {
- pm_runtime_disable(vdev->device);
- vfio_platform_put_reset(vdev);
- vfio_iommu_group_put(dev->iommu_group, dev);
- }
-
- return vdev;
+ pm_runtime_disable(vdev->device);
+ vfio_platform_put_reset(vdev);
+ vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev);
}
EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index 289089910643..a5ba82c8cbc3 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
+#include <linux/vfio.h>
#define VFIO_PLATFORM_OFFSET_SHIFT 40
#define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1)
@@ -42,6 +43,7 @@ struct vfio_platform_region {
};
struct vfio_platform_device {
+ struct vfio_device vdev;
struct vfio_platform_region *regions;
u32 num_regions;
struct vfio_platform_irq *irqs;
@@ -80,8 +82,7 @@ struct vfio_platform_reset_node {
extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct device *dev);
-extern struct vfio_platform_device *vfio_platform_remove_common
- (struct device *dev);
+void vfio_platform_remove_common(struct vfio_platform_device *vdev);
extern int vfio_platform_irq_init(struct vfio_platform_device *vdev);
extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev);
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 38779e6fd80c..5e631c359ef2 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -46,7 +46,6 @@ static struct vfio {
struct mutex group_lock;
struct cdev group_cdev;
dev_t group_devt;
- wait_queue_head_t release_q;
} vfio;
struct vfio_iommu_driver {
@@ -90,15 +89,6 @@ struct vfio_group {
struct blocking_notifier_head notifier;
};
-struct vfio_device {
- struct kref kref;
- struct device *dev;
- const struct vfio_device_ops *ops;
- struct vfio_group *group;
- struct list_head group_next;
- void *device_data;
-};
-
#ifdef CONFIG_VFIO_NOIOMMU
static bool noiommu __read_mostly;
module_param_named(enable_unsafe_noiommu_mode,
@@ -109,8 +99,8 @@ MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. Thi
/*
* vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
* and remove functions, any use cases other than acquiring the first
- * reference for the purpose of calling vfio_add_group_dev() or removing
- * that symmetric reference after vfio_del_group_dev() should use the raw
+ * reference for the purpose of calling vfio_register_group_dev() or removing
+ * that symmetric reference after vfio_unregister_group_dev() should use the raw
* iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
* removes the device from the dummy group and cannot be nested.
*/
@@ -532,67 +522,17 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
/**
* Device objects - create, release, get, put, search
*/
-static
-struct vfio_device *vfio_group_create_device(struct vfio_group *group,
- struct device *dev,
- const struct vfio_device_ops *ops,
- void *device_data)
-{
- struct vfio_device *device;
-
- device = kzalloc(sizeof(*device), GFP_KERNEL);
- if (!device)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&device->kref);
- device->dev = dev;
- device->group = group;
- device->ops = ops;
- device->device_data = device_data;
- dev_set_drvdata(dev, device);
-
- /* No need to get group_lock, caller has group reference */
- vfio_group_get(group);
-
- mutex_lock(&group->device_lock);
- list_add(&device->group_next, &group->device_list);
- group->dev_counter++;
- mutex_unlock(&group->device_lock);
-
- return device;
-}
-
-static void vfio_device_release(struct kref *kref)
-{
- struct vfio_device *device = container_of(kref,
- struct vfio_device, kref);
- struct vfio_group *group = device->group;
-
- list_del(&device->group_next);
- group->dev_counter--;
- mutex_unlock(&group->device_lock);
-
- dev_set_drvdata(device->dev, NULL);
-
- kfree(device);
-
- /* vfio_del_group_dev may be waiting for this device */
- wake_up(&vfio.release_q);
-}
-
/* Device reference always implies a group reference */
void vfio_device_put(struct vfio_device *device)
{
- struct vfio_group *group = device->group;
- kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
- vfio_group_put(group);
+ if (refcount_dec_and_test(&device->refcount))
+ complete(&device->comp);
}
EXPORT_SYMBOL_GPL(vfio_device_put);
-static void vfio_device_get(struct vfio_device *device)
+static bool vfio_device_try_get(struct vfio_device *device)
{
- vfio_group_get(device->group);
- kref_get(&device->kref);
+ return refcount_inc_not_zero(&device->refcount);
}
static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
@@ -602,8 +542,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_for_each_entry(device, &group->device_list, group_next) {
- if (device->dev == dev) {
- vfio_device_get(device);
+ if (device->dev == dev && vfio_device_try_get(device)) {
mutex_unlock(&group->device_lock);
return device;
}
@@ -801,14 +740,22 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
/**
* VFIO driver API
*/
-int vfio_add_group_dev(struct device *dev,
- const struct vfio_device_ops *ops, void *device_data)
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
+ const struct vfio_device_ops *ops)
+{
+ init_completion(&device->comp);
+ device->dev = dev;
+ device->ops = ops;
+}
+EXPORT_SYMBOL_GPL(vfio_init_group_dev);
+
+int vfio_register_group_dev(struct vfio_device *device)
{
+ struct vfio_device *existing_device;
struct iommu_group *iommu_group;
struct vfio_group *group;
- struct vfio_device *device;
- iommu_group = iommu_group_get(dev);
+ iommu_group = iommu_group_get(device->dev);
if (!iommu_group)
return -EINVAL;
@@ -827,31 +774,29 @@ int vfio_add_group_dev(struct device *dev,
iommu_group_put(iommu_group);
}
- device = vfio_group_get_device(group, dev);
- if (device) {
- dev_WARN(dev, "Device already exists on group %d\n",
+ existing_device = vfio_group_get_device(group, device->dev);
+ if (existing_device) {
+ dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(iommu_group));
- vfio_device_put(device);
+ vfio_device_put(existing_device);
vfio_group_put(group);
return -EBUSY;
}
- device = vfio_group_create_device(group, dev, ops, device_data);
- if (IS_ERR(device)) {
- vfio_group_put(group);
- return PTR_ERR(device);
- }
+ /* Our reference on group is moved to the device */
+ device->group = group;
- /*
- * Drop all but the vfio_device reference. The vfio_device holds
- * a reference to the vfio_group, which holds a reference to the
- * iommu_group.
- */
- vfio_group_put(group);
+ /* Refcounting can't start until the driver calls register */
+ refcount_set(&device->refcount, 1);
+
+ mutex_lock(&group->device_lock);
+ list_add(&device->group_next, &group->device_list);
+ group->dev_counter++;
+ mutex_unlock(&group->device_lock);
return 0;
}
-EXPORT_SYMBOL_GPL(vfio_add_group_dev);
+EXPORT_SYMBOL_GPL(vfio_register_group_dev);
/**
* Get a reference to the vfio_device for a device. Even if the
@@ -886,7 +831,7 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
int ret;
if (it->ops->match) {
- ret = it->ops->match(it->device_data, buf);
+ ret = it->ops->match(it, buf);
if (ret < 0) {
device = ERR_PTR(ret);
break;
@@ -895,9 +840,8 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
ret = !strcmp(dev_name(it->dev), buf);
}
- if (ret) {
+ if (ret && vfio_device_try_get(it)) {
device = it;
- vfio_device_get(device);
break;
}
}
@@ -907,32 +851,15 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
}
/*
- * Caller must hold a reference to the vfio_device
- */
-void *vfio_device_data(struct vfio_device *device)
-{
- return device->device_data;
-}
-EXPORT_SYMBOL_GPL(vfio_device_data);
-
-/*
* Decrement the device reference count and wait for the device to be
* removed. Open file descriptors for the device... */
-void *vfio_del_group_dev(struct device *dev)
+void vfio_unregister_group_dev(struct vfio_device *device)
{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct vfio_device *device = dev_get_drvdata(dev);
struct vfio_group *group = device->group;
- void *device_data = device->device_data;
struct vfio_unbound_dev *unbound;
unsigned int i = 0;
bool interrupted = false;
-
- /*
- * The group exists so long as we have a device reference. Get
- * a group reference and use it to scan for the device going away.
- */
- vfio_group_get(group);
+ long rc;
/*
* When the device is removed from the group, the group suddenly
@@ -945,7 +872,7 @@ void *vfio_del_group_dev(struct device *dev)
*/
unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
if (unbound) {
- unbound->dev = dev;
+ unbound->dev = device->dev;
mutex_lock(&group->unbound_lock);
list_add(&unbound->unbound_next, &group->unbound_list);
mutex_unlock(&group->unbound_lock);
@@ -953,44 +880,33 @@ void *vfio_del_group_dev(struct device *dev)
WARN_ON(!unbound);
vfio_device_put(device);
-
- /*
- * If the device is still present in the group after the above
- * 'put', then it is in use and we need to request it from the
- * bus driver. The driver may in turn need to request the
- * device from the user. We send the request on an arbitrary
- * interval with counter to allow the driver to take escalating
- * measures to release the device if it has the ability to do so.
- */
- add_wait_queue(&vfio.release_q, &wait);
-
- do {
- device = vfio_group_get_device(group, dev);
- if (!device)
- break;
-
+ rc = try_wait_for_completion(&device->comp);
+ while (rc <= 0) {
if (device->ops->request)
- device->ops->request(device_data, i++);
-
- vfio_device_put(device);
+ device->ops->request(device, i++);
if (interrupted) {
- wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
+ rc = wait_for_completion_timeout(&device->comp,
+ HZ * 10);
} else {
- wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
- if (signal_pending(current)) {
+ rc = wait_for_completion_interruptible_timeout(
+ &device->comp, HZ * 10);
+ if (rc < 0) {
interrupted = true;
- dev_warn(dev,
+ dev_warn(device->dev,
"Device is currently in use, task"
" \"%s\" (%d) "
"blocked until device is released",
current->comm, task_pid_nr(current));
}
}
+ }
- } while (1);
+ mutex_lock(&group->device_lock);
+ list_del(&device->group_next);
+ group->dev_counter--;
+ mutex_unlock(&group->device_lock);
- remove_wait_queue(&vfio.release_q, &wait);
/*
* In order to support multiple devices per group, devices can be
* plucked from the group while other devices in the group are still
@@ -1008,11 +924,10 @@ void *vfio_del_group_dev(struct device *dev)
if (list_empty(&group->device_list))
wait_event(group->container_q, !group->container);
+ /* Matches the get in vfio_register_group_dev() */
vfio_group_put(group);
-
- return device_data;
}
-EXPORT_SYMBOL_GPL(vfio_del_group_dev);
+EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/**
* VFIO base fd, /dev/vfio/vfio
@@ -1454,7 +1369,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(device))
return PTR_ERR(device);
- ret = device->ops->open(device->device_data);
+ ret = device->ops->open(device);
if (ret) {
vfio_device_put(device);
return ret;
@@ -1466,7 +1381,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
*/
ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0) {
- device->ops->release(device->device_data);
+ device->ops->release(device);
vfio_device_put(device);
return ret;
}
@@ -1476,7 +1391,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(filep)) {
put_unused_fd(ret);
ret = PTR_ERR(filep);
- device->ops->release(device->device_data);
+ device->ops->release(device);
vfio_device_put(device);
return ret;
}
@@ -1633,7 +1548,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
- device->ops->release(device->device_data);
+ device->ops->release(device);
vfio_group_try_dissolve_container(device->group);
@@ -1650,7 +1565,7 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
if (unlikely(!device->ops->ioctl))
return -EINVAL;
- return device->ops->ioctl(device->device_data, cmd, arg);
+ return device->ops->ioctl(device, cmd, arg);
}
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
@@ -1661,7 +1576,7 @@ static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
if (unlikely(!device->ops->read))
return -EINVAL;
- return device->ops->read(device->device_data, buf, count, ppos);
+ return device->ops->read(device, buf, count, ppos);
}
static ssize_t vfio_device_fops_write(struct file *filep,
@@ -1673,7 +1588,7 @@ static ssize_t vfio_device_fops_write(struct file *filep,
if (unlikely(!device->ops->write))
return -EINVAL;
- return device->ops->write(device->device_data, buf, count, ppos);
+ return device->ops->write(device, buf, count, ppos);
}
static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
@@ -1683,7 +1598,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
if (unlikely(!device->ops->mmap))
return -EINVAL;
- return device->ops->mmap(device->device_data, vma);
+ return device->ops->mmap(device, vma);
}
static const struct file_operations vfio_device_fops = {
@@ -2379,7 +2294,6 @@ static int __init vfio_init(void)
mutex_init(&vfio.iommu_drivers_lock);
INIT_LIST_HEAD(&vfio.group_list);
INIT_LIST_HEAD(&vfio.iommu_drivers_list);
- init_waitqueue_head(&vfio.release_q);
ret = misc_register(&vfio_dev);
if (ret) {
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 45cbfd4879a5..a0747c35a778 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -16,7 +16,7 @@
* IOMMU to support the IOMMU API and have few to no restrictions around
* the IOVA range that can be mapped. The Type1 IOMMU is currently
* optimized for relatively static mappings of a userspace process with
- * userpsace pages pinned into memory. We also assume devices and IOMMU
+ * userspace pages pinned into memory. We also assume devices and IOMMU
* domains are PCI based as the IOMMU API is still centered around a
* device/bus interface rather than a group interface.
*/
@@ -77,7 +77,6 @@ struct vfio_iommu {
bool v2;
bool nesting;
bool dirty_page_tracking;
- bool pinned_page_dirty_scope;
bool container_open;
};
@@ -877,7 +876,7 @@ again:
/*
* If iommu capable domain exist in the container then all pages are
- * already pinned and accounted. Accouting should be done if there is no
+ * already pinned and accounted. Accounting should be done if there is no
* iommu capable domain in the container.
*/
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
@@ -960,7 +959,7 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
bool do_accounting;
int i;
- if (!iommu || !user_pfn)
+ if (!iommu || !user_pfn || npage <= 0)
return -EINVAL;
/* Supported for v2 version only */
@@ -977,13 +976,13 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
iova = user_pfn[i] << PAGE_SHIFT;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
if (!dma)
- goto unpin_exit;
+ break;
+
vfio_unpin_page_external(dma, iova, do_accounting);
}
-unpin_exit:
mutex_unlock(&iommu->lock);
- return i > npage ? npage : (i > 0 ? i : -EINVAL);
+ return i > 0 ? i : -EINVAL;
}
static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
@@ -1933,28 +1932,13 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
return ret;
}
-static struct device *vfio_mdev_get_iommu_device(struct device *dev)
-{
- struct device *(*fn)(struct device *dev);
- struct device *iommu_device;
-
- fn = symbol_get(mdev_get_iommu_device);
- if (fn) {
- iommu_device = fn(dev);
- symbol_put(mdev_get_iommu_device);
-
- return iommu_device;
- }
-
- return NULL;
-}
-
static int vfio_mdev_attach_domain(struct device *dev, void *data)
{
+ struct mdev_device *mdev = to_mdev_device(dev);
struct iommu_domain *domain = data;
struct device *iommu_device;
- iommu_device = vfio_mdev_get_iommu_device(dev);
+ iommu_device = mdev_get_iommu_device(mdev);
if (iommu_device) {
if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
return iommu_aux_attach_device(domain, iommu_device);
@@ -1967,10 +1951,11 @@ static int vfio_mdev_attach_domain(struct device *dev, void *data)
static int vfio_mdev_detach_domain(struct device *dev, void *data)
{
+ struct mdev_device *mdev = to_mdev_device(dev);
struct iommu_domain *domain = data;
struct device *iommu_device;
- iommu_device = vfio_mdev_get_iommu_device(dev);
+ iommu_device = mdev_get_iommu_device(mdev);
if (iommu_device) {
if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
iommu_aux_detach_device(domain, iommu_device);
@@ -2018,9 +2003,10 @@ static bool vfio_bus_is_mdev(struct bus_type *bus)
static int vfio_mdev_iommu_device(struct device *dev, void *data)
{
+ struct mdev_device *mdev = to_mdev_device(dev);
struct device **old = data, *new;
- new = vfio_mdev_get_iommu_device(dev);
+ new = mdev_get_iommu_device(mdev);
if (!new || (*old && *old != new))
return -EINVAL;
@@ -2177,7 +2163,7 @@ static int vfio_iommu_resv_exclude(struct list_head *iova,
continue;
/*
* Insert a new node if current node overlaps with the
- * reserve region to exlude that from valid iova range.
+ * reserve region to exclude that from valid iova range.
* Note that, new node is inserted before the current
* node and finally the current node is deleted keeping
* the list updated and sorted.
@@ -2262,7 +2248,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
int ret;
bool resv_msi, msi_remap;
phys_addr_t resv_msi_base = 0;
- struct iommu_domain_geometry geo;
+ struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy);
LIST_HEAD(group_resv_regions);
@@ -2330,10 +2316,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
}
if (iommu->nesting) {
- int attr = 1;
-
- ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
- &attr);
+ ret = iommu_enable_nesting(domain->domain);
if (ret)
goto out_domain;
}
@@ -2343,10 +2326,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_domain;
/* Get aperture info */
- iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
-
- if (vfio_iommu_aper_conflict(iommu, geo.aperture_start,
- geo.aperture_end)) {
+ geo = &domain->domain->geometry;
+ if (vfio_iommu_aper_conflict(iommu, geo->aperture_start,
+ geo->aperture_end)) {
ret = -EINVAL;
goto out_detach;
}
@@ -2369,8 +2351,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (ret)
goto out_detach;
- ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start,
- geo.aperture_end);
+ ret = vfio_iommu_aper_resize(&iova_copy, geo->aperture_start,
+ geo->aperture_end);
if (ret)
goto out_detach;
@@ -2503,7 +2485,6 @@ static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
struct list_head *iova_copy)
{
struct vfio_domain *domain;
- struct iommu_domain_geometry geo;
struct vfio_iova *node;
dma_addr_t start = 0;
dma_addr_t end = (dma_addr_t)~0;
@@ -2512,12 +2493,12 @@ static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
return;
list_for_each_entry(domain, &iommu->domain_list, next) {
- iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
- &geo);
- if (geo.aperture_start > start)
- start = geo.aperture_start;
- if (geo.aperture_end < end)
- end = geo.aperture_end;
+ struct iommu_domain_geometry *geo = &domain->domain->geometry;
+
+ if (geo->aperture_start > start)
+ start = geo->aperture_start;
+ if (geo->aperture_end < end)
+ end = geo->aperture_end;
}
/* Modify aperture limits. The new aper is either same or bigger */