summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/driver-api/vfio.rst48
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c127
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc_private.h1
-rw-r--r--drivers/vfio/mdev/mdev_private.h5
-rw-r--r--drivers/vfio/mdev/vfio_mdev.c53
-rw-r--r--drivers/vfio/pci/Kconfig6
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c271
-rw-r--r--drivers/vfio/pci/vfio_pci_nvlink2.c490
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h15
-rw-r--r--drivers/vfio/platform/vfio_amba.c8
-rw-r--r--drivers/vfio/platform/vfio_platform.c20
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c56
-rw-r--r--drivers/vfio/platform/vfio_platform_private.h5
-rw-r--r--drivers/vfio/vfio.c210
-rw-r--r--include/linux/vfio.h37
-rw-r--r--include/uapi/linux/vfio.h38
17 files changed, 421 insertions, 970 deletions
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
index f1a4d3c3ba0b..decc68cb8114 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -249,35 +249,41 @@ VFIO bus driver API
VFIO bus drivers, such as vfio-pci make use of only a few interfaces
into VFIO core. When devices are bound and unbound to the driver,
-the driver should call vfio_add_group_dev() and vfio_del_group_dev()
-respectively::
-
- extern int vfio_add_group_dev(struct device *dev,
- const struct vfio_device_ops *ops,
- void *device_data);
-
- extern void *vfio_del_group_dev(struct device *dev);
-
-vfio_add_group_dev() indicates to the core to begin tracking the
-iommu_group of the specified dev and register the dev as owned by
-a VFIO bus driver. The driver provides an ops structure for callbacks
+the driver should call vfio_register_group_dev() and
+vfio_unregister_group_dev() respectively::
+
+ void vfio_init_group_dev(struct vfio_device *device,
+ struct device *dev,
+ const struct vfio_device_ops *ops);
+ int vfio_register_group_dev(struct vfio_device *device);
+ void vfio_unregister_group_dev(struct vfio_device *device);
+
+The driver should embed the vfio_device in its own structure and call
+vfio_init_group_dev() to pre-configure it before going to registration.
+vfio_register_group_dev() indicates to the core to begin tracking the
+iommu_group of the specified dev and register the dev as owned by a VFIO bus
+driver. Once vfio_register_group_dev() returns it is possible for userspace to
+start accessing the driver, thus the driver should ensure it is completely
+ready before calling it. The driver provides an ops structure for callbacks
similar to a file operations structure::
struct vfio_device_ops {
- int (*open)(void *device_data);
- void (*release)(void *device_data);
- ssize_t (*read)(void *device_data, char __user *buf,
+ int (*open)(struct vfio_device *vdev);
+ void (*release)(struct vfio_device *vdev);
+ ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos);
- ssize_t (*write)(void *device_data, const char __user *buf,
+ ssize_t (*write)(struct vfio_device *vdev,
+ const char __user *buf,
size_t size, loff_t *ppos);
- long (*ioctl)(void *device_data, unsigned int cmd,
+ long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
- int (*mmap)(void *device_data, struct vm_area_struct *vma);
+ int (*mmap)(struct vfio_device *vdev,
+ struct vm_area_struct *vma);
};
-Each function is passed the device_data that was originally registered
-in the vfio_add_group_dev() call above. This allows the bus driver
-an easy place to store its opaque, private data. The open/release
+Each function is passed the vdev that was originally registered
+in the vfio_register_group_dev() call above. This allows the bus driver
+to obtain its private data using container_of(). The open/release
callbacks are issued when a new file descriptor is created for a
device (via VFIO_GROUP_GET_DEVICE_FD). The ioctl interface provides
a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index f27e25112c40..980e59551301 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -75,7 +75,8 @@ static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
goto unlock;
}
- cont_vdev = vfio_device_data(device);
+ cont_vdev =
+ container_of(device, struct vfio_fsl_mc_device, vdev);
if (!cont_vdev || !cont_vdev->reflck) {
vfio_device_put(device);
ret = -ENODEV;
@@ -135,9 +136,10 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
kfree(vdev->regions);
}
-static int vfio_fsl_mc_open(void *device_data)
+static int vfio_fsl_mc_open(struct vfio_device *core_vdev)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
int ret;
if (!try_module_get(THIS_MODULE))
@@ -161,9 +163,10 @@ err_reg_init:
return ret;
}
-static void vfio_fsl_mc_release(void *device_data)
+static void vfio_fsl_mc_release(struct vfio_device *core_vdev)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
int ret;
mutex_lock(&vdev->reflck->lock);
@@ -197,11 +200,12 @@ static void vfio_fsl_mc_release(void *device_data)
module_put(THIS_MODULE);
}
-static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
- unsigned long arg)
+static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev,
+ unsigned int cmd, unsigned long arg)
{
unsigned long minsz;
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
switch (cmd) {
@@ -327,10 +331,11 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
}
}
-static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
+static ssize_t vfio_fsl_mc_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
@@ -404,10 +409,12 @@ static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
return 0;
}
-static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_fsl_mc_write(struct vfio_device *core_vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
@@ -468,9 +475,11 @@ static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
size, vma->vm_page_prot);
}
-static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
{
- struct vfio_fsl_mc_device *vdev = device_data;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
struct fsl_mc_device *mc_dev = vdev->mc_dev;
unsigned int index;
@@ -568,23 +577,39 @@ static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev)
dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret);
goto out_nc_unreg;
}
+ return 0;
+
+out_nc_unreg:
+ bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
+ return ret;
+}
+static int vfio_fsl_mc_scan_container(struct fsl_mc_device *mc_dev)
+{
+ int ret;
+
+ /* non dprc devices do not scan for other devices */
+ if (!is_fsl_mc_bus_dprc(mc_dev))
+ return 0;
ret = dprc_scan_container(mc_dev, false);
if (ret) {
- dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
- goto out_dprc_cleanup;
+ dev_err(&mc_dev->dev,
+ "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
+ dprc_remove_devices(mc_dev, NULL, 0);
+ return ret;
}
-
return 0;
+}
+
+static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+ if (!is_fsl_mc_bus_dprc(mc_dev))
+ return;
-out_dprc_cleanup:
- dprc_remove_devices(mc_dev, NULL, 0);
dprc_cleanup(mc_dev);
-out_nc_unreg:
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
- vdev->nb.notifier_call = NULL;
-
- return ret;
}
static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
@@ -600,36 +625,50 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
return -EINVAL;
}
- vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev) {
ret = -ENOMEM;
goto out_group_put;
}
+ vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops);
vdev->mc_dev = mc_dev;
-
- ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
- if (ret) {
- dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
- goto out_group_put;
- }
+ mutex_init(&vdev->igate);
ret = vfio_fsl_mc_reflck_attach(vdev);
if (ret)
- goto out_group_dev;
+ goto out_kfree;
ret = vfio_fsl_mc_init_device(vdev);
if (ret)
goto out_reflck;
- mutex_init(&vdev->igate);
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret) {
+ dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
+ goto out_device;
+ }
+ /*
+ * This triggers recursion into vfio_fsl_mc_probe() on another device
+ * and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the
+ * vfio_add_group_dev() above. It has no impact on this vdev, so it is
+ * safe to be after the vfio device is made live.
+ */
+ ret = vfio_fsl_mc_scan_container(mc_dev);
+ if (ret)
+ goto out_group_dev;
+ dev_set_drvdata(dev, vdev);
return 0;
+out_group_dev:
+ vfio_unregister_group_dev(&vdev->vdev);
+out_device:
+ vfio_fsl_uninit_device(vdev);
out_reflck:
vfio_fsl_mc_reflck_put(vdev->reflck);
-out_group_dev:
- vfio_del_group_dev(dev);
+out_kfree:
+ kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, dev);
return ret;
@@ -637,25 +676,17 @@ out_group_put:
static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
{
- struct vfio_fsl_mc_device *vdev;
struct device *dev = &mc_dev->dev;
+ struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev);
- vdev = vfio_del_group_dev(dev);
- if (!vdev)
- return -EINVAL;
-
+ vfio_unregister_group_dev(&vdev->vdev);
mutex_destroy(&vdev->igate);
+ dprc_remove_devices(mc_dev, NULL, 0);
+ vfio_fsl_uninit_device(vdev);
vfio_fsl_mc_reflck_put(vdev->reflck);
- if (is_fsl_mc_bus_dprc(mc_dev)) {
- dprc_remove_devices(mc_dev, NULL, 0);
- dprc_cleanup(mc_dev);
- }
-
- if (vdev->nb.notifier_call)
- bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
-
+ kfree(vdev);
vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
return 0;
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
index a97ee691ed47..89700e00e77d 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -36,6 +36,7 @@ struct vfio_fsl_mc_region {
};
struct vfio_fsl_mc_device {
+ struct vfio_device vdev;
struct fsl_mc_device *mc_dev;
struct notifier_block nb;
int refcnt;
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 4d62b76c4734..fdbb3bee99a9 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -35,7 +35,10 @@ struct mdev_device {
bool active;
};
-#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)
+static inline struct mdev_device *to_mdev_device(struct device *dev)
+{
+ return container_of(dev, struct mdev_device, dev);
+}
#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
struct mdev_type {
diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index b52eea128549..ae7e322fbe3c 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -21,10 +21,11 @@
#define DRIVER_AUTHOR "NVIDIA Corporation"
#define DRIVER_DESC "VFIO based driver for Mediated device"
-static int vfio_mdev_open(void *device_data)
+static int vfio_mdev_open(struct vfio_device *core_vdev)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
+
int ret;
if (unlikely(!parent->ops->open))
@@ -40,9 +41,9 @@ static int vfio_mdev_open(void *device_data)
return ret;
}
-static void vfio_mdev_release(void *device_data)
+static void vfio_mdev_release(struct vfio_device *core_vdev)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
if (likely(parent->ops->release))
@@ -51,10 +52,10 @@ static void vfio_mdev_release(void *device_data)
module_put(THIS_MODULE);
}
-static long vfio_mdev_unlocked_ioctl(void *device_data,
+static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
if (unlikely(!parent->ops->ioctl))
@@ -63,10 +64,10 @@ static long vfio_mdev_unlocked_ioctl(void *device_data,
return parent->ops->ioctl(mdev, cmd, arg);
}
-static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
+static ssize_t vfio_mdev_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
if (unlikely(!parent->ops->read))
@@ -75,10 +76,11 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
return parent->ops->read(mdev, buf, count, ppos);
}
-static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_mdev_write(struct vfio_device *core_vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
if (unlikely(!parent->ops->write))
@@ -87,9 +89,10 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
return parent->ops->write(mdev, buf, count, ppos);
}
-static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_mdev_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
if (unlikely(!parent->ops->mmap))
@@ -98,9 +101,9 @@ static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
return parent->ops->mmap(mdev, vma);
}
-static void vfio_mdev_request(void *device_data, unsigned int count)
+static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count)
{
- struct mdev_device *mdev = device_data;
+ struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
struct mdev_parent *parent = mdev->parent;
if (parent->ops->request)
@@ -124,13 +127,29 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = {
static int vfio_mdev_probe(struct device *dev)
{
struct mdev_device *mdev = to_mdev_device(dev);
+ struct vfio_device *vdev;
+ int ret;
- return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev);
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+
+ vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
+ ret = vfio_register_group_dev(vdev);
+ if (ret) {
+ kfree(vdev);
+ return ret;
+ }
+ dev_set_drvdata(&mdev->dev, vdev);
+ return 0;
}
static void vfio_mdev_remove(struct device *dev)
{
- vfio_del_group_dev(dev);
+ struct vfio_device *vdev = dev_get_drvdata(dev);
+
+ vfio_unregister_group_dev(vdev);
+ kfree(vdev);
}
static struct mdev_driver vfio_mdev_driver = {
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 4abddbebd4b2..53ce78d7d07b 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -39,9 +39,3 @@ config VFIO_PCI_IGD
and LPC bridge config space.
To enable Intel IGD assignment through vfio-pci, say Y.
-
-config VFIO_PCI_NVLINK2
- def_bool y
- depends on VFIO_PCI && PPC_POWERNV && SPAPR_TCE_IOMMU
- help
- VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index eff97a7cd9f1..3ff42093962f 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -2,7 +2,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
-vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 37a4a4824e30..882e890dca54 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -388,24 +388,6 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
- if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
- IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
- ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
- if (ret && ret != -ENODEV) {
- pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n");
- goto disable_exit;
- }
- }
-
- if (pdev->vendor == PCI_VENDOR_ID_IBM &&
- IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
- ret = vfio_pci_ibm_npu2_init(vdev);
- if (ret && ret != -ENODEV) {
- pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n");
- goto disable_exit;
- }
- }
-
vfio_pci_probe_mmaps(vdev);
return 0;
@@ -516,30 +498,29 @@ out:
static struct pci_driver vfio_pci_driver;
-static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
- struct vfio_device **pf_dev)
+static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev)
{
struct pci_dev *physfn = pci_physfn(vdev->pdev);
+ struct vfio_device *pf_dev;
if (!vdev->pdev->is_virtfn)
return NULL;
- *pf_dev = vfio_device_get_from_dev(&physfn->dev);
- if (!*pf_dev)
+ pf_dev = vfio_device_get_from_dev(&physfn->dev);
+ if (!pf_dev)
return NULL;
if (pci_dev_driver(physfn) != &vfio_pci_driver) {
- vfio_device_put(*pf_dev);
+ vfio_device_put(pf_dev);
return NULL;
}
- return vfio_device_data(*pf_dev);
+ return container_of(pf_dev, struct vfio_pci_device, vdev);
}
static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
{
- struct vfio_device *pf_dev;
- struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
+ struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
if (!pf_vdev)
return;
@@ -549,12 +530,13 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
WARN_ON(pf_vdev->vf_token->users < 0);
mutex_unlock(&pf_vdev->vf_token->lock);
- vfio_device_put(pf_dev);
+ vfio_device_put(&pf_vdev->vdev);
}
-static void vfio_pci_release(void *device_data)
+static void vfio_pci_release(struct vfio_device *core_vdev)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
mutex_lock(&vdev->reflck->lock);
@@ -580,9 +562,10 @@ static void vfio_pci_release(void *device_data)
module_put(THIS_MODULE);
}
-static int vfio_pci_open(void *device_data)
+static int vfio_pci_open(struct vfio_device *core_vdev)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -791,15 +774,16 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
}
struct vfio_devices {
- struct vfio_device **devices;
+ struct vfio_pci_device **devices;
int cur_index;
int max_index;
};
-static long vfio_pci_ioctl(void *device_data,
+static long vfio_pci_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
unsigned long minsz;
if (cmd == VFIO_DEVICE_GET_INFO) {
@@ -1279,9 +1263,7 @@ reset_info_exit:
goto hot_reset_release;
for (; mem_idx < devs.cur_index; mem_idx++) {
- struct vfio_pci_device *tmp;
-
- tmp = vfio_device_data(devs.devices[mem_idx]);
+ struct vfio_pci_device *tmp = devs.devices[mem_idx];
ret = down_write_trylock(&tmp->memory_lock);
if (!ret) {
@@ -1296,17 +1278,13 @@ reset_info_exit:
hot_reset_release:
for (i = 0; i < devs.cur_index; i++) {
- struct vfio_device *device;
- struct vfio_pci_device *tmp;
-
- device = devs.devices[i];
- tmp = vfio_device_data(device);
+ struct vfio_pci_device *tmp = devs.devices[i];
if (i < mem_idx)
up_write(&tmp->memory_lock);
else
mutex_unlock(&tmp->vma_lock);
- vfio_device_put(device);
+ vfio_device_put(&tmp->vdev);
}
kfree(devs.devices);
@@ -1401,11 +1379,10 @@ hot_reset_release:
return -ENOTTY;
}
-static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
+static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
- struct vfio_pci_device *vdev = device_data;
if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
return -EINVAL;
@@ -1433,22 +1410,28 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
return -EINVAL;
}
-static ssize_t vfio_pci_read(void *device_data, char __user *buf,
+static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
+
if (!count)
return 0;
- return vfio_pci_rw(device_data, buf, count, ppos, false);
+ return vfio_pci_rw(vdev, buf, count, ppos, false);
}
-static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
+static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
+
if (!count)
return 0;
- return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
+ return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
}
/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
@@ -1645,9 +1628,10 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = {
.fault = vfio_pci_mmap_fault,
};
-static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
struct pci_dev *pdev = vdev->pdev;
unsigned int index;
u64 phys_len, req_len, pgoff, req_start;
@@ -1713,9 +1697,10 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
return 0;
}
-static void vfio_pci_request(void *device_data, unsigned int count)
+static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
struct pci_dev *pdev = vdev->pdev;
mutex_lock(&vdev->igate);
@@ -1766,8 +1751,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
return 0; /* No VF token provided or required */
if (vdev->pdev->is_virtfn) {
- struct vfio_device *pf_dev;
- struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
+ struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
bool match;
if (!pf_vdev) {
@@ -1780,7 +1764,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
}
if (!vf_token) {
- vfio_device_put(pf_dev);
+ vfio_device_put(&pf_vdev->vdev);
pci_info_ratelimited(vdev->pdev,
"VF token required to access device\n");
return -EACCES;
@@ -1790,7 +1774,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
mutex_unlock(&pf_vdev->vf_token->lock);
- vfio_device_put(pf_dev);
+ vfio_device_put(&pf_vdev->vdev);
if (!match) {
pci_info_ratelimited(vdev->pdev,
@@ -1829,9 +1813,10 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
#define VF_TOKEN_ARG "vf_token="
-static int vfio_pci_match(void *device_data, char *buf)
+static int vfio_pci_match(struct vfio_device *core_vdev, char *buf)
{
- struct vfio_pci_device *vdev = device_data;
+ struct vfio_pci_device *vdev =
+ container_of(core_vdev, struct vfio_pci_device, vdev);
bool vf_token = false;
uuid_t uuid;
int ret;
@@ -1921,6 +1906,68 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
return 0;
}
+static int vfio_pci_vf_init(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ if (!pdev->is_physfn)
+ return 0;
+
+ vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
+ if (!vdev->vf_token)
+ return -ENOMEM;
+
+ mutex_init(&vdev->vf_token->lock);
+ uuid_gen(&vdev->vf_token->uuid);
+
+ vdev->nb.notifier_call = vfio_pci_bus_notifier;
+ ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
+ if (ret) {
+ kfree(vdev->vf_token);
+ return ret;
+ }
+ return 0;
+}
+
+static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)
+{
+ if (!vdev->vf_token)
+ return;
+
+ bus_unregister_notifier(&pci_bus_type, &vdev->nb);
+ WARN_ON(vdev->vf_token->users);
+ mutex_destroy(&vdev->vf_token->lock);
+ kfree(vdev->vf_token);
+}
+
+static int vfio_pci_vga_init(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ if (!vfio_pci_is_vga(pdev))
+ return 0;
+
+ ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
+ if (ret)
+ return ret;
+ vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));
+ return 0;
+}
+
+static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+
+ if (!vfio_pci_is_vga(pdev))
+ return;
+ vga_client_register(pdev, NULL, NULL, NULL);
+ vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
+ VGA_RSRC_LEGACY_IO |
+ VGA_RSRC_LEGACY_MEM);
+}
+
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_device *vdev;
@@ -1956,6 +2003,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_group_put;
}
+ vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops);
vdev->pdev = pdev;
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
@@ -1967,35 +2015,15 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&vdev->vma_list);
init_rwsem(&vdev->memory_lock);
- ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
+ ret = vfio_pci_reflck_attach(vdev);
if (ret)
goto out_free;
-
- ret = vfio_pci_reflck_attach(vdev);
+ ret = vfio_pci_vf_init(vdev);
if (ret)
- goto out_del_group_dev;
-
- if (pdev->is_physfn) {
- vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
- if (!vdev->vf_token) {
- ret = -ENOMEM;
- goto out_reflck;
- }
-
- mutex_init(&vdev->vf_token->lock);
- uuid_gen(&vdev->vf_token->uuid);
-
- vdev->nb.notifier_call = vfio_pci_bus_notifier;
- ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
- if (ret)
- goto out_vf_token;
- }
-
- if (vfio_pci_is_vga(pdev)) {
- vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
- vga_set_legacy_decoding(pdev,
- vfio_pci_set_vga_decode(vdev, false));
- }
+ goto out_reflck;
+ ret = vfio_pci_vga_init(vdev);
+ if (ret)
+ goto out_vf;
vfio_pci_probe_power_state(vdev);
@@ -2013,15 +2041,21 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vfio_pci_set_power_state(vdev, PCI_D3hot);
}
- return ret;
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret)
+ goto out_power;
+ dev_set_drvdata(&pdev->dev, vdev);
+ return 0;
-out_vf_token:
- kfree(vdev->vf_token);
+out_power:
+ if (!disable_idle_d3)
+ vfio_pci_set_power_state(vdev, PCI_D0);
+out_vf:
+ vfio_pci_vf_uninit(vdev);
out_reflck:
vfio_pci_reflck_put(vdev->reflck);
-out_del_group_dev:
- vfio_del_group_dev(&pdev->dev);
out_free:
+ kfree(vdev->pm_save);
kfree(vdev);
out_group_put:
vfio_iommu_group_put(group, &pdev->dev);
@@ -2030,41 +2064,25 @@ out_group_put:
static void vfio_pci_remove(struct pci_dev *pdev)
{
- struct vfio_pci_device *vdev;
+ struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);
pci_disable_sriov(pdev);
- vdev = vfio_del_group_dev(&pdev->dev);
- if (!vdev)
- return;
-
- if (vdev->vf_token) {
- WARN_ON(vdev->vf_token->users);
- mutex_destroy(&vdev->vf_token->lock);
- kfree(vdev->vf_token);
- }
-
- if (vdev->nb.notifier_call)
- bus_unregister_notifier(&pci_bus_type, &vdev->nb);
+ vfio_unregister_group_dev(&vdev->vdev);
+ vfio_pci_vf_uninit(vdev);
vfio_pci_reflck_put(vdev->reflck);
+ vfio_pci_vga_uninit(vdev);
vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
- kfree(vdev->region);
- mutex_destroy(&vdev->ioeventfds_lock);
if (!disable_idle_d3)
vfio_pci_set_power_state(vdev, PCI_D0);
+ mutex_destroy(&vdev->ioeventfds_lock);
+ kfree(vdev->region);
kfree(vdev->pm_save);
kfree(vdev);
-
- if (vfio_pci_is_vga(pdev)) {
- vga_client_register(pdev, NULL, NULL, NULL);
- vga_set_legacy_decoding(pdev,
- VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
- VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
- }
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -2077,11 +2095,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
if (device == NULL)
return PCI_ERS_RESULT_DISCONNECT;
- vdev = vfio_device_data(device);
- if (vdev == NULL) {
- vfio_device_put(device);
- return PCI_ERS_RESULT_DISCONNECT;
- }
+ vdev = container_of(device, struct vfio_pci_device, vdev);
mutex_lock(&vdev->igate);
@@ -2097,7 +2111,6 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
- struct vfio_pci_device *vdev;
struct vfio_device *device;
int ret = 0;
@@ -2110,12 +2123,6 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
if (!device)
return -ENODEV;
- vdev = vfio_device_data(device);
- if (!vdev) {
- vfio_device_put(device);
- return -ENODEV;
- }
-
if (nr_virtfn == 0)
pci_disable_sriov(pdev);
else
@@ -2175,7 +2182,7 @@ static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data)
return 0;
}
- vdev = vfio_device_data(device);
+ vdev = container_of(device, struct vfio_pci_device, vdev);
if (vdev->reflck) {
vfio_pci_reflck_get(vdev->reflck);
@@ -2237,7 +2244,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- vdev = vfio_device_data(device);
+ vdev = container_of(device, struct vfio_pci_device, vdev);
/* Fault if the device is not unused */
if (vdev->refcnt) {
@@ -2245,7 +2252,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- devs->devices[devs->cur_index++] = device;
+ devs->devices[devs->cur_index++] = vdev;
return 0;
}
@@ -2267,7 +2274,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- vdev = vfio_device_data(device);
+ vdev = container_of(device, struct vfio_pci_device, vdev);
/*
* Locking multiple devices is prone to deadlock, runaway and
@@ -2278,7 +2285,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
return -EBUSY;
}
- devs->devices[devs->cur_index++] = device;
+ devs->devices[devs->cur_index++] = vdev;
return 0;
}
@@ -2326,7 +2333,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
/* Does at least one need a reset? */
for (i = 0; i < devs.cur_index; i++) {
- tmp = vfio_device_data(devs.devices[i]);
+ tmp = devs.devices[i];
if (tmp->needs_reset) {
ret = pci_reset_bus(vdev->pdev);
break;
@@ -2335,7 +2342,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
put_devs:
for (i = 0; i < devs.cur_index; i++) {
- tmp = vfio_device_data(devs.devices[i]);
+ tmp = devs.devices[i];
/*
* If reset was successful, affected devices no longer need
@@ -2351,7 +2358,7 @@ put_devs:
vfio_pci_set_power_state(tmp, PCI_D3hot);
}
- vfio_device_put(devs.devices[i]);
+ vfio_device_put(&tmp->vdev);
}
kfree(devs.devices);
diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
deleted file mode 100644
index f276624fec79..000000000000
--- a/drivers/vfio/pci/vfio_pci_nvlink2.c
+++ /dev/null
@@ -1,490 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2.
- *
- * Copyright (C) 2018 IBM Corp. All rights reserved.
- * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
- *
- * Register an on-GPU RAM region for cacheable access.
- *
- * Derived from original vfio_pci_igd.c:
- * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
- * Author: Alex Williamson <alex.williamson@redhat.com>
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/uaccess.h>
-#include <linux/vfio.h>
-#include <linux/sched/mm.h>
-#include <linux/mmu_context.h>
-#include <asm/kvm_ppc.h>
-#include "vfio_pci_private.h"
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap_fault);
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap);
-EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_npu2_mmap);
-
-struct vfio_pci_nvgpu_data {
- unsigned long gpu_hpa; /* GPU RAM physical address */
- unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
- unsigned long useraddr; /* GPU RAM userspace address */
- unsigned long size; /* Size of the GPU RAM window (usually 128GB) */
- struct mm_struct *mm;
- struct mm_iommu_table_group_mem_t *mem; /* Pre-registered RAM descr. */
- struct pci_dev *gpdev;
- struct notifier_block group_notifier;
-};
-
-static size_t vfio_pci_nvgpu_rw(struct vfio_pci_device *vdev,
- char __user *buf, size_t count, loff_t *ppos, bool iswrite)
-{
- unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
- struct vfio_pci_nvgpu_data *data = vdev->region[i].data;
- loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
- loff_t posaligned = pos & PAGE_MASK, posoff = pos & ~PAGE_MASK;
- size_t sizealigned;
- void __iomem *ptr;
-
- if (pos >= vdev->region[i].size)
- return -EINVAL;
-
- count = min(count, (size_t)(vdev->region[i].size - pos));
-
- /*
- * We map only a bit of GPU RAM for a short time instead of mapping it
- * for the guest lifetime as:
- *
- * 1) we do not know GPU RAM size, only aperture which is 4-8 times
- * bigger than actual RAM size (16/32GB RAM vs. 128GB aperture);
- * 2) mapping GPU RAM allows CPU to prefetch and if this happens
- * before NVLink bridge is reset (which fences GPU RAM),
- * hardware management interrupts (HMI) might happen, this
- * will freeze NVLink bridge.
- *
- * This is not fast path anyway.
- */
- sizealigned = ALIGN(posoff + count, PAGE_SIZE);
- ptr = ioremap_cache(data->gpu_hpa + posaligned, sizealigned);
- if (!ptr)
- return -EFAULT;
-
- if (iswrite) {
- if (copy_from_user(ptr + posoff, buf, count))
- count = -EFAULT;
- else
- *ppos += count;
- } else {
- if (copy_to_user(buf, ptr + posoff, count))
- count = -EFAULT;
- else
- *ppos += count;
- }
-
- iounmap(ptr);
-
- return count;
-}
-
-static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region)
-{
- struct vfio_pci_nvgpu_data *data = region->data;
- long ret;
-
- /* If there were any mappings at all... */
- if (data->mm) {
- if (data->mem) {
- ret = mm_iommu_put(data->mm, data->mem);
- WARN_ON(ret);
- }
-
- mmdrop(data->mm);
- }
-
- vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
- &data->group_notifier);
-
- pnv_npu2_unmap_lpar_dev(data->gpdev);
-
- kfree(data);
-}
-
-static vm_fault_t vfio_pci_nvgpu_mmap_fault(struct vm_fault *vmf)
-{
- vm_fault_t ret;
- struct vm_area_struct *vma = vmf->vma;
- struct vfio_pci_region *region = vma->vm_private_data;
- struct vfio_pci_nvgpu_data *data = region->data;
- unsigned long vmf_off = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
- unsigned long nv2pg = data->gpu_hpa >> PAGE_SHIFT;
- unsigned long vm_pgoff = vma->vm_pgoff &
- ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
- unsigned long pfn = nv2pg + vm_pgoff + vmf_off;
-
- ret = vmf_insert_pfn(vma, vmf->address, pfn);
- trace_vfio_pci_nvgpu_mmap_fault(data->gpdev, pfn << PAGE_SHIFT,
- vmf->address, ret);
-
- return ret;
-}
-
-static const struct vm_operations_struct vfio_pci_nvgpu_mmap_vmops = {
- .fault = vfio_pci_nvgpu_mmap_fault,
-};
-
-static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vm_area_struct *vma)
-{
- int ret;
- struct vfio_pci_nvgpu_data *data = region->data;
-
- if (data->useraddr)
- return -EPERM;
-
- if (vma->vm_end - vma->vm_start > data->size)
- return -EINVAL;
-
- vma->vm_private_data = region;
- vma->vm_flags |= VM_PFNMAP;
- vma->vm_ops = &vfio_pci_nvgpu_mmap_vmops;
-
- /*
- * Calling mm_iommu_newdev() here once as the region is not
- * registered yet and therefore right initialization will happen now.
- * Other places will use mm_iommu_find() which returns
- * registered @mem and does not go gup().
- */
- data->useraddr = vma->vm_start;
- data->mm = current->mm;
-
- mmgrab(data->mm);
- ret = (int) mm_iommu_newdev(data->mm, data->useraddr,
- vma_pages(vma), data->gpu_hpa, &data->mem);
-
- trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr,
- vma->vm_end - vma->vm_start, ret);
-
- return ret;
-}
-
-static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vfio_info_cap *caps)
-{
- struct vfio_pci_nvgpu_data *data = region->data;
- struct vfio_region_info_cap_nvlink2_ssatgt cap = {
- .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
- .header.version = 1,
- .tgt = data->gpu_tgt
- };
-
- return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
-}
-
-static const struct vfio_pci_regops vfio_pci_nvgpu_regops = {
- .rw = vfio_pci_nvgpu_rw,
- .release = vfio_pci_nvgpu_release,
- .mmap = vfio_pci_nvgpu_mmap,
- .add_capability = vfio_pci_nvgpu_add_capability,
-};
-
-static int vfio_pci_nvgpu_group_notifier(struct notifier_block *nb,
- unsigned long action, void *opaque)
-{
- struct kvm *kvm = opaque;
- struct vfio_pci_nvgpu_data *data = container_of(nb,
- struct vfio_pci_nvgpu_data,
- group_notifier);
-
- if (action == VFIO_GROUP_NOTIFY_SET_KVM && kvm &&
- pnv_npu2_map_lpar_dev(data->gpdev,
- kvm->arch.lpid, MSR_DR | MSR_PR))
- return NOTIFY_BAD;
-
- return NOTIFY_OK;
-}
-
-int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
-{
- int ret;
- u64 reg[2];
- u64 tgt = 0;
- struct device_node *npu_node, *mem_node;
- struct pci_dev *npu_dev;
- struct vfio_pci_nvgpu_data *data;
- uint32_t mem_phandle = 0;
- unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
-
- /*
- * PCI config space does not tell us about NVLink presence but
- * platform does, use this.
- */
- npu_dev = pnv_pci_get_npu_dev(vdev->pdev, 0);
- if (!npu_dev)
- return -ENODEV;
-
- npu_node = pci_device_to_OF_node(npu_dev);
- if (!npu_node)
- return -EINVAL;
-
- if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
- return -ENODEV;
-
- mem_node = of_find_node_by_phandle(mem_phandle);
- if (!mem_node)
- return -EINVAL;
-
- if (of_property_read_variable_u64_array(mem_node, "reg", reg,
- ARRAY_SIZE(reg), ARRAY_SIZE(reg)) !=
- ARRAY_SIZE(reg))
- return -EINVAL;
-
- if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
- dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
- return -EFAULT;
- }
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->gpu_hpa = reg[0];
- data->gpu_tgt = tgt;
- data->size = reg[1];
-
- dev_dbg(&vdev->pdev->dev, "%lx..%lx\n", data->gpu_hpa,
- data->gpu_hpa + data->size - 1);
-
- data->gpdev = vdev->pdev;
- data->group_notifier.notifier_call = vfio_pci_nvgpu_group_notifier;
-
- ret = vfio_register_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
- &events, &data->group_notifier);
- if (ret)
- goto free_exit;
-
- /*
- * We have just set KVM, we do not need the listener anymore.
- * Also, keeping it registered means that if more than one GPU is
- * assigned, we will get several similar notifiers notifying about
- * the same device again which does not help with anything.
- */
- vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
- &data->group_notifier);
-
- ret = vfio_pci_register_dev_region(vdev,
- PCI_VENDOR_ID_NVIDIA | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
- VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
- &vfio_pci_nvgpu_regops,
- data->size,
- VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE |
- VFIO_REGION_INFO_FLAG_MMAP,
- data);
- if (ret)
- goto free_exit;
-
- return 0;
-free_exit:
- kfree(data);
-
- return ret;
-}
-
-/*
- * IBM NPU2 bridge
- */
-struct vfio_pci_npu2_data {
- void *base; /* ATSD register virtual address, for emulated access */
- unsigned long mmio_atsd; /* ATSD physical address */
- unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
- unsigned int link_speed; /* The link speed from DT's ibm,nvlink-speed */
-};
-
-static size_t vfio_pci_npu2_rw(struct vfio_pci_device *vdev,
- char __user *buf, size_t count, loff_t *ppos, bool iswrite)
-{
- unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
- struct vfio_pci_npu2_data *data = vdev->region[i].data;
- loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
-
- if (pos >= vdev->region[i].size)
- return -EINVAL;
-
- count = min(count, (size_t)(vdev->region[i].size - pos));
-
- if (iswrite) {
- if (copy_from_user(data->base + pos, buf, count))
- return -EFAULT;
- } else {
- if (copy_to_user(buf, data->base + pos, count))
- return -EFAULT;
- }
- *ppos += count;
-
- return count;
-}
-
-static int vfio_pci_npu2_mmap(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vm_area_struct *vma)
-{
- int ret;
- struct vfio_pci_npu2_data *data = region->data;
- unsigned long req_len = vma->vm_end - vma->vm_start;
-
- if (req_len != PAGE_SIZE)
- return -EINVAL;
-
- vma->vm_flags |= VM_PFNMAP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- ret = remap_pfn_range(vma, vma->vm_start, data->mmio_atsd >> PAGE_SHIFT,
- req_len, vma->vm_page_prot);
- trace_vfio_pci_npu2_mmap(vdev->pdev, data->mmio_atsd, vma->vm_start,
- vma->vm_end - vma->vm_start, ret);
-
- return ret;
-}
-
-static void vfio_pci_npu2_release(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region)
-{
- struct vfio_pci_npu2_data *data = region->data;
-
- memunmap(data->base);
- kfree(data);
-}
-
-static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev,
- struct vfio_pci_region *region, struct vfio_info_cap *caps)
-{
- struct vfio_pci_npu2_data *data = region->data;
- struct vfio_region_info_cap_nvlink2_ssatgt captgt = {
- .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT,
- .header.version = 1,
- .tgt = data->gpu_tgt
- };
- struct vfio_region_info_cap_nvlink2_lnkspd capspd = {
- .header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD,
- .header.version = 1,
- .link_speed = data->link_speed
- };
- int ret;
-
- ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt));
- if (ret)
- return ret;
-
- return vfio_info_add_capability(caps, &capspd.header, sizeof(capspd));
-}
-
-static const struct vfio_pci_regops vfio_pci_npu2_regops = {
- .rw = vfio_pci_npu2_rw,
- .mmap = vfio_pci_npu2_mmap,
- .release = vfio_pci_npu2_release,
- .add_capability = vfio_pci_npu2_add_capability,
-};
-
-int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
-{
- int ret;
- struct vfio_pci_npu2_data *data;
- struct device_node *nvlink_dn;
- u32 nvlink_index = 0, mem_phandle = 0;
- struct pci_dev *npdev = vdev->pdev;
- struct device_node *npu_node = pci_device_to_OF_node(npdev);
- struct pci_controller *hose = pci_bus_to_host(npdev->bus);
- u64 mmio_atsd = 0;
- u64 tgt = 0;
- u32 link_speed = 0xff;
-
- /*
- * PCI config space does not tell us about NVLink presence but
- * platform does, use this.
- */
- if (!pnv_pci_get_gpu_dev(vdev->pdev))
- return -ENODEV;
-
- if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
- return -ENODEV;
-
- /*
- * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
- * so we can allocate one register per link, using nvlink index as
- * a key.
- * There is always at least one ATSD register so as long as at least
- * NVLink bridge #0 is passed to the guest, ATSD will be available.
- */
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return -ENODEV;
-
- if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
- &mmio_atsd)) {
- if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
- &mmio_atsd)) {
- dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
- mmio_atsd = 0;
- } else {
- dev_warn(&vdev->pdev->dev,
- "Using fallback ibm,mmio-atsd[0] for ATSD.\n");
- }
- }
-
- if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
- dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
- return -EFAULT;
- }
-
- if (of_property_read_u32(npu_node, "ibm,nvlink-speed", &link_speed)) {
- dev_warn(&vdev->pdev->dev, "No ibm,nvlink-speed found\n");
- return -EFAULT;
- }
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->mmio_atsd = mmio_atsd;
- data->gpu_tgt = tgt;
- data->link_speed = link_speed;
- if (data->mmio_atsd) {
- data->base = memremap(data->mmio_atsd, SZ_64K, MEMREMAP_WT);
- if (!data->base) {
- ret = -ENOMEM;
- goto free_exit;
- }
- }
-
- /*
- * We want to expose the capability even if this specific NVLink
- * did not get its own ATSD register because capabilities
- * belong to VFIO regions and normally there will be ATSD register
- * assigned to the NVLink bridge.
- */
- ret = vfio_pci_register_dev_region(vdev,
- PCI_VENDOR_ID_IBM |
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
- VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
- &vfio_pci_npu2_regops,
- data->mmio_atsd ? PAGE_SIZE : 0,
- VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE |
- VFIO_REGION_INFO_FLAG_MMAP,
- data);
- if (ret)
- goto free_exit;
-
- return 0;
-
-free_exit:
- if (data->base)
- memunmap(data->base);
- kfree(data);
-
- return ret;
-}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9cd1882a05af..5a36272cecbf 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -100,6 +100,7 @@ struct vfio_pci_mmap_vma {
};
struct vfio_pci_device {
+ struct vfio_device vdev;
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS];
bool bar_mmap_supported[PCI_STD_NUM_BARS];
@@ -199,20 +200,6 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
-#ifdef CONFIG_VFIO_PCI_NVLINK2
-extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
-extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
-#else
-static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
-{
- return -ENODEV;
-}
-
-static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
-{
- return -ENODEV;
-}
-#endif
#ifdef CONFIG_S390
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index 3626c2150101..f970eb2a999f 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -66,16 +66,18 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
if (ret) {
kfree(vdev->name);
kfree(vdev);
+ return ret;
}
- return ret;
+ dev_set_drvdata(&adev->dev, vdev);
+ return 0;
}
static void vfio_amba_remove(struct amba_device *adev)
{
- struct vfio_platform_device *vdev =
- vfio_platform_remove_common(&adev->dev);
+ struct vfio_platform_device *vdev = dev_get_drvdata(&adev->dev);
+ vfio_platform_remove_common(vdev);
kfree(vdev->name);
kfree(vdev);
}
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 9fb6818cea12..e4027799a154 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -54,23 +54,21 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->reset_required = reset_required;
ret = vfio_platform_probe_common(vdev, &pdev->dev);
- if (ret)
+ if (ret) {
kfree(vdev);
-
- return ret;
+ return ret;
+ }
+ dev_set_drvdata(&pdev->dev, vdev);
+ return 0;
}
static int vfio_platform_remove(struct platform_device *pdev)
{
- struct vfio_platform_device *vdev;
-
- vdev = vfio_platform_remove_common(&pdev->dev);
- if (vdev) {
- kfree(vdev);
- return 0;
- }
+ struct vfio_platform_device *vdev = dev_get_drvdata(&pdev->dev);
- return -EINVAL;
+ vfio_platform_remove_common(vdev);
+ kfree(vdev);
+ return 0;
}
static struct platform_driver vfio_platform_driver = {
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index fb4b385191f2..361e5b57e369 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -218,9 +218,10 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
return -EINVAL;
}
-static void vfio_platform_release(void *device_data)
+static void vfio_platform_release(struct vfio_device *core_vdev)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
mutex_lock(&driver_lock);
@@ -244,9 +245,10 @@ static void vfio_platform_release(void *device_data)
module_put(vdev->parent_module);
}
-static int vfio_platform_open(void *device_data)
+static int vfio_platform_open(struct vfio_device *core_vdev)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
int ret;
if (!try_module_get(vdev->parent_module))
@@ -293,10 +295,12 @@ err_reg:
return ret;
}
-static long vfio_platform_ioctl(void *device_data,
+static long vfio_platform_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
+
unsigned long minsz;
if (cmd == VFIO_DEVICE_GET_INFO) {
@@ -455,10 +459,11 @@ err:
return -EFAULT;
}
-static ssize_t vfio_platform_read(void *device_data, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_platform_read(struct vfio_device *core_vdev,
+ char __user *buf, size_t count, loff_t *ppos)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;
@@ -531,10 +536,11 @@ err:
return -EFAULT;
}
-static ssize_t vfio_platform_write(void *device_data, const char __user *buf,
+static ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;
@@ -573,9 +579,10 @@ static int vfio_platform_mmap_mmio(struct vfio_platform_region region,
req_len, vma->vm_page_prot);
}
-static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma)
+static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
- struct vfio_platform_device *vdev = device_data;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
unsigned int index;
index = vma->vm_pgoff >> (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT);
@@ -659,8 +666,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct iommu_group *group;
int ret;
- if (!vdev)
- return -EINVAL;
+ vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops);
ret = vfio_platform_acpi_probe(vdev, dev);
if (ret)
@@ -685,13 +691,13 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
goto put_reset;
}
- ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
+ ret = vfio_register_group_dev(&vdev->vdev);
if (ret)
goto put_iommu;
mutex_init(&vdev->igate);
- pm_runtime_enable(vdev->device);
+ pm_runtime_enable(dev);
return 0;
put_iommu:
@@ -702,19 +708,13 @@ put_reset:
}
EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
-struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
+void vfio_platform_remove_common(struct vfio_platform_device *vdev)
{
- struct vfio_platform_device *vdev;
-
- vdev = vfio_del_group_dev(dev);
+ vfio_unregister_group_dev(&vdev->vdev);
- if (vdev) {
- pm_runtime_disable(vdev->device);
- vfio_platform_put_reset(vdev);
- vfio_iommu_group_put(dev->iommu_group, dev);
- }
-
- return vdev;
+ pm_runtime_disable(vdev->device);
+ vfio_platform_put_reset(vdev);
+ vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev);
}
EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index 289089910643..a5ba82c8cbc3 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
+#include <linux/vfio.h>
#define VFIO_PLATFORM_OFFSET_SHIFT 40
#define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1)
@@ -42,6 +43,7 @@ struct vfio_platform_region {
};
struct vfio_platform_device {
+ struct vfio_device vdev;
struct vfio_platform_region *regions;
u32 num_regions;
struct vfio_platform_irq *irqs;
@@ -80,8 +82,7 @@ struct vfio_platform_reset_node {
extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
struct device *dev);
-extern struct vfio_platform_device *vfio_platform_remove_common
- (struct device *dev);
+void vfio_platform_remove_common(struct vfio_platform_device *vdev);
extern int vfio_platform_irq_init(struct vfio_platform_device *vdev);
extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev);
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 38779e6fd80c..5e631c359ef2 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -46,7 +46,6 @@ static struct vfio {
struct mutex group_lock;
struct cdev group_cdev;
dev_t group_devt;
- wait_queue_head_t release_q;
} vfio;
struct vfio_iommu_driver {
@@ -90,15 +89,6 @@ struct vfio_group {
struct blocking_notifier_head notifier;
};
-struct vfio_device {
- struct kref kref;
- struct device *dev;
- const struct vfio_device_ops *ops;
- struct vfio_group *group;
- struct list_head group_next;
- void *device_data;
-};
-
#ifdef CONFIG_VFIO_NOIOMMU
static bool noiommu __read_mostly;
module_param_named(enable_unsafe_noiommu_mode,
@@ -109,8 +99,8 @@ MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. Thi
/*
* vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
* and remove functions, any use cases other than acquiring the first
- * reference for the purpose of calling vfio_add_group_dev() or removing
- * that symmetric reference after vfio_del_group_dev() should use the raw
+ * reference for the purpose of calling vfio_register_group_dev() or removing
+ * that symmetric reference after vfio_unregister_group_dev() should use the raw
* iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
* removes the device from the dummy group and cannot be nested.
*/
@@ -532,67 +522,17 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
/**
* Device objects - create, release, get, put, search
*/
-static
-struct vfio_device *vfio_group_create_device(struct vfio_group *group,
- struct device *dev,
- const struct vfio_device_ops *ops,
- void *device_data)
-{
- struct vfio_device *device;
-
- device = kzalloc(sizeof(*device), GFP_KERNEL);
- if (!device)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&device->kref);
- device->dev = dev;
- device->group = group;
- device->ops = ops;
- device->device_data = device_data;
- dev_set_drvdata(dev, device);
-
- /* No need to get group_lock, caller has group reference */
- vfio_group_get(group);
-
- mutex_lock(&group->device_lock);
- list_add(&device->group_next, &group->device_list);
- group->dev_counter++;
- mutex_unlock(&group->device_lock);
-
- return device;
-}
-
-static void vfio_device_release(struct kref *kref)
-{
- struct vfio_device *device = container_of(kref,
- struct vfio_device, kref);
- struct vfio_group *group = device->group;
-
- list_del(&device->group_next);
- group->dev_counter--;
- mutex_unlock(&group->device_lock);
-
- dev_set_drvdata(device->dev, NULL);
-
- kfree(device);
-
- /* vfio_del_group_dev may be waiting for this device */
- wake_up(&vfio.release_q);
-}
-
/* Device reference always implies a group reference */
void vfio_device_put(struct vfio_device *device)
{
- struct vfio_group *group = device->group;
- kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
- vfio_group_put(group);
+ if (refcount_dec_and_test(&device->refcount))
+ complete(&device->comp);
}
EXPORT_SYMBOL_GPL(vfio_device_put);
-static void vfio_device_get(struct vfio_device *device)
+static bool vfio_device_try_get(struct vfio_device *device)
{
- vfio_group_get(device->group);
- kref_get(&device->kref);
+ return refcount_inc_not_zero(&device->refcount);
}
static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
@@ -602,8 +542,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_for_each_entry(device, &group->device_list, group_next) {
- if (device->dev == dev) {
- vfio_device_get(device);
+ if (device->dev == dev && vfio_device_try_get(device)) {
mutex_unlock(&group->device_lock);
return device;
}
@@ -801,14 +740,22 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
/**
* VFIO driver API
*/
-int vfio_add_group_dev(struct device *dev,
- const struct vfio_device_ops *ops, void *device_data)
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
+ const struct vfio_device_ops *ops)
+{
+ init_completion(&device->comp);
+ device->dev = dev;
+ device->ops = ops;
+}
+EXPORT_SYMBOL_GPL(vfio_init_group_dev);
+
+int vfio_register_group_dev(struct vfio_device *device)
{
+ struct vfio_device *existing_device;
struct iommu_group *iommu_group;
struct vfio_group *group;
- struct vfio_device *device;
- iommu_group = iommu_group_get(dev);
+ iommu_group = iommu_group_get(device->dev);
if (!iommu_group)
return -EINVAL;
@@ -827,31 +774,29 @@ int vfio_add_group_dev(struct device *dev,
iommu_group_put(iommu_group);
}
- device = vfio_group_get_device(group, dev);
- if (device) {
- dev_WARN(dev, "Device already exists on group %d\n",
+ existing_device = vfio_group_get_device(group, device->dev);
+ if (existing_device) {
+ dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(iommu_group));
- vfio_device_put(device);
+ vfio_device_put(existing_device);
vfio_group_put(group);
return -EBUSY;
}
- device = vfio_group_create_device(group, dev, ops, device_data);
- if (IS_ERR(device)) {
- vfio_group_put(group);
- return PTR_ERR(device);
- }
+ /* Our reference on group is moved to the device */
+ device->group = group;
- /*
- * Drop all but the vfio_device reference. The vfio_device holds
- * a reference to the vfio_group, which holds a reference to the
- * iommu_group.
- */
- vfio_group_put(group);
+ /* Refcounting can't start until the driver calls register */
+ refcount_set(&device->refcount, 1);
+
+ mutex_lock(&group->device_lock);
+ list_add(&device->group_next, &group->device_list);
+ group->dev_counter++;
+ mutex_unlock(&group->device_lock);
return 0;
}
-EXPORT_SYMBOL_GPL(vfio_add_group_dev);
+EXPORT_SYMBOL_GPL(vfio_register_group_dev);
/**
* Get a reference to the vfio_device for a device. Even if the
@@ -886,7 +831,7 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
int ret;
if (it->ops->match) {
- ret = it->ops->match(it->device_data, buf);
+ ret = it->ops->match(it, buf);
if (ret < 0) {
device = ERR_PTR(ret);
break;
@@ -895,9 +840,8 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
ret = !strcmp(dev_name(it->dev), buf);
}
- if (ret) {
+ if (ret && vfio_device_try_get(it)) {
device = it;
- vfio_device_get(device);
break;
}
}
@@ -907,32 +851,15 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
}
/*
- * Caller must hold a reference to the vfio_device
- */
-void *vfio_device_data(struct vfio_device *device)
-{
- return device->device_data;
-}
-EXPORT_SYMBOL_GPL(vfio_device_data);
-
-/*
* Decrement the device reference count and wait for the device to be
* removed. Open file descriptors for the device... */
-void *vfio_del_group_dev(struct device *dev)
+void vfio_unregister_group_dev(struct vfio_device *device)
{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct vfio_device *device = dev_get_drvdata(dev);
struct vfio_group *group = device->group;
- void *device_data = device->device_data;
struct vfio_unbound_dev *unbound;
unsigned int i = 0;
bool interrupted = false;
-
- /*
- * The group exists so long as we have a device reference. Get
- * a group reference and use it to scan for the device going away.
- */
- vfio_group_get(group);
+ long rc;
/*
* When the device is removed from the group, the group suddenly
@@ -945,7 +872,7 @@ void *vfio_del_group_dev(struct device *dev)
*/
unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
if (unbound) {
- unbound->dev = dev;
+ unbound->dev = device->dev;
mutex_lock(&group->unbound_lock);
list_add(&unbound->unbound_next, &group->unbound_list);
mutex_unlock(&group->unbound_lock);
@@ -953,44 +880,33 @@ void *vfio_del_group_dev(struct device *dev)
WARN_ON(!unbound);
vfio_device_put(device);
-
- /*
- * If the device is still present in the group after the above
- * 'put', then it is in use and we need to request it from the
- * bus driver. The driver may in turn need to request the
- * device from the user. We send the request on an arbitrary
- * interval with counter to allow the driver to take escalating
- * measures to release the device if it has the ability to do so.
- */
- add_wait_queue(&vfio.release_q, &wait);
-
- do {
- device = vfio_group_get_device(group, dev);
- if (!device)
- break;
-
+ rc = try_wait_for_completion(&device->comp);
+ while (rc <= 0) {
if (device->ops->request)
- device->ops->request(device_data, i++);
-
- vfio_device_put(device);
+ device->ops->request(device, i++);
if (interrupted) {
- wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
+ rc = wait_for_completion_timeout(&device->comp,
+ HZ * 10);
} else {
- wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
- if (signal_pending(current)) {
+ rc = wait_for_completion_interruptible_timeout(
+ &device->comp, HZ * 10);
+ if (rc < 0) {
interrupted = true;
- dev_warn(dev,
+ dev_warn(device->dev,
"Device is currently in use, task"
" \"%s\" (%d) "
"blocked until device is released",
current->comm, task_pid_nr(current));
}
}
+ }
- } while (1);
+ mutex_lock(&group->device_lock);
+ list_del(&device->group_next);
+ group->dev_counter--;
+ mutex_unlock(&group->device_lock);
- remove_wait_queue(&vfio.release_q, &wait);
/*
* In order to support multiple devices per group, devices can be
* plucked from the group while other devices in the group are still
@@ -1008,11 +924,10 @@ void *vfio_del_group_dev(struct device *dev)
if (list_empty(&group->device_list))
wait_event(group->container_q, !group->container);
+ /* Matches the get in vfio_register_group_dev() */
vfio_group_put(group);
-
- return device_data;
}
-EXPORT_SYMBOL_GPL(vfio_del_group_dev);
+EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/**
* VFIO base fd, /dev/vfio/vfio
@@ -1454,7 +1369,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(device))
return PTR_ERR(device);
- ret = device->ops->open(device->device_data);
+ ret = device->ops->open(device);
if (ret) {
vfio_device_put(device);
return ret;
@@ -1466,7 +1381,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
*/
ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0) {
- device->ops->release(device->device_data);
+ device->ops->release(device);
vfio_device_put(device);
return ret;
}
@@ -1476,7 +1391,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
if (IS_ERR(filep)) {
put_unused_fd(ret);
ret = PTR_ERR(filep);
- device->ops->release(device->device_data);
+ device->ops->release(device);
vfio_device_put(device);
return ret;
}
@@ -1633,7 +1548,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
- device->ops->release(device->device_data);
+ device->ops->release(device);
vfio_group_try_dissolve_container(device->group);
@@ -1650,7 +1565,7 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
if (unlikely(!device->ops->ioctl))
return -EINVAL;
- return device->ops->ioctl(device->device_data, cmd, arg);
+ return device->ops->ioctl(device, cmd, arg);
}
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
@@ -1661,7 +1576,7 @@ static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
if (unlikely(!device->ops->read))
return -EINVAL;
- return device->ops->read(device->device_data, buf, count, ppos);
+ return device->ops->read(device, buf, count, ppos);
}
static ssize_t vfio_device_fops_write(struct file *filep,
@@ -1673,7 +1588,7 @@ static ssize_t vfio_device_fops_write(struct file *filep,
if (unlikely(!device->ops->write))
return -EINVAL;
- return device->ops->write(device->device_data, buf, count, ppos);
+ return device->ops->write(device, buf, count, ppos);
}
static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
@@ -1683,7 +1598,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
if (unlikely(!device->ops->mmap))
return -EINVAL;
- return device->ops->mmap(device->device_data, vma);
+ return device->ops->mmap(device, vma);
}
static const struct file_operations vfio_device_fops = {
@@ -2379,7 +2294,6 @@ static int __init vfio_init(void)
mutex_init(&vfio.iommu_drivers_lock);
INIT_LIST_HEAD(&vfio.group_list);
INIT_LIST_HEAD(&vfio.iommu_drivers_list);
- init_waitqueue_head(&vfio.release_q);
ret = misc_register(&vfio_dev);
if (ret) {
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index b7e18bde5aa8..a2c5b30e1763 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -15,6 +15,17 @@
#include <linux/poll.h>
#include <uapi/linux/vfio.h>
+struct vfio_device {
+ struct device *dev;
+ const struct vfio_device_ops *ops;
+ struct vfio_group *group;
+
+ /* Members below here are private, not for driver use */
+ refcount_t refcount;
+ struct completion comp;
+ struct list_head group_next;
+};
+
/**
* struct vfio_device_ops - VFIO bus driver device callbacks
*
@@ -32,30 +43,28 @@
*/
struct vfio_device_ops {
char *name;
- int (*open)(void *device_data);
- void (*release)(void *device_data);
- ssize_t (*read)(void *device_data, char __user *buf,
+ int (*open)(struct vfio_device *vdev);
+ void (*release)(struct vfio_device *vdev);
+ ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos);
- ssize_t (*write)(void *device_data, const char __user *buf,
+ ssize_t (*write)(struct vfio_device *vdev, const char __user *buf,
size_t count, loff_t *size);
- long (*ioctl)(void *device_data, unsigned int cmd,
+ long (*ioctl)(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg);
- int (*mmap)(void *device_data, struct vm_area_struct *vma);
- void (*request)(void *device_data, unsigned int count);
- int (*match)(void *device_data, char *buf);
+ int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
+ void (*request)(struct vfio_device *vdev, unsigned int count);
+ int (*match)(struct vfio_device *vdev, char *buf);
};
extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
-extern int vfio_add_group_dev(struct device *dev,
- const struct vfio_device_ops *ops,
- void *device_data);
-
-extern void *vfio_del_group_dev(struct device *dev);
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
+ const struct vfio_device_ops *ops);
+int vfio_register_group_dev(struct vfio_device *device);
+void vfio_unregister_group_dev(struct vfio_device *device);
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device);
-extern void *vfio_device_data(struct vfio_device *device);
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8ce36c1d53ca..34b1f53a3901 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -333,17 +333,10 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
/* 10de vendor PCI sub-types */
-/*
- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
- */
-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
+/* subtype 1 was VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM, don't use */
/* 1014 vendor PCI sub-types */
-/*
- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
- * to do TLB invalidation on a GPU.
- */
-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
+/* subtype 1 was VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD, don't use */
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
@@ -637,32 +630,9 @@ struct vfio_device_migration_info {
*/
#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
-/*
- * Capability with compressed real address (aka SSA - small system address)
- * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
- * and by the userspace to associate a NVLink bridge with a GPU.
- */
-#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4
-
-struct vfio_region_info_cap_nvlink2_ssatgt {
- struct vfio_info_cap_header header;
- __u64 tgt;
-};
+/* subtype 4 was VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, don't use */
-/*
- * Capability with an NVLink link speed. The value is read by
- * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
- * property in the device tree. The value is fixed in the hardware
- * and failing to provide the correct value results in the link
- * not working with no indication from the driver why.
- */
-#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5
-
-struct vfio_region_info_cap_nvlink2_lnkspd {
- struct vfio_info_cap_header header;
- __u32 link_speed;
- __u32 __pad;
-};
+/* subtype 5 was VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD, don't use */
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,