From 6eb4da8cf54537992fc9843be8b2af4f83f717e0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:39 -0300 Subject: iommu: Have __iommu_probe_device() check for already probed devices This is a step toward making __iommu_probe_device() self contained. It should, under proper locking, check if the device is already associated with an iommu driver and resolve parallel probes. All but one of the callers open code this test using two different means, but they all rely on dev->iommu_group. Currently the bus_iommu_probe()/probe_iommu_group() and probe_acpi_namespace_devices() rejects already probed devices with an unlocked read of dev->iommu_group. The OF and ACPI "replay" functions use device_iommu_mapped() which is the same read without the pointless refcount. Move this test into __iommu_probe_device() and put it under the iommu_probe_device_lock. The store to dev->iommu_group is in iommu_group_add_device() which is also called under this lock for iommu driver devices, making it properly locked. The only path that didn't have this check is the hotplug path triggered by BUS_NOTIFY_ADD_DEVICE. The only way to get dev->iommu_group assigned outside the probe path is via iommu_group_add_device(). Today the only caller is VFIO no-iommu which never associates with an iommu driver. Thus adding this additional check is safe. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Acked-by: Rafael J. Wysocki Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index da340f11c5f5..bdc5ce884b7b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -351,9 +351,16 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list * but for now enforcing a simple global ordering is fine. */ mutex_lock(&iommu_probe_device_lock); + + /* Device is probed already if in a group */ + if (dev->iommu_group) { + ret = 0; + goto out_unlock; + } + if (!dev_iommu_get(dev)) { ret = -ENOMEM; - goto err_unlock; + goto out_unlock; } if (!try_module_get(ops->owner)) { @@ -399,7 +406,7 @@ out_module_put: err_free: dev_iommu_free(dev); -err_unlock: +out_unlock: mutex_unlock(&iommu_probe_device_lock); return ret; @@ -1711,16 +1718,8 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) static int probe_iommu_group(struct device *dev, void *data) { struct list_head *group_list = data; - struct iommu_group *group; int ret; - /* Device is probed already if in a group */ - group = iommu_group_get(dev); - if (group) { - iommu_group_put(group); - return 0; - } - ret = __iommu_probe_device(dev, group_list); if (ret == -ENODEV) ret = 0; -- cgit v1.2.3 From 5665d15d3cb796d363a2dc0d2ed17855a3cb5942 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:40 -0300 Subject: iommu: Use iommu_group_ref_get/put() for dev->iommu_group No reason to open code this, use the proper helper functions. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bdc5ce884b7b..2f16f988ab36 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -500,7 +500,7 @@ static void __iommu_group_release_device(struct iommu_group *group, kfree(grp_dev->name); kfree(grp_dev); dev->iommu_group = NULL; - kobject_put(group->devices_kobj); + iommu_group_put(group); } static void iommu_release_device(struct device *dev) @@ -1067,8 +1067,7 @@ rename: goto err_free_name; } - kobject_get(group->devices_kobj); - + iommu_group_ref_get(group); dev->iommu_group = group; mutex_lock(&group->mutex); -- cgit v1.2.3 From 7bdb99622f7e7dcaa58bfc2fa98caf23cfc40994 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:41 -0300 Subject: iommu: Inline iommu_group_get_for_dev() into __iommu_probe_device() This is the only caller, and it doesn't need the generality of the function. We already know there is no iommu_group, so it is simply two function calls. Moving it here allows the following patches to split the logic in these functions. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 50 +++++++++----------------------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2f16f988ab36..bd56cf02bc99 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -127,7 +127,6 @@ static int iommu_setup_default_domain(struct iommu_group *group, int target_type); static int iommu_create_device_direct_mappings(struct iommu_domain *domain, struct device *dev); -static struct iommu_group *iommu_group_get_for_dev(struct device *dev); static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count); @@ -379,12 +378,18 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list if (ops->is_attach_deferred) dev->iommu->attach_deferred = ops->is_attach_deferred(dev); - group = iommu_group_get_for_dev(dev); + group = ops->device_group(dev); + if (WARN_ON_ONCE(group == NULL)) + group = ERR_PTR(-EINVAL); if (IS_ERR(group)) { ret = PTR_ERR(group); goto out_release; } + ret = iommu_group_add_device(group, dev); + if (ret) + goto err_put_group; + mutex_lock(&group->mutex); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); @@ -396,6 +401,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list return 0; +err_put_group: + iommu_group_put(group); out_release: if (ops->release_device) ops->release_device(dev); @@ -1670,45 +1677,6 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) return dom; } -/** - * iommu_group_get_for_dev - Find or create the IOMMU group for a device - * @dev: target device - * - * This function is intended to be called by IOMMU drivers and extended to - * support common, bus-defined algorithms when determining or creating the - * IOMMU group for a device. On success, the caller will hold a reference - * to the returned IOMMU group, which will already include the provided - * device. The reference should be released with iommu_group_put(). - */ -static struct iommu_group *iommu_group_get_for_dev(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - struct iommu_group *group; - int ret; - - group = iommu_group_get(dev); - if (group) - return group; - - group = ops->device_group(dev); - if (WARN_ON_ONCE(group == NULL)) - return ERR_PTR(-EINVAL); - - if (IS_ERR(group)) - return group; - - ret = iommu_group_add_device(group, dev); - if (ret) - goto out_put_group; - - return group; - -out_put_group: - iommu_group_put(group); - - return ERR_PTR(ret); -} - struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) { return group->default_domain; -- cgit v1.2.3 From df15d76dcacac2126a4d20ba06c9d7e4b18bad8e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:42 -0300 Subject: iommu: Simplify the __iommu_group_remove_device() flow Instead of returning the struct group_device and then later freeing it, do the entire free under the group->mutex and defer only putting the iommu_group. It is safe to remove the sysfs_links and free memory while holding that mutex. Move the sanity assert of the group status into __iommu_group_free_device(). The next patch will improve upon this and consolidate the group put and the mutex into __iommu_group_remove_device(). __iommu_group_free_device() is close to being the paired undo of iommu_group_add_device(), following patches will improve on that. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 83 ++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 44 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bd56cf02bc99..6cf4da060e94 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -470,32 +470,8 @@ err_out: } -/* - * Remove a device from a group's device list and return the group device - * if successful. - */ -static struct group_device * -__iommu_group_remove_device(struct iommu_group *group, struct device *dev) -{ - struct group_device *device; - - lockdep_assert_held(&group->mutex); - for_each_group_device(group, device) { - if (device->dev == dev) { - list_del(&device->list); - return device; - } - } - - return NULL; -} - -/* - * Release a device from its group and decrements the iommu group reference - * count. - */ -static void __iommu_group_release_device(struct iommu_group *group, - struct group_device *grp_dev) +static void __iommu_group_free_device(struct iommu_group *group, + struct group_device *grp_dev) { struct device *dev = grp_dev->dev; @@ -504,16 +480,45 @@ static void __iommu_group_release_device(struct iommu_group *group, trace_remove_device_from_group(group->id, dev); + /* + * If the group has become empty then ownership must have been + * released, and the current domain must be set back to NULL or + * the default domain. + */ + if (list_empty(&group->devices)) + WARN_ON(group->owner_cnt || + group->domain != group->default_domain); + kfree(grp_dev->name); kfree(grp_dev); dev->iommu_group = NULL; - iommu_group_put(group); } -static void iommu_release_device(struct device *dev) +/* + * Remove the iommu_group from the struct device. The attached group must be put + * by the caller after releaseing the group->mutex. + */ +static void __iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; struct group_device *device; + + lockdep_assert_held(&group->mutex); + for_each_group_device(group, device) { + if (device->dev != dev) + continue; + + list_del(&device->list); + __iommu_group_free_device(group, device); + /* Caller must put iommu_group */ + return; + } + WARN(true, "Corrupted iommu_group device_list"); +} + +static void iommu_release_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; const struct iommu_ops *ops; if (!dev->iommu || !group) @@ -522,16 +527,7 @@ static void iommu_release_device(struct device *dev) iommu_device_unlink(dev->iommu->iommu_dev, dev); mutex_lock(&group->mutex); - device = __iommu_group_remove_device(group, dev); - - /* - * If the group has become empty then ownership must have been released, - * and the current domain must be set back to NULL or the default - * domain. - */ - if (list_empty(&group->devices)) - WARN_ON(group->owner_cnt || - group->domain != group->default_domain); + __iommu_group_remove_device(dev); /* * release_device() must stop using any attached domain on the device. @@ -547,8 +543,8 @@ static void iommu_release_device(struct device *dev) ops->release_device(dev); mutex_unlock(&group->mutex); - if (device) - __iommu_group_release_device(group, device); + /* Pairs with the get in iommu_group_add_device() */ + iommu_group_put(group); module_put(ops->owner); dev_iommu_free(dev); @@ -1107,7 +1103,6 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device); void iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; - struct group_device *device; if (!group) return; @@ -1115,11 +1110,11 @@ void iommu_group_remove_device(struct device *dev) dev_info(dev, "Removing from iommu group %d\n", group->id); mutex_lock(&group->mutex); - device = __iommu_group_remove_device(group, dev); + __iommu_group_remove_device(dev); mutex_unlock(&group->mutex); - if (device) - __iommu_group_release_device(group, device); + /* Pairs with the get in iommu_group_add_device() */ + iommu_group_put(group); } EXPORT_SYMBOL_GPL(iommu_group_remove_device); -- cgit v1.2.3 From aa0958570f24f562422afa41fefd1b3a1fe0f6d0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:43 -0300 Subject: iommu: Add iommu_init/deinit_device() paired functions Move the driver init and destruction code into two logically paired functions. There is a subtle ordering dependency in how the group's domains are freed, the current code does the kobject_put() on the group which will hopefully trigger the free of the domains before the module_put() that protects the domain->ops. Reorganize this to be explicit and documented. The domains are cleaned up by iommu_deinit_device() if it is the last device to be deinit'd from the group. This must be done in a specific order - after ops->release_device() and before the module_put(). Make it very clear and obvious by putting the order directly in one function. Leave WARN_ON's in case the refcounting gets messed up somehow. This also moves the module_put() and dev_iommu_free() under the group->mutex to keep the code simple. Building paired functions like this helps ensure that error cleanup flows in __iommu_probe_device() are correct because they share the same code that handles the normal flow. These details become relavent as following patches add more error unwind into __iommu_probe_device(), and ultimately a following series adds fine-grained locking to __iommu_probe_device(). Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 191 +++++++++++++++++++++++++++++--------------------- 1 file changed, 112 insertions(+), 79 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6cf4da060e94..f052f52958fd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -332,10 +332,99 @@ static u32 dev_iommu_get_max_pasids(struct device *dev) return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); } +/* + * Init the dev->iommu and dev->iommu_group in the struct device and get the + * driver probed + */ +static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) +{ + struct iommu_device *iommu_dev; + struct iommu_group *group; + int ret; + + if (!dev_iommu_get(dev)) + return -ENOMEM; + + if (!try_module_get(ops->owner)) { + ret = -EINVAL; + goto err_free; + } + + iommu_dev = ops->probe_device(dev); + if (IS_ERR(iommu_dev)) { + ret = PTR_ERR(iommu_dev); + goto err_module_put; + } + + group = ops->device_group(dev); + if (WARN_ON_ONCE(group == NULL)) + group = ERR_PTR(-EINVAL); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto err_release; + } + dev->iommu_group = group; + + dev->iommu->iommu_dev = iommu_dev; + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); + if (ops->is_attach_deferred) + dev->iommu->attach_deferred = ops->is_attach_deferred(dev); + return 0; + +err_release: + if (ops->release_device) + ops->release_device(dev); +err_module_put: + module_put(ops->owner); +err_free: + dev_iommu_free(dev); + return ret; +} + +static void iommu_deinit_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + const struct iommu_ops *ops = dev_iommu_ops(dev); + + lockdep_assert_held(&group->mutex); + + /* + * release_device() must stop using any attached domain on the device. + * If there are still other devices in the group they are not effected + * by this callback. + * + * The IOMMU driver must set the device to either an identity or + * blocking translation and stop using any domain pointer, as it is + * going to be freed. + */ + if (ops->release_device) + ops->release_device(dev); + + /* + * If this is the last driver to use the group then we must free the + * domains before we do the module_put(). + */ + if (list_empty(&group->devices)) { + if (group->default_domain) { + iommu_domain_free(group->default_domain); + group->default_domain = NULL; + } + if (group->blocking_domain) { + iommu_domain_free(group->blocking_domain); + group->blocking_domain = NULL; + } + group->domain = NULL; + } + + /* Caller must put iommu_group */ + dev->iommu_group = NULL; + module_put(ops->owner); + dev_iommu_free(dev); +} + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; - struct iommu_device *iommu_dev; struct iommu_group *group; static DEFINE_MUTEX(iommu_probe_device_lock); int ret; @@ -357,62 +446,30 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list goto out_unlock; } - if (!dev_iommu_get(dev)) { - ret = -ENOMEM; + ret = iommu_init_device(dev, ops); + if (ret) goto out_unlock; - } - - if (!try_module_get(ops->owner)) { - ret = -EINVAL; - goto err_free; - } - - iommu_dev = ops->probe_device(dev); - if (IS_ERR(iommu_dev)) { - ret = PTR_ERR(iommu_dev); - goto out_module_put; - } - - dev->iommu->iommu_dev = iommu_dev; - dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); - if (ops->is_attach_deferred) - dev->iommu->attach_deferred = ops->is_attach_deferred(dev); - - group = ops->device_group(dev); - if (WARN_ON_ONCE(group == NULL)) - group = ERR_PTR(-EINVAL); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_release; - } + group = dev->iommu_group; ret = iommu_group_add_device(group, dev); + mutex_lock(&group->mutex); if (ret) goto err_put_group; - mutex_lock(&group->mutex); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); mutex_unlock(&group->mutex); iommu_group_put(group); mutex_unlock(&iommu_probe_device_lock); - iommu_device_link(iommu_dev, dev); + iommu_device_link(dev->iommu->iommu_dev, dev); return 0; err_put_group: + iommu_deinit_device(dev); + mutex_unlock(&group->mutex); iommu_group_put(group); -out_release: - if (ops->release_device) - ops->release_device(dev); - -out_module_put: - module_put(ops->owner); - -err_free: - dev_iommu_free(dev); - out_unlock: mutex_unlock(&iommu_probe_device_lock); @@ -491,63 +548,45 @@ static void __iommu_group_free_device(struct iommu_group *group, kfree(grp_dev->name); kfree(grp_dev); - dev->iommu_group = NULL; } -/* - * Remove the iommu_group from the struct device. The attached group must be put - * by the caller after releaseing the group->mutex. - */ +/* Remove the iommu_group from the struct device. */ static void __iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; struct group_device *device; - lockdep_assert_held(&group->mutex); + mutex_lock(&group->mutex); for_each_group_device(group, device) { if (device->dev != dev) continue; list_del(&device->list); __iommu_group_free_device(group, device); - /* Caller must put iommu_group */ - return; + if (dev->iommu && dev->iommu->iommu_dev) + iommu_deinit_device(dev); + else + dev->iommu_group = NULL; + goto out; } WARN(true, "Corrupted iommu_group device_list"); +out: + mutex_unlock(&group->mutex); + + /* Pairs with the get in iommu_group_add_device() */ + iommu_group_put(group); } static void iommu_release_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; - const struct iommu_ops *ops; if (!dev->iommu || !group) return; iommu_device_unlink(dev->iommu->iommu_dev, dev); - mutex_lock(&group->mutex); __iommu_group_remove_device(dev); - - /* - * release_device() must stop using any attached domain on the device. - * If there are still other devices in the group they are not effected - * by this callback. - * - * The IOMMU driver must set the device to either an identity or - * blocking translation and stop using any domain pointer, as it is - * going to be freed. - */ - ops = dev_iommu_ops(dev); - if (ops->release_device) - ops->release_device(dev); - mutex_unlock(&group->mutex); - - /* Pairs with the get in iommu_group_add_device() */ - iommu_group_put(group); - - module_put(ops->owner); - dev_iommu_free(dev); } static int __init iommu_set_def_domain_type(char *str) @@ -808,10 +847,9 @@ static void iommu_group_release(struct kobject *kobj) ida_free(&iommu_group_ida, group->id); - if (group->default_domain) - iommu_domain_free(group->default_domain); - if (group->blocking_domain) - iommu_domain_free(group->blocking_domain); + /* Domains are free'd by iommu_deinit_device() */ + WARN_ON(group->default_domain); + WARN_ON(group->blocking_domain); kfree(group->name); kfree(group); @@ -1109,12 +1147,7 @@ void iommu_group_remove_device(struct device *dev) dev_info(dev, "Removing from iommu group %d\n", group->id); - mutex_lock(&group->mutex); __iommu_group_remove_device(dev); - mutex_unlock(&group->mutex); - - /* Pairs with the get in iommu_group_add_device() */ - iommu_group_put(group); } EXPORT_SYMBOL_GPL(iommu_group_remove_device); -- cgit v1.2.3 From 14891af3799e8cd24dee14f78c31fa663cfb5ba9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:44 -0300 Subject: iommu: Move the iommu driver sysfs setup into iommu_init/deinit_device() It makes logical sense that once the driver is attached to the device the sysfs links appear, even if we haven't fully created the group_device or attached the device to a domain. Fix the missing error handling on sysfs creation since iommu_init_device() can trivially handle this. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sysfs.c | 6 ------ drivers/iommu/iommu.c | 13 +++++++++---- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index 99869217fbec..c8aba0e2a30d 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -107,9 +107,6 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) { int ret; - if (!iommu || IS_ERR(iommu)) - return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) @@ -126,9 +123,6 @@ EXPORT_SYMBOL_GPL(iommu_device_link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link) { - if (!iommu || IS_ERR(iommu)) - return; - sysfs_remove_link(&link->kobj, "iommu"); sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f052f52958fd..4688f61b7a4f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -356,12 +356,16 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) goto err_module_put; } + ret = iommu_device_link(iommu_dev, dev); + if (ret) + goto err_release; + group = ops->device_group(dev); if (WARN_ON_ONCE(group == NULL)) group = ERR_PTR(-EINVAL); if (IS_ERR(group)) { ret = PTR_ERR(group); - goto err_release; + goto err_unlink; } dev->iommu_group = group; @@ -371,6 +375,8 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) dev->iommu->attach_deferred = ops->is_attach_deferred(dev); return 0; +err_unlink: + iommu_device_unlink(iommu_dev, dev); err_release: if (ops->release_device) ops->release_device(dev); @@ -388,6 +394,8 @@ static void iommu_deinit_device(struct device *dev) lockdep_assert_held(&group->mutex); + iommu_device_unlink(dev->iommu->iommu_dev, dev); + /* * release_device() must stop using any attached domain on the device. * If there are still other devices in the group they are not effected @@ -462,7 +470,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list iommu_group_put(group); mutex_unlock(&iommu_probe_device_lock); - iommu_device_link(dev->iommu->iommu_dev, dev); return 0; @@ -584,8 +591,6 @@ static void iommu_release_device(struct device *dev) if (!dev->iommu || !group) return; - iommu_device_unlink(dev->iommu->iommu_dev, dev); - __iommu_group_remove_device(dev); } -- cgit v1.2.3 From cfb6ee65f7603a0605fa8f5fe5b0782f0731c81c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:46 -0300 Subject: iommu: Always destroy the iommu_group during iommu_release_device() Have release fully clean up the iommu related parts of the struct device, no matter what state they are in. Split the logic so that the three things owned by the iommu core are always cleaned up: - Any attached iommu_group - Any allocated dev->iommu and its contents including a fwsepc - Any attached driver via a struct group_device This fixes a minor bug where a fwspec created without an iommu_group being probed would not be freed. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4688f61b7a4f..55b916e8da3b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -574,10 +574,8 @@ static void __iommu_group_remove_device(struct device *dev) iommu_deinit_device(dev); else dev->iommu_group = NULL; - goto out; + break; } - WARN(true, "Corrupted iommu_group device_list"); -out: mutex_unlock(&group->mutex); /* Pairs with the get in iommu_group_add_device() */ @@ -588,10 +586,12 @@ static void iommu_release_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; - if (!dev->iommu || !group) - return; + if (group) + __iommu_group_remove_device(dev); - __iommu_group_remove_device(dev); + /* Free any fwspec if no iommu_driver was ever attached */ + if (dev->iommu) + dev_iommu_free(dev); } static int __init iommu_set_def_domain_type(char *str) -- cgit v1.2.3 From fa08280364882c42993dc5d8394c467d76803fdb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:47 -0300 Subject: iommu: Split iommu_group_add_device() Move the list_add_tail() for the group_device into the critical region that immediately follows in __iommu_probe_device(). This avoids one case of unlocking and immediately re-locking the group->mutex. Consistently make the caller responsible for setting dev->iommu_group, prior patches moved this into iommu_init_device(), make the no-driver path do this in iommu_group_add_device(). This completes making __iommu_group_free_device() and iommu_group_alloc_device() into pair'd functions. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 66 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 23 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 55b916e8da3b..3129e5c50c87 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -129,6 +129,8 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, struct device *dev); static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count); +static struct group_device *iommu_group_alloc_device(struct iommu_group *group, + struct device *dev); #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ @@ -435,6 +437,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; static DEFINE_MUTEX(iommu_probe_device_lock); + struct group_device *gdev; int ret; if (!ops) @@ -459,16 +462,17 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list goto out_unlock; group = dev->iommu_group; - ret = iommu_group_add_device(group, dev); + gdev = iommu_group_alloc_device(group, dev); mutex_lock(&group->mutex); - if (ret) + if (IS_ERR(gdev)) { + ret = PTR_ERR(gdev); goto err_put_group; + } + list_add_tail(&gdev->list, &group->devices); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); mutex_unlock(&group->mutex); - iommu_group_put(group); - mutex_unlock(&iommu_probe_device_lock); return 0; @@ -578,7 +582,10 @@ static void __iommu_group_remove_device(struct device *dev) } mutex_unlock(&group->mutex); - /* Pairs with the get in iommu_group_add_device() */ + /* + * Pairs with the get in iommu_init_device() or + * iommu_group_add_device() + */ iommu_group_put(group); } @@ -1067,22 +1074,16 @@ out: return ret; } -/** - * iommu_group_add_device - add a device to an iommu group - * @group: the group into which to add the device (reference should be held) - * @dev: the device - * - * This function is called by an iommu driver to add a device into a - * group. Adding a device increments the group reference count. - */ -int iommu_group_add_device(struct iommu_group *group, struct device *dev) +/* This is undone by __iommu_group_free_device() */ +static struct group_device *iommu_group_alloc_device(struct iommu_group *group, + struct device *dev) { int ret, i = 0; struct group_device *device; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) - return -ENOMEM; + return ERR_PTR(-ENOMEM); device->dev = dev; @@ -1113,17 +1114,11 @@ rename: goto err_free_name; } - iommu_group_ref_get(group); - dev->iommu_group = group; - - mutex_lock(&group->mutex); - list_add_tail(&device->list, &group->devices); - mutex_unlock(&group->mutex); trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); - return 0; + return device; err_free_name: kfree(device->name); @@ -1132,7 +1127,32 @@ err_remove_link: err_free_device: kfree(device); dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); - return ret; + return ERR_PTR(ret); +} + +/** + * iommu_group_add_device - add a device to an iommu group + * @group: the group into which to add the device (reference should be held) + * @dev: the device + * + * This function is called by an iommu driver to add a device into a + * group. Adding a device increments the group reference count. + */ +int iommu_group_add_device(struct iommu_group *group, struct device *dev) +{ + struct group_device *gdev; + + gdev = iommu_group_alloc_device(group, dev); + if (IS_ERR(gdev)) + return PTR_ERR(gdev); + + iommu_group_ref_get(group); + dev->iommu_group = group; + + mutex_lock(&group->mutex); + list_add_tail(&gdev->list, &group->devices); + mutex_unlock(&group->mutex); + return 0; } EXPORT_SYMBOL_GPL(iommu_group_add_device); -- cgit v1.2.3 From f188056352bcdcd4bec31c22baa00ba6568f1416 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 5 Jun 2023 21:59:48 -0300 Subject: iommu: Avoid locking/unlocking for iommu_probe_device() Remove the race where a hotplug of a device into an existing group will have the device installed in the group->devices, but not yet attached to the group's current domain. Move the group attachment logic from iommu_probe_device() and put it under the same mutex that updates the group->devices list so everything is atomic under the lock. We retain the two step setup of the default domain for the bus_iommu_probe() case solely so that we have a more complete view of the group when creating the default domain for boot time devices. This is not generally necessary with the current code structure but seems to be supporting some odd corner cases like alias RID's and IOMMU_RESV_DIRECT or driver bugs returning different default_domain types for the same group. During bus_iommu_probe() the group will have a device list but both group->default_domain and group->domain will be NULL. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/10-v3-328044aa278c+45e49-iommu_probe_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 78 +++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 43 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3129e5c50c87..42377f49ab87 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -131,6 +131,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count); static struct group_device *iommu_group_alloc_device(struct iommu_group *group, struct device *dev); +static void __iommu_group_free_device(struct iommu_group *group, + struct group_device *grp_dev); #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ @@ -469,14 +471,39 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list goto err_put_group; } + /* + * The gdev must be in the list before calling + * iommu_setup_default_domain() + */ list_add_tail(&gdev->list, &group->devices); - if (group_list && !group->default_domain && list_empty(&group->entry)) - list_add_tail(&group->entry, group_list); + WARN_ON(group->default_domain && !group->domain); + if (group->default_domain) + iommu_create_device_direct_mappings(group->default_domain, dev); + if (group->domain) { + ret = __iommu_device_set_domain(group, dev, group->domain, 0); + if (ret) + goto err_remove_gdev; + } else if (!group->default_domain && !group_list) { + ret = iommu_setup_default_domain(group, 0); + if (ret) + goto err_remove_gdev; + } else if (!group->default_domain) { + /* + * With a group_list argument we defer the default_domain setup + * to the caller by providing a de-duplicated list of groups + * that need further setup. + */ + if (list_empty(&group->entry)) + list_add_tail(&group->entry, group_list); + } mutex_unlock(&group->mutex); mutex_unlock(&iommu_probe_device_lock); return 0; +err_remove_gdev: + list_del(&gdev->list); + __iommu_group_free_device(group, gdev); err_put_group: iommu_deinit_device(dev); mutex_unlock(&group->mutex); @@ -490,52 +517,17 @@ out_unlock: int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops; - struct iommu_group *group; int ret; ret = __iommu_probe_device(dev, NULL); if (ret) - goto err_out; - - group = iommu_group_get(dev); - if (!group) { - ret = -ENODEV; - goto err_release; - } - - mutex_lock(&group->mutex); - - if (group->default_domain) - iommu_create_device_direct_mappings(group->default_domain, dev); - - if (group->domain) { - ret = __iommu_device_set_domain(group, dev, group->domain, 0); - if (ret) - goto err_unlock; - } else if (!group->default_domain) { - ret = iommu_setup_default_domain(group, 0); - if (ret) - goto err_unlock; - } - - mutex_unlock(&group->mutex); - iommu_group_put(group); + return ret; ops = dev_iommu_ops(dev); if (ops->probe_finalize) ops->probe_finalize(dev); return 0; - -err_unlock: - mutex_unlock(&group->mutex); - iommu_group_put(group); -err_release: - iommu_release_device(dev); - -err_out: - return ret; - } static void __iommu_group_free_device(struct iommu_group *group, @@ -1815,11 +1807,6 @@ int bus_iommu_probe(const struct bus_type *bus) LIST_HEAD(group_list); int ret; - /* - * This code-path does not allocate the default domain when - * creating the iommu group, so do it after the groups are - * created. - */ ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); if (ret) return ret; @@ -1832,6 +1819,11 @@ int bus_iommu_probe(const struct bus_type *bus) /* Remove item from the list */ list_del_init(&group->entry); + /* + * We go to the trouble of deferred default domain creation so + * that the cross-group default domain type and the setup of the + * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. + */ ret = iommu_setup_default_domain(group, 0); if (ret) { mutex_unlock(&group->mutex); -- cgit v1.2.3 From 791c2b17fb4023f21c3cbf5f268af01d9b8cb7cc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 13 Apr 2023 14:40:25 +0100 Subject: iommu: Optimise PCI SAC address trick Per the reasoning in commit 4bf7fda4dce2 ("iommu/dma: Add config for PCI SAC address trick") and its subsequent revert, this mechanism no longer serves its original purpose, but now only works around broken hardware/drivers in a way that is unfortunately too impactful to remove. This does not, however, prevent us from solving the performance impact which that workaround has on large-scale systems that don't need it. Once the 32-bit IOVA space fills up and a workload starts allocating and freeing on both sides of the boundary, the opportunistic SAC allocation can then end up spending significant time hunting down scattered fragments of free 32-bit space, or just reestablishing max32_alloc_size. This can easily be exacerbated by a change in allocation pattern, such as by changing the network MTU, which can increase pressure on the 32-bit space by leaving a large quantity of cached IOVAs which are now the wrong size to be recycled, but also won't be freed since the non-opportunistic allocations can still be satisfied from the whole 64-bit space without triggering the reclaim path. However, in the context of a workaround where smaller DMA addresses aren't simply a preference but a necessity, if we get to that point at all then in fact it's already the endgame. The nature of the allocator is currently such that the first IOVA we give to a device after the 32-bit space runs out will be the highest possible address for that device, ever. If that works, then great, we know we can optimise for speed by always allocating from the full range. And if it doesn't, then the worst has already happened and any brokenness is now showing, so there's little point in continuing to try to hide it. To that end, implement a flag to refine the SAC business into a per-device policy that can automatically get itself out of the way if and when it stops being useful. CC: Linus Torvalds CC: Jakub Kicinski Reviewed-by: John Garry Signed-off-by: Robin Murphy Tested-by: Vasant Hegde Tested-by: Jakub Kicinski Link: https://lore.kernel.org/r/b8502b115b915d2a3fabde367e099e39106686c8.1681392791.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 26 ++++++++++++++++++++------ drivers/iommu/dma-iommu.h | 8 ++++++++ drivers/iommu/iommu.c | 3 +++ include/linux/iommu.h | 2 ++ 4 files changed, 33 insertions(+), 6 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e57724163835..4b1a88f514c9 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -660,7 +660,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len, iova = 0; + unsigned long shift, iova_len, iova; if (cookie->type == IOMMU_DMA_MSI_COOKIE) { cookie->msi_iova += size; @@ -675,15 +675,29 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, if (domain->geometry.force_aperture) dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); - /* Try to get PCI devices a SAC address */ - if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev)) + /* + * Try to use all the 32-bit PCI addresses first. The original SAC vs. + * DAC reasoning loses relevance with PCIe, but enough hardware and + * firmware bugs are still lurking out there that it's safest not to + * venture into the 64-bit space until necessary. + * + * If your device goes wrong after seeing the notice then likely either + * its driver is not setting DMA masks accurately, the hardware has + * some inherent bug in handling >32-bit addresses, or not all the + * expected address bits are wired up between the device and the IOMMU. + */ + if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift, false); + if (iova) + goto done; - if (!iova) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, - true); + dev->iommu->pci_32bit_workaround = false; + dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); + } + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +done: return (dma_addr_t)iova << shift; } diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index 942790009292..c829f1f82a99 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -17,6 +17,10 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); extern bool iommu_dma_forcedac; +static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) +{ + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; +} #else /* CONFIG_IOMMU_DMA */ @@ -38,5 +42,9 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 42377f49ab87..e67f6562da73 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -499,6 +499,9 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list mutex_unlock(&group->mutex); mutex_unlock(&iommu_probe_device_lock); + if (dev_is_pci(dev)) + iommu_dma_set_pci_32bit_workaround(dev); + return 0; err_remove_gdev: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d31642596675..b1dcb1b9b170 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -409,6 +409,7 @@ struct iommu_fault_param { * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume * @attach_deferred: the dma domain attachment is deferred + * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -422,6 +423,7 @@ struct dev_iommu { void *priv; u32 max_pasids; u32 attach_deferred:1; + u32 pci_32bit_workaround:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3 From 6b7867b5b8a6b14c487bf04a693ab424c7a8718d Mon Sep 17 00:00:00 2001 From: Zhu Wang Date: Mon, 31 Jul 2023 19:27:58 +0800 Subject: iommu: Remove kernel-doc warnings Remove kernel-doc warnings: drivers/iommu/iommu.c:3261: warning: Function parameter or member 'group' not described in 'iommu_group_release_dma_owner' drivers/iommu/iommu.c:3261: warning: Excess function parameter 'dev' description in 'iommu_group_release_dma_owner' drivers/iommu/iommu.c:3275: warning: Function parameter or member 'dev' not described in 'iommu_device_release_dma_owner' drivers/iommu/iommu.c:3275: warning: Excess function parameter 'group' description in 'iommu_device_release_dma_owner' Signed-off-by: Zhu Wang Fixes: 89395ccedbc1 ("iommu: Add device-centric DMA ownership interfaces") Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230731112758.214775-1-wangzhu9@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e67f6562da73..c53a4942f745 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3216,7 +3216,7 @@ static void __iommu_release_dma_ownership(struct iommu_group *group) /** * iommu_group_release_dma_owner() - Release DMA ownership of a group - * @dev: The device + * @group: The group * * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). */ @@ -3230,7 +3230,7 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); /** * iommu_device_release_dma_owner() - Release DMA ownership of a device - * @group: The device. + * @dev: The device. * * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). */ -- cgit v1.2.3 From 2dcebc7ddce7ffd4015824227c7623558b89d721 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 9 Aug 2023 20:47:55 +0800 Subject: iommu: Move global PASID allocation from SVA to core Intel ENQCMD requires a single PASID to be shared between multiple devices, as the PASID is stored in a single MSR register per-process and userspace can use only that one PASID. This means that the PASID allocation for any ENQCMD using device driver must always come from a shared global pool, regardless of what kind of domain the PASID will be used with. Split the code for the global PASID allocator into iommu_alloc/free_global_pasid() so that drivers can attach non-SVA domains to PASIDs as well. This patch moves global PASID allocation APIs from SVA to IOMMU APIs. Reserved PASIDs, currently only RID_PASID, are excluded from the global PASID allocation. It is expected that device drivers will use the allocated PASIDs to attach to appropriate IOMMU domains for use. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230802212427.1497170-3-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 29 ++++++++++------------------- drivers/iommu/iommu.c | 28 ++++++++++++++++++++++++++++ include/linux/iommu.h | 10 ++++++++++ 3 files changed, 48 insertions(+), 19 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 05c0fb2acbc4..b78671a8a914 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,34 +10,30 @@ #include "iommu-sva.h" static DEFINE_MUTEX(iommu_sva_lock); -static DEFINE_IDA(iommu_global_pasid_ida); /* Allocate a PASID for the mm within range (inclusive) */ -static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) +static int iommu_sva_alloc_pasid(struct mm_struct *mm, struct device *dev) { + ioasid_t pasid; int ret = 0; - if (min == IOMMU_PASID_INVALID || - max == IOMMU_PASID_INVALID || - min == 0 || max < min) - return -EINVAL; - if (!arch_pgtable_dma_compat(mm)) return -EBUSY; mutex_lock(&iommu_sva_lock); /* Is a PASID already associated with this mm? */ if (mm_valid_pasid(mm)) { - if (mm->pasid < min || mm->pasid > max) + if (mm->pasid >= dev->iommu->max_pasids) ret = -EOVERFLOW; goto out; } - ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL); - if (ret < 0) + pasid = iommu_alloc_global_pasid(dev); + if (pasid == IOMMU_PASID_INVALID) { + ret = -ENOSPC; goto out; - - mm->pasid = ret; + } + mm->pasid = pasid; ret = 0; out: mutex_unlock(&iommu_sva_lock); @@ -64,15 +60,10 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm { struct iommu_domain *domain; struct iommu_sva *handle; - ioasid_t max_pasids; int ret; - max_pasids = dev->iommu->max_pasids; - if (!max_pasids) - return ERR_PTR(-EOPNOTSUPP); - /* Allocate mm->pasid if necessary. */ - ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1); + ret = iommu_sva_alloc_pasid(mm, dev); if (ret) return ERR_PTR(ret); @@ -217,5 +208,5 @@ void mm_pasid_drop(struct mm_struct *mm) if (likely(!mm_valid_pasid(mm))) return; - ida_free(&iommu_global_pasid_ida, mm->pasid); + iommu_free_global_pasid(mm->pasid); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index caaf563d38ae..d199d144460c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -39,6 +39,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +static DEFINE_IDA(iommu_global_pasid_ida); static unsigned int iommu_def_domain_type __read_mostly; static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); @@ -3400,3 +3401,30 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, return domain; } + +ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + int ret; + + /* max_pasids == 0 means that the device does not support PASID */ + if (!dev->iommu->max_pasids) + return IOMMU_PASID_INVALID; + + /* + * max_pasids is set up by vendor driver based on number of PASID bits + * supported but the IDA allocation is inclusive. + */ + ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, + dev->iommu->max_pasids - 1, GFP_KERNEL); + return ret < 0 ? IOMMU_PASID_INVALID : ret; +} +EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); + +void iommu_free_global_pasid(ioasid_t pasid) +{ + if (WARN_ON(pasid == IOMMU_PASID_INVALID)) + return; + + ida_free(&iommu_global_pasid_ida, pasid); +} +EXPORT_SYMBOL_GPL(iommu_free_global_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2870bc29d456..6b821cb715d5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -197,6 +197,7 @@ enum iommu_dev_features { }; #define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ +#define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; @@ -728,6 +729,8 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct iommu_domain * iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, unsigned int type); +ioasid_t iommu_alloc_global_pasid(struct device *dev); +void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -1089,6 +1092,13 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, { return NULL; } + +static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + return IOMMU_PASID_INVALID; +} + +static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ /** -- cgit v1.2.3 From a48ce36e2786f8bb33e9f42ea2d0c23ea4af3e0d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Aug 2023 20:48:02 +0800 Subject: iommu: Prevent RESV_DIRECT devices from blocking domains The IOMMU_RESV_DIRECT flag indicates that a memory region must be mapped 1:1 at all times. This means that the region must always be accessible to the device, even if the device is attached to a blocking domain. This is equal to saying that IOMMU_RESV_DIRECT flag prevents devices from being attached to blocking domains. This also implies that devices that implement RESV_DIRECT regions will be prevented from being assigned to user space since taking the DMA ownership immediately switches to a blocking domain. The rule of preventing devices with the IOMMU_RESV_DIRECT regions from being assigned to user space has existed in the Intel IOMMU driver for a long time. Now, this rule is being lifted up to a general core rule, as other architectures like AMD and ARM also have RMRR-like reserved regions. This has been discussed in the community mailing list and refer to below link for more details. Other places using unmanaged domains for kernel DMA must follow the iommu_get_resv_regions() and setup IOMMU_RESV_DIRECT - we do not restrict them in the core code. Cc: Robin Murphy Cc: Alex Williamson Cc: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/linux-iommu/BN9PR11MB5276E84229B5BD952D78E9598C639@BN9PR11MB5276.namprd11.prod.outlook.com Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20230724060352.113458-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 37 +++++++++++++++++++++++++++---------- include/linux/iommu.h | 2 ++ 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'drivers/iommu/iommu.c') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d199d144460c..e385a99e25e1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -960,14 +960,12 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, unsigned long pg_size; int ret = 0; - if (!iommu_is_dma_domain(domain)) - return 0; - - BUG_ON(!domain->pgsize_bitmap); - - pg_size = 1UL << __ffs(domain->pgsize_bitmap); + pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; INIT_LIST_HEAD(&mappings); + if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) + return -EINVAL; + iommu_get_resv_regions(dev, &mappings); /* We need to consider overlapping regions for different devices */ @@ -975,13 +973,17 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, dma_addr_t start, end, addr; size_t map_size = 0; - start = ALIGN(entry->start, pg_size); - end = ALIGN(entry->start + entry->length, pg_size); + if (entry->type == IOMMU_RESV_DIRECT) + dev->iommu->require_direct = 1; - if (entry->type != IOMMU_RESV_DIRECT && - entry->type != IOMMU_RESV_DIRECT_RELAXABLE) + if ((entry->type != IOMMU_RESV_DIRECT && + entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || + !iommu_is_dma_domain(domain)) continue; + start = ALIGN(entry->start, pg_size); + end = ALIGN(entry->start + entry->length, pg_size); + for (addr = start; addr <= end; addr += pg_size) { phys_addr_t phys_addr; @@ -2122,6 +2124,21 @@ static int __iommu_device_set_domain(struct iommu_group *group, { int ret; + /* + * If the device requires IOMMU_RESV_DIRECT then we cannot allow + * the blocking domain to be attached as it does not contain the + * required 1:1 mapping. This test effectively excludes the device + * being used with iommu_group_claim_dma_owner() which will block + * vfio and iommufd as well. + */ + if (dev->iommu->require_direct && + (new_domain->type == IOMMU_DOMAIN_BLOCKED || + new_domain == group->blocking_domain)) { + dev_warn(dev, + "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); + return -EINVAL; + } + if (dev->iommu->attach_deferred) { if (new_domain == group->default_domain) return 0; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6b821cb715d5..54bae452975f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -411,6 +411,7 @@ struct iommu_fault_param { * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume * @attach_deferred: the dma domain attachment is deferred + * @require_direct: device requires IOMMU_RESV_DIRECT regions * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -424,6 +425,7 @@ struct dev_iommu { void *priv; u32 max_pasids; u32 attach_deferred:1; + u32 require_direct:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3