From d87beb749281404b4b4919930b1cc6352e3746f2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 31 May 2017 18:52:29 +0100 Subject: iommu/of: Handle PCI aliases properly When a PCI device has DMA quirks, we need to ensure that an upstream IOMMU knows about all possible aliases, since the presence of a DMA quirk does not preclude the device still also emitting transactions (e.g. MSIs) on its 'real' RID. Similarly, the rules for bridge aliasing are relatively complex, and some bridges may only take ownership of transactions under particular transient circumstances, leading again to multiple RIDs potentially being seen at the IOMMU for the given device. Take all this into account in the OF code by translating every RID produced by the alias walk, not just whichever one comes out last. Happily, this also makes things tidy enough that we can reduce the number of both total lines of code, and confusing levels of indirection, by pulling the "iommus"/"iommu-map" parsing helpers back in-line again. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 57 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 8cb60829a7a1..be8ac1ddec06 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -140,75 +140,39 @@ static const struct iommu_ops return ops; } -static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) -{ - struct of_phandle_args *iommu_spec = data; - - iommu_spec->args[0] = alias; - return iommu_spec->np == pdev->bus->dev.of_node; -} +struct of_pci_iommu_alias_info { + struct device *dev; + struct device_node *np; +}; -static const struct iommu_ops -*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np) +static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) { + struct of_pci_iommu_alias_info *info = data; const struct iommu_ops *ops; - struct of_phandle_args iommu_spec; + struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - /* - * Start by tracing the RID alias down the PCI topology as - * far as the host bridge whose OF node we have... - * (we're not even attempting to handle multi-alias devices yet) - */ - iommu_spec.args_count = 1; - iommu_spec.np = bridge_np; - pci_for_each_dma_alias(pdev, __get_pci_rid, &iommu_spec); - /* - * ...then find out what that becomes once it escapes the PCI - * bus into the system beyond, and which IOMMU it ends up at. - */ - iommu_spec.np = NULL; - err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", + err = of_pci_map_rid(info->np, alias, "iommu-map", "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) - return err == -ENODEV ? NULL : ERR_PTR(err); - - ops = of_iommu_xlate(&pdev->dev, &iommu_spec); + return err == -ENODEV ? 1 : err; + ops = of_iommu_xlate(info->dev, &iommu_spec); of_node_put(iommu_spec.np); - return ops; -} - -static const struct iommu_ops -*of_platform_iommu_init(struct device *dev, struct device_node *np) -{ - struct of_phandle_args iommu_spec; - const struct iommu_ops *ops = NULL; - int idx = 0; - /* - * We don't currently walk up the tree looking for a parent IOMMU. - * See the `Notes:' section of - * Documentation/devicetree/bindings/iommu/iommu.txt - */ - while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", - idx, &iommu_spec)) { - ops = of_iommu_xlate(dev, &iommu_spec); - of_node_put(iommu_spec.np); - idx++; - if (IS_ERR_OR_NULL(ops)) - break; - } + if (IS_ERR(ops)) + return PTR_ERR(ops); - return ops; + return info->np == pdev->bus->dev.of_node; } const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { - const struct iommu_ops *ops; + const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec = dev->iommu_fwspec; + int err; if (!master_np) return NULL; @@ -221,18 +185,44 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, iommu_fwspec_free(dev); } - if (dev_is_pci(dev)) - ops = of_pci_iommu_init(to_pci_dev(dev), master_np); - else - ops = of_platform_iommu_init(dev, master_np); + /* + * We don't currently walk up the tree looking for a parent IOMMU. + * See the `Notes:' section of + * Documentation/devicetree/bindings/iommu/iommu.txt + */ + if (dev_is_pci(dev)) { + struct of_pci_iommu_alias_info info = { + .dev = dev, + .np = master_np, + }; + + err = pci_for_each_dma_alias(to_pci_dev(dev), + of_pci_iommu_init, &info); + if (err) /* err > 0 means the walk stopped, but non-fatally */ + ops = ERR_PTR(min(err, 0)); + else /* success implies both fwspec and ops are now valid */ + ops = dev->iommu_fwspec->ops; + } else { + struct of_phandle_args iommu_spec; + int idx = 0; + + while (!of_parse_phandle_with_args(master_np, "iommus", + "#iommu-cells", + idx, &iommu_spec)) { + ops = of_iommu_xlate(dev, &iommu_spec); + of_node_put(iommu_spec.np); + idx++; + if (IS_ERR_OR_NULL(ops)) + break; + } + } /* * If we have reason to believe the IOMMU driver missed the initial * add_device callback for dev, replay it to get things in order. */ if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus && !dev->iommu_group) { - int err = ops->add_device(dev); - + err = ops->add_device(dev); if (err) ops = ERR_PTR(err); } -- cgit v1.2.3 From ebae3e830a991116526646f09dd67f68f332b330 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 5 Jul 2017 20:27:53 +0300 Subject: iommu: Correct iommu_map / iommu_unmap prototypes Commit 7d3002cc8c16 ("iommu/core: split mapping to page sizes as supported by the hardware") replaced 'int gfp_order' with a 'size_t size' of iommu_map / iommu_unmap function arguments, but missed the function prototypes for the disabled CONFIG_IOMMU_API case, let's correct them for consistency. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..f1ce8e517d8d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -425,13 +425,13 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot) + phys_addr_t paddr, size_t size, int prot) { return -ENODEV; } static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, - int gfp_order) + size_t size) { return -ENODEV; } -- cgit v1.2.3 From 6bd4f1c754b2fafac403073b0d8469bed1d37e2d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2017 16:43:09 -0500 Subject: iommu: Convert to using %pOF instead of full_name Now that we have a custom printf format specifier, convert users of full_name to use %pOF instead. This is preparation to remove storing of the full path string for each node. Signed-off-by: Rob Herring Cc: Joerg Roedel Cc: Heiko Stuebner Cc: iommu@lists.linux-foundation.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Reviewed-by: Heiko Stuebner Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 20 ++++++++------------ drivers/iommu/fsl_pamu_domain.c | 10 ++++------ drivers/iommu/of_iommu.c | 3 +-- drivers/iommu/rockchip-iommu.c | 10 +++++----- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index a34355fca37a..919ad9045ac4 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -530,8 +530,8 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) if (node) { prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", - node->full_name); + pr_debug("missing cache-stash-id at %pOF\n", + node); of_node_put(node); return ~(u32)0; } @@ -557,8 +557,8 @@ found_cpu_node: if (stash_dest_hint == cache_level) { prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", - node->full_name); + pr_debug("missing cache-stash-id at %pOF\n", + node); of_node_put(node); return ~(u32)0; } @@ -568,8 +568,7 @@ found_cpu_node: prop = of_get_property(node, "next-level-cache", NULL); if (!prop) { - pr_debug("can't find next-level-cache at %s\n", - node->full_name); + pr_debug("can't find next-level-cache at %pOF\n", node); of_node_put(node); return ~(u32)0; /* can't traverse any further */ } @@ -1063,8 +1062,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) guts_node = of_find_matching_node(NULL, guts_device_ids); if (!guts_node) { - dev_err(dev, "could not find GUTS node %s\n", - dev->of_node->full_name); + dev_err(dev, "could not find GUTS node %pOF\n", dev->of_node); ret = -ENODEV; goto error; } @@ -1246,8 +1244,7 @@ static __init int fsl_pamu_init(void) pdev = platform_device_alloc("fsl-of-pamu", 0); if (!pdev) { - pr_err("could not allocate device %s\n", - np->full_name); + pr_err("could not allocate device %pOF\n", np); ret = -ENOMEM; goto error_device_alloc; } @@ -1259,8 +1256,7 @@ static __init int fsl_pamu_init(void) ret = platform_device_add(pdev); if (ret) { - pr_err("could not add device %s (err=%i)\n", - np->full_name, ret); + pr_err("could not add device %pOF (err=%i)\n", np, ret); goto error_device_add; } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index da0e1e30ef37..01c73479345d 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -619,8 +619,8 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain, for (i = 0; i < num; i++) { /* Ensure that LIODN value is valid */ if (liodn[i] >= PAACE_NUMBER_ENTRIES) { - pr_debug("Invalid liodn %d, attach device failed for %s\n", - liodn[i], dev->of_node->full_name); + pr_debug("Invalid liodn %d, attach device failed for %pOF\n", + liodn[i], dev->of_node); ret = -EINVAL; break; } @@ -684,8 +684,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn_cnt = len / sizeof(u32); ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt); } else { - pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); ret = -EINVAL; } @@ -720,8 +719,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain, if (prop) detach_device(dev, dma_domain); else - pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); } static int configure_domain_geometry(struct iommu_domain *domain, void *data) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index be8ac1ddec06..34160e7a8dd7 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -245,8 +245,7 @@ static int __init of_iommu_init(void) const of_iommu_init_fn init_fn = match->data; if (init_fn && init_fn(np)) - pr_err("Failed to initialise IOMMU %s\n", - of_node_full_name(np)); + pr_err("Failed to initialise IOMMU %pOF\n", np); } return 0; diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4ba48a26b389..1b8155dada26 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1008,20 +1008,20 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group, ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0, &args); if (ret) { - dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n", - np->full_name, ret); + dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n", + np, ret); return ret; } if (args.args_count != 0) { - dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n", - args.np->full_name, args.args_count); + dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n", + args.np, args.args_count); return -EINVAL; } pd = of_find_device_by_node(args.np); of_node_put(args.np); if (!pd) { - dev_err(dev, "iommu %s not found\n", args.np->full_name); + dev_err(dev, "iommu %pOF not found\n", args.np); return -EPROBE_DEFER; } -- cgit v1.2.3 From ce2eb8f44e60c748fac56ede46b526fdac773e1b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:35 +0100 Subject: iommu/msm: Add iommu_group support As the last step to making groups mandatory, clean up the remaining drivers by adding basic support. Whilst it may not perfectly reflect the isolation capabilities of the hardware, using generic_device_group() should at least maintain existing behaviour with respect to the API. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index d0448353d501..04f4d51ffacb 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -393,6 +393,7 @@ static struct msm_iommu_dev *find_iommu_for_dev(struct device *dev) static int msm_iommu_add_device(struct device *dev) { struct msm_iommu_dev *iommu; + struct iommu_group *group; unsigned long flags; int ret = 0; @@ -406,7 +407,16 @@ static int msm_iommu_add_device(struct device *dev) spin_unlock_irqrestore(&msm_iommu_lock, flags); - return ret; + if (ret) + return ret; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + + return 0; } static void msm_iommu_remove_device(struct device *dev) @@ -421,6 +431,8 @@ static void msm_iommu_remove_device(struct device *dev) iommu_device_unlink(&iommu->iommu, dev); spin_unlock_irqrestore(&msm_iommu_lock, flags); + + iommu_group_remove_device(dev); } static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -700,6 +712,7 @@ static struct iommu_ops msm_iommu_ops = { .iova_to_phys = msm_iommu_iova_to_phys, .add_device = msm_iommu_add_device, .remove_device = msm_iommu_remove_device, + .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, }; -- cgit v1.2.3 From d92e1f849830fc78c50a00b953361fc1449aa1e2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:36 +0100 Subject: iommu/tegra-smmu: Add iommu_group support As the last step to making groups mandatory, clean up the remaining drivers by adding basic support. Whilst it may not perfectly reflect the isolation capabilities of the hardware (tegra_smmu_swgroup sounds suspiciously like something that might warrant representing at the iommu_group level), using generic_device_group() should at least maintain existing behaviour with respect to the API. Signed-off-by: Robin Murphy Tested-by: Mikko Perttunen Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index eeb19f560a05..faa9c1e70482 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -704,6 +704,7 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np) static int tegra_smmu_add_device(struct device *dev) { struct device_node *np = dev->of_node; + struct iommu_group *group; struct of_phandle_args args; unsigned int index = 0; @@ -725,12 +726,19 @@ static int tegra_smmu_add_device(struct device *dev) index++; } + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; } static void tegra_smmu_remove_device(struct device *dev) { dev->archdata.iommu = NULL; + iommu_group_remove_device(dev); } static const struct iommu_ops tegra_smmu_ops = { @@ -741,6 +749,7 @@ static const struct iommu_ops tegra_smmu_ops = { .detach_dev = tegra_smmu_detach_dev, .add_device = tegra_smmu_add_device, .remove_device = tegra_smmu_remove_device, + .device_group = generic_device_group, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, .map_sg = default_iommu_map_sg, -- cgit v1.2.3 From 15f9a3104b80a83e33ec04609aa61ac7e045fa2c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:37 +0100 Subject: iommu/tegra-gart: Add iommu_group support As the last step to making groups mandatory, clean up the remaining drivers by adding basic support. Whilst it may not perfectly reflect the isolation capabilities of the hardware, using generic_device_group() should at least maintain existing behaviour with respect to the API. Signed-off-by: Robin Murphy Tested-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 37e708fdbb5a..29bafc6e82ae 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -334,12 +334,31 @@ static bool gart_iommu_capable(enum iommu_cap cap) return false; } +static int gart_iommu_add_device(struct device *dev) +{ + struct iommu_group *group = iommu_group_get_for_dev(dev); + + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; +} + +static void gart_iommu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, .domain_free = gart_iommu_domain_free, .attach_dev = gart_iommu_attach_dev, .detach_dev = gart_iommu_detach_dev, + .add_device = gart_iommu_add_device, + .remove_device = gart_iommu_remove_device, + .device_group = generic_device_group, .map = gart_iommu_map, .map_sg = default_iommu_map_sg, .unmap = gart_iommu_unmap, -- cgit v1.2.3 From 05f80300dc8bcfe8566b36256d01482cae5afa02 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:38 +0100 Subject: iommu: Finish making iommu_group support mandatory Now that all the drivers properly implementing the IOMMU API support groups (I'm ignoring the etnaviv GPU MMUs which seemingly only do just enough to convince the ARM DMA mapping ops), we can remove the FIXME workarounds from the core code. In the process, it also seems logical to make the .device_group callback non-optional for drivers calling iommu_group_get_for_dev() - the current callers all implement it anyway, and it doesn't make sense for any future callers not to either. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3f6ea160afed..af69bf7e035a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1005,11 +1005,10 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - group = ERR_PTR(-EINVAL); - - if (ops && ops->device_group) - group = ops->device_group(dev); + if (!ops) + return ERR_PTR(-EINVAL); + group = ops->device_group(dev); if (WARN_ON_ONCE(group == NULL)) return ERR_PTR(-EINVAL); @@ -1298,12 +1297,8 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) int ret; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return __iommu_attach_device(domain, dev); - /* - * We have a group - lock it to make sure the device-count doesn't + * Lock the group to make sure the device-count doesn't * change while we are attaching */ mutex_lock(&group->mutex); @@ -1336,9 +1331,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return __iommu_detach_device(domain, dev); mutex_lock(&group->mutex); if (iommu_group_device_count(group) != 1) { @@ -1360,9 +1352,6 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return NULL; domain = group->domain; -- cgit v1.2.3 From da4b02750a9fe1d1c4d047d14e69ec7542dddeb3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Aug 2017 17:29:06 +0100 Subject: iommu/of: Fix of_iommu_configure() for disabled IOMMUs Sudeep reports that the logic got slightly broken when a PCI iommu-map entry targets an IOMMU marked as disabled in DT, since of_pci_map_rid() succeeds in following a phandle, and of_iommu_xlate() doesn't return an error value, but we miss checking whether ops was actually non-NULL. Whilst this could be solved with a point fix in of_pci_iommu_init(), it suggests that all the juggling of ERR_PTR values through the ops pointer is proving rather too complicated for its own good, so let's instead simplify the whole flow (with a side-effect of eliminating the cause of the bug). The fact that we now rely on iommu_fwspec means that we no longer need to pass around an iommu_ops pointer at all - we can simply propagate a regular int return value until we know whether we have a viable IOMMU, then retrieve the ops from the fwspec if and when we actually need them. This makes everything a bit more uniform and certainly easier to follow. Fixes: d87beb749281 ("iommu/of: Handle PCI aliases properly") Reported-by: Sudeep Holla Tested-by: Sudeep Holla Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 59 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 34160e7a8dd7..e60e3dba85a0 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -25,6 +25,8 @@ #include #include +#define NO_IOMMU 1 + static const struct of_device_id __iommu_of_table_sentinel __used __section(__iommu_of_table_end); @@ -109,8 +111,8 @@ static bool of_iommu_driver_present(struct device_node *np) return of_match_node(&__iommu_of_table, np); } -static const struct iommu_ops -*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) +static int of_iommu_xlate(struct device *dev, + struct of_phandle_args *iommu_spec) { const struct iommu_ops *ops; struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; @@ -120,24 +122,20 @@ static const struct iommu_ops if ((ops && !ops->of_xlate) || !of_device_is_available(iommu_spec->np) || (!ops && !of_iommu_driver_present(iommu_spec->np))) - return NULL; + return NO_IOMMU; err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); if (err) - return ERR_PTR(err); + return err; /* * The otherwise-empty fwspec handily serves to indicate the specific * IOMMU device we're waiting for, which will be useful if we ever get * a proper probe-ordering dependency mechanism in future. */ if (!ops) - return ERR_PTR(-EPROBE_DEFER); - - err = ops->of_xlate(dev, iommu_spec); - if (err) - return ERR_PTR(err); + return -EPROBE_DEFER; - return ops; + return ops->of_xlate(dev, iommu_spec); } struct of_pci_iommu_alias_info { @@ -148,7 +146,6 @@ struct of_pci_iommu_alias_info { static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) { struct of_pci_iommu_alias_info *info = data; - const struct iommu_ops *ops; struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; @@ -156,13 +153,12 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) - return err == -ENODEV ? 1 : err; + return err == -ENODEV ? NO_IOMMU : err; - ops = of_iommu_xlate(info->dev, &iommu_spec); + err = of_iommu_xlate(info->dev, &iommu_spec); of_node_put(iommu_spec.np); - - if (IS_ERR(ops)) - return PTR_ERR(ops); + if (err) + return err; return info->np == pdev->bus->dev.of_node; } @@ -172,7 +168,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, { const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec = dev->iommu_fwspec; - int err; + int err = NO_IOMMU; if (!master_np) return NULL; @@ -198,10 +194,6 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); - if (err) /* err > 0 means the walk stopped, but non-fatally */ - ops = ERR_PTR(min(err, 0)); - else /* success implies both fwspec and ops are now valid */ - ops = dev->iommu_fwspec->ops; } else { struct of_phandle_args iommu_spec; int idx = 0; @@ -209,27 +201,34 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, while (!of_parse_phandle_with_args(master_np, "iommus", "#iommu-cells", idx, &iommu_spec)) { - ops = of_iommu_xlate(dev, &iommu_spec); + err = of_iommu_xlate(dev, &iommu_spec); of_node_put(iommu_spec.np); idx++; - if (IS_ERR_OR_NULL(ops)) + if (err) break; } } + + /* + * Two success conditions can be represented by non-negative err here: + * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons + * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately + * <0 : any actual error + */ + if (!err) + ops = dev->iommu_fwspec->ops; /* * If we have reason to believe the IOMMU driver missed the initial * add_device callback for dev, replay it to get things in order. */ - if (!IS_ERR_OR_NULL(ops) && ops->add_device && - dev->bus && !dev->iommu_group) { + if (ops && ops->add_device && dev->bus && !dev->iommu_group) err = ops->add_device(dev); - if (err) - ops = ERR_PTR(err); - } /* Ignore all other errors apart from EPROBE_DEFER */ - if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) { - dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops)); + if (err == -EPROBE_DEFER) { + ops = ERR_PTR(err); + } else if (err < 0) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); ops = NULL; } -- cgit v1.2.3 From 42f87e71c3df12d8f29ec1bb7b47772ffaeaf1ee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 14:44:28 +0200 Subject: iommu/iova: Add flush-queue data structures This patch adds the basic data-structures to implement flush-queues in the generic IOVA code. It also adds the initialization and destroy routines for these data structures. The initialization routine is designed so that the use of this feature is optional for the users of IOVA code. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/iova.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 246f14c83944..b9f6ce02a1e1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -50,10 +50,48 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit + 1; + iovad->flush_cb = NULL; + iovad->fq = NULL; init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); +static void free_iova_flush_queue(struct iova_domain *iovad) +{ + if (!iovad->fq) + return; + + free_percpu(iovad->fq); + + iovad->fq = NULL; + iovad->flush_cb = NULL; + iovad->entry_dtor = NULL; +} + +int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) +{ + int cpu; + + iovad->fq = alloc_percpu(struct iova_fq); + if (!iovad->fq) + return -ENOMEM; + + iovad->flush_cb = flush_cb; + iovad->entry_dtor = entry_dtor; + + for_each_possible_cpu(cpu) { + struct iova_fq *fq; + + fq = per_cpu_ptr(iovad->fq, cpu); + fq->head = 0; + fq->tail = 0; + } + + return 0; +} +EXPORT_SYMBOL_GPL(init_iova_flush_queue); + static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { @@ -433,6 +471,7 @@ void put_iova_domain(struct iova_domain *iovad) struct rb_node *node; unsigned long flags; + free_iova_flush_queue(iovad); free_iova_rcaches(iovad); spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); node = rb_first(&iovad->rbroot); diff --git a/include/linux/iova.h b/include/linux/iova.h index e0a892ae45c0..8aa10896150e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -36,6 +36,30 @@ struct iova_rcache { struct iova_cpu_rcache __percpu *cpu_rcaches; }; +struct iova_domain; + +/* Call-Back from IOVA code into IOMMU drivers */ +typedef void (* iova_flush_cb)(struct iova_domain *domain); + +/* Destructor for per-entry data */ +typedef void (* iova_entry_dtor)(unsigned long data); + +/* Number of entries per Flush Queue */ +#define IOVA_FQ_SIZE 256 + +/* Flush Queue entry for defered flushing */ +struct iova_fq_entry { + unsigned long iova_pfn; + unsigned long pages; + unsigned long data; +}; + +/* Per-CPU Flush Queue structure */ +struct iova_fq { + struct iova_fq_entry entries[IOVA_FQ_SIZE]; + unsigned head, tail; +}; + /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ @@ -45,6 +69,14 @@ struct iova_domain { unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + + iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU + TLBs */ + + iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for + iova entry */ + + struct iova_fq __percpu *fq; /* Flush Queue */ }; static inline unsigned long iova_size(struct iova *iova) @@ -102,6 +134,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn, unsigned long pfn_32bit); +int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, @@ -174,6 +208,13 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +static inline int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, + iova_entry_dtor entry_dtor) +{ + return -ENODEV; +} + static inline struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { -- cgit v1.2.3 From 1928210107edd4fa786199fef6b875d3af3bef88 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 15:49:44 +0200 Subject: iommu/iova: Implement Flush-Queue ring buffer Add a function to add entries to the Flush-Queue ring buffer. If the buffer is full, call the flush-callback and free the entries. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iova.h | 9 ++++++ 2 files changed, 89 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b9f6ce02a1e1..e5c9a7ae6088 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -32,6 +32,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long limit_pfn); static void init_iova_rcaches(struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); +static void fq_destroy_all_entries(struct iova_domain *iovad); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, @@ -61,6 +62,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad) if (!iovad->fq) return; + fq_destroy_all_entries(iovad); free_percpu(iovad->fq); iovad->fq = NULL; @@ -461,6 +463,84 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) } EXPORT_SYMBOL_GPL(free_iova_fast); +#define fq_ring_for_each(i, fq) \ + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + +static inline bool fq_full(struct iova_fq *fq) +{ + return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); +} + +static inline unsigned fq_ring_add(struct iova_fq *fq) +{ + unsigned idx = fq->tail; + + fq->tail = (idx + 1) % IOVA_FQ_SIZE; + + return idx; +} + +static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) +{ + unsigned idx; + + fq_ring_for_each(idx, fq) { + + if (iovad->entry_dtor) + iovad->entry_dtor(fq->entries[idx].data); + + free_iova_fast(iovad, + fq->entries[idx].iova_pfn, + fq->entries[idx].pages); + } + + fq->head = 0; + fq->tail = 0; +} + +static void fq_destroy_all_entries(struct iova_domain *iovad) +{ + int cpu; + + /* + * This code runs when the iova_domain is being detroyed, so don't + * bother to free iovas, just call the entry_dtor on all remaining + * entries. + */ + if (!iovad->entry_dtor) + return; + + for_each_possible_cpu(cpu) { + struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); + int idx; + + fq_ring_for_each(idx, fq) + iovad->entry_dtor(fq->entries[idx].data); + } +} + +void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data) +{ + struct iova_fq *fq = get_cpu_ptr(iovad->fq); + unsigned idx; + + if (fq_full(fq)) { + iovad->flush_cb(iovad); + fq_ring_free(iovad, fq); + } + + idx = fq_ring_add(fq); + + fq->entries[idx].iova_pfn = pfn; + fq->entries[idx].pages = pages; + fq->entries[idx].data = data; + + put_cpu_ptr(iovad->fq); +} +EXPORT_SYMBOL_GPL(queue_iova); + /** * put_iova_domain - destroys the iova doamin * @iovad: - iova domain in question. diff --git a/include/linux/iova.h b/include/linux/iova.h index 8aa10896150e..1ae85248ec50 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -127,6 +127,9 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, bool size_aligned); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); +void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, @@ -182,6 +185,12 @@ static inline void free_iova_fast(struct iova_domain *iovad, { } +static inline void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data) +{ +} + static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn) -- cgit v1.2.3 From fb418dab8a4f01dde0c025d15145c589ec02796b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:14:59 +0200 Subject: iommu/iova: Add flush counters to Flush-Queue implementation There are two counters: * fq_flush_start_cnt - Increased when a TLB flush is started. * fq_flush_finish_cnt - Increased when a TLB flush is finished. The fq_flush_start_cnt is assigned to every Flush-Queue entry on its creation. When freeing entries from the Flush-Queue, the value in the entry is compared to the fq_flush_finish_cnt. The entry can only be freed when its value is less than the value of fq_flush_finish_cnt. The reason for these counters it to take advantage of IOMMU TLB flushes that happened on other CPUs. These already flushed the TLB for Flush-Queue entries on other CPUs so that they can already be freed without flushing the TLB again. This makes it less likely that the Flush-Queue is full and saves IOMMU TLB flushes. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 27 ++++++++++++++++++++++++--- include/linux/iova.h | 8 ++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index e5c9a7ae6088..47b144e417ad 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -75,6 +75,9 @@ int init_iova_flush_queue(struct iova_domain *iovad, { int cpu; + atomic64_set(&iovad->fq_flush_start_cnt, 0); + atomic64_set(&iovad->fq_flush_finish_cnt, 0); + iovad->fq = alloc_percpu(struct iova_fq); if (!iovad->fq) return -ENOMEM; @@ -482,20 +485,30 @@ static inline unsigned fq_ring_add(struct iova_fq *fq) static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) { + u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); unsigned idx; fq_ring_for_each(idx, fq) { + if (fq->entries[idx].counter >= counter) + break; + if (iovad->entry_dtor) iovad->entry_dtor(fq->entries[idx].data); free_iova_fast(iovad, fq->entries[idx].iova_pfn, fq->entries[idx].pages); + + fq->head = (fq->head + 1) % IOVA_FQ_SIZE; } +} - fq->head = 0; - fq->tail = 0; +static void iova_domain_flush(struct iova_domain *iovad) +{ + atomic64_inc(&iovad->fq_flush_start_cnt); + iovad->flush_cb(iovad); + atomic64_inc(&iovad->fq_flush_finish_cnt); } static void fq_destroy_all_entries(struct iova_domain *iovad) @@ -526,8 +539,15 @@ void queue_iova(struct iova_domain *iovad, struct iova_fq *fq = get_cpu_ptr(iovad->fq); unsigned idx; + /* + * First remove all entries from the flush queue that have already been + * flushed out on another CPU. This makes the fq_full() check below less + * likely to be true. + */ + fq_ring_free(iovad, fq); + if (fq_full(fq)) { - iovad->flush_cb(iovad); + iova_domain_flush(iovad); fq_ring_free(iovad, fq); } @@ -536,6 +556,7 @@ void queue_iova(struct iova_domain *iovad, fq->entries[idx].iova_pfn = pfn; fq->entries[idx].pages = pages; fq->entries[idx].data = data; + fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); put_cpu_ptr(iovad->fq); } diff --git a/include/linux/iova.h b/include/linux/iova.h index 1ae85248ec50..985b8008999e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* iova structure */ @@ -52,6 +53,7 @@ struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; unsigned long data; + u64 counter; /* Flush counter when this entrie was added */ }; /* Per-CPU Flush Queue structure */ @@ -77,6 +79,12 @@ struct iova_domain { iova entry */ struct iova_fq __percpu *fq; /* Flush Queue */ + + atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that + have been started */ + + atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that + have been finished */ }; static inline unsigned long iova_size(struct iova *iova) -- cgit v1.2.3 From 8109c2a2f8463852dddd6a1c3fcf262047c0c124 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:31:17 +0200 Subject: iommu/iova: Add locking to Flush-Queues The lock is taken from the same CPU most of the time. But having it allows to flush the queue also from another CPU if necessary. This will be used by a timer to regularily flush any pending IOVAs from the Flush-Queues. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 11 +++++++++++ include/linux/iova.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 47b144e417ad..749d39533e0b 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -91,6 +91,8 @@ int init_iova_flush_queue(struct iova_domain *iovad, fq = per_cpu_ptr(iovad->fq, cpu); fq->head = 0; fq->tail = 0; + + spin_lock_init(&fq->lock); } return 0; @@ -471,6 +473,7 @@ EXPORT_SYMBOL_GPL(free_iova_fast); static inline bool fq_full(struct iova_fq *fq) { + assert_spin_locked(&fq->lock); return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); } @@ -478,6 +481,8 @@ static inline unsigned fq_ring_add(struct iova_fq *fq) { unsigned idx = fq->tail; + assert_spin_locked(&fq->lock); + fq->tail = (idx + 1) % IOVA_FQ_SIZE; return idx; @@ -488,6 +493,8 @@ static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); unsigned idx; + assert_spin_locked(&fq->lock); + fq_ring_for_each(idx, fq) { if (fq->entries[idx].counter >= counter) @@ -537,8 +544,11 @@ void queue_iova(struct iova_domain *iovad, unsigned long data) { struct iova_fq *fq = get_cpu_ptr(iovad->fq); + unsigned long flags; unsigned idx; + spin_lock_irqsave(&fq->lock, flags); + /* * First remove all entries from the flush queue that have already been * flushed out on another CPU. This makes the fq_full() check below less @@ -558,6 +568,7 @@ void queue_iova(struct iova_domain *iovad, fq->entries[idx].data = data; fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); + spin_unlock_irqrestore(&fq->lock, flags); put_cpu_ptr(iovad->fq); } EXPORT_SYMBOL_GPL(queue_iova); diff --git a/include/linux/iova.h b/include/linux/iova.h index 985b8008999e..913a690cd4b0 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -60,6 +60,7 @@ struct iova_fq_entry { struct iova_fq { struct iova_fq_entry entries[IOVA_FQ_SIZE]; unsigned head, tail; + spinlock_t lock; }; /* holds all the iova translations for a domain */ -- cgit v1.2.3 From 9a005a800ae817c2c90ef117d7cd77614d866777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:58:18 +0200 Subject: iommu/iova: Add flush timer Add a timer to flush entries from the Flush-Queues every 10ms. This makes sure that no stale TLB entries remain for too long after an IOVA has been unmapped. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 32 ++++++++++++++++++++++++++++++++ include/linux/iova.h | 8 ++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 749d39533e0b..33edfa794ae9 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -33,6 +33,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, static void init_iova_rcaches(struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); static void fq_destroy_all_entries(struct iova_domain *iovad); +static void fq_flush_timeout(unsigned long data); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, @@ -62,7 +63,11 @@ static void free_iova_flush_queue(struct iova_domain *iovad) if (!iovad->fq) return; + if (timer_pending(&iovad->fq_timer)) + del_timer(&iovad->fq_timer); + fq_destroy_all_entries(iovad); + free_percpu(iovad->fq); iovad->fq = NULL; @@ -95,6 +100,9 @@ int init_iova_flush_queue(struct iova_domain *iovad, spin_lock_init(&fq->lock); } + setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad); + atomic_set(&iovad->fq_timer_on, 0); + return 0; } EXPORT_SYMBOL_GPL(init_iova_flush_queue); @@ -539,6 +547,25 @@ static void fq_destroy_all_entries(struct iova_domain *iovad) } } +static void fq_flush_timeout(unsigned long data) +{ + struct iova_domain *iovad = (struct iova_domain *)data; + int cpu; + + atomic_set(&iovad->fq_timer_on, 0); + iova_domain_flush(iovad); + + for_each_possible_cpu(cpu) { + unsigned long flags; + struct iova_fq *fq; + + fq = per_cpu_ptr(iovad->fq, cpu); + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free(iovad, fq); + spin_unlock_irqrestore(&fq->lock, flags); + } +} + void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data) @@ -569,6 +596,11 @@ void queue_iova(struct iova_domain *iovad, fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); spin_unlock_irqrestore(&fq->lock, flags); + + if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) + mod_timer(&iovad->fq_timer, + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); + put_cpu_ptr(iovad->fq); } EXPORT_SYMBOL_GPL(queue_iova); diff --git a/include/linux/iova.h b/include/linux/iova.h index 913a690cd4b0..d179b9bf7814 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -48,6 +48,9 @@ typedef void (* iova_entry_dtor)(unsigned long data); /* Number of entries per Flush Queue */ #define IOVA_FQ_SIZE 256 +/* Timeout (in ms) after which entries are flushed from the Flush-Queue */ +#define IOVA_FQ_TIMEOUT 10 + /* Flush Queue entry for defered flushing */ struct iova_fq_entry { unsigned long iova_pfn; @@ -86,6 +89,11 @@ struct iova_domain { atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that have been finished */ + + struct timer_list fq_timer; /* Timer to regularily empty the + flush-queues */ + atomic_t fq_timer_on; /* 1 when timer is active, 0 + when not */ }; static inline unsigned long iova_size(struct iova *iova) -- cgit v1.2.3 From 9003d6186321e22b19125721b6fb2aa390ff8be6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 17:19:13 +0200 Subject: iommu/amd: Make use of iova queue flushing Rip out the implementation in the AMD IOMMU driver and use the one in the common iova code instead. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 229 ++-------------------------------------------- 1 file changed, 9 insertions(+), 220 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 688e77576e5a..cabcaa506ed6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -137,20 +137,7 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); - -#define FLUSH_QUEUE_SIZE 256 - -struct flush_queue_entry { - unsigned long iova_pfn; - unsigned long pages; - u64 counter; /* Flush counter when this entry was added to the queue */ -}; - -struct flush_queue { - struct flush_queue_entry *entries; - unsigned head, tail; - spinlock_t lock; -}; +static void iova_domain_flush_tlb(struct iova_domain *iovad); /* * Data container for a dma_ops specific protection domain @@ -161,36 +148,6 @@ struct dma_ops_domain { /* IOVA RB-Tree */ struct iova_domain iovad; - - struct flush_queue __percpu *flush_queue; - - /* - * We need two counter here to be race-free wrt. IOTLB flushing and - * adding entries to the flush queue. - * - * The flush_start_cnt is incremented _before_ the IOTLB flush starts. - * New entries added to the flush ring-buffer get their 'counter' value - * from here. This way we can make sure that entries added to the queue - * (or other per-cpu queues of the same domain) while the TLB is about - * to be flushed are not considered to be flushed already. - */ - atomic64_t flush_start_cnt; - - /* - * The flush_finish_cnt is incremented when an IOTLB flush is complete. - * This value is always smaller than flush_start_cnt. The queue_add - * function frees all IOVAs that have a counter value smaller than - * flush_finish_cnt. This makes sure that we only free IOVAs that are - * flushed out of the IOTLB of the domain. - */ - atomic64_t flush_finish_cnt; - - /* - * Timer to make sure we don't keep IOVAs around unflushed - * for too long - */ - struct timer_list flush_timer; - atomic_t flush_timer_on; }; static struct iova_domain reserved_iova_ranges; @@ -1788,178 +1745,19 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - kfree(queue->entries); - } - - free_percpu(dom->flush_queue); - - dom->flush_queue = NULL; -} - -static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) -{ - int cpu; - - atomic64_set(&dom->flush_start_cnt, 0); - atomic64_set(&dom->flush_finish_cnt, 0); - - dom->flush_queue = alloc_percpu(struct flush_queue); - if (!dom->flush_queue) - return -ENOMEM; - - /* First make sure everything is cleared */ - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - queue->head = 0; - queue->tail = 0; - queue->entries = NULL; - } - - /* Now start doing the allocation */ - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries), - GFP_KERNEL); - if (!queue->entries) { - dma_ops_domain_free_flush_queue(dom); - return -ENOMEM; - } - - spin_lock_init(&queue->lock); - } - - return 0; -} - static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) { - atomic64_inc(&dom->flush_start_cnt); domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); - atomic64_inc(&dom->flush_finish_cnt); } -static inline bool queue_ring_full(struct flush_queue *queue) +static void iova_domain_flush_tlb(struct iova_domain *iovad) { - assert_spin_locked(&queue->lock); - - return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); -} + struct dma_ops_domain *dom; -#define queue_ring_for_each(i, q) \ - for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) - -static inline unsigned queue_ring_add(struct flush_queue *queue) -{ - unsigned idx = queue->tail; - - assert_spin_locked(&queue->lock); - queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; - - return idx; -} - -static inline void queue_ring_remove_head(struct flush_queue *queue) -{ - assert_spin_locked(&queue->lock); - queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; -} - -static void queue_ring_free_flushed(struct dma_ops_domain *dom, - struct flush_queue *queue) -{ - u64 counter = atomic64_read(&dom->flush_finish_cnt); - int idx; - - queue_ring_for_each(idx, queue) { - /* - * This assumes that counter values in the ring-buffer are - * monotonously rising. - */ - if (queue->entries[idx].counter >= counter) - break; - - free_iova_fast(&dom->iovad, - queue->entries[idx].iova_pfn, - queue->entries[idx].pages); - - queue_ring_remove_head(queue); - } -} - -static void queue_add(struct dma_ops_domain *dom, - unsigned long address, unsigned long pages) -{ - struct flush_queue *queue; - unsigned long flags; - int idx; - - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - queue = get_cpu_ptr(dom->flush_queue); - spin_lock_irqsave(&queue->lock, flags); - - /* - * First remove the enries from the ring-buffer that are already - * flushed to make the below queue_ring_full() check less likely - */ - queue_ring_free_flushed(dom, queue); - - /* - * When ring-queue is full, flush the entries from the IOTLB so - * that we can free all entries with queue_ring_free_flushed() - * below. - */ - if (queue_ring_full(queue)) { - dma_ops_domain_flush_tlb(dom); - queue_ring_free_flushed(dom, queue); - } - - idx = queue_ring_add(queue); - - queue->entries[idx].iova_pfn = address; - queue->entries[idx].pages = pages; - queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); - - spin_unlock_irqrestore(&queue->lock, flags); - - if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0) - mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10)); - - put_cpu_ptr(dom->flush_queue); -} - -static void queue_flush_timeout(unsigned long data) -{ - struct dma_ops_domain *dom = (struct dma_ops_domain *)data; - int cpu; - - atomic_set(&dom->flush_timer_on, 0); + dom = container_of(iovad, struct dma_ops_domain, iovad); dma_ops_domain_flush_tlb(dom); - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - unsigned long flags; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - spin_lock_irqsave(&queue->lock, flags); - queue_ring_free_flushed(dom, queue); - spin_unlock_irqrestore(&queue->lock, flags); - } } /* @@ -1973,11 +1771,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) del_domain_from_list(&dom->domain); - if (timer_pending(&dom->flush_timer)) - del_timer(&dom->flush_timer); - - dma_ops_domain_free_flush_queue(dom); - put_iova_domain(&dom->iovad); free_pagetable(&dom->domain); @@ -2013,16 +1806,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); - /* Initialize reserved ranges */ - copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); - - if (dma_ops_domain_alloc_flush_queue(dma_dom)) + if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; - setup_timer(&dma_dom->flush_timer, queue_flush_timeout, - (unsigned long)dma_dom); - - atomic_set(&dma_dom->flush_timer_on, 0); + /* Initialize reserved ranges */ + copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); add_domain_to_list(&dma_dom->domain); @@ -2619,7 +2407,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); } else { - queue_add(dma_dom, dma_addr, pages); + pages = __roundup_pow_of_two(pages); + queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); } } -- cgit v1.2.3 From c8acb28b331364b32a5c81dbfbdfc8475b2f1f27 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Aug 2017 11:42:46 +0200 Subject: iommu/vt-d: Allow to flush more than 4GB of device TLBs The shift qi_flush_dev_iotlb() is done on an int, which limits the mask to 32 bits. Make the mask 64 bits wide so that more than 4GB of address range can be flushed at once. Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c8b0329c85d2..ca5ebaeafd6a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1343,7 +1343,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, if (mask) { BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); - addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else desc.high = QI_DEV_IOTLB_ADDR(addr); -- cgit v1.2.3 From 13cf01744608e1dc3f13dd316c95cb7a1fdaf740 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Aug 2017 11:40:10 +0200 Subject: iommu/vt-d: Make use of iova deferred flushing Remove the deferred flushing implementation in the Intel VT-d driver and use the one from the common iova code instead. Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 197 +++++++++----------------------------------- 1 file changed, 38 insertions(+), 159 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 687f18f65cea..d5e8b8628a1a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -static void flush_unmaps_timeout(unsigned long data); - -struct deferred_flush_entry { - unsigned long iova_pfn; - unsigned long nrpages; - struct dmar_domain *domain; - struct page *freelist; -}; - -#define HIGH_WATER_MARK 250 -struct deferred_flush_table { - int next; - struct deferred_flush_entry entries[HIGH_WATER_MARK]; -}; - -struct deferred_flush_data { - spinlock_t lock; - int timer_on; - struct timer_list timer; - long size; - struct deferred_flush_table *tables; -}; - -static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); - /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; @@ -1309,6 +1284,13 @@ static void dma_free_pagelist(struct page *freelist) } } +static void iova_entry_free(unsigned long data) +{ + struct page *freelist = (struct page *)data; + + dma_free_pagelist(freelist); +} + /* iommu handling */ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { @@ -1622,6 +1604,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, addr, mask); } +static void iommu_flush_iova(struct iova_domain *iovad) +{ + struct dmar_domain *domain; + int idx; + + domain = container_of(iovad, struct dmar_domain, iovad); + + for_each_domain_iommu(idx, domain) { + struct intel_iommu *iommu = g_iommus[idx]; + u16 did = domain->iommu_did[iommu->seq_id]; + + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + + if (!cap_caching_mode(iommu->cap)) + iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), + 0, MAX_AGAW_PFN_WIDTH); + } +} + static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) { u32 pmen; @@ -1932,9 +1933,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, { int adjust_width, agaw; unsigned long sagaw; + int err; init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); + + err = init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, iova_entry_free); + if (err) + return err; + domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -1986,14 +1994,6 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; - /* Flush any lazy unmaps that may reference this domain */ - if (!intel_iommu_strict) { - int cpu; - - for_each_possible_cpu(cpu) - flush_unmaps_timeout(cpu); - } - /* Remove associated devices and clear attached or cached domains */ rcu_read_lock(); domain_remove_dev_info(domain); @@ -3206,7 +3206,7 @@ static int __init init_dmars(void) bool copied_tables = false; struct device *dev; struct intel_iommu *iommu; - int i, ret, cpu; + int i, ret; /* * for each drhd @@ -3239,22 +3239,6 @@ static int __init init_dmars(void) goto error; } - for_each_possible_cpu(cpu) { - struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush, - cpu); - - dfd->tables = kzalloc(g_num_of_iommus * - sizeof(struct deferred_flush_table), - GFP_KERNEL); - if (!dfd->tables) { - ret = -ENOMEM; - goto free_g_iommus; - } - - spin_lock_init(&dfd->lock); - setup_timer(&dfd->timer, flush_unmaps_timeout, cpu); - } - for_each_active_iommu(iommu, drhd) { g_iommus[iommu->seq_id] = iommu; @@ -3437,10 +3421,9 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } -free_g_iommus: - for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(&deferred_flush, cpu)->tables); + kfree(g_iommus); + error: return ret; } @@ -3645,110 +3628,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, dir, *dev->dma_mask); } -static void flush_unmaps(struct deferred_flush_data *flush_data) -{ - int i, j; - - flush_data->timer_on = 0; - - /* just flush them all */ - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct deferred_flush_table *flush_table = - &flush_data->tables[i]; - if (!iommu) - continue; - - if (!flush_table->next) - continue; - - /* In caching mode, global flushes turn emulation expensive */ - if (!cap_caching_mode(iommu->cap)) - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH); - for (j = 0; j < flush_table->next; j++) { - unsigned long mask; - struct deferred_flush_entry *entry = - &flush_table->entries[j]; - unsigned long iova_pfn = entry->iova_pfn; - unsigned long nrpages = entry->nrpages; - struct dmar_domain *domain = entry->domain; - struct page *freelist = entry->freelist; - - /* On real hardware multiple invalidations are expensive */ - if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain, - mm_to_dma_pfn(iova_pfn), - nrpages, !freelist, 0); - else { - mask = ilog2(nrpages); - iommu_flush_dev_iotlb(domain, - (uint64_t)iova_pfn << PAGE_SHIFT, mask); - } - free_iova_fast(&domain->iovad, iova_pfn, nrpages); - if (freelist) - dma_free_pagelist(freelist); - } - flush_table->next = 0; - } - - flush_data->size = 0; -} - -static void flush_unmaps_timeout(unsigned long cpuid) -{ - struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid); - unsigned long flags; - - spin_lock_irqsave(&flush_data->lock, flags); - flush_unmaps(flush_data); - spin_unlock_irqrestore(&flush_data->lock, flags); -} - -static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, - unsigned long nrpages, struct page *freelist) -{ - unsigned long flags; - int entry_id, iommu_id; - struct intel_iommu *iommu; - struct deferred_flush_entry *entry; - struct deferred_flush_data *flush_data; - - flush_data = raw_cpu_ptr(&deferred_flush); - - /* Flush all CPUs' entries to avoid deferring too much. If - * this becomes a bottleneck, can just flush us, and rely on - * flush timer for the rest. - */ - if (flush_data->size == HIGH_WATER_MARK) { - int cpu; - - for_each_online_cpu(cpu) - flush_unmaps_timeout(cpu); - } - - spin_lock_irqsave(&flush_data->lock, flags); - - iommu = domain_get_iommu(dom); - iommu_id = iommu->seq_id; - - entry_id = flush_data->tables[iommu_id].next; - ++(flush_data->tables[iommu_id].next); - - entry = &flush_data->tables[iommu_id].entries[entry_id]; - entry->domain = dom; - entry->iova_pfn = iova_pfn; - entry->nrpages = nrpages; - entry->freelist = freelist; - - if (!flush_data->timer_on) { - mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10)); - flush_data->timer_on = 1; - } - flush_data->size++; - spin_unlock_irqrestore(&flush_data->lock, flags); -} - static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) { struct dmar_domain *domain; @@ -3784,7 +3663,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); dma_free_pagelist(freelist); } else { - add_unmap(domain, iova_pfn, nrpages, freelist); + queue_iova(&domain->iovad, iova_pfn, nrpages, + (unsigned long)freelist); /* * queue up the release of the unmap to save the 1/6th of the * cpu used up by the iotlb flush operation... @@ -4721,7 +4601,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) static int intel_iommu_cpu_dead(unsigned int cpu) { free_all_cpu_cached_iovas(cpu); - flush_unmaps_timeout(cpu); return 0; } -- cgit v1.2.3