diff options
Diffstat (limited to 'drivers/iommu/arm-smmu.c')
-rw-r--r-- | drivers/iommu/arm-smmu.c | 213 |
1 files changed, 123 insertions, 90 deletions
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7c503a6bc585..a180665ea002 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/ratelimit.h> #include <linux/slab.h> #include <linux/amba/bus.h> @@ -122,7 +123,7 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) { if (pm_runtime_enabled(smmu->dev)) - pm_runtime_put(smmu->dev); + pm_runtime_put_autosuspend(smmu->dev); } static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -244,6 +245,9 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, unsigned int spin_cnt, delay; u32 reg; + if (smmu->impl && unlikely(smmu->impl->tlb_sync)) + return smmu->impl->tlb_sync(smmu, page, sync, status); + arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL); for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { @@ -268,9 +272,8 @@ static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) spin_unlock_irqrestore(&smmu->global_sync_lock, flags); } -static void arm_smmu_tlb_sync_context(void *cookie) +static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long flags; @@ -280,13 +283,6 @@ static void arm_smmu_tlb_sync_context(void *cookie) spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); } -static void arm_smmu_tlb_sync_vmid(void *cookie) -{ - struct arm_smmu_domain *smmu_domain = cookie; - - arm_smmu_tlb_sync_global(smmu_domain->smmu); -} - static void arm_smmu_tlb_inv_context_s1(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; @@ -297,7 +293,7 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie) wmb(); arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx, ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid); - arm_smmu_tlb_sync_context(cookie); + arm_smmu_tlb_sync_context(smmu_domain); } static void arm_smmu_tlb_inv_context_s2(void *cookie) @@ -312,18 +308,16 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) } static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) + size_t granule, void *cookie, int reg) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - int reg, idx = cfg->cbndx; + int idx = cfg->cbndx; if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) wmb(); - reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; - if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { iova = (iova >> 12) << 12; iova |= cfg->asid; @@ -342,16 +336,15 @@ static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size, } static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) + size_t granule, void *cookie, int reg) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; - int reg, idx = smmu_domain->cfg.cbndx; + int idx = smmu_domain->cfg.cbndx; if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) wmb(); - reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; do { if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64) @@ -362,85 +355,98 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size, } while (size -= granule); } -/* - * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears - * almost negligible, but the benefit of getting the first one in as far ahead - * of the sync as possible is significant, hence we don't just make this a - * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think. - */ -static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size, + size_t granule, void *cookie) { - struct arm_smmu_domain *smmu_domain = cookie; - struct arm_smmu_device *smmu = smmu_domain->smmu; - - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - wmb(); + arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie, + ARM_SMMU_CB_S1_TLBIVA); + arm_smmu_tlb_sync_context(cookie); +} - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid); +static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie, + ARM_SMMU_CB_S1_TLBIVAL); + arm_smmu_tlb_sync_context(cookie); } -static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, - size_t granule, void *cookie) +static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) { - struct arm_smmu_domain *smmu_domain = cookie; - const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie, + ARM_SMMU_CB_S1_TLBIVAL); +} - ops->tlb_inv_range(iova, size, granule, false, cookie); - ops->tlb_sync(cookie); +static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie, + ARM_SMMU_CB_S2_TLBIIPAS2); + arm_smmu_tlb_sync_context(cookie); } -static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, - size_t granule, void *cookie) +static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size, + size_t granule, void *cookie) { - struct arm_smmu_domain *smmu_domain = cookie; - const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie, + ARM_SMMU_CB_S2_TLBIIPAS2L); + arm_smmu_tlb_sync_context(cookie); +} - ops->tlb_inv_range(iova, size, granule, true, cookie); - ops->tlb_sync(cookie); +static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ + arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie, + ARM_SMMU_CB_S2_TLBIIPAS2L); } -static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) +static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + arm_smmu_tlb_inv_context_s2(cookie); +} +/* + * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears + * almost negligible, but the benefit of getting the first one in as far ahead + * of the sync as possible is significant, hence we don't just make this a + * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might + * think. + */ +static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + struct arm_smmu_device *smmu = smmu_domain->smmu; - ops->tlb_inv_range(iova, granule, granule, true, cookie); + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid); } -static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = { - .tlb = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s1, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_page = arm_smmu_tlb_add_page, - }, - .tlb_inv_range = arm_smmu_tlb_inv_range_s1, - .tlb_sync = arm_smmu_tlb_sync_context, +static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s1, + .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1, + .tlb_add_page = arm_smmu_tlb_add_page_s1, }; -static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = { - .tlb = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_page = arm_smmu_tlb_add_page, - }, - .tlb_inv_range = arm_smmu_tlb_inv_range_s2, - .tlb_sync = arm_smmu_tlb_sync_context, +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2, + .tlb_add_page = arm_smmu_tlb_add_page_s2, }; -static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = { - .tlb = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_page = arm_smmu_tlb_add_page, - }, - .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync, - .tlb_sync = arm_smmu_tlb_sync_vmid, +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1, + .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1, + .tlb_add_page = arm_smmu_tlb_add_page_s2_v1, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -472,6 +478,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) { u32 gfsr, gfsynr0, gfsynr1, gfsynr2; struct arm_smmu_device *smmu = dev; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR); gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0); @@ -481,11 +489,19 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) if (!gfsr) return IRQ_NONE; - dev_err_ratelimited(smmu->dev, - "Unexpected global fault, this could be serious\n"); - dev_err_ratelimited(smmu->dev, - "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", - gfsr, gfsynr0, gfsynr1, gfsynr2); + if (__ratelimit(&rs)) { + if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && + (gfsr & sGFSR_USF)) + dev_err(smmu->dev, + "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n", + (u16)gfsynr1); + else + dev_err(smmu->dev, + "Unexpected global fault, this could be serious\n"); + dev_err(smmu->dev, + "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", + gfsr, gfsynr0, gfsynr1, gfsynr2); + } arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr); return IRQ_HANDLED; @@ -770,7 +786,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .ias = ias, .oas = oas, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, - .tlb = &smmu_domain->flush_ops->tlb, + .tlb = smmu_domain->flush_ops, .iommu_dev = smmu->dev, }; @@ -1039,8 +1055,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev) } group = iommu_group_get_for_dev(dev); - if (!group) - group = ERR_PTR(-ENOMEM); if (IS_ERR(group)) { ret = PTR_ERR(group); goto out_err; @@ -1154,6 +1168,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* Looks ok, so add the device to the domain */ ret = arm_smmu_domain_add_master(smmu_domain, fwspec); + /* + * Setup an autosuspend delay to avoid bouncing runpm state. + * Otherwise, if a driver for a suspended consumer device + * unmaps buffers, it will runpm resume/suspend for each one. + * + * For example, when used by a GPU device, when an application + * or game exits, it can trigger unmapping 100s or 1000s of + * buffers. With a runpm cycle for each buffer, that adds up + * to 5-10sec worth of reprogramming the context bank, while + * the system appears to be locked up to the user. + */ + pm_runtime_set_autosuspend_delay(smmu->dev, 20); + pm_runtime_use_autosuspend(smmu->dev); + rpm_put: arm_smmu_rpm_put(smmu); return ret; @@ -1200,7 +1228,7 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) if (smmu_domain->flush_ops) { arm_smmu_rpm_get(smmu); - smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain); + smmu_domain->flush_ops->tlb_flush_all(smmu_domain); arm_smmu_rpm_put(smmu); } } @@ -1211,11 +1239,16 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->flush_ops) { - arm_smmu_rpm_get(smmu); - smmu_domain->flush_ops->tlb_sync(smmu_domain); - arm_smmu_rpm_put(smmu); - } + if (!smmu) + return; + + arm_smmu_rpm_get(smmu); + if (smmu->version == ARM_SMMU_V2 || + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + arm_smmu_tlb_sync_context(smmu_domain); + else + arm_smmu_tlb_sync_global(smmu); + arm_smmu_rpm_put(smmu); } static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, |