diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 6 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 6 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 229 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 64 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 15 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 31 | ||||
-rw-r--r-- | drivers/iommu/intel-svm.c | 30 | ||||
-rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 4 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm-v7s.c | 183 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 189 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable.h | 6 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 17 | ||||
-rw-r--r-- | drivers/iommu/iova.c | 30 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 353 | ||||
-rw-r--r-- | drivers/iommu/mtk_iommu_v1.c | 1 | ||||
-rw-r--r-- | drivers/iommu/of_iommu.c | 7 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/s390-iommu.c | 15 |
18 files changed, 826 insertions, 362 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6ee3a25ae731..f73ff28f77e2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST) + depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG config ROCKCHIP_IOMMU bool "Rockchip IOMMU Support" - depends on ARM + depends on ARM || ARM64 depends on ARCH_ROCKCHIP || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU @@ -274,7 +274,7 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARM_LPAE + depends on ARM || IOMMU_DMA depends on ARCH_RENESAS || COMPILE_TEST select IOMMU_API select IOMMU_IO_PGTABLE_LPAE diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c618c26a3b33..83a55df17dfe 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4005,11 +4005,9 @@ static void irte_ga_prepare(void *entry, u8 vector, u32 dest_apicid, int devid) { struct irte_ga *irte = (struct irte_ga *) entry; - struct iommu_dev_data *dev_data = search_dev_data(devid); irte->lo.val = 0; irte->hi.val = 0; - irte->lo.fields_remap.guest_mode = dev_data ? dev_data->use_vapic : 0; irte->lo.fields_remap.int_type = delivery_mode; irte->lo.fields_remap.dm = dest_mode; irte->hi.fields.vector = vector; @@ -4065,10 +4063,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, struct irte_ga *irte = (struct irte_ga *) entry; struct iommu_dev_data *dev_data = search_dev_data(devid); - if (!dev_data || !dev_data->use_vapic) { + if (!dev_data || !dev_data->use_vapic || + !irte->lo.fields_remap.guest_mode) { irte->hi.fields.vector = vector; irte->lo.fields_remap.destination = dest_apicid; - irte->lo.fields_remap.guest_mode = 0; modify_irte_ga(devid, index, irte, NULL); } } diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 380969aa60d5..568c400eeaed 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -408,10 +408,20 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 +#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_HISILICON_HI161X +#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 +#endif + +#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX +#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 +#endif + static bool disable_bypass; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -597,6 +607,7 @@ struct arm_smmu_device { u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) +#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) u32 options; struct arm_smmu_cmdq cmdq; @@ -604,6 +615,7 @@ struct arm_smmu_device { struct arm_smmu_priq priq; int gerr_irq; + int combined_irq; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -645,7 +657,6 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; - spinlock_t pgtbl_lock; enum arm_smmu_domain_stage stage; union { @@ -663,9 +674,20 @@ struct arm_smmu_option_prop { static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, + { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, { 0, NULL}, }; +static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, + struct arm_smmu_device *smmu) +{ + if ((offset > SZ_64K) && + (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)) + offset -= SZ_64K; + + return smmu->base + offset; +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -737,7 +759,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q) */ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) { - ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); + ktime_t timeout; + unsigned int delay = 1; + + /* Wait longer if it's queue drain */ + timeout = ktime_add_us(ktime_get(), drain ? + ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : + ARM_SMMU_POLL_TIMEOUT_US); while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { if (ktime_compare(ktime_get(), timeout) > 0) @@ -747,7 +775,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) wfe(); } else { cpu_relax(); - udelay(1); + udelay(delay); + delay *= 2; } } @@ -1302,6 +1331,24 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) return IRQ_HANDLED; } +static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) +{ + struct arm_smmu_device *smmu = dev; + + arm_smmu_evtq_thread(irq, dev); + if (smmu->features & ARM_SMMU_FEAT_PRI) + arm_smmu_priq_thread(irq, dev); + + return IRQ_HANDLED; +} + +static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) +{ + arm_smmu_gerror_handler(irq, dev); + arm_smmu_cmdq_sync_handler(irq, dev); + return IRQ_WAKE_THREAD; +} + /* IO_PGTABLE API */ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { @@ -1406,7 +1453,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); - spin_lock_init(&smmu_domain->pgtbl_lock); return &smmu_domain->domain; } @@ -1555,6 +1601,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .iommu_dev = smmu->dev, }; + if (smmu->features & ARM_SMMU_FEAT_COHERENCY) + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -1675,44 +1724,29 @@ out_unlock: static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - int ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return -ENODEV; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->map(ops, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - size_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->unmap(ops, iova, size); } static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - phys_addr_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (domain->type == IOMMU_DOMAIN_IDENTITY) return iova; @@ -1720,11 +1754,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->iova_to_phys(ops, iova); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - - return ret; + return ops->iova_to_phys(ops, iova); } static struct platform_driver arm_smmu_driver; @@ -1961,8 +1991,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return -ENOMEM; } - q->prod_reg = smmu->base + prod_off; - q->cons_reg = smmu->base + cons_off; + q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); + q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); q->ent_dwords = dwords; q->q_base = Q_BASE_RWA; @@ -2218,18 +2248,9 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) devm_add_action(dev, arm_smmu_free_msis, dev); } -static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) +static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) { - int ret, irq; - u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; - - /* Disable IRQs first */ - ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, - ARM_SMMU_IRQ_CTRLACK); - if (ret) { - dev_err(smmu->dev, "failed to disable irqs\n"); - return ret; - } + int irq, ret; arm_smmu_setup_msis(smmu); @@ -2272,10 +2293,41 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) if (ret < 0) dev_warn(smmu->dev, "failed to enable priq irq\n"); - else - irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; } } +} + +static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) +{ + int ret, irq; + u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; + + /* Disable IRQs first */ + ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, + ARM_SMMU_IRQ_CTRLACK); + if (ret) { + dev_err(smmu->dev, "failed to disable irqs\n"); + return ret; + } + + irq = smmu->combined_irq; + if (irq) { + /* + * Cavium ThunderX2 implementation doesn't not support unique + * irq lines. Use single irq line for all the SMMUv3 interrupts. + */ + ret = devm_request_threaded_irq(smmu->dev, irq, + arm_smmu_combined_irq_handler, + arm_smmu_combined_irq_thread, + IRQF_ONESHOT, + "arm-smmu-v3-combined-irq", smmu); + if (ret < 0) + dev_warn(smmu->dev, "failed to enable combined irq\n"); + } else + arm_smmu_setup_unique_irqs(smmu); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, irqen_flags, @@ -2363,8 +2415,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); - writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD); - writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS); + writel_relaxed(smmu->evtq.q.prod, + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); + writel_relaxed(smmu->evtq.q.cons, + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); enables |= CR0_EVTQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -2379,9 +2433,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writeq_relaxed(smmu->priq.q.q_base, smmu->base + ARM_SMMU_PRIQ_BASE); writel_relaxed(smmu->priq.q.prod, - smmu->base + ARM_SMMU_PRIQ_PROD); + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); writel_relaxed(smmu->priq.q.cons, - smmu->base + ARM_SMMU_PRIQ_CONS); + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); enables |= CR0_PRIQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -2605,6 +2659,20 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI +static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +{ + switch (model) { + case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: + smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; + break; + case ACPI_IORT_SMMU_HISILICON_HI161X: + smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; + break; + } + + dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); +} + static int arm_smmu_device_acpi_probe(struct platform_device *pdev, struct arm_smmu_device *smmu) { @@ -2617,6 +2685,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; + acpi_smmu_get_options(iort_smmu->model, smmu); + if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; @@ -2652,6 +2722,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, return ret; } +static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) +{ + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) + return SZ_64K; + else + return SZ_128K; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -2668,9 +2746,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev) } smmu->dev = dev; + if (dev->of_node) { + ret = arm_smmu_device_dt_probe(pdev, smmu); + } else { + ret = arm_smmu_device_acpi_probe(pdev, smmu); + if (ret == -ENODEV) + return ret; + } + + /* Set bypass mode according to firmware probing result */ + bypass = !!ret; + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (resource_size(res) + 1 < SZ_128K) { + if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } @@ -2681,33 +2770,27 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return PTR_ERR(smmu->base); /* Interrupt lines */ - irq = platform_get_irq_byname(pdev, "eventq"); - if (irq > 0) - smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "priq"); + irq = platform_get_irq_byname(pdev, "combined"); if (irq > 0) - smmu->priq.q.irq = irq; + smmu->combined_irq = irq; + else { + irq = platform_get_irq_byname(pdev, "eventq"); + if (irq > 0) + smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "cmdq-sync"); - if (irq > 0) - smmu->cmdq.q.irq = irq; + irq = platform_get_irq_byname(pdev, "priq"); + if (irq > 0) + smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); - if (irq > 0) - smmu->gerr_irq = irq; + irq = platform_get_irq_byname(pdev, "cmdq-sync"); + if (irq > 0) + smmu->cmdq.q.irq = irq; - if (dev->of_node) { - ret = arm_smmu_device_dt_probe(pdev, smmu); - } else { - ret = arm_smmu_device_acpi_probe(pdev, smmu); - if (ret == -ENODEV) - return ret; + irq = platform_get_irq_byname(pdev, "gerror"); + if (irq > 0) + smmu->gerr_irq = irq; } - - /* Set bypass mode according to firmware probing result */ - bypass = !!ret; - /* Probe the h/w */ ret = arm_smmu_device_hw_probe(smmu); if (ret) @@ -2736,6 +2819,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) iommu_device_set_fwnode(&smmu->iommu, dev->fwnode); ret = iommu_device_register(&smmu->iommu); + if (ret) { + dev_err(dev, "Failed to register iommu\n"); + return ret; + } #ifdef CONFIG_PCI if (pci_bus_type.iommu_ops != &arm_smmu_ops) { @@ -2768,7 +2855,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) return 0; } -static struct of_device_id arm_smmu_of_match[] = { +static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v3", }, { }, }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7ec30b08b3bd..bc89b4d6c043 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -312,6 +312,14 @@ enum arm_smmu_implementation { CAVIUM_SMMUV2, }; +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_CORELINK_MMU401 +#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 +#endif +#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX +#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 +#endif + struct arm_smmu_s2cr { struct iommu_group *group; int count; @@ -425,10 +433,10 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - spinlock_t pgtbl_lock; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ + spinlock_t cb_lock; /* Serialises ATS1* ops */ struct iommu_domain domain; }; @@ -1010,6 +1018,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { @@ -1102,7 +1113,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); - spin_lock_init(&smmu_domain->pgtbl_lock); + spin_lock_init(&smmu_domain->cb_lock); return &smmu_domain->domain; } @@ -1380,35 +1391,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - int ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return -ENODEV; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->map(ops, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - size_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->unmap(ops, iova, size); } static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, @@ -1422,10 +1421,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, void __iomem *cb_base; u32 tmp; u64 phys; - unsigned long va; + unsigned long va, flags; cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); + spin_lock_irqsave(&smmu_domain->cb_lock, flags); /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; if (smmu->version == ARM_SMMU_V2) @@ -1435,6 +1435,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); @@ -1442,6 +1443,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, } phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); if (phys & CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); @@ -1454,10 +1456,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - phys_addr_t ret; - unsigned long flags; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; if (domain->type == IOMMU_DOMAIN_IDENTITY) return iova; @@ -1465,17 +1465,11 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS && - smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - ret = arm_smmu_iova_to_phys_hard(domain, iova); - } else { - ret = ops->iova_to_phys(ops, iova); - } - - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + return arm_smmu_iova_to_phys_hard(domain, iova); - return ret; + return ops->iova_to_phys(ops, iova); } static bool arm_smmu_capable(enum iommu_cap cap) @@ -2073,6 +2067,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) smmu->version = ARM_SMMU_V1; smmu->model = GENERIC_SMMU; break; + case ACPI_IORT_SMMU_CORELINK_MMU401: + smmu->version = ARM_SMMU_V1_64K; + smmu->model = GENERIC_SMMU; + break; case ACPI_IORT_SMMU_V2: smmu->version = ARM_SMMU_V2; smmu->model = GENERIC_SMMU; @@ -2081,6 +2079,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) smmu->version = ARM_SMMU_V2; smmu->model = ARM_MMU500; break; + case ACPI_IORT_SMMU_CAVIUM_THUNDERX: + smmu->version = ARM_SMMU_V2; + smmu->model = CAVIUM_SMMUV2; + break; default: ret = -ENODEV; } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8348f366ddd1..f1db86939031 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -314,7 +314,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, * If we have devices with different DMA masks, move the free * area cache limit down for the benefit of the smaller one. */ - iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn); + iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); return 0; } @@ -396,13 +396,13 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, dma_addr_t iova, size_t size) { struct iova_domain *iovad = &cookie->iovad; - unsigned long shift = iova_shift(iovad); /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; else - free_iova_fast(iovad, iova >> shift, size >> shift); + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); } static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, @@ -617,11 +617,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - size_t iova_off = iova_offset(iovad, phys); + size_t iova_off = 0; dma_addr_t iova; - size = iova_align(iovad, size + iova_off); + if (cookie->type == IOMMU_DMA_IOVA_COOKIE) { + iova_off = iova_offset(&cookie->iovad, phys); + size = iova_align(&cookie->iovad, size + iova_off); + } + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) return DMA_ERROR_CODE; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 90ab0115d78e..c9bd5c8b162e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -481,7 +481,7 @@ struct deferred_flush_data { struct deferred_flush_table *tables; }; -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; @@ -2055,11 +2055,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } } pgd = domain->pgd; @@ -2387,7 +2390,7 @@ static struct dmar_domain *find_domain(struct device *dev) /* No lock here, assumes no domain exit in normal case */ info = dev->archdata.iommu; - if (info) + if (likely(info)) return info->domain; return NULL; } @@ -3475,7 +3478,7 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) +static struct dmar_domain *get_valid_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; @@ -3522,18 +3525,6 @@ out: return domain; } -static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev) -{ - struct device_domain_info *info; - - /* No lock here, assumes no domain exit in normal case */ - info = dev->archdata.iommu; - if (likely(info)) - return info->domain; - - return __get_valid_domain_for_dev(dev); -} - /* Check if the dev needs to go through non-identity map and unmap process.*/ static int iommu_no_mapping(struct device *dev) { @@ -3722,10 +3713,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, struct intel_iommu *iommu; struct deferred_flush_entry *entry; struct deferred_flush_data *flush_data; - unsigned int cpuid; - cpuid = get_cpu(); - flush_data = per_cpu_ptr(&deferred_flush, cpuid); + flush_data = raw_cpu_ptr(&deferred_flush); /* Flush all CPUs' entries to avoid deferring too much. If * this becomes a bottleneck, can just flush us, and rely on @@ -3758,8 +3747,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, } flush_data->size++; spin_unlock_irqrestore(&flush_data->lock, flags); - - put_cpu(); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3970,7 +3957,7 @@ static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) return !dma_addr; } -struct dma_map_ops intel_dma_ops = { +const struct dma_map_ops intel_dma_ops = { .alloc = intel_alloc_coherent, .free = intel_free_coherent, .map_sg = intel_map_sg, diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 23c427602c55..f167c0d84ebf 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) } EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); +int intel_svm_is_pasid_valid(struct device *dev, int pasid) +{ + struct intel_iommu *iommu; + struct intel_svm *svm; + int ret = -EINVAL; + + mutex_lock(&pasid_mutex); + iommu = intel_svm_device_to_iommu(dev); + if (!iommu || !iommu->pasid_table) + goto out; + + svm = idr_find(&iommu->pasid_idr, pasid); + if (!svm) + goto out; + + /* init_mm is used in this case */ + if (!svm->mm) + ret = 1; + else if (atomic_read(&svm->mm->mm_users) > 0) + ret = 1; + else + ret = 0; + + out: + mutex_unlock(&pasid_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); + /* Page request queue descriptor */ struct page_req_dsc { u64 srr:1; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index a190cbd76ef7..f7ef4a5d4785 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS]; * the dmar_global_lock. */ static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); -static struct irq_domain_ops intel_ir_domain_ops; +static const struct irq_domain_ops intel_ir_domain_ops; static void iommu_disable_irq_remapping(struct intel_iommu *iommu); static int __init parse_ioapics_under_ir(void); @@ -1396,7 +1396,7 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain, modify_irte(&data->irq_2_iommu, &entry); } -static struct irq_domain_ops intel_ir_domain_ops = { +static const struct irq_domain_ops intel_ir_domain_ops = { .alloc = intel_irq_remapping_alloc, .free = intel_irq_remapping_free, .activate = intel_irq_remapping_activate, diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8d6ca28c3e1f..af330f513653 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -32,6 +32,7 @@ #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt +#include <linux/atomic.h> #include <linux/dma-mapping.h> #include <linux/gfp.h> #include <linux/iommu.h> @@ -39,6 +40,7 @@ #include <linux/kmemleak.h> #include <linux/sizes.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/types.h> #include <asm/barrier.h> @@ -92,7 +94,8 @@ #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0) -#define ARM_V7S_PTE_IS_TABLE(pte, lvl) (lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE)) +#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \ + ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE)) /* Page table bits */ #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl))) @@ -167,6 +170,7 @@ struct arm_v7s_io_pgtable { arm_v7s_iopte *pgd; struct kmem_cache *l2_tables; + spinlock_t split_lock; }; static dma_addr_t __arm_v7s_dma_addr(void *pages) @@ -186,7 +190,8 @@ static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl) static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct arm_v7s_io_pgtable *data) { - struct device *dev = data->iop.cfg.iommu_dev; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct device *dev = cfg->iommu_dev; dma_addr_t dma; size_t size = ARM_V7S_TABLE_SIZE(lvl); void *table = NULL; @@ -195,7 +200,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); - if (table && !selftest_running) { + if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -224,10 +229,11 @@ out_free: static void __arm_v7s_free_table(void *table, int lvl, struct arm_v7s_io_pgtable *data) { - struct device *dev = data->iop.cfg.iommu_dev; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl); - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -239,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) return; dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -280,6 +286,13 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl, else if (prot & IOMMU_CACHE) pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C; + pte |= ARM_V7S_PTE_TYPE_PAGE; + if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) + pte |= ARM_V7S_ATTR_NS_SECTION; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) + pte |= ARM_V7S_ATTR_MTK_4GB; + return pte; } @@ -352,7 +365,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, int lvl, int num_entries, arm_v7s_iopte *ptep) { struct io_pgtable_cfg *cfg = &data->iop.cfg; - arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg); + arm_v7s_iopte pte; int i; for (i = 0; i < num_entries; i++) @@ -374,13 +387,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, return -EEXIST; } - pte |= ARM_V7S_PTE_TYPE_PAGE; - if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) - pte |= ARM_V7S_ATTR_NS_SECTION; - - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) - pte |= ARM_V7S_ATTR_MTK_4GB; - + pte = arm_v7s_prot_to_pte(prot, lvl, cfg); if (num_entries > 1) pte = arm_v7s_pte_to_cont(pte, lvl); @@ -390,6 +397,30 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, return 0; } +static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, + arm_v7s_iopte *ptep, + arm_v7s_iopte curr, + struct io_pgtable_cfg *cfg) +{ + arm_v7s_iopte old, new; + + new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) + new |= ARM_V7S_ATTR_NS_TABLE; + + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); + + old = cmpxchg_relaxed(ptep, curr, new); + __arm_v7s_pte_sync(ptep, 1, cfg); + + return old; +} + static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, phys_addr_t paddr, size_t size, int prot, int lvl, arm_v7s_iopte *ptep) @@ -411,20 +442,23 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, return -EINVAL; /* Grab a pointer to the next level */ - pte = *ptep; + pte = READ_ONCE(*ptep); if (!pte) { cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data); if (!cptep) return -ENOMEM; - pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE; - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_V7S_ATTR_NS_TABLE; + pte = arm_v7s_install_table(cptep, ptep, 0, cfg); + if (pte) + __arm_v7s_free_table(cptep, lvl + 1, data); + } else { + /* We've no easy way of knowing if it's synced yet, so... */ + __arm_v7s_pte_sync(ptep, 1, cfg); + } - __arm_v7s_set_pte(ptep, pte, 1, cfg); - } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { + if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { cptep = iopte_deref(pte, lvl); - } else { + } else if (pte) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -477,66 +511,73 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) kfree(data); } -static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, - unsigned long iova, int idx, int lvl, - arm_v7s_iopte *ptep) +static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, + unsigned long iova, int idx, int lvl, + arm_v7s_iopte *ptep) { struct io_pgtable *iop = &data->iop; arm_v7s_iopte pte; size_t size = ARM_V7S_BLOCK_SIZE(lvl); int i; + /* Check that we didn't lose a race to get the lock */ + pte = *ptep; + if (!arm_v7s_pte_is_cont(pte, lvl)) + return pte; + ptep -= idx & (ARM_V7S_CONT_PAGES - 1); - pte = arm_v7s_cont_to_pte(*ptep, lvl); - for (i = 0; i < ARM_V7S_CONT_PAGES; i++) { - ptep[i] = pte; - pte += size; - } + pte = arm_v7s_cont_to_pte(pte, lvl); + for (i = 0; i < ARM_V7S_CONT_PAGES; i++) + ptep[i] = pte + i * size; __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg); size *= ARM_V7S_CONT_PAGES; io_pgtable_tlb_add_flush(iop, iova, size, size, true); io_pgtable_tlb_sync(iop); + return pte; } static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, unsigned long iova, size_t size, - arm_v7s_iopte *ptep) + arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep) { - unsigned long blk_start, blk_end, blk_size; - phys_addr_t blk_paddr; - arm_v7s_iopte table = 0; - int prot = arm_v7s_pte_to_prot(*ptep, 1); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_v7s_iopte pte, *tablep; + int i, unmap_idx, num_entries, num_ptes; - blk_size = ARM_V7S_BLOCK_SIZE(1); - blk_start = iova & ARM_V7S_LVL_MASK(1); - blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1); - blk_paddr = *ptep & ARM_V7S_LVL_MASK(1); + tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data); + if (!tablep) + return 0; /* Bytes unmapped */ - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { - arm_v7s_iopte *tablep; + num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_entries = size >> ARM_V7S_LVL_SHIFT(2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); + if (num_entries > 1) + pte = arm_v7s_pte_to_cont(pte, 2); + + for (i = 0; i < num_ptes; i += num_entries, pte += size) { /* Unmap! */ - if (blk_start == iova) + if (i == unmap_idx) continue; - /* __arm_v7s_map expects a pointer to the start of the table */ - tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1); - if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1, - tablep) < 0) { - if (table) { - /* Free the table we allocated */ - tablep = iopte_deref(table, 1); - __arm_v7s_free_table(tablep, 2, data); - } - return 0; /* Bytes unmapped */ - } + __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg); } - __arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg); - iova &= ~(blk_size - 1); - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); + pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg); + if (pte != blk_pte) { + __arm_v7s_free_table(tablep, 2, data); + + if (!ARM_V7S_PTE_IS_TABLE(pte, 1)) + return 0; + + tablep = iopte_deref(pte, 1); + return __arm_v7s_unmap(data, iova, size, 2, tablep); + } + + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; } @@ -555,17 +596,28 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, idx = ARM_V7S_LVL_IDX(iova, lvl); ptep += idx; do { - if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i]))) + pte[i] = READ_ONCE(ptep[i]); + if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i]))) return 0; - pte[i] = ptep[i]; } while (++i < num_entries); /* * If we've hit a contiguous 'large page' entry at this level, it * needs splitting first, unless we're unmapping the whole lot. + * + * For splitting, we can't rewrite 16 PTEs atomically, and since we + * can't necessarily assume TEX remap we don't have a software bit to + * mark live entries being split. In practice (i.e. DMA API code), we + * will never be splitting large pages anyway, so just wrap this edge + * case in a lock for the sake of correctness and be done with it. */ - if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) - arm_v7s_split_cont(data, iova, idx, lvl, ptep); + if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) { + unsigned long flags; + + spin_lock_irqsave(&data->split_lock, flags); + pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep); + spin_unlock_irqrestore(&data->split_lock, flags); + } /* If the size matches this level, we're in the right place */ if (num_entries) { @@ -593,7 +645,7 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_v7s_split_blk_unmap(data, iova, size, ptep); + return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep); } /* Keep on walkin' */ @@ -623,7 +675,8 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)]; + ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + pte = READ_ONCE(*ptep); ptep = iopte_deref(pte, lvl); } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); @@ -651,7 +704,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | - IO_PGTABLE_QUIRK_ARM_MTK_4GB)) + IO_PGTABLE_QUIRK_ARM_MTK_4GB | + IO_PGTABLE_QUIRK_NO_DMA)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ @@ -663,6 +717,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (!data) return NULL; + spin_lock_init(&data->split_lock); data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), @@ -749,7 +804,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } -static struct iommu_gather_ops dummy_tlb_ops = { +static const struct iommu_gather_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, @@ -768,7 +823,7 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS, + .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6e5df5e0a3bd..b182039862c5 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -20,6 +20,7 @@ #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt +#include <linux/atomic.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/sizes.h> @@ -99,6 +100,8 @@ #define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) +/* Software bit for solving coherency races */ +#define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) @@ -217,7 +220,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, if (!pages) return NULL; - if (!selftest_running) { + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -243,40 +246,64 @@ out_free: static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages_exact(pages, size); } +static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, + struct io_pgtable_cfg *cfg) +{ + dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), + sizeof(*ptep), DMA_TO_DEVICE); +} + static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, struct io_pgtable_cfg *cfg) { *ptep = pte; - if (!selftest_running) - dma_sync_single_for_device(cfg->iommu_dev, - __arm_lpae_dma_addr(ptep), - sizeof(pte), DMA_TO_DEVICE); + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + __arm_lpae_sync_pte(ptep, cfg); } static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, int lvl, arm_lpae_iopte *ptep); +static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, + phys_addr_t paddr, arm_lpae_iopte prot, + int lvl, arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte = prot; + + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + pte |= ARM_LPAE_PTE_TYPE_PAGE; + else + pte |= ARM_LPAE_PTE_TYPE_BLOCK; + + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + + __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); +} + static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep) { - arm_lpae_iopte pte = prot; - struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte = *ptep; - if (iopte_leaf(*ptep, lvl)) { + if (iopte_leaf(pte, lvl)) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; - } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { + } else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { /* * We need to unmap and free the old table before * overwriting it with a block entry. @@ -289,19 +316,40 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, return -EINVAL; } + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); + return 0; +} + +static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, + arm_lpae_iopte *ptep, + arm_lpae_iopte curr, + struct io_pgtable_cfg *cfg) +{ + arm_lpae_iopte old, new; + + new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; + new |= ARM_LPAE_PTE_NSTABLE; - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - pte |= ARM_LPAE_PTE_TYPE_PAGE; - else - pte |= ARM_LPAE_PTE_TYPE_BLOCK; + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); - pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; - pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + old = cmpxchg64_relaxed(ptep, curr, new); - __arm_lpae_set_pte(ptep, pte, cfg); - return 0; + if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || + (old & ARM_LPAE_PTE_SW_SYNC)) + return old; + + /* Even if it's not ours, there's no point waiting; just kick it */ + __arm_lpae_sync_pte(ptep, cfg); + if (old == curr) + WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); + + return old; } static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, @@ -310,6 +358,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, { arm_lpae_iopte *cptep, pte; size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + size_t tblsz = ARM_LPAE_GRANULE(data); struct io_pgtable_cfg *cfg = &data->iop.cfg; /* Find our entry at the current level */ @@ -324,20 +373,23 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, return -EINVAL; /* Grab a pointer to the next level */ - pte = *ptep; + pte = READ_ONCE(*ptep); if (!pte) { - cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), - GFP_ATOMIC, cfg); + cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; - pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NSTABLE; - __arm_lpae_set_pte(ptep, pte, cfg); - } else if (!iopte_leaf(pte, lvl)) { + pte = arm_lpae_install_table(cptep, ptep, 0, cfg); + if (pte) + __arm_lpae_free_pages(cptep, tblsz, cfg); + } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && + !(pte & ARM_LPAE_PTE_SW_SYNC)) { + __arm_lpae_sync_pte(ptep, cfg); + } + + if (pte && !iopte_leaf(pte, lvl)) { cptep = iopte_deref(pte, data); - } else { + } else if (pte) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -452,40 +504,55 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop) static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, - arm_lpae_iopte prot, int lvl, - arm_lpae_iopte *ptep, size_t blk_size) + arm_lpae_iopte blk_pte, int lvl, + arm_lpae_iopte *ptep) { - unsigned long blk_start, blk_end; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte, *tablep; phys_addr_t blk_paddr; - arm_lpae_iopte table = 0; + size_t tablesz = ARM_LPAE_GRANULE(data); + size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + int i, unmap_idx = -1; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; - blk_start = iova & ~(blk_size - 1); - blk_end = blk_start + blk_size; - blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; + tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); + if (!tablep) + return 0; /* Bytes unmapped */ - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { - arm_lpae_iopte *tablep; + if (size == split_sz) + unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data); + blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift; + pte = iopte_prot(blk_pte); + + for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) { /* Unmap! */ - if (blk_start == iova) + if (i == unmap_idx) continue; - /* __arm_lpae_map expects a pointer to the start of the table */ - tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); - if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, - tablep) < 0) { - if (table) { - /* Free the table we allocated */ - tablep = iopte_deref(table, data); - __arm_lpae_free_pgtable(data, lvl + 1, tablep); - } - return 0; /* Bytes unmapped */ - } + __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); } - __arm_lpae_set_pte(ptep, table, &data->iop.cfg); - iova &= ~(blk_size - 1); - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); + pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + if (pte != blk_pte) { + __arm_lpae_free_pages(tablep, tablesz, cfg); + /* + * We may race against someone unmapping another part of this + * block, but anything else is invalid. We can't misinterpret + * a page entry here since we're never at the last level. + */ + if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE) + return 0; + + tablep = iopte_deref(pte, data); + } + + if (unmap_idx < 0) + return __arm_lpae_unmap(data, iova, size, lvl, tablep); + + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; } @@ -495,19 +562,18 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte; struct io_pgtable *iop = &data->iop; - size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); /* Something went horribly wrong and we ran out of page table */ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) return 0; ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); - pte = *ptep; + pte = READ_ONCE(*ptep); if (WARN_ON(!pte)) return 0; /* If the size matches this level, we're in the right place */ - if (size == blk_size) { + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { __arm_lpae_set_pte(ptep, 0, &iop->cfg); if (!iopte_leaf(pte, lvl)) { @@ -527,9 +593,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_lpae_split_blk_unmap(data, iova, size, - iopte_prot(pte), lvl, ptep, - blk_size); + return arm_lpae_split_blk_unmap(data, iova, size, pte, + lvl + 1, ptep); } /* Keep on walkin' */ @@ -565,7 +630,8 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; /* Grab the IOPTE we're interested in */ - pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data)); + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); /* Valid entry? */ if (!pte) @@ -673,7 +739,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -762,7 +828,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks) + if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1066,6 +1132,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, + .quirks = IO_PGTABLE_QUIRK_NO_DMA, }; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 969d82cc92ca..524263a7ae6f 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -65,11 +65,17 @@ struct io_pgtable_cfg { * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). + * + * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever + * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a + * software-emulated IOMMU), such that pagetable updates need not + * be treated as explicit DMA data. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) + #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index cf7ca7e70777..3f6ea160afed 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -915,13 +915,7 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) */ struct iommu_group *generic_device_group(struct device *dev) { - struct iommu_group *group; - - group = iommu_group_alloc(); - if (IS_ERR(group)) - return NULL; - - return group; + return iommu_group_alloc(); } /* @@ -988,11 +982,7 @@ struct iommu_group *pci_device_group(struct device *dev) return group; /* No shared group found, allocate new */ - group = iommu_group_alloc(); - if (IS_ERR(group)) - return NULL; - - return group; + return iommu_group_alloc(); } /** @@ -1020,6 +1010,9 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (ops && ops->device_group) group = ops->device_group(dev); + if (WARN_ON_ONCE(group == NULL)) + return ERR_PTR(-EINVAL); + if (IS_ERR(group)) return group; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 5c88ba70e4e0..246f14c83944 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -22,6 +22,7 @@ #include <linux/slab.h> #include <linux/smp.h> #include <linux/bitops.h> +#include <linux/cpu.h> static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, @@ -48,7 +49,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; - iovad->dma_32bit_pfn = pfn_32bit; + iovad->dma_32bit_pfn = pfn_32bit + 1; init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -63,7 +64,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) struct rb_node *prev_node = rb_prev(iovad->cached32_node); struct iova *curr_iova = rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo - 1; + *limit_pfn = curr_iova->pfn_lo; return prev_node; } } @@ -135,7 +136,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, static unsigned int iova_get_pad_size(unsigned int size, unsigned int limit_pfn) { - return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1); + return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); } static int __alloc_and_insert_iova_range(struct iova_domain *iovad, @@ -155,18 +156,15 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, while (curr) { struct iova *curr_iova = rb_entry(curr, struct iova, node); - if (limit_pfn < curr_iova->pfn_lo) + if (limit_pfn <= curr_iova->pfn_lo) { goto move_left; - else if (limit_pfn < curr_iova->pfn_hi) - goto adjust_limit_pfn; - else { + } else if (limit_pfn > curr_iova->pfn_hi) { if (size_aligned) pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) + if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) break; /* found a free slot */ } -adjust_limit_pfn: - limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0; + limit_pfn = curr_iova->pfn_lo; move_left: prev = curr; curr = rb_prev(curr); @@ -182,7 +180,7 @@ move_left: } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size) + 1; + new->pfn_lo = limit_pfn - (size + pad_size); new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ @@ -269,7 +267,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (!new_iova) return NULL; - ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, + ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, new_iova, size_aligned); if (ret) { @@ -398,10 +396,8 @@ retry: /* Try replenishing IOVAs by flushing rcache. */ flushed_rcache = true; - preempt_disable(); for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); - preempt_enable(); goto retry; } @@ -729,7 +725,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, bool can_insert = false; unsigned long flags; - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_full(cpu_rcache->loaded)) { @@ -759,7 +755,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, iova_magazine_push(cpu_rcache->loaded, iova_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); - put_cpu_ptr(rcache->cpu_rcaches); if (mag_to_free) { iova_magazine_free_pfns(mag_to_free, iovad); @@ -793,7 +788,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, bool has_pfn = false; unsigned long flags; - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_empty(cpu_rcache->loaded)) { @@ -815,7 +810,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); - put_cpu_ptr(rcache->cpu_rcaches); return iova_pfn; } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index b7e14ee863f9..2a38aa15be17 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -8,7 +8,9 @@ * the Free Software Foundation; version 2 of the License. */ +#include <linux/bitmap.h> #include <linux/delay.h> +#include <linux/dma-iommu.h> #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/export.h> @@ -21,17 +23,24 @@ #include <linux/sizes.h> #include <linux/slab.h> +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) #include <asm/dma-iommu.h> #include <asm/pgalloc.h> +#endif #include "io-pgtable.h" +#define IPMMU_CTX_MAX 1 + struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; struct list_head list; unsigned int num_utlbs; + spinlock_t lock; /* Protects ctx and domains[] */ + DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); + struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; struct dma_iommu_mapping *mapping; }; @@ -47,10 +56,12 @@ struct ipmmu_vmsa_domain { spinlock_t lock; /* Protects mappings */ }; -struct ipmmu_vmsa_archdata { +struct ipmmu_vmsa_iommu_priv { struct ipmmu_vmsa_device *mmu; unsigned int *utlbs; unsigned int num_utlbs; + struct device *dev; + struct list_head list; }; static DEFINE_SPINLOCK(ipmmu_devices_lock); @@ -61,6 +72,24 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) return container_of(dom, struct ipmmu_vmsa_domain, io_domain); } + +static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) +{ +#if defined(CONFIG_ARM) + return dev->archdata.iommu; +#else + return dev->iommu_fwspec->iommu_priv; +#endif +} +static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p) +{ +#if defined(CONFIG_ARM) + dev->archdata.iommu = p; +#else + dev->iommu_fwspec->iommu_priv = p; +#endif +} + #define TLB_LOOP_TIMEOUT 100 /* 100us */ /* ----------------------------------------------------------------------------- @@ -293,9 +322,29 @@ static struct iommu_gather_ops ipmmu_gather_ops = { * Domain/Context Management */ +static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, + struct ipmmu_vmsa_domain *domain) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&mmu->lock, flags); + + ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); + if (ret != IPMMU_CTX_MAX) { + mmu->domains[ret] = domain; + set_bit(ret, mmu->ctx); + } + + spin_unlock_irqrestore(&mmu->lock, flags); + + return ret; +} + static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; + int ret; /* * Allocate the page table operations. @@ -309,7 +358,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * non-secure mode. */ domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; - domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; domain->cfg.ias = 32; domain->cfg.oas = 40; domain->cfg.tlb = &ipmmu_gather_ops; @@ -327,10 +376,15 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) return -EINVAL; /* - * TODO: When adding support for multiple contexts, find an unused - * context. + * Find an unused context. */ - domain->context_id = 0; + ret = ipmmu_domain_allocate_context(domain->mmu, domain); + if (ret == IPMMU_CTX_MAX) { + free_io_pgtable_ops(domain->iop); + return -EBUSY; + } + + domain->context_id = ret; /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; @@ -372,6 +426,19 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) return 0; } +static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, + unsigned int context_id) +{ + unsigned long flags; + + spin_lock_irqsave(&mmu->lock, flags); + + clear_bit(context_id, mmu->ctx); + mmu->domains[context_id] = NULL; + + spin_unlock_irqrestore(&mmu->lock, flags); +} + static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) { /* @@ -382,6 +449,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) */ ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); ipmmu_tlb_sync(domain); + ipmmu_domain_free_context(domain->mmu, domain->context_id); } /* ----------------------------------------------------------------------------- @@ -439,29 +507,35 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) static irqreturn_t ipmmu_irq(int irq, void *dev) { struct ipmmu_vmsa_device *mmu = dev; - struct iommu_domain *io_domain; - struct ipmmu_vmsa_domain *domain; + irqreturn_t status = IRQ_NONE; + unsigned int i; + unsigned long flags; - if (!mmu->mapping) - return IRQ_NONE; + spin_lock_irqsave(&mmu->lock, flags); + + /* + * Check interrupts for all active contexts. + */ + for (i = 0; i < IPMMU_CTX_MAX; i++) { + if (!mmu->domains[i]) + continue; + if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) + status = IRQ_HANDLED; + } - io_domain = mmu->mapping->domain; - domain = to_vmsa_domain(io_domain); + spin_unlock_irqrestore(&mmu->lock, flags); - return ipmmu_domain_irq(domain); + return status; } /* ----------------------------------------------------------------------------- * IOMMU Operations */ -static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) { struct ipmmu_vmsa_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; @@ -487,8 +561,8 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain) static int ipmmu_attach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; - struct ipmmu_vmsa_device *mmu = archdata->mmu; + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct ipmmu_vmsa_device *mmu = priv->mmu; struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned long flags; unsigned int i; @@ -513,15 +587,16 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", dev_name(mmu->dev), dev_name(domain->mmu->dev)); ret = -EINVAL; - } + } else + dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); spin_unlock_irqrestore(&domain->lock, flags); if (ret < 0) return ret; - for (i = 0; i < archdata->num_utlbs; ++i) - ipmmu_utlb_enable(domain, archdata->utlbs[i]); + for (i = 0; i < priv->num_utlbs; ++i) + ipmmu_utlb_enable(domain, priv->utlbs[i]); return 0; } @@ -529,12 +604,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, static void ipmmu_detach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned int i; - for (i = 0; i < archdata->num_utlbs; ++i) - ipmmu_utlb_disable(domain, archdata->utlbs[i]); + for (i = 0; i < priv->num_utlbs; ++i) + ipmmu_utlb_disable(domain, priv->utlbs[i]); /* * TODO: Optimize by disabling the context when no device is attached. @@ -595,22 +670,15 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, return 0; } -static int ipmmu_add_device(struct device *dev) +static int ipmmu_init_platform_device(struct device *dev) { - struct ipmmu_vmsa_archdata *archdata; + struct ipmmu_vmsa_iommu_priv *priv; struct ipmmu_vmsa_device *mmu; - struct iommu_group *group = NULL; unsigned int *utlbs; unsigned int i; int num_utlbs; int ret = -ENODEV; - if (dev->archdata.iommu) { - dev_warn(dev, "IOMMU driver already assigned to device %s\n", - dev_name(dev)); - return -EINVAL; - } - /* Find the master corresponding to the device. */ num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus", @@ -647,6 +715,46 @@ static int ipmmu_add_device(struct device *dev) } } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto error; + } + + priv->mmu = mmu; + priv->utlbs = utlbs; + priv->num_utlbs = num_utlbs; + priv->dev = dev; + set_priv(dev, priv); + return 0; + +error: + kfree(utlbs); + return ret; +} + +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) + +static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +{ + if (type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + return __ipmmu_domain_alloc(type); +} + +static int ipmmu_add_device(struct device *dev) +{ + struct ipmmu_vmsa_device *mmu = NULL; + struct iommu_group *group; + int ret; + + if (to_priv(dev)) { + dev_warn(dev, "IOMMU driver already assigned to device %s\n", + dev_name(dev)); + return -EINVAL; + } + /* Create a device group and add the device to it. */ group = iommu_group_alloc(); if (IS_ERR(group)) { @@ -664,16 +772,9 @@ static int ipmmu_add_device(struct device *dev) goto error; } - archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); - if (!archdata) { - ret = -ENOMEM; + ret = ipmmu_init_platform_device(dev); + if (ret < 0) goto error; - } - - archdata->mmu = mmu; - archdata->utlbs = utlbs; - archdata->num_utlbs = num_utlbs; - dev->archdata.iommu = archdata; /* * Create the ARM mapping, used by the ARM DMA mapping core to allocate @@ -684,6 +785,7 @@ static int ipmmu_add_device(struct device *dev) * - Make the mapping size configurable ? We currently use a 2GB mapping * at a 1GB offset to ensure that NULL VAs will fault. */ + mmu = to_priv(dev)->mmu; if (!mmu->mapping) { struct dma_iommu_mapping *mapping; @@ -708,30 +810,30 @@ static int ipmmu_add_device(struct device *dev) return 0; error: - arm_iommu_release_mapping(mmu->mapping); - - kfree(dev->archdata.iommu); - kfree(utlbs); - - dev->archdata.iommu = NULL; + if (mmu) + arm_iommu_release_mapping(mmu->mapping); if (!IS_ERR_OR_NULL(group)) iommu_group_remove_device(dev); + kfree(to_priv(dev)->utlbs); + kfree(to_priv(dev)); + set_priv(dev, NULL); + return ret; } static void ipmmu_remove_device(struct device *dev) { - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); arm_iommu_detach_device(dev); iommu_group_remove_device(dev); - kfree(archdata->utlbs); - kfree(archdata); + kfree(priv->utlbs); + kfree(priv); - dev->archdata.iommu = NULL; + set_priv(dev, NULL); } static const struct iommu_ops ipmmu_ops = { @@ -748,6 +850,144 @@ static const struct iommu_ops ipmmu_ops = { .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, }; +#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ + +#ifdef CONFIG_IOMMU_DMA + +static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); +static LIST_HEAD(ipmmu_slave_devices); + +static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type) +{ + struct iommu_domain *io_domain = NULL; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + io_domain = __ipmmu_domain_alloc(type); + break; + + case IOMMU_DOMAIN_DMA: + io_domain = __ipmmu_domain_alloc(type); + if (io_domain) + iommu_get_dma_cookie(io_domain); + break; + } + + return io_domain; +} + +static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) +{ + switch (io_domain->type) { + case IOMMU_DOMAIN_DMA: + iommu_put_dma_cookie(io_domain); + /* fall-through */ + default: + ipmmu_domain_free(io_domain); + break; + } +} + +static int ipmmu_add_device_dma(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_group *group; + + /* + * Only let through devices that have been verified in xlate() + * We may get called with dev->iommu_fwspec set to NULL. + */ + if (!fwspec || !fwspec->iommu_priv) + return -ENODEV; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + spin_lock(&ipmmu_slave_devices_lock); + list_add(&to_priv(dev)->list, &ipmmu_slave_devices); + spin_unlock(&ipmmu_slave_devices_lock); + return 0; +} + +static void ipmmu_remove_device_dma(struct device *dev) +{ + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + + spin_lock(&ipmmu_slave_devices_lock); + list_del(&priv->list); + spin_unlock(&ipmmu_slave_devices_lock); + + iommu_group_remove_device(dev); +} + +static struct device *ipmmu_find_sibling_device(struct device *dev) +{ + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL; + bool found = false; + + spin_lock(&ipmmu_slave_devices_lock); + + list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) { + if (priv == sibling_priv) + continue; + if (sibling_priv->mmu == priv->mmu) { + found = true; + break; + } + } + + spin_unlock(&ipmmu_slave_devices_lock); + + return found ? sibling_priv->dev : NULL; +} + +static struct iommu_group *ipmmu_find_group_dma(struct device *dev) +{ + struct iommu_group *group; + struct device *sibling; + + sibling = ipmmu_find_sibling_device(dev); + if (sibling) + group = iommu_group_get(sibling); + if (!sibling || IS_ERR(group)) + group = generic_device_group(dev); + + return group; +} + +static int ipmmu_of_xlate_dma(struct device *dev, + struct of_phandle_args *spec) +{ + /* If the IPMMU device is disabled in DT then return error + * to make sure the of_iommu code does not install ops + * even though the iommu device is disabled + */ + if (!of_device_is_available(spec->np)) + return -ENODEV; + + return ipmmu_init_platform_device(dev); +} + +static const struct iommu_ops ipmmu_ops = { + .domain_alloc = ipmmu_domain_alloc_dma, + .domain_free = ipmmu_domain_free_dma, + .attach_dev = ipmmu_attach_device, + .detach_dev = ipmmu_detach_device, + .map = ipmmu_map, + .unmap = ipmmu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = ipmmu_iova_to_phys, + .add_device = ipmmu_add_device_dma, + .remove_device = ipmmu_remove_device_dma, + .device_group = ipmmu_find_group_dma, + .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + .of_xlate = ipmmu_of_xlate_dma, +}; + +#endif /* CONFIG_IOMMU_DMA */ + /* ----------------------------------------------------------------------------- * Probe/remove and init */ @@ -768,11 +1008,6 @@ static int ipmmu_probe(struct platform_device *pdev) int irq; int ret; - if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) { - dev_err(&pdev->dev, "missing platform data\n"); - return -EINVAL; - } - mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL); if (!mmu) { dev_err(&pdev->dev, "cannot allocate device data\n"); @@ -781,6 +1016,8 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->dev = &pdev->dev; mmu->num_utlbs = 32; + spin_lock_init(&mmu->lock); + bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -840,7 +1077,9 @@ static int ipmmu_remove(struct platform_device *pdev) list_del(&mmu->list); spin_unlock(&ipmmu_devices_lock); +#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) arm_iommu_release_mapping(mmu->mapping); +#endif ipmmu_device_reset(mmu); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a27ef570c328..bc1efbfb9ddf 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -18,6 +18,7 @@ #include <linux/clk.h> #include <linux/component.h> #include <linux/device.h> +#include <linux/dma-mapping.h> #include <linux/dma-iommu.h> #include <linux/err.h> #include <linux/interrupt.h> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 9f44ee8ea1bc..19779b88a479 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -118,6 +118,7 @@ static const struct iommu_ops ops = iommu_ops_from_fwnode(fwnode); if ((ops && !ops->of_xlate) || + !of_device_is_available(iommu_spec->np) || (!ops && !of_iommu_driver_present(iommu_spec->np))) return NULL; @@ -236,6 +237,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, ops = ERR_PTR(err); } + /* Ignore all other errors apart from EPROBE_DEFER */ + if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) { + dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops)); + ops = NULL; + } + return ops; } diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 95dfca36ccb9..641e035cf866 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1309,7 +1309,7 @@ static void omap_iommu_remove_device(struct device *dev) static struct iommu_group *omap_iommu_device_group(struct device *dev) { struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; - struct iommu_group *group = NULL; + struct iommu_group *group = ERR_PTR(-EINVAL); if (arch_data->iommu_dev) group = arch_data->iommu_dev->group; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 179e636a4d91..8788640756a7 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -165,20 +165,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, static int s390_iommu_add_device(struct device *dev) { - struct iommu_group *group; - int rc; + struct iommu_group *group = iommu_group_get_for_dev(dev); - group = iommu_group_get(dev); - if (!group) { - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - } + if (IS_ERR(group)) + return PTR_ERR(group); - rc = iommu_group_add_device(group, dev); iommu_group_put(group); - return rc; + return 0; } static void s390_iommu_remove_device(struct device *dev) @@ -344,6 +338,7 @@ static struct iommu_ops s390_iommu_ops = { .iova_to_phys = s390_iommu_iova_to_phys, .add_device = s390_iommu_add_device, .remove_device = s390_iommu_remove_device, + .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, }; |