diff options
Diffstat (limited to 'drivers/iommu')
30 files changed, 1317 insertions, 1779 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 2ef0b3918440..1f111b399bca 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -349,7 +349,7 @@ config S390_AP_IOMMU is not implemented as it is not necessary for VFIO. config MTK_IOMMU - bool "MTK IOMMU Support" + tristate "MediaTek IOMMU Support" depends on ARCH_MEDIATEK || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API @@ -364,7 +364,7 @@ config MTK_IOMMU If unsure, say N here. config MTK_IOMMU_V1 - bool "MTK IOMMU Version 1 (M4U gen1) Support" + tristate "MediaTek IOMMU Version 1 (M4U gen1) Support" depends on ARM depends on ARCH_MEDIATEK || COMPILE_TEST select ARM_DMA_USE_IOMMU diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 5925b6af2123..c0fb0ba88143 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,5 +27,5 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o +obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 026ce7f8d993..55dd38d814d9 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -11,7 +11,6 @@ #include "amd_iommu_types.h" -extern int amd_iommu_get_num_iommus(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); @@ -65,7 +64,6 @@ extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); -extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); #ifdef CONFIG_IRQ_REMAP extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 6937e3674a16..94c1a7a9876d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -693,7 +693,6 @@ struct iommu_dev_data { } ats; /* ATS state */ bool pri_tlp; /* PASID TLB required for PPR completions */ - u32 errata; /* Bitmap for errata to apply */ bool use_vapic; /* Enable device to use vapic mode */ bool defer_attach; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9126efcbaf2c..54f6e99528f3 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,7 +12,6 @@ #include <linux/acpi.h> #include <linux/list.h> #include <linux/bitmap.h> -#include <linux/delay.h> #include <linux/slab.h> #include <linux/syscore_ops.h> #include <linux/interrupt.h> @@ -208,7 +207,6 @@ u16 *amd_iommu_alias_table; * for a specific device. It is also indexed by the PCI device id. */ struct amd_iommu **amd_iommu_rlookup_table; -EXPORT_SYMBOL(amd_iommu_rlookup_table); /* * This table is used to find the irq remapping table for a given device id @@ -257,8 +255,6 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); static bool amd_iommu_pre_enabled = true; @@ -268,7 +264,6 @@ bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); } -EXPORT_SYMBOL(translation_pre_enabled); static void clear_translation_pre_enabled(struct amd_iommu *iommu) { @@ -1717,53 +1712,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) +static void init_iommu_perf_ctr(struct amd_iommu *iommu) { - int retry; + u64 val; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg, save_src; if (!iommu_feature(iommu, FEATURE_PC)) return; amd_iommu_pc_present = true; - /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || - iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) - goto pc_false; - - /* - * Disable power gating by programing the performance counter - * source to 20 (i.e. counts the reads and writes from/to IOMMU - * Reserved Register [MMIO Offset 1FF8h] that are ignored.), - * which never get incremented during this init phase. - * (Note: The event is also deprecated.) - */ - val = 20; - if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) - goto pc_false; - - /* Check if the performance counters can be written to */ - val = 0xabcd; - for (retry = 5; retry; retry--) { - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || - iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || - val2) - break; - - /* Wait about 20 msec for power gating to disable and retry. */ - msleep(20); - } - - /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || - iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) - goto pc_false; - - if (val != val2) - goto pc_false; - pci_info(pdev, "IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); @@ -1771,11 +1729,6 @@ static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) iommu->max_counters = (u8) ((val >> 7) & 0xf); return; - -pc_false: - pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n"); - amd_iommu_pc_present = false; - return; } static ssize_t amd_iommu_show_cap(struct device *dev, @@ -1837,7 +1790,7 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * IVHD and MMIO conflict. */ if (features != iommu->features) - pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", features, iommu->features); } @@ -2714,7 +2667,6 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; int i, remap_cache_sz, ret; acpi_status status; - u32 pci_id; if (!amd_iommu_detected) return -ENODEV; @@ -2804,16 +2756,6 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; - /* Disable IOMMU if there's Stoney Ridge graphics */ - for (i = 0; i < 32; i++) { - pci_id = read_pci_config(0, i, 0, 0); - if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { - pr_info("Disable IOMMU on Stoney Ridge\n"); - amd_iommu_disabled = true; - break; - } - } - /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -2880,6 +2822,7 @@ static bool detect_ivrs(void) { struct acpi_table_header *ivrs_base; acpi_status status; + int i; status = acpi_get_table("IVRS", 0, &ivrs_base); if (status == AE_NOT_FOUND) @@ -2892,6 +2835,17 @@ static bool detect_ivrs(void) acpi_put_table(ivrs_base); + /* Don't use IOMMU if there is Stoney Ridge graphics */ + for (i = 0; i < 32; i++) { + u32 pci_id; + + pci_id = read_pci_config(0, i, 0, 0); + if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { + pr_info("Disable IOMMU on Stoney Ridge\n"); + return false; + } + } + /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); @@ -2918,12 +2872,12 @@ static int __init state_next(void) } break; case IOMMU_IVRS_DETECTED: - ret = early_amd_iommu_init(); - init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; - if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD IOMMU disabled\n"); + if (amd_iommu_disabled) { init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; + } else { + ret = early_amd_iommu_init(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; } break; case IOMMU_ACPI_FINISHED: @@ -3001,8 +2955,11 @@ int __init amd_iommu_prepare(void) amd_iommu_irq_remap = true; ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); - if (ret) + if (ret) { + amd_iommu_irq_remap = false; return ret; + } + return amd_iommu_irq_remap ? 0 : -ENODEV; } @@ -3273,7 +3230,6 @@ struct amd_iommu *get_amd_iommu(unsigned int idx) return iommu; return NULL; } -EXPORT_SYMBOL(get_amd_iommu); /**************************************************************************** * @@ -3355,7 +3311,6 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); } -EXPORT_SYMBOL(amd_iommu_pc_get_reg); int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) { @@ -3364,4 +3319,3 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -EXPORT_SYMBOL(amd_iommu_pc_set_reg); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a69a8b573e40..80e8e1916dd1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -290,15 +290,6 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev) return true; } -static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) -{ - struct iommu_dev_data *dev_data; - - dev_data = dev_iommu_priv_get(&pdev->dev); - - return dev_data->errata & (1 << erratum) ? true : false; -} - /* * This function checks if the driver got a valid device from the caller to * avoid dereferencing invalid pointers. @@ -861,33 +852,58 @@ static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); } -static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, - size_t size, u16 domid, int pde) +/* + * Builds an invalidation address which is suitable for one page or multiple + * pages. Sets the size bit (S) as needed is more than one page is flushed. + */ +static inline u64 build_inv_address(u64 address, size_t size) { - u64 pages; - bool s; + u64 pages, end, msb_diff; pages = iommu_num_pages(address, size, PAGE_SIZE); - s = false; - if (pages > 1) { + if (pages == 1) + return address & PAGE_MASK; + + end = address + size - 1; + + /* + * msb_diff would hold the index of the most significant bit that + * flipped between the start and end. + */ + msb_diff = fls64(end ^ address) - 1; + + /* + * Bits 63:52 are sign extended. If for some reason bit 51 is different + * between the start and the end, invalidate everything. + */ + if (unlikely(msb_diff > 51)) { + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + } else { /* - * If we have to flush more than one page, flush all - * TLB entries for this domain + * The msb-bit must be clear on the address. Just set all the + * lower bits. */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = true; + address |= 1ull << (msb_diff - 1); } + /* Clear bits 11:0 */ address &= PAGE_MASK; + /* Set the size bit - we flush more than one 4kb page */ + return address | CMD_INV_IOMMU_PAGES_SIZE_MASK; +} + +static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + size_t size, u16 domid, int pde) +{ + u64 inv_address = build_inv_address(address, size); + memset(cmd, 0, sizeof(*cmd)); cmd->data[1] |= domid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); + cmd->data[2] = lower_32_bits(inv_address); + cmd->data[3] = upper_32_bits(inv_address); CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); - if (s) /* size bit - we flush more than one 4kb page */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } @@ -895,32 +911,15 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, u64 address, size_t size) { - u64 pages; - bool s; - - pages = iommu_num_pages(address, size, PAGE_SIZE); - s = false; - - if (pages > 1) { - /* - * If we have to flush more than one page, flush all - * TLB entries for this domain - */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = true; - } - - address &= PAGE_MASK; + u64 inv_address = build_inv_address(address, size); memset(cmd, 0, sizeof(*cmd)); cmd->data[0] = devid; cmd->data[0] |= (qdep & 0xff) << 24; cmd->data[1] = devid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); + cmd->data[2] = lower_32_bits(inv_address); + cmd->data[3] = upper_32_bits(inv_address); CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); - if (s) - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; } static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid, @@ -1531,33 +1530,9 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev) pci_disable_pasid(pdev); } -/* FIXME: Change generic reset-function to do the same */ -static int pri_reset_while_enabled(struct pci_dev *pdev) -{ - u16 control; - int pos; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) - return -EINVAL; - - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); - control |= PCI_PRI_CTRL_RESET; - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); - - return 0; -} - static int pdev_iommuv2_enable(struct pci_dev *pdev) { - bool reset_enable; - int reqs, ret; - - /* FIXME: Hardcode number of outstanding requests for now */ - reqs = 32; - if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE)) - reqs = 1; - reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET); + int ret; /* Only allow access to user-accessible pages */ ret = pci_enable_pasid(pdev, 0); @@ -1570,16 +1545,11 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) goto out_err; /* Enable PRI */ - ret = pci_enable_pri(pdev, reqs); + /* FIXME: Hardcode number of outstanding requests for now */ + ret = pci_enable_pri(pdev, 32); if (ret) goto out_err; - if (reset_enable) { - ret = pri_reset_while_enabled(pdev); - if (ret) - goto out_err; - } - ret = pci_enable_ats(pdev, PAGE_SHIFT); if (ret) goto out_err; @@ -1715,9 +1685,6 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) return ERR_PTR(-ENODEV); devid = get_device_id(dev); - if (devid < 0) - return ERR_PTR(devid); - iommu = amd_iommu_rlookup_table[devid]; if (dev_iommu_priv_get(dev)) @@ -1771,26 +1738,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) return acpihid_device_group(dev); } -static int amd_iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - return -ENODEV; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = !amd_iommu_unmap_flush; - return 0; - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } -} - /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. @@ -1855,7 +1802,7 @@ int __init amd_iommu_init_dma_ops(void) pr_info("IO/TLB flush on unmap enabled\n"); else pr_info("Lazy IO/TLB flushing enabled\n"); - + iommu_set_dma_strict(amd_iommu_unmap_flush); return 0; } @@ -2019,16 +1966,12 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + int devid = get_device_id(dev); struct amd_iommu *iommu; - int devid; if (!check_device(dev)) return; - devid = get_device_id(dev); - if (devid < 0) - return; - if (dev_data->domain != NULL) detach_device(dev); @@ -2257,7 +2200,6 @@ const struct iommu_ops amd_iommu_ops = { .release_device = amd_iommu_release_device, .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, - .domain_get_attr = amd_iommu_domain_get_attr, .get_resv_regions = amd_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, @@ -2310,9 +2252,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) unsigned long flags; int levels, ret; - if (pasids <= 0 || pasids > (PASID_MASK + 1)) - return -EINVAL; - /* Number of GCR3 table levels required */ for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) levels += 1; @@ -2563,52 +2502,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, } EXPORT_SYMBOL(amd_iommu_complete_ppr); -struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) -{ - struct protection_domain *pdomain; - struct iommu_dev_data *dev_data; - struct device *dev = &pdev->dev; - struct iommu_domain *io_domain; - - if (!check_device(dev)) - return NULL; - - dev_data = dev_iommu_priv_get(&pdev->dev); - pdomain = dev_data->domain; - io_domain = iommu_get_domain_for_dev(dev); - - if (pdomain == NULL && dev_data->defer_attach) { - dev_data->defer_attach = false; - pdomain = to_pdomain(io_domain); - attach_device(dev, pdomain); - } - - if (pdomain == NULL) - return NULL; - - if (io_domain->type != IOMMU_DOMAIN_DMA) - return NULL; - - /* Only return IOMMUv2 domains */ - if (!(pdomain->flags & PD_IOMMUV2_MASK)) - return NULL; - - return &pdomain->domain; -} -EXPORT_SYMBOL(amd_iommu_get_v2_domain); - -void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) -{ - struct iommu_dev_data *dev_data; - - if (!amd_iommu_v2_supported()) - return; - - dev_data = dev_iommu_priv_get(&pdev->dev); - dev_data->errata |= (1 << erratum); -} -EXPORT_SYMBOL(amd_iommu_enable_device_erratum); - int amd_iommu_device_info(struct pci_dev *pdev, struct amd_iommu_device_info *info) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8594b4a83043..6f8ffc86a115 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -245,8 +245,6 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) break; case CMDQ_OP_PREFETCH_CFG: cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid); - cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); - cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; break; case CMDQ_OP_CFGI_CD: cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); @@ -909,8 +907,8 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { - for (i = 0; i < master->num_sids; i++) { - cmd.cfgi.sid = master->sids[i]; + for (i = 0; i < master->num_streams; i++) { + cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); } } @@ -1355,6 +1353,29 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } +__maybe_unused +static struct arm_smmu_master * +arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) +{ + struct rb_node *node; + struct arm_smmu_stream *stream; + + lockdep_assert_held(&smmu->streams_mutex); + + node = smmu->streams.rb_node; + while (node) { + stream = rb_entry(node, struct arm_smmu_stream, node); + if (stream->id < sid) + node = node->rb_right; + else if (stream->id > sid) + node = node->rb_left; + else + return stream->master; + } + + return NULL; +} + /* IRQ and event handlers */ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { @@ -1588,8 +1609,8 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); - for (i = 0; i < master->num_sids; i++) { - cmd.atc.sid = master->sids[i]; + for (i = 0; i < master->num_streams; i++) { + cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); } @@ -1632,8 +1653,8 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, if (!master->ats_enabled) continue; - for (i = 0; i < master->num_sids; i++) { - cmd.atc.sid = master->sids[i]; + for (i = 0; i < master->num_streams; i++) { + cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); } } @@ -2017,7 +2038,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; - if (smmu_domain->non_strict) + if (!iommu_get_dma_strict(domain)) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); @@ -2065,13 +2086,13 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) int i, j; struct arm_smmu_device *smmu = master->smmu; - for (i = 0; i < master->num_sids; ++i) { - u32 sid = master->sids[i]; + for (i = 0; i < master->num_streams; ++i) { + u32 sid = master->streams[i].id; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); /* Bridged PCI devices may end up with duplicated IDs */ for (j = 0; j < i; j++) - if (master->sids[j] == sid) + if (master->streams[j].id == sid) break; if (j < i) continue; @@ -2305,6 +2326,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (!gather->pgsize) + return; + arm_smmu_tlb_inv_range_domain(gather->start, gather->end - gather->start + 1, gather->pgsize, true, smmu_domain); @@ -2345,11 +2369,101 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) return sid < limit; } +static int arm_smmu_insert_master(struct arm_smmu_device *smmu, + struct arm_smmu_master *master) +{ + int i; + int ret = 0; + struct arm_smmu_stream *new_stream, *cur_stream; + struct rb_node **new_node, *parent_node = NULL; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); + + master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams), + GFP_KERNEL); + if (!master->streams) + return -ENOMEM; + master->num_streams = fwspec->num_ids; + + mutex_lock(&smmu->streams_mutex); + for (i = 0; i < fwspec->num_ids; i++) { + u32 sid = fwspec->ids[i]; + + new_stream = &master->streams[i]; + new_stream->id = sid; + new_stream->master = master; + + /* + * Check the SIDs are in range of the SMMU and our stream table + */ + if (!arm_smmu_sid_in_range(smmu, sid)) { + ret = -ERANGE; + break; + } + + /* Ensure l2 strtab is initialised */ + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { + ret = arm_smmu_init_l2_strtab(smmu, sid); + if (ret) + break; + } + + /* Insert into SID tree */ + new_node = &(smmu->streams.rb_node); + while (*new_node) { + cur_stream = rb_entry(*new_node, struct arm_smmu_stream, + node); + parent_node = *new_node; + if (cur_stream->id > new_stream->id) { + new_node = &((*new_node)->rb_left); + } else if (cur_stream->id < new_stream->id) { + new_node = &((*new_node)->rb_right); + } else { + dev_warn(master->dev, + "stream %u already in tree\n", + cur_stream->id); + ret = -EINVAL; + break; + } + } + if (ret) + break; + + rb_link_node(&new_stream->node, parent_node, new_node); + rb_insert_color(&new_stream->node, &smmu->streams); + } + + if (ret) { + for (i--; i >= 0; i--) + rb_erase(&master->streams[i].node, &smmu->streams); + kfree(master->streams); + } + mutex_unlock(&smmu->streams_mutex); + + return ret; +} + +static void arm_smmu_remove_master(struct arm_smmu_master *master) +{ + int i; + struct arm_smmu_device *smmu = master->smmu; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); + + if (!smmu || !master->streams) + return; + + mutex_lock(&smmu->streams_mutex); + for (i = 0; i < fwspec->num_ids; i++) + rb_erase(&master->streams[i].node, &smmu->streams); + mutex_unlock(&smmu->streams_mutex); + + kfree(master->streams); +} + static struct iommu_ops arm_smmu_ops; static struct iommu_device *arm_smmu_probe_device(struct device *dev) { - int i, ret; + int ret; struct arm_smmu_device *smmu; struct arm_smmu_master *master; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -2370,29 +2484,15 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; - master->sids = fwspec->ids; - master->num_sids = fwspec->num_ids; INIT_LIST_HEAD(&master->bonds); dev_iommu_priv_set(dev, master); - /* Check the SIDs are in range of the SMMU and our stream table */ - for (i = 0; i < master->num_sids; i++) { - u32 sid = master->sids[i]; - - if (!arm_smmu_sid_in_range(smmu, sid)) { - ret = -ERANGE; - goto err_free_master; - } - - /* Ensure l2 strtab is initialised */ - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - ret = arm_smmu_init_l2_strtab(smmu, sid); - if (ret) - goto err_free_master; - } - } + ret = arm_smmu_insert_master(smmu, master); + if (ret) + goto err_free_master; - master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); + device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits); + master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits); /* * Note that PASID must be enabled before, and disabled after ATS: @@ -2428,6 +2528,7 @@ static void arm_smmu_release_device(struct device *dev) WARN_ON(arm_smmu_master_sva_enabled(master)); arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); + arm_smmu_remove_master(master); kfree(master); iommu_fwspec_free(dev); } @@ -2449,76 +2550,18 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +static int arm_smmu_enable_nesting(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; - default: - return -ENODEV; - } - break; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = smmu_domain->non_strict; - return 0; - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } -} - -static int arm_smmu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ int ret = 0; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); mutex_lock(&smmu_domain->init_mutex); - - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; - } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; - default: - ret = -ENODEV; - } - break; - case IOMMU_DOMAIN_DMA: - switch(attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - smmu_domain->non_strict = *(int *)data; - break; - default: - ret = -ENODEV; - } - break; - default: - ret = -EINVAL; - } - -out_unlock: + if (smmu_domain->smmu) + ret = -EPERM; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; mutex_unlock(&smmu_domain->init_mutex); + return ret; } @@ -2619,8 +2662,7 @@ static struct iommu_ops arm_smmu_ops = { .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, - .domain_get_attr = arm_smmu_domain_get_attr, - .domain_set_attr = arm_smmu_domain_set_attr, + .enable_nesting = arm_smmu_enable_nesting, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, @@ -2851,6 +2893,9 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; + mutex_init(&smmu->streams_mutex); + smmu->streams = RB_ROOT; + ret = arm_smmu_init_queues(smmu); if (ret) return ret; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index f985817c967a..46e8c49214a8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -115,7 +115,7 @@ #define GERROR_PRIQ_ABT_ERR (1 << 3) #define GERROR_EVTQ_ABT_ERR (1 << 2) #define GERROR_CMDQ_ERR (1 << 0) -#define GERROR_ERR_MASK 0xfd +#define GERROR_ERR_MASK 0x1fd #define ARM_SMMU_GERRORN 0x64 @@ -410,8 +410,6 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_PREFETCH_CFG 0x1 struct { u32 sid; - u8 size; - u64 addr; } prefetch; #define CMDQ_OP_CFGI_STE 0x3 @@ -639,6 +637,15 @@ struct arm_smmu_device { /* IOMMU core code handle */ struct iommu_device iommu; + + struct rb_root streams; + struct mutex streams_mutex; +}; + +struct arm_smmu_stream { + u32 id; + struct arm_smmu_master *master; + struct rb_node node; }; /* SMMU private data for each master */ @@ -647,8 +654,8 @@ struct arm_smmu_master { struct device *dev; struct arm_smmu_domain *domain; struct list_head domain_head; - u32 *sids; - unsigned int num_sids; + struct arm_smmu_stream *streams; + unsigned int num_streams; bool ats_enabled; bool sva_enabled; struct list_head bonds; @@ -668,7 +675,6 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; - bool non_strict; atomic_t nr_ats_masters; enum arm_smmu_domain_stage stage; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index d8c6bfde6a61..3c6adcdb201b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -761,14 +761,17 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; + if (!iommu_get_dma_strict(domain)) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + if (smmu->impl && smmu->impl->init_context) { ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev); if (ret) goto out_clear_smmu; } - if (smmu_domain->pgtbl_cfg.quirks) - pgtbl_cfg.quirks |= smmu_domain->pgtbl_cfg.quirks; + if (smmu_domain->pgtbl_quirks) + pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { @@ -1481,98 +1484,34 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +static int arm_smmu_enable_nesting(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + int ret = 0; - switch(domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; - case DOMAIN_ATTR_IO_PGTABLE_CFG: { - struct io_pgtable_domain_attr *pgtbl_cfg = data; - *pgtbl_cfg = smmu_domain->pgtbl_cfg; - - return 0; - } - default: - return -ENODEV; - } - break; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: { - bool non_strict = smmu_domain->pgtbl_cfg.quirks & - IO_PGTABLE_QUIRK_NON_STRICT; - *(int *)data = non_strict; - return 0; - } - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } + mutex_lock(&smmu_domain->init_mutex); + if (smmu_domain->smmu) + ret = -EPERM; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + mutex_unlock(&smmu_domain->init_mutex); + + return ret; } -static int arm_smmu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain, + unsigned long quirks) { - int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + int ret = 0; mutex_lock(&smmu_domain->init_mutex); - - switch(domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; - } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; - case DOMAIN_ATTR_IO_PGTABLE_CFG: { - struct io_pgtable_domain_attr *pgtbl_cfg = data; - - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; - } - - smmu_domain->pgtbl_cfg = *pgtbl_cfg; - break; - } - default: - ret = -ENODEV; - } - break; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - if (*(int *)data) - smmu_domain->pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; - else - smmu_domain->pgtbl_cfg.quirks &= ~IO_PGTABLE_QUIRK_NON_STRICT; - break; - default: - ret = -ENODEV; - } - break; - default: - ret = -EINVAL; - } -out_unlock: + if (smmu_domain->smmu) + ret = -EPERM; + else + smmu_domain->pgtbl_quirks = quirks; mutex_unlock(&smmu_domain->init_mutex); + return ret; } @@ -1631,8 +1570,8 @@ static struct iommu_ops arm_smmu_ops = { .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, - .domain_get_attr = arm_smmu_domain_get_attr, - .domain_set_attr = arm_smmu_domain_set_attr, + .enable_nesting = arm_smmu_enable_nesting, + .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index d2a2d1bc58ba..c31a59d35c64 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -364,7 +364,7 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - struct io_pgtable_domain_attr pgtbl_cfg; + unsigned long pgtbl_quirks; const struct iommu_flush_ops *flush_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index af765c813cc8..efaf5eab40a1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -52,15 +52,17 @@ struct iommu_dma_cookie { }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); +bool iommu_dma_forcedac __read_mostly; -void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain) +static int __init iommu_dma_forcedac_setup(char *str) { - struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; + int ret = kstrtobool(str, &iommu_dma_forcedac); - free_cpu_cached_iovas(cpu, iovad); + if (!ret && iommu_dma_forcedac) + pr_info("Forcing DAC for PCI devices\n"); + return ret; } +early_param("iommu.forcedac", iommu_dma_forcedac_setup); static void iommu_dma_entry_dtor(unsigned long data) { @@ -304,10 +306,7 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) cookie = container_of(iovad, struct iommu_dma_cookie, iovad); domain = cookie->fq_domain; - /* - * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE - * implies that ops->flush_iotlb_all must be non-NULL. - */ + domain->ops->flush_iotlb_all(domain); } @@ -334,7 +333,6 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; struct iova_domain *iovad; - int attr; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -371,8 +369,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) && - !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && - attr) { + domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, iommu_dma_entry_dtor)) pr_warn("iova flush queue initialization failed\n"); @@ -444,7 +441,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); /* Try to get PCI devices a SAC address */ - if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) + if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift, false); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index de324b4eedfe..8fa9a591fb96 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -407,7 +407,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) struct sysmmu_drvdata *data = dev_id; const struct sysmmu_fault_info *finfo; unsigned int i, n, itype; - sysmmu_iova_t fault_addr = -1; + sysmmu_iova_t fault_addr; unsigned short reg_status, reg_clear; int ret = -ENOSYS; diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index b9a974d97831..fc38b1fba7cf 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -63,19 +63,6 @@ static const struct of_device_id l3_device_ids[] = { /* maximum subwindows permitted per liodn */ static u32 max_subwindow_count; -/* Pool for fspi allocation */ -static struct gen_pool *spaace_pool; - -/** - * pamu_get_max_subwin_cnt() - Return the maximum supported - * subwindow count per liodn. - * - */ -u32 pamu_get_max_subwin_cnt(void) -{ - return max_subwindow_count; -} - /** * pamu_get_ppaace() - Return the primary PACCE * @liodn: liodn PAACT index for desired PAACE @@ -155,13 +142,6 @@ static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) return fls64(addrspace_size) - 2; } -/* Derive the PAACE window count encoding for the subwindow count */ -static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt) -{ - /* window count is 2^(WCE+1) bytes */ - return __ffs(subwindow_cnt) - 1; -} - /* * Set the PAACE type as primary and set the coherency required domain * attribute @@ -175,88 +155,10 @@ static void pamu_init_ppaace(struct paace *ppaace) } /* - * Set the PAACE type as secondary and set the coherency required domain - * attribute. - */ -static void pamu_init_spaace(struct paace *spaace) -{ - set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY); - set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, - PAACE_M_COHERENCE_REQ); -} - -/* - * Return the spaace (corresponding to the secondary window index) - * for a particular ppaace. - */ -static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum) -{ - u32 subwin_cnt; - struct paace *spaace = NULL; - - subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1); - - if (wnum < subwin_cnt) - spaace = &spaact[paace->fspi + wnum]; - else - pr_debug("secondary paace out of bounds\n"); - - return spaace; -} - -/** - * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows - * required for primary PAACE in the secondary - * PAACE table. - * @subwin_cnt: Number of subwindows to be reserved. - * - * A PPAACE entry may have a number of associated subwindows. A subwindow - * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores - * the index (fspi) of the first SPAACE entry in the SPAACT table. This - * function returns the index of the first SPAACE entry. The remaining - * SPAACE entries are reserved contiguously from that index. - * - * Returns a valid fspi index in the range of 0 - SPAACE_NUMBER_ENTRIES on success. - * If no SPAACE entry is available or the allocator can not reserve the required - * number of contiguous entries function returns ULONG_MAX indicating a failure. - * - */ -static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt) -{ - unsigned long spaace_addr; - - spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace)); - if (!spaace_addr) - return ULONG_MAX; - - return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace)); -} - -/* Release the subwindows reserved for a particular LIODN */ -void pamu_free_subwins(int liodn) -{ - struct paace *ppaace; - u32 subwin_cnt, size; - - ppaace = pamu_get_ppaace(liodn); - if (!ppaace) { - pr_debug("Invalid liodn entry\n"); - return; - } - - if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) { - subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1); - size = (subwin_cnt - 1) * sizeof(struct paace); - gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size); - set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); - } -} - -/* * Function used for updating stash destination for the coressponding * LIODN. */ -int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) +int pamu_update_paace_stash(int liodn, u32 value) { struct paace *paace; @@ -265,11 +167,6 @@ int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) pr_debug("Invalid liodn entry\n"); return -ENOENT; } - if (subwin) { - paace = pamu_get_spaace(paace, subwin - 1); - if (!paace) - return -ENOENT; - } set_bf(paace->impl_attr, PAACE_IA_CID, value); mb(); @@ -277,65 +174,20 @@ int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) return 0; } -/* Disable a subwindow corresponding to the LIODN */ -int pamu_disable_spaace(int liodn, u32 subwin) -{ - struct paace *paace; - - paace = pamu_get_ppaace(liodn); - if (!paace) { - pr_debug("Invalid liodn entry\n"); - return -ENOENT; - } - if (subwin) { - paace = pamu_get_spaace(paace, subwin - 1); - if (!paace) - return -ENOENT; - set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID); - } else { - set_bf(paace->addr_bitfields, PAACE_AF_AP, - PAACE_AP_PERMS_DENIED); - } - - mb(); - - return 0; -} - /** * pamu_config_paace() - Sets up PPAACE entry for specified liodn * * @liodn: Logical IO device number - * @win_addr: starting address of DSA window - * @win-size: size of DSA window * @omi: Operation mapping index -- if ~omi == 0 then omi not defined - * @rpn: real (true physical) page number * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then * stashid not defined - * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then - * snoopid not defined - * @subwin_cnt: number of sub-windows * @prot: window permissions * * Returns 0 upon success else error code < 0 returned */ -int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, - u32 omi, unsigned long rpn, u32 snoopid, u32 stashid, - u32 subwin_cnt, int prot) +int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot) { struct paace *ppaace; - unsigned long fspi; - - if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) { - pr_debug("window size too small or not a power of two %pa\n", - &win_size); - return -EINVAL; - } - - if (win_addr & (win_size - 1)) { - pr_debug("window address is not aligned with window size\n"); - return -EINVAL; - } ppaace = pamu_get_ppaace(liodn); if (!ppaace) @@ -343,13 +195,12 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, /* window size is 2^(WSE+1) bytes */ set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, - map_addrspace_size_to_wse(win_size)); + map_addrspace_size_to_wse(1ULL << 36)); pamu_init_ppaace(ppaace); - ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20); - set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, - (win_addr >> PAMU_PAGE_SHIFT)); + ppaace->wbah = 0; + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0); /* set up operation mapping if it's configured */ if (omi < OME_NUMBER_ENTRIES) { @@ -364,120 +215,12 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, if (~stashid != 0) set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid); - /* configure snoop id */ - if (~snoopid != 0) - ppaace->domain_attr.to_host.snpid = snoopid; - - if (subwin_cnt) { - /* The first entry is in the primary PAACE instead */ - fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1); - if (fspi == ULONG_MAX) { - pr_debug("spaace indexes exhausted\n"); - return -EINVAL; - } - - /* window count is 2^(WCE+1) bytes */ - set_bf(ppaace->impl_attr, PAACE_IA_WCE, - map_subwindow_cnt_to_wce(subwin_cnt)); - set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1); - ppaace->fspi = fspi; - } else { - set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); - ppaace->twbah = rpn >> 20; - set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn); - set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot); - set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0); - set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); - } - mb(); - - return 0; -} - -/** - * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow - * - * @liodn: Logical IO device number - * @subwin_cnt: number of sub-windows associated with dma-window - * @subwin: subwindow index - * @subwin_size: size of subwindow - * @omi: Operation mapping index - * @rpn: real (true physical) page number - * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then - * snoopid not defined - * @stashid: cache stash id for associated cpu - * @enable: enable/disable subwindow after reconfiguration - * @prot: sub window permissions - * - * Returns 0 upon success else error code < 0 returned - */ -int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, - phys_addr_t subwin_size, u32 omi, unsigned long rpn, - u32 snoopid, u32 stashid, int enable, int prot) -{ - struct paace *paace; - - /* setup sub-windows */ - if (!subwin_cnt) { - pr_debug("Invalid subwindow count\n"); - return -EINVAL; - } - - paace = pamu_get_ppaace(liodn); - if (subwin > 0 && subwin < subwin_cnt && paace) { - paace = pamu_get_spaace(paace, subwin - 1); - - if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) { - pamu_init_spaace(paace); - set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn); - } - } - - if (!paace) { - pr_debug("Invalid liodn entry\n"); - return -ENOENT; - } - - if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) { - pr_debug("subwindow size out of range, or not a power of 2\n"); - return -EINVAL; - } - - if (rpn == ULONG_MAX) { - pr_debug("real page number out of range\n"); - return -EINVAL; - } - - /* window size is 2^(WSE+1) bytes */ - set_bf(paace->win_bitfields, PAACE_WIN_SWSE, - map_addrspace_size_to_wse(subwin_size)); - - set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); - paace->twbah = rpn >> 20; - set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn); - set_bf(paace->addr_bitfields, PAACE_AF_AP, prot); - - /* configure snoop id */ - if (~snoopid != 0) - paace->domain_attr.to_host.snpid = snoopid; - - /* set up operation mapping if it's configured */ - if (omi < OME_NUMBER_ENTRIES) { - set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); - paace->op_encode.index_ot.omi = omi; - } else if (~omi != 0) { - pr_debug("bad operation mapping index: %d\n", omi); - return -EINVAL; - } - - if (~stashid != 0) - set_bf(paace->impl_attr, PAACE_IA_CID, stashid); - - smp_wmb(); - - if (enable) - set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); - + set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + ppaace->twbah = 0; + set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, 0); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot); + set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); mb(); return 0; @@ -1129,17 +872,6 @@ static int fsl_pamu_probe(struct platform_device *pdev) spaact_phys = virt_to_phys(spaact); omt_phys = virt_to_phys(omt); - spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1); - if (!spaace_pool) { - ret = -ENOMEM; - dev_err(dev, "Failed to allocate spaace gen pool\n"); - goto error; - } - - ret = gen_pool_add(spaace_pool, (unsigned long)spaact, SPAACT_SIZE, -1); - if (ret) - goto error_genpool; - pamubypenr = in_be32(&guts_regs->pamubypenr); for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; @@ -1167,9 +899,6 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; -error_genpool: - gen_pool_destroy(spaace_pool); - error: if (irq != NO_IRQ) free_irq(irq, data); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index e1496ba96160..36df7975ff64 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -383,18 +383,10 @@ struct ome { int pamu_domain_init(void); int pamu_enable_liodn(int liodn); int pamu_disable_liodn(int liodn); -void pamu_free_subwins(int liodn); -int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, - u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid, - u32 subwin_cnt, int prot); -int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr, - phys_addr_t subwin_size, u32 omi, unsigned long rpn, - uint32_t snoopid, u32 stashid, int enable, int prot); +int pamu_config_ppaace(int liodn, u32 omi, uint32_t stashid, int prot); u32 get_stash_id(u32 stash_dest_hint, u32 vcpu); void get_ome_index(u32 *omi_index, struct device *dev); -int pamu_update_paace_stash(int liodn, u32 subwin, u32 value); -int pamu_disable_spaace(int liodn, u32 subwin); -u32 pamu_get_max_subwin_cnt(void); +int pamu_update_paace_stash(int liodn, u32 value); #endif /* __FSL_PAMU_H */ diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index b2110767caf4..0ac781186dbd 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -54,159 +54,18 @@ static int __init iommu_init_mempool(void) return 0; } -static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova) -{ - u32 win_cnt = dma_domain->win_cnt; - struct dma_window *win_ptr = &dma_domain->win_arr[0]; - struct iommu_domain_geometry *geom; - - geom = &dma_domain->iommu_domain.geometry; - - if (!win_cnt || !dma_domain->geom_size) { - pr_debug("Number of windows/geometry not configured for the domain\n"); - return 0; - } - - if (win_cnt > 1) { - u64 subwin_size; - dma_addr_t subwin_iova; - u32 wnd; - - subwin_size = dma_domain->geom_size >> ilog2(win_cnt); - subwin_iova = iova & ~(subwin_size - 1); - wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size); - win_ptr = &dma_domain->win_arr[wnd]; - } - - if (win_ptr->valid) - return win_ptr->paddr + (iova & (win_ptr->size - 1)); - - return 0; -} - -static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain) -{ - struct dma_window *sub_win_ptr = &dma_domain->win_arr[0]; - int i, ret; - unsigned long rpn, flags; - - for (i = 0; i < dma_domain->win_cnt; i++) { - if (sub_win_ptr[i].valid) { - rpn = sub_win_ptr[i].paddr >> PAMU_PAGE_SHIFT; - spin_lock_irqsave(&iommu_lock, flags); - ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i, - sub_win_ptr[i].size, - ~(u32)0, - rpn, - dma_domain->snoop_id, - dma_domain->stash_id, - (i > 0) ? 1 : 0, - sub_win_ptr[i].prot); - spin_unlock_irqrestore(&iommu_lock, flags); - if (ret) { - pr_debug("SPAACE configuration failed for liodn %d\n", - liodn); - return ret; - } - } - } - - return ret; -} - -static int map_win(int liodn, struct fsl_dma_domain *dma_domain) -{ - int ret; - struct dma_window *wnd = &dma_domain->win_arr[0]; - phys_addr_t wnd_addr = dma_domain->iommu_domain.geometry.aperture_start; - unsigned long flags; - - spin_lock_irqsave(&iommu_lock, flags); - ret = pamu_config_ppaace(liodn, wnd_addr, - wnd->size, - ~(u32)0, - wnd->paddr >> PAMU_PAGE_SHIFT, - dma_domain->snoop_id, dma_domain->stash_id, - 0, wnd->prot); - spin_unlock_irqrestore(&iommu_lock, flags); - if (ret) - pr_debug("PAACE configuration failed for liodn %d\n", liodn); - - return ret; -} - -/* Map the DMA window corresponding to the LIODN */ -static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain) -{ - if (dma_domain->win_cnt > 1) - return map_subwins(liodn, dma_domain); - else - return map_win(liodn, dma_domain); -} - -/* Update window/subwindow mapping for the LIODN */ -static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr) -{ - int ret; - struct dma_window *wnd = &dma_domain->win_arr[wnd_nr]; - unsigned long flags; - - spin_lock_irqsave(&iommu_lock, flags); - if (dma_domain->win_cnt > 1) { - ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr, - wnd->size, - ~(u32)0, - wnd->paddr >> PAMU_PAGE_SHIFT, - dma_domain->snoop_id, - dma_domain->stash_id, - (wnd_nr > 0) ? 1 : 0, - wnd->prot); - if (ret) - pr_debug("Subwindow reconfiguration failed for liodn %d\n", - liodn); - } else { - phys_addr_t wnd_addr; - - wnd_addr = dma_domain->iommu_domain.geometry.aperture_start; - - ret = pamu_config_ppaace(liodn, wnd_addr, - wnd->size, - ~(u32)0, - wnd->paddr >> PAMU_PAGE_SHIFT, - dma_domain->snoop_id, dma_domain->stash_id, - 0, wnd->prot); - if (ret) - pr_debug("Window reconfiguration failed for liodn %d\n", - liodn); - } - - spin_unlock_irqrestore(&iommu_lock, flags); - - return ret; -} - static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, u32 val) { - int ret = 0, i; + int ret = 0; unsigned long flags; spin_lock_irqsave(&iommu_lock, flags); - if (!dma_domain->win_arr) { - pr_debug("Windows not configured, stash destination update failed for liodn %d\n", - liodn); + ret = pamu_update_paace_stash(liodn, val); + if (ret) { + pr_debug("Failed to update SPAACE for liodn %d\n ", liodn); spin_unlock_irqrestore(&iommu_lock, flags); - return -EINVAL; - } - - for (i = 0; i < dma_domain->win_cnt; i++) { - ret = pamu_update_paace_stash(liodn, i, val); - if (ret) { - pr_debug("Failed to update SPAACE %d field for liodn %d\n ", - i, liodn); - spin_unlock_irqrestore(&iommu_lock, flags); - return ret; - } + return ret; } spin_unlock_irqrestore(&iommu_lock, flags); @@ -215,16 +74,12 @@ static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, } /* Set the geometry parameters for a LIODN */ -static int pamu_set_liodn(int liodn, struct device *dev, - struct fsl_dma_domain *dma_domain, - struct iommu_domain_geometry *geom_attr, - u32 win_cnt) +static int pamu_set_liodn(struct fsl_dma_domain *dma_domain, struct device *dev, + int liodn) { - phys_addr_t window_addr, window_size; - phys_addr_t subwin_size; - int ret = 0, i; u32 omi_index = ~(u32)0; unsigned long flags; + int ret; /* * Configure the omi_index at the geometry setup time. @@ -233,93 +88,30 @@ static int pamu_set_liodn(int liodn, struct device *dev, */ get_ome_index(&omi_index, dev); - window_addr = geom_attr->aperture_start; - window_size = dma_domain->geom_size; - spin_lock_irqsave(&iommu_lock, flags); ret = pamu_disable_liodn(liodn); - if (!ret) - ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index, - 0, dma_domain->snoop_id, - dma_domain->stash_id, win_cnt, 0); + if (ret) + goto out_unlock; + ret = pamu_config_ppaace(liodn, omi_index, dma_domain->stash_id, 0); + if (ret) + goto out_unlock; + ret = pamu_config_ppaace(liodn, ~(u32)0, dma_domain->stash_id, + PAACE_AP_PERMS_QUERY | PAACE_AP_PERMS_UPDATE); +out_unlock: spin_unlock_irqrestore(&iommu_lock, flags); if (ret) { - pr_debug("PAACE configuration failed for liodn %d, win_cnt =%d\n", - liodn, win_cnt); - return ret; - } - - if (win_cnt > 1) { - subwin_size = window_size >> ilog2(win_cnt); - for (i = 0; i < win_cnt; i++) { - spin_lock_irqsave(&iommu_lock, flags); - ret = pamu_disable_spaace(liodn, i); - if (!ret) - ret = pamu_config_spaace(liodn, win_cnt, i, - subwin_size, omi_index, - 0, dma_domain->snoop_id, - dma_domain->stash_id, - 0, 0); - spin_unlock_irqrestore(&iommu_lock, flags); - if (ret) { - pr_debug("SPAACE configuration failed for liodn %d\n", - liodn); - return ret; - } - } + pr_debug("PAACE configuration failed for liodn %d\n", + liodn); } - return ret; } -static int check_size(u64 size, dma_addr_t iova) -{ - /* - * Size must be a power of two and at least be equal - * to PAMU page size. - */ - if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) { - pr_debug("Size too small or not a power of two\n"); - return -EINVAL; - } - - /* iova must be page size aligned */ - if (iova & (size - 1)) { - pr_debug("Address is not aligned with window size\n"); - return -EINVAL; - } - - return 0; -} - -static struct fsl_dma_domain *iommu_alloc_dma_domain(void) -{ - struct fsl_dma_domain *domain; - - domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL); - if (!domain) - return NULL; - - domain->stash_id = ~(u32)0; - domain->snoop_id = ~(u32)0; - domain->win_cnt = pamu_get_max_subwin_cnt(); - domain->geom_size = 0; - - INIT_LIST_HEAD(&domain->devices); - - spin_lock_init(&domain->domain_lock); - - return domain; -} - -static void remove_device_ref(struct device_domain_info *info, u32 win_cnt) +static void remove_device_ref(struct device_domain_info *info) { unsigned long flags; list_del(&info->link); spin_lock_irqsave(&iommu_lock, flags); - if (win_cnt > 1) - pamu_free_subwins(info->liodn); pamu_disable_liodn(info->liodn); spin_unlock_irqrestore(&iommu_lock, flags); spin_lock_irqsave(&device_domain_lock, flags); @@ -337,7 +129,7 @@ static void detach_device(struct device *dev, struct fsl_dma_domain *dma_domain) /* Remove the device from the domain device list */ list_for_each_entry_safe(info, tmp, &dma_domain->devices, link) { if (!dev || (info->dev == dev)) - remove_device_ref(info, dma_domain->win_cnt); + remove_device_ref(info); } spin_unlock_irqrestore(&dma_domain->domain_lock, flags); } @@ -379,13 +171,10 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - if (iova < domain->geometry.aperture_start || iova > domain->geometry.aperture_end) return 0; - - return get_phys_addr(dma_domain, iova); + return iova; } static bool fsl_pamu_capable(enum iommu_cap cap) @@ -399,10 +188,6 @@ static void fsl_pamu_domain_free(struct iommu_domain *domain) /* remove all the devices from the device list */ detach_device(NULL, dma_domain); - - dma_domain->enabled = 0; - dma_domain->mapped = 0; - kmem_cache_free(fsl_pamu_domain_cache, dma_domain); } @@ -413,12 +198,15 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type) if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; - dma_domain = iommu_alloc_dma_domain(); - if (!dma_domain) { - pr_debug("dma_domain allocation failed\n"); + dma_domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL); + if (!dma_domain) return NULL; - } - /* defaul geometry 64 GB i.e. maximum system address */ + + dma_domain->stash_id = ~(u32)0; + INIT_LIST_HEAD(&dma_domain->devices); + spin_lock_init(&dma_domain->domain_lock); + + /* default geometry 64 GB i.e. maximum system address */ dma_domain->iommu_domain. geometry.aperture_start = 0; dma_domain->iommu_domain.geometry.aperture_end = (1ULL << 36) - 1; dma_domain->iommu_domain.geometry.force_aperture = true; @@ -426,24 +214,6 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type) return &dma_domain->iommu_domain; } -/* Configure geometry settings for all LIODNs associated with domain */ -static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain, - struct iommu_domain_geometry *geom_attr, - u32 win_cnt) -{ - struct device_domain_info *info; - int ret = 0; - - list_for_each_entry(info, &dma_domain->devices, link) { - ret = pamu_set_liodn(info->liodn, info->dev, dma_domain, - geom_attr, win_cnt); - if (ret) - break; - } - - return ret; -} - /* Update stash destination for all LIODNs associated with the domain */ static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val) { @@ -459,198 +229,13 @@ static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val) return ret; } -/* Update domain mappings for all LIODNs associated with the domain */ -static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr) -{ - struct device_domain_info *info; - int ret = 0; - - list_for_each_entry(info, &dma_domain->devices, link) { - ret = update_liodn(info->liodn, dma_domain, wnd_nr); - if (ret) - break; - } - return ret; -} - -static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr) -{ - struct device_domain_info *info; - int ret = 0; - - list_for_each_entry(info, &dma_domain->devices, link) { - if (dma_domain->win_cnt == 1 && dma_domain->enabled) { - ret = pamu_disable_liodn(info->liodn); - if (!ret) - dma_domain->enabled = 0; - } else { - ret = pamu_disable_spaace(info->liodn, wnd_nr); - } - } - - return ret; -} - -static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - unsigned long flags; - int ret; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - if (!dma_domain->win_arr) { - pr_debug("Number of windows not configured\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return; - } - - if (wnd_nr >= dma_domain->win_cnt) { - pr_debug("Invalid window index\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return; - } - - if (dma_domain->win_arr[wnd_nr].valid) { - ret = disable_domain_win(dma_domain, wnd_nr); - if (!ret) { - dma_domain->win_arr[wnd_nr].valid = 0; - dma_domain->mapped--; - } - } - - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); -} - -static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size, int prot) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - struct dma_window *wnd; - int pamu_prot = 0; - int ret; - unsigned long flags; - u64 win_size; - - if (prot & IOMMU_READ) - pamu_prot |= PAACE_AP_PERMS_QUERY; - if (prot & IOMMU_WRITE) - pamu_prot |= PAACE_AP_PERMS_UPDATE; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - if (!dma_domain->win_arr) { - pr_debug("Number of windows not configured\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -ENODEV; - } - - if (wnd_nr >= dma_domain->win_cnt) { - pr_debug("Invalid window index\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt); - if (size > win_size) { - pr_debug("Invalid window size\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - if (dma_domain->win_cnt == 1) { - if (dma_domain->enabled) { - pr_debug("Disable the window before updating the mapping\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EBUSY; - } - - ret = check_size(size, domain->geometry.aperture_start); - if (ret) { - pr_debug("Aperture start not aligned to the size\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - } - - wnd = &dma_domain->win_arr[wnd_nr]; - if (!wnd->valid) { - wnd->paddr = paddr; - wnd->size = size; - wnd->prot = pamu_prot; - - ret = update_domain_mapping(dma_domain, wnd_nr); - if (!ret) { - wnd->valid = 1; - dma_domain->mapped++; - } - } else { - pr_debug("Disable the window before updating the mapping\n"); - ret = -EBUSY; - } - - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return ret; -} - -/* - * Attach the LIODN to the DMA domain and configure the geometry - * and window mappings. - */ -static int handle_attach_device(struct fsl_dma_domain *dma_domain, - struct device *dev, const u32 *liodn, - int num) -{ - unsigned long flags; - struct iommu_domain *domain = &dma_domain->iommu_domain; - int ret = 0; - int i; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - for (i = 0; i < num; i++) { - /* Ensure that LIODN value is valid */ - if (liodn[i] >= PAACE_NUMBER_ENTRIES) { - pr_debug("Invalid liodn %d, attach device failed for %pOF\n", - liodn[i], dev->of_node); - ret = -EINVAL; - break; - } - - attach_device(dma_domain, liodn[i], dev); - /* - * Check if geometry has already been configured - * for the domain. If yes, set the geometry for - * the LIODN. - */ - if (dma_domain->win_arr) { - u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0; - - ret = pamu_set_liodn(liodn[i], dev, dma_domain, - &domain->geometry, win_cnt); - if (ret) - break; - if (dma_domain->mapped) { - /* - * Create window/subwindow mapping for - * the LIODN. - */ - ret = map_liodn(liodn[i], dma_domain); - if (ret) - break; - } - } - } - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return ret; -} - static int fsl_pamu_attach_device(struct iommu_domain *domain, struct device *dev) { struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + unsigned long flags; + int len, ret = 0, i; const u32 *liodn; - u32 liodn_cnt; - int len, ret = 0; struct pci_dev *pdev = NULL; struct pci_controller *pci_ctl; @@ -670,14 +255,30 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, } liodn = of_get_property(dev->of_node, "fsl,liodn", &len); - if (liodn) { - liodn_cnt = len / sizeof(u32); - ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt); - } else { + if (!liodn) { pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); - ret = -EINVAL; + return -EINVAL; } + spin_lock_irqsave(&dma_domain->domain_lock, flags); + for (i = 0; i < len / sizeof(u32); i++) { + /* Ensure that LIODN value is valid */ + if (liodn[i] >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid liodn %d, attach device failed for %pOF\n", + liodn[i], dev->of_node); + ret = -EINVAL; + break; + } + + attach_device(dma_domain, liodn[i], dev); + ret = pamu_set_liodn(dma_domain, dev, liodn[i]); + if (ret) + break; + ret = pamu_enable_liodn(liodn[i]); + if (ret) + break; + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); return ret; } @@ -712,202 +313,26 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain, pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); } -static int configure_domain_geometry(struct iommu_domain *domain, void *data) -{ - struct iommu_domain_geometry *geom_attr = data; - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - dma_addr_t geom_size; - unsigned long flags; - - geom_size = geom_attr->aperture_end - geom_attr->aperture_start + 1; - /* - * Sanity check the geometry size. Also, we do not support - * DMA outside of the geometry. - */ - if (check_size(geom_size, geom_attr->aperture_start) || - !geom_attr->force_aperture) { - pr_debug("Invalid PAMU geometry attributes\n"); - return -EINVAL; - } - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - if (dma_domain->enabled) { - pr_debug("Can't set geometry attributes as domain is active\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EBUSY; - } - - /* Copy the domain geometry information */ - memcpy(&domain->geometry, geom_attr, - sizeof(struct iommu_domain_geometry)); - dma_domain->geom_size = geom_size; - - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return 0; -} - /* Set the domain stash attribute */ -static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data) +int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu) { - struct pamu_stash_attribute *stash_attr = data; + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); unsigned long flags; int ret; spin_lock_irqsave(&dma_domain->domain_lock, flags); - - memcpy(&dma_domain->dma_stash, stash_attr, - sizeof(struct pamu_stash_attribute)); - - dma_domain->stash_id = get_stash_id(stash_attr->cache, - stash_attr->cpu); + dma_domain->stash_id = get_stash_id(PAMU_ATTR_CACHE_L1, cpu); if (dma_domain->stash_id == ~(u32)0) { pr_debug("Invalid stash attributes\n"); spin_unlock_irqrestore(&dma_domain->domain_lock, flags); return -EINVAL; } - ret = update_domain_stash(dma_domain, dma_domain->stash_id); - - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return ret; -} - -/* Configure domain dma state i.e. enable/disable DMA */ -static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable) -{ - struct device_domain_info *info; - unsigned long flags; - int ret; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - - if (enable && !dma_domain->mapped) { - pr_debug("Can't enable DMA domain without valid mapping\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -ENODEV; - } - - dma_domain->enabled = enable; - list_for_each_entry(info, &dma_domain->devices, link) { - ret = (enable) ? pamu_enable_liodn(info->liodn) : - pamu_disable_liodn(info->liodn); - if (ret) - pr_debug("Unable to set dma state for liodn %d", - info->liodn); - } - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return 0; -} - -static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - unsigned long flags; - int ret; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ - if (dma_domain->enabled) { - pr_debug("Can't set geometry attributes as domain is active\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EBUSY; - } - - /* Ensure that the geometry has been set for the domain */ - if (!dma_domain->geom_size) { - pr_debug("Please configure geometry before setting the number of windows\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - /* - * Ensure we have valid window count i.e. it should be less than - * maximum permissible limit and should be a power of two. - */ - if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { - pr_debug("Invalid window count\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, - w_count > 1 ? w_count : 0); - if (!ret) { - kfree(dma_domain->win_arr); - dma_domain->win_arr = kcalloc(w_count, - sizeof(*dma_domain->win_arr), - GFP_ATOMIC); - if (!dma_domain->win_arr) { - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -ENOMEM; - } - dma_domain->win_cnt = w_count; - } spin_unlock_irqrestore(&dma_domain->domain_lock, flags); return ret; } -static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, - enum iommu_attr attr_type, void *data) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - int ret = 0; - - switch (attr_type) { - case DOMAIN_ATTR_GEOMETRY: - ret = configure_domain_geometry(domain, data); - break; - case DOMAIN_ATTR_FSL_PAMU_STASH: - ret = configure_domain_stash(dma_domain, data); - break; - case DOMAIN_ATTR_FSL_PAMU_ENABLE: - ret = configure_domain_dma_state(dma_domain, *(int *)data); - break; - case DOMAIN_ATTR_WINDOWS: - ret = fsl_pamu_set_windows(domain, *(u32 *)data); - break; - default: - pr_debug("Unsupported attribute type\n"); - ret = -EINVAL; - break; - } - - return ret; -} - -static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, - enum iommu_attr attr_type, void *data) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - int ret = 0; - - switch (attr_type) { - case DOMAIN_ATTR_FSL_PAMU_STASH: - memcpy(data, &dma_domain->dma_stash, - sizeof(struct pamu_stash_attribute)); - break; - case DOMAIN_ATTR_FSL_PAMU_ENABLE: - *(int *)data = dma_domain->enabled; - break; - case DOMAIN_ATTR_FSL_PAMUV1: - *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; - break; - case DOMAIN_ATTR_WINDOWS: - *(u32 *)data = dma_domain->win_cnt; - break; - default: - pr_debug("Unsupported attribute type\n"); - ret = -EINVAL; - break; - } - - return ret; -} - static struct iommu_group *get_device_iommu_group(struct device *dev) { struct iommu_group *group; @@ -1031,11 +456,7 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_free = fsl_pamu_domain_free, .attach_dev = fsl_pamu_attach_device, .detach_dev = fsl_pamu_detach_device, - .domain_window_enable = fsl_pamu_window_enable, - .domain_window_disable = fsl_pamu_window_disable, .iova_to_phys = fsl_pamu_iova_to_phys, - .domain_set_attr = fsl_pamu_set_domain_attr, - .domain_get_attr = fsl_pamu_get_domain_attr, .probe_device = fsl_pamu_probe_device, .release_device = fsl_pamu_release_device, .device_group = fsl_pamu_device_group, diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h index 2865d42782e8..95ac1b3cab3b 100644 --- a/drivers/iommu/fsl_pamu_domain.h +++ b/drivers/iommu/fsl_pamu_domain.h @@ -9,56 +9,10 @@ #include "fsl_pamu.h" -struct dma_window { - phys_addr_t paddr; - u64 size; - int valid; - int prot; -}; - struct fsl_dma_domain { - /* - * Indicates the geometry size for the domain. - * This would be set when the geometry is - * configured for the domain. - */ - dma_addr_t geom_size; - /* - * Number of windows assocaited with this domain. - * During domain initialization, it is set to the - * the maximum number of subwindows allowed for a LIODN. - * Minimum value for this is 1 indicating a single PAMU - * window, without any sub windows. Value can be set/ - * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS. - * Value can only be set once the geometry has been configured. - */ - u32 win_cnt; - /* - * win_arr contains information of the configured - * windows for a domain. This is allocated only - * when the number of windows for the domain are - * set. - */ - struct dma_window *win_arr; /* list of devices associated with the domain */ struct list_head devices; - /* dma_domain states: - * mapped - A particular mapping has been created - * within the configured geometry. - * enabled - DMA has been enabled for the given - * domain. This translates to setting of the - * valid bit for the primary PAACE in the PAMU - * PAACT table. Domain geometry should be set and - * it must have a valid mapping before DMA can be - * enabled for it. - * - */ - int mapped; - int enabled; - /* stash_id obtained from the stash attribute details */ u32 stash_id; - struct pamu_stash_attribute dma_stash; - u32 snoop_id; struct iommu_domain iommu_domain; spinlock_t domain_lock; }; diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index d5c51b5c20af..6971397805f3 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1205,6 +1205,63 @@ static inline void reclaim_free_desc(struct q_inval *qi) } } +static const char *qi_type_string(u8 type) +{ + switch (type) { + case QI_CC_TYPE: + return "Context-cache Invalidation"; + case QI_IOTLB_TYPE: + return "IOTLB Invalidation"; + case QI_DIOTLB_TYPE: + return "Device-TLB Invalidation"; + case QI_IEC_TYPE: + return "Interrupt Entry Cache Invalidation"; + case QI_IWD_TYPE: + return "Invalidation Wait"; + case QI_EIOTLB_TYPE: + return "PASID-based IOTLB Invalidation"; + case QI_PC_TYPE: + return "PASID-cache Invalidation"; + case QI_DEIOTLB_TYPE: + return "PASID-based Device-TLB Invalidation"; + case QI_PGRP_RESP_TYPE: + return "Page Group Response"; + default: + return "UNKNOWN"; + } +} + +static void qi_dump_fault(struct intel_iommu *iommu, u32 fault) +{ + unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG); + u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + struct qi_desc *desc = iommu->qi->desc + head; + + if (fault & DMA_FSTS_IQE) + pr_err("VT-d detected Invalidation Queue Error: Reason %llx", + DMAR_IQER_REG_IQEI(iqe_err)); + if (fault & DMA_FSTS_ITE) + pr_err("VT-d detected Invalidation Time-out Error: SID %llx", + DMAR_IQER_REG_ITESID(iqe_err)); + if (fault & DMA_FSTS_ICE) + pr_err("VT-d detected Invalidation Completion Error: SID %llx", + DMAR_IQER_REG_ICESID(iqe_err)); + + pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n", + qi_type_string(desc->qw0 & 0xf), + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); + + head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH; + head <<= qi_shift(iommu); + desc = iommu->qi->desc + head; + + pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n", + qi_type_string(desc->qw0 & 0xf), + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); +} + static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; @@ -1216,6 +1273,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) return -EAGAIN; fault = readl(iommu->reg + DMAR_FSTS_REG); + if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE)) + qi_dump_fault(iommu, fault); /* * If IQE happens, the head points to the descriptor associated @@ -1232,12 +1291,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) * used by software as private data. We won't print * out these two qw's for security consideration. */ - pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n", - (unsigned long long)desc->qw0, - (unsigned long long)desc->qw1); memcpy(desc, qi->desc + (wait_index << shift), 1 << shift); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Queue Error (IQE) cleared\n"); return -EINVAL; } } @@ -1254,6 +1311,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Time-out Error (ITE) cleared\n"); do { if (qi->desc_status[head] == QI_IN_USE) @@ -1265,8 +1323,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) return -EAGAIN; } - if (fault & DMA_FSTS_ICE) + if (fault & DMA_FSTS_ICE) { writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Completion Error (ICE) cleared\n"); + } return 0; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ee0932307d64..0e04d450c38a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -360,7 +360,6 @@ int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); static int dmar_map_gfx = 1; -static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; @@ -451,8 +450,8 @@ static int __init intel_iommu_setup(char *str) dmar_map_gfx = 0; pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - pr_info("Forcing DAC for PCI devices\n"); - dmar_forcedac = 1; + pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); + iommu_dma_forcedac = true; } else if (!strncmp(str, "strict", 6)) { pr_info("Disable batched IOTLB flush\n"); intel_iommu_strict = 1; @@ -658,7 +657,14 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - if (!ecap_sc_support(iommu->ecap)) { + /* + * If the hardware is operating in the scalable mode, + * the snooping control is always supported since we + * always set PASID-table-entry.PGSNP bit if the domain + * is managed outside (UNMANAGED). + */ + if (!sm_supported(iommu) && + !ecap_sc_support(iommu->ecap)) { ret = 0; break; } @@ -1340,6 +1346,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) readl, (sts & DMA_GSTS_RTPS), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + if (sm_supported(iommu)) + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } void iommu_flush_write_buffer(struct intel_iommu *iommu) @@ -2289,6 +2300,41 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain, return level; } +/* + * Ensure that old small page tables are removed to make room for superpage(s). + * We're going to add new large pages, so make sure we don't remove their parent + * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared. + */ +static void switch_to_super_page(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long end_pfn, int level) +{ + unsigned long lvl_pages = lvl_to_nr_pages(level); + struct dma_pte *pte = NULL; + int i; + + while (start_pfn <= end_pfn) { + if (!pte) + pte = pfn_to_dma_pte(domain, start_pfn, &level); + + if (dma_pte_present(pte)) { + dma_pte_free_pagetable(domain, start_pfn, + start_pfn + lvl_pages - 1, + level + 1); + + for_each_domain_iommu(i, domain) + iommu_flush_iotlb_psi(g_iommus[i], domain, + start_pfn, lvl_pages, + 0, 0); + } + + pte++; + start_pfn += lvl_pages; + if (first_pte_in_page(pte)) + pte = NULL; + } +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -2305,8 +2351,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -EINVAL; attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); + attr |= DMA_FL_PTE_PRESENT; if (domain_use_first_level(domain)) { - attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; + attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US; if (domain->domain.type == IOMMU_DOMAIN_DMA) { attr |= DMA_FL_PTE_ACCESS; @@ -2329,22 +2376,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; /* It is large page*/ if (largepage_lvl > 1) { - unsigned long nr_superpages, end_pfn; + unsigned long end_pfn; pteval |= DMA_PTE_LARGE_PAGE; - lvl_pages = lvl_to_nr_pages(largepage_lvl); - - nr_superpages = nr_pages / lvl_pages; - end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; - - /* - * Ensure that old small page tables are - * removed to make room for superpage(s). - * We're adding new large pages, so make sure - * we don't remove their parent tables. - */ - dma_pte_free_pagetable(domain, iov_pfn, end_pfn, - largepage_lvl + 1); + end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1; + switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -2422,6 +2458,10 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + + if (sm_supported(iommu)) + qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0); + iommu->flush.flush_iotlb(iommu, did_old, 0, @@ -2505,6 +2545,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) + flags |= PASID_FLAG_PAGE_SNOOP; + return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, domain->iommu_did[iommu->seq_id], flags); @@ -3267,8 +3310,6 @@ static int __init init_dmars(void) register_pasid_allocator(iommu); #endif iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA @@ -3458,12 +3499,7 @@ static int init_iommu_hw(void) } iommu_flush_write_buffer(iommu); - iommu_set_root_entry(iommu); - - iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3846,8 +3882,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto disable_iommu; iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); @@ -4065,35 +4099,6 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; -static void free_all_cpu_cached_iovas(unsigned int cpu) -{ - int i; - - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct dmar_domain *domain; - int did; - - if (!iommu) - continue; - - for (did = 0; did < cap_ndoms(iommu->cap); did++) { - domain = get_iommu_domain(iommu, (u16)did); - - if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) - continue; - - iommu_dma_free_cpu_cached_iovas(cpu, &domain->domain); - } - } -} - -static int intel_iommu_cpu_dead(unsigned int cpu) -{ - free_all_cpu_cached_iovas(cpu); - return 0; -} - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -4377,6 +4382,17 @@ int __init intel_iommu_init(void) down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { + /* + * The flush queue implementation does not perform + * page-selective invalidations that are required for efficient + * TLB flushes in virtual environments. The benefit of batching + * is likely to be much lower than the overhead of synchronizing + * the virtual and physical IOMMU page-tables. + */ + if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) { + pr_warn("IOMMU batching is disabled due to virtualization"); + intel_iommu_strict = 1; + } iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); @@ -4385,11 +4401,10 @@ int __init intel_iommu_init(void) } up_read(&dmar_global_lock); + iommu_set_dma_strict(intel_iommu_strict); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); - cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, - intel_iommu_cpu_dead); down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) @@ -5343,6 +5358,8 @@ static int siov_find_pci_dvsec(struct pci_dev *pdev) static bool intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) { + struct device_domain_info *info = get_domain_info(dev); + if (feat == IOMMU_DEV_FEAT_AUX) { int ret; @@ -5357,13 +5374,13 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } - if (feat == IOMMU_DEV_FEAT_SVA) { - struct device_domain_info *info = get_domain_info(dev); + if (feat == IOMMU_DEV_FEAT_IOPF) + return info && info->pri_supported; + if (feat == IOMMU_DEV_FEAT_SVA) return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && info->pasid_supported && info->pri_supported && info->ats_supported; - } return false; } @@ -5374,12 +5391,18 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_IOPF) + return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; + if (feat == IOMMU_DEV_FEAT_SVA) { struct device_domain_info *info = get_domain_info(dev); if (!info) return -EINVAL; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } @@ -5423,87 +5446,23 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, } static int -intel_iommu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +intel_iommu_enable_nesting(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long flags; - int ret = 0; - - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; + int ret = -ENODEV; - switch (attr) { - case DOMAIN_ATTR_NESTING: - spin_lock_irqsave(&device_domain_lock, flags); - if (nested_mode_support() && - list_empty(&dmar_domain->devices)) { - dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; - dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; - } else { - ret = -ENODEV; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - break; - default: - ret = -EINVAL; - break; + spin_lock_irqsave(&device_domain_lock, flags); + if (nested_mode_support() && list_empty(&dmar_domain->devices)) { + dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; + dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; + ret = 0; } + spin_unlock_irqrestore(&device_domain_lock, flags); return ret; } -static bool domain_use_flush_queue(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool r = true; - - if (intel_iommu_strict) - return false; - - /* - * The flush queue implementation does not perform page-selective - * invalidations that are required for efficient TLB flushes in virtual - * environments. The benefit of batching is likely to be much lower than - * the overhead of synchronizing the virtual and physical IOMMU - * page-tables. - */ - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!cap_caching_mode(iommu->cap)) - continue; - - pr_warn_once("IOMMU batching is disabled due to virtualization"); - r = false; - break; - } - rcu_read_unlock(); - - return r; -} - -static int -intel_iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - return -ENODEV; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = domain_use_flush_queue(); - return 0; - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } -} - /* * Check that the device does not live on an external facing PCI port that is * marked as untrusted. Such devices should not be able to apply quirks and @@ -5576,8 +5535,7 @@ const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, .domain_free = intel_iommu_domain_free, - .domain_get_attr = intel_iommu_domain_get_attr, - .domain_set_attr = intel_iommu_domain_set_attr, + .enable_nesting = intel_iommu_enable_nesting, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 611ef5243cb6..5c16ebe037a1 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -736,7 +736,7 @@ static int __init intel_prepare_irq_remapping(void) return -ENODEV; if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) - goto error; + return -ENODEV; if (!dmar_ir_support()) return -ENODEV; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index f26cb6195b2c..72646bafc52f 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -24,7 +24,6 @@ /* * Intel IOMMU system wide PASID name space: */ -static DEFINE_SPINLOCK(pasid_lock); u32 intel_pasid_max_id = PASID_MAX; int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) @@ -231,7 +230,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) return info->pasid_table; } -int intel_pasid_get_dev_max_id(struct device *dev) +static int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; @@ -242,7 +241,7 @@ int intel_pasid_get_dev_max_id(struct device *dev) return info->pasid_table->max_pasid; } -struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) +static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) { struct device_domain_info *info; struct pasid_table *pasid_table; @@ -259,19 +258,25 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; - spin_lock(&pasid_lock); +retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { entries = alloc_pgtable_page(info->iommu->node); - if (!entries) { - spin_unlock(&pasid_lock); + if (!entries) return NULL; - } - WRITE_ONCE(dir[dir_index].val, - (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + /* + * The pasid directory table entry won't be freed after + * allocation. No worry about the race with free and + * clear. However, this entry might be populated by others + * while we are preparing it. Use theirs with a retry. + */ + if (cmpxchg64(&dir[dir_index].val, 0ULL, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { + free_pgtable_page(entries); + goto retry; + } } - spin_unlock(&pasid_lock); return &entries[index]; } @@ -394,6 +399,15 @@ static inline void pasid_set_sre(struct pasid_entry *pe) } /* + * Setup the WPE(Write Protect Enable) field (Bit 132) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_wpe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4); +} + +/* * Setup the P(Present) field (Bit 0) of a scalable mode PASID * entry. */ @@ -412,6 +426,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) } /* + * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode + * PASID entry. + */ +static inline void +pasid_set_pgsnp(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24); +} + +/* * Setup the First Level Page table Pointer field (Bit 140~191) * of a scalable mode PASID entry. */ @@ -493,6 +517,9 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, if (WARN_ON(!pte)) return; + if (!(pte->val[0] & PASID_PTE_PRESENT)) + return; + did = pasid_get_domain_id(pte); intel_pasid_clear_entry(dev, pasid, fault_ignore); @@ -522,6 +549,22 @@ static void pasid_flush_caches(struct intel_iommu *iommu, } } +static inline int pasid_enable_wpe(struct pasid_entry *pte) +{ +#ifdef CONFIG_X86 + unsigned long cr0 = read_cr0(); + + /* CR0.WP is normally set but just to be sure */ + if (unlikely(!(cr0 & X86_CR0_WP))) { + pr_err_ratelimited("No CPU write protect!\n"); + return -EINVAL; + } +#endif + pasid_set_wpe(pte); + + return 0; +}; + /* * Set up the scalable mode pasid table entry for first only * translation type. @@ -553,6 +596,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EINVAL; } pasid_set_sre(pte); + if (pasid_enable_wpe(pte)) + return -EINVAL; + } if (flags & PASID_FLAG_FL5LP) { @@ -565,6 +611,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } } + if (flags & PASID_FLAG_PAGE_SNOOP) + pasid_set_pgsnp(pte); + pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -643,6 +692,9 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) + pasid_set_pgsnp(pte); + /* * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). @@ -706,6 +758,9 @@ intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, return -EINVAL; } pasid_set_sre(pte); + /* Enable write protect WP if guest requested */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_WPE) + pasid_set_wpe(pte); } if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 444c0bec221a..5ff61c3d401f 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -48,6 +48,7 @@ */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) #define PASID_FLAG_NESTED BIT(1) +#define PASID_FLAG_PAGE_SNOOP BIT(2) /* * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first- @@ -99,14 +100,9 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte) } extern unsigned int intel_pasid_max_id; -int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp); -void intel_pasid_free_id(u32 pasid); -void *intel_pasid_lookup_id(u32 pasid); int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); -int intel_pasid_get_dev_max_id(struct device *dev); -struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid); int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, pgd_t *pgd, u32 pasid, u16 did, int flags); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 574a7e657a9a..5165cea90421 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -462,13 +462,12 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) /* Caller must hold pasid_mutex, mm reference */ static int intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct svm_dev_ops *ops, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct intel_svm *svm = NULL, *t; struct device_domain_info *info; struct intel_svm_dev *sdev; - struct intel_svm *svm = NULL; unsigned long iflags; int pasid_max; int ret; @@ -494,34 +493,26 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - if (!(flags & SVM_FLAG_PRIVATE_PASID)) { - struct intel_svm *t; - - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) - continue; - - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm) + continue; - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } - sdev->users++; - goto success; - } + svm = t; + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } + + break; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); @@ -550,7 +541,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Finish the setup now we know we're keeping it */ sdev->users = 1; - sdev->ops = ops; init_rcu_head(&sdev->rcu); if (!svm) { @@ -862,7 +852,7 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) /* Fill in event data for device specific processing */ memset(&event, 0, sizeof(struct iommu_fault_event)); event.fault.type = IOMMU_FAULT_PAGE_REQ; - event.fault.prm.addr = desc->addr; + event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; event.fault.prm.pasid = desc->pasid; event.fault.prm.grpid = desc->prg_index; event.fault.prm.perm = prq_to_iommu_prot(desc); @@ -895,6 +885,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_iommu *iommu = d; struct intel_svm *svm = NULL; int head, tail, handled = 0; + unsigned int flags = 0; /* Clear PPR bit before reading head/tail registers, to * ensure that we get a new interrupt if needed. */ @@ -920,7 +911,17 @@ static irqreturn_t prq_event_thread(int irq, void *d) ((unsigned long long *)req)[1]); goto no_pasid; } - + /* We shall not receive page request for supervisor SVM */ + if (req->pm_req && (req->rd_req | req->wr_req)) { + pr_err("Unexpected page request in Privilege Mode"); + /* No need to find the matching sdev as for bad_req */ + goto no_pasid; + } + /* DMA read with exec requeset is not supported. */ + if (req->exe_req && req->rd_req) { + pr_err("Execution request not supported\n"); + goto no_pasid; + } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); svm = ioasid_find(NULL, req->pasid, NULL); @@ -982,9 +983,11 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (access_error(vma, req)) goto invalid; - ret = handle_mm_fault(vma, address, - req->wr_req ? FAULT_FLAG_WRITE : 0, - NULL); + flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; + if (req->wr_req) + flags |= FAULT_FLAG_WRITE; + + ret = handle_mm_fault(vma, address, flags, NULL); if (ret & VM_FAULT_ERROR) goto invalid; @@ -993,13 +996,6 @@ invalid: mmap_read_unlock(svm->mm); mmput(svm->mm); bad_req: - WARN_ON(!sdev); - if (sdev && sdev->ops && sdev->ops->fault_cb) { - int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->pm_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, - req->priv_data, rwxp, result); - } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; @@ -1021,12 +1017,12 @@ no_pasid: QI_PGRP_RESP_TYPE; resp.qw1 = QI_PGRP_IDX(req->prg_index) | QI_PGRP_LPIG(req->lpig); + resp.qw2 = 0; + resp.qw3 = 0; if (req->priv_data_present) memcpy(&resp.qw2, req->priv_data, sizeof(req->priv_data)); - resp.qw2 = 0; - resp.qw3 = 0; qi_submit_sync(iommu, &resp, 1, 0); } prq_advance: @@ -1074,7 +1070,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) if (drvdata) flags = *(unsigned int *)drvdata; mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + ret = intel_svm_bind_mm(dev, flags, mm, &sdev); if (ret) sva = ERR_PTR(ret); else if (sdev) diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c new file mode 100644 index 000000000000..1df8c1dcae77 --- /dev/null +++ b/drivers/iommu/io-pgfault.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle device page faults + * + * Copyright (C) 2020 ARM Ltd. + */ + +#include <linux/iommu.h> +#include <linux/list.h> +#include <linux/sched/mm.h> +#include <linux/slab.h> +#include <linux/workqueue.h> + +#include "iommu-sva-lib.h" + +/** + * struct iopf_queue - IO Page Fault queue + * @wq: the fault workqueue + * @devices: devices attached to this queue + * @lock: protects the device list + */ +struct iopf_queue { + struct workqueue_struct *wq; + struct list_head devices; + struct mutex lock; +}; + +/** + * struct iopf_device_param - IO Page Fault data attached to a device + * @dev: the device that owns this param + * @queue: IOPF queue + * @queue_list: index into queue->devices + * @partial: faults that are part of a Page Request Group for which the last + * request hasn't been submitted yet. + */ +struct iopf_device_param { + struct device *dev; + struct iopf_queue *queue; + struct list_head queue_list; + struct list_head partial; +}; + +struct iopf_fault { + struct iommu_fault fault; + struct list_head list; +}; + +struct iopf_group { + struct iopf_fault last_fault; + struct list_head faults; + struct work_struct work; + struct device *dev; +}; + +static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, + enum iommu_page_response_code status) +{ + struct iommu_page_response resp = { + .version = IOMMU_PAGE_RESP_VERSION_1, + .pasid = iopf->fault.prm.pasid, + .grpid = iopf->fault.prm.grpid, + .code = status, + }; + + if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && + (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) + resp.flags = IOMMU_PAGE_RESP_PASID_VALID; + + return iommu_page_response(dev, &resp); +} + +static enum iommu_page_response_code +iopf_handle_single(struct iopf_fault *iopf) +{ + vm_fault_t ret; + struct mm_struct *mm; + struct vm_area_struct *vma; + unsigned int access_flags = 0; + unsigned int fault_flags = FAULT_FLAG_REMOTE; + struct iommu_fault_page_request *prm = &iopf->fault.prm; + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; + + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) + return status; + + mm = iommu_sva_find(prm->pasid); + if (IS_ERR_OR_NULL(mm)) + return status; + + mmap_read_lock(mm); + + vma = find_extend_vma(mm, prm->addr); + if (!vma) + /* Unmapped area */ + goto out_put_mm; + + if (prm->perm & IOMMU_FAULT_PERM_READ) + access_flags |= VM_READ; + + if (prm->perm & IOMMU_FAULT_PERM_WRITE) { + access_flags |= VM_WRITE; + fault_flags |= FAULT_FLAG_WRITE; + } + + if (prm->perm & IOMMU_FAULT_PERM_EXEC) { + access_flags |= VM_EXEC; + fault_flags |= FAULT_FLAG_INSTRUCTION; + } + + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) + fault_flags |= FAULT_FLAG_USER; + + if (access_flags & ~vma->vm_flags) + /* Access fault */ + goto out_put_mm; + + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : + IOMMU_PAGE_RESP_SUCCESS; + +out_put_mm: + mmap_read_unlock(mm); + mmput(mm); + + return status; +} + +static void iopf_handle_group(struct work_struct *work) +{ + struct iopf_group *group; + struct iopf_fault *iopf, *next; + enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; + + group = container_of(work, struct iopf_group, work); + + list_for_each_entry_safe(iopf, next, &group->faults, list) { + /* + * For the moment, errors are sticky: don't handle subsequent + * faults in the group if there is an error. + */ + if (status == IOMMU_PAGE_RESP_SUCCESS) + status = iopf_handle_single(iopf); + + if (!(iopf->fault.prm.flags & + IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) + kfree(iopf); + } + + iopf_complete_group(group->dev, &group->last_fault, status); + kfree(group); +} + +/** + * iommu_queue_iopf - IO Page Fault handler + * @fault: fault event + * @cookie: struct device, passed to iommu_register_device_fault_handler. + * + * Add a fault to the device workqueue, to be handled by mm. + * + * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard + * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't + * expect a response. It may be generated when disabling a PASID (issuing a + * PASID stop request) by some PCI devices. + * + * The PASID stop request is issued by the device driver before unbind(). Once + * it completes, no page request is generated for this PASID anymore and + * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 + * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait + * for all outstanding page requests to come back with a response before + * completing the PASID stop request. Others do not wait for page responses, and + * instead issue this Stop Marker that tells us when the PASID can be + * reallocated. + * + * It is safe to discard the Stop Marker because it is an optimization. + * a. Page requests, which are posted requests, have been flushed to the IOMMU + * when the stop request completes. + * b. The IOMMU driver flushes all fault queues on unbind() before freeing the + * PASID. + * + * So even though the Stop Marker might be issued by the device *after* the stop + * request completes, outstanding faults will have been dealt with by the time + * the PASID is freed. + * + * Return: 0 on success and <0 on error. + */ +int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) +{ + int ret; + struct iopf_group *group; + struct iopf_fault *iopf, *next; + struct iopf_device_param *iopf_param; + + struct device *dev = cookie; + struct dev_iommu *param = dev->iommu; + + lockdep_assert_held(¶m->lock); + + if (fault->type != IOMMU_FAULT_PAGE_REQ) + /* Not a recoverable page fault */ + return -EOPNOTSUPP; + + /* + * As long as we're holding param->lock, the queue can't be unlinked + * from the device and therefore cannot disappear. + */ + iopf_param = param->iopf_param; + if (!iopf_param) + return -ENODEV; + + if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); + if (!iopf) + return -ENOMEM; + + iopf->fault = *fault; + + /* Non-last request of a group. Postpone until the last one */ + list_add(&iopf->list, &iopf_param->partial); + + return 0; + } + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) { + /* + * The caller will send a response to the hardware. But we do + * need to clean up before leaving, otherwise partial faults + * will be stuck. + */ + ret = -ENOMEM; + goto cleanup_partial; + } + + group->dev = dev; + group->last_fault.fault = *fault; + INIT_LIST_HEAD(&group->faults); + list_add(&group->last_fault.list, &group->faults); + INIT_WORK(&group->work, iopf_handle_group); + + /* See if we have partial faults for this group */ + list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { + if (iopf->fault.prm.grpid == fault->prm.grpid) + /* Insert *before* the last fault */ + list_move(&iopf->list, &group->faults); + } + + queue_work(iopf_param->queue->wq, &group->work); + return 0; + +cleanup_partial: + list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { + if (iopf->fault.prm.grpid == fault->prm.grpid) { + list_del(&iopf->list); + kfree(iopf); + } + } + return ret; +} +EXPORT_SYMBOL_GPL(iommu_queue_iopf); + +/** + * iopf_queue_flush_dev - Ensure that all queued faults have been processed + * @dev: the endpoint whose faults need to be flushed. + * + * The IOMMU driver calls this before releasing a PASID, to ensure that all + * pending faults for this PASID have been handled, and won't hit the address + * space of the next process that uses this PASID. The driver must make sure + * that no new fault is added to the queue. In particular it must flush its + * low-level queue before calling this function. + * + * Return: 0 on success and <0 on error. + */ +int iopf_queue_flush_dev(struct device *dev) +{ + int ret = 0; + struct iopf_device_param *iopf_param; + struct dev_iommu *param = dev->iommu; + + if (!param) + return -ENODEV; + + mutex_lock(¶m->lock); + iopf_param = param->iopf_param; + if (iopf_param) + flush_workqueue(iopf_param->queue->wq); + else + ret = -ENODEV; + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); + +/** + * iopf_queue_discard_partial - Remove all pending partial fault + * @queue: the queue whose partial faults need to be discarded + * + * When the hardware queue overflows, last page faults in a group may have been + * lost and the IOMMU driver calls this to discard all partial faults. The + * driver shouldn't be adding new faults to this queue concurrently. + * + * Return: 0 on success and <0 on error. + */ +int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + struct iopf_fault *iopf, *next; + struct iopf_device_param *iopf_param; + + if (!queue) + return -EINVAL; + + mutex_lock(&queue->lock); + list_for_each_entry(iopf_param, &queue->devices, queue_list) { + list_for_each_entry_safe(iopf, next, &iopf_param->partial, + list) { + list_del(&iopf->list); + kfree(iopf); + } + } + mutex_unlock(&queue->lock); + return 0; +} +EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); + +/** + * iopf_queue_add_device - Add producer to the fault queue + * @queue: IOPF queue + * @dev: device to add + * + * Return: 0 on success and <0 on error. + */ +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) +{ + int ret = -EBUSY; + struct iopf_device_param *iopf_param; + struct dev_iommu *param = dev->iommu; + + if (!param) + return -ENODEV; + + iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL); + if (!iopf_param) + return -ENOMEM; + + INIT_LIST_HEAD(&iopf_param->partial); + iopf_param->queue = queue; + iopf_param->dev = dev; + + mutex_lock(&queue->lock); + mutex_lock(¶m->lock); + if (!param->iopf_param) { + list_add(&iopf_param->queue_list, &queue->devices); + param->iopf_param = iopf_param; + ret = 0; + } + mutex_unlock(¶m->lock); + mutex_unlock(&queue->lock); + + if (ret) + kfree(iopf_param); + + return ret; +} +EXPORT_SYMBOL_GPL(iopf_queue_add_device); + +/** + * iopf_queue_remove_device - Remove producer from fault queue + * @queue: IOPF queue + * @dev: device to remove + * + * Caller makes sure that no more faults are reported for this device. + * + * Return: 0 on success and <0 on error. + */ +int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +{ + int ret = -EINVAL; + struct iopf_fault *iopf, *next; + struct iopf_device_param *iopf_param; + struct dev_iommu *param = dev->iommu; + + if (!param || !queue) + return -EINVAL; + + mutex_lock(&queue->lock); + mutex_lock(¶m->lock); + iopf_param = param->iopf_param; + if (iopf_param && iopf_param->queue == queue) { + list_del(&iopf_param->queue_list); + param->iopf_param = NULL; + ret = 0; + } + mutex_unlock(¶m->lock); + mutex_unlock(&queue->lock); + if (ret) + return ret; + + /* Just in case some faults are still stuck */ + list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) + kfree(iopf); + + kfree(iopf_param); + + return 0; +} +EXPORT_SYMBOL_GPL(iopf_queue_remove_device); + +/** + * iopf_queue_alloc - Allocate and initialize a fault queue + * @name: a unique string identifying the queue (for workqueue) + * + * Return: the queue on success and NULL on error. + */ +struct iopf_queue *iopf_queue_alloc(const char *name) +{ + struct iopf_queue *queue; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return NULL; + + /* + * The WQ is unordered because the low-level handler enqueues faults by + * group. PRI requests within a group have to be ordered, but once + * that's dealt with, the high-level function can handle groups out of + * order. + */ + queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); + if (!queue->wq) { + kfree(queue); + return NULL; + } + + INIT_LIST_HEAD(&queue->devices); + mutex_init(&queue->lock); + + return queue; +} +EXPORT_SYMBOL_GPL(iopf_queue_alloc); + +/** + * iopf_queue_free - Free IOPF queue + * @queue: queue to free + * + * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or + * adding/removing devices on this queue anymore. + */ +void iopf_queue_free(struct iopf_queue *queue) +{ + struct iopf_device_param *iopf_param, *next; + + if (!queue) + return; + + list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) + iopf_queue_remove_device(queue, iopf_param->dev); + + destroy_workqueue(queue->wq); + kfree(queue); +} +EXPORT_SYMBOL_GPL(iopf_queue_free); diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index b40990aef3fd..031155010ca8 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -12,4 +12,57 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); +/* I/O Page fault */ +struct device; +struct iommu_fault; +struct iopf_queue; + +#ifdef CONFIG_IOMMU_SVA_LIB +int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); + +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_remove_device(struct iopf_queue *queue, + struct device *dev); +int iopf_queue_flush_dev(struct device *dev); +struct iopf_queue *iopf_queue_alloc(const char *name); +void iopf_queue_free(struct iopf_queue *queue); +int iopf_queue_discard_partial(struct iopf_queue *queue); + +#else /* CONFIG_IOMMU_SVA_LIB */ +static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) +{ + return -ENODEV; +} + +static inline int iopf_queue_add_device(struct iopf_queue *queue, + struct device *dev) +{ + return -ENODEV; +} + +static inline int iopf_queue_remove_device(struct iopf_queue *queue, + struct device *dev) +{ + return -ENODEV; +} + +static inline int iopf_queue_flush_dev(struct device *dev) +{ + return -ENODEV; +} + +static inline struct iopf_queue *iopf_queue_alloc(const char *name) +{ + return NULL; +} + +static inline void iopf_queue_free(struct iopf_queue *queue) +{ +} + +static inline int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + return -ENODEV; +} +#endif /* CONFIG_IOMMU_SVA_LIB */ #endif /* _IOMMU_SVA_LIB_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d0b0a15dba84..c4ad9c644802 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -69,16 +69,7 @@ static const char * const iommu_group_resv_type_string[] = { }; #define IOMMU_CMD_LINE_DMA_API BIT(0) - -static void iommu_set_cmd_line_dma_api(void) -{ - iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; -} - -static bool iommu_cmd_line_dma_api(void) -{ - return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API); -} +#define IOMMU_CMD_LINE_STRICT BIT(1) static int iommu_alloc_default_domain(struct iommu_group *group, struct device *dev); @@ -130,9 +121,7 @@ static const char *iommu_domain_type_str(unsigned int t) static int __init iommu_subsys_init(void) { - bool cmd_line = iommu_cmd_line_dma_api(); - - if (!cmd_line) { + if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) iommu_set_default_passthrough(false); else @@ -146,7 +135,8 @@ static int __init iommu_subsys_init(void) pr_info("Default domain type: %s %s\n", iommu_domain_type_str(iommu_def_domain_type), - cmd_line ? "(set via kernel command line)" : ""); + (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? + "(set via kernel command line)" : ""); return 0; } @@ -329,10 +319,29 @@ early_param("iommu.passthrough", iommu_set_def_domain_type); static int __init iommu_dma_setup(char *str) { - return kstrtobool(str, &iommu_dma_strict); + int ret = kstrtobool(str, &iommu_dma_strict); + + if (!ret) + iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; + return ret; } early_param("iommu.strict", iommu_dma_setup); +void iommu_set_dma_strict(bool strict) +{ + if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT)) + iommu_dma_strict = strict; +} + +bool iommu_get_dma_strict(struct iommu_domain *domain) +{ + /* only allow lazy flushing for DMA domains */ + if (domain->type == IOMMU_DOMAIN_DMA) + return iommu_dma_strict; + return true; +} +EXPORT_SYMBOL_GPL(iommu_get_dma_strict); + static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@ -1511,14 +1520,6 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus, group->default_domain = dom; if (!group->domain) group->domain = dom; - - if (!iommu_dma_strict) { - int attr = 1; - iommu_domain_set_attr(dom, - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, - &attr); - } - return 0; } @@ -2610,17 +2611,6 @@ size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); } -int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size, int prot) -{ - if (unlikely(domain->ops->domain_window_enable == NULL)) - return -ENODEV; - - return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size, - prot); -} -EXPORT_SYMBOL_GPL(iommu_domain_window_enable); - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened @@ -2675,50 +2665,26 @@ static int __init iommu_init(void) } core_initcall(iommu_init); -int iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +int iommu_enable_nesting(struct iommu_domain *domain) { - struct iommu_domain_geometry *geometry; - bool *paging; - int ret = 0; - - switch (attr) { - case DOMAIN_ATTR_GEOMETRY: - geometry = data; - *geometry = domain->geometry; - - break; - case DOMAIN_ATTR_PAGING: - paging = data; - *paging = (domain->pgsize_bitmap != 0UL); - break; - default: - if (!domain->ops->domain_get_attr) - return -EINVAL; - - ret = domain->ops->domain_get_attr(domain, attr, data); - } - - return ret; + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + if (!domain->ops->enable_nesting) + return -EINVAL; + return domain->ops->enable_nesting(domain); } -EXPORT_SYMBOL_GPL(iommu_domain_get_attr); +EXPORT_SYMBOL_GPL(iommu_enable_nesting); -int iommu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +int iommu_set_pgtable_quirks(struct iommu_domain *domain, + unsigned long quirk) { - int ret = 0; - - switch (attr) { - default: - if (domain->ops->domain_set_attr == NULL) - return -EINVAL; - - ret = domain->ops->domain_set_attr(domain, attr, data); - } - - return ret; + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + if (!domain->ops->set_pgtable_quirks) + return -EINVAL; + return domain->ops->set_pgtable_quirks(domain, quirk); } -EXPORT_SYMBOL_GPL(iommu_domain_set_attr); +EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); void iommu_get_resv_regions(struct device *dev, struct list_head *list) { @@ -2777,16 +2743,14 @@ EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); void iommu_set_default_passthrough(bool cmd_line) { if (cmd_line) - iommu_set_cmd_line_dma_api(); - + iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; } void iommu_set_default_translated(bool cmd_line) { if (cmd_line) - iommu_set_cmd_line_dma_api(); - + iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; iommu_def_domain_type = IOMMU_DOMAIN_DMA; } @@ -2878,10 +2842,12 @@ EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); */ int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + if (dev->iommu && dev->iommu->iommu_dev) { + const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; - if (ops && ops->dev_enable_feat) - return ops->dev_enable_feat(dev, feat); + if (ops->dev_enable_feat) + return ops->dev_enable_feat(dev, feat); + } return -ENODEV; } @@ -2894,10 +2860,12 @@ EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); */ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + if (dev->iommu && dev->iommu->iommu_dev) { + const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; - if (ops && ops->dev_disable_feat) - return ops->dev_disable_feat(dev, feat); + if (ops->dev_disable_feat) + return ops->dev_disable_feat(dev, feat); + } return -EBUSY; } @@ -2905,10 +2873,12 @@ EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + if (dev->iommu && dev->iommu->iommu_dev) { + const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; - if (ops && ops->dev_feat_enabled) - return ops->dev_feat_enabled(dev, feat); + if (ops->dev_feat_enabled) + return ops->dev_feat_enabled(dev, feat); + } return false; } diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index e6e2fa85271c..b7ecd5b08039 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -22,11 +22,28 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); static void init_iova_rcaches(struct iova_domain *iovad); +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); static void fq_destroy_all_entries(struct iova_domain *iovad); static void fq_flush_timeout(struct timer_list *t); + +static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) +{ + struct iova_domain *iovad; + + iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); + + free_cpu_cached_iovas(cpu, iovad); + return 0; +} + static void free_global_cached_iovas(struct iova_domain *iovad); +static struct iova *to_iova(struct rb_node *node) +{ + return rb_entry(node, struct iova, node); +} + void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn) @@ -51,6 +68,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); rb_insert_color(&iovad->anchor.node, &iovad->rbroot); + cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, &iovad->cpuhp_dead); init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -136,7 +154,7 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - cached_iova = rb_entry(iovad->cached32_node, struct iova, node); + cached_iova = to_iova(iovad->cached32_node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && free->pfn_lo >= cached_iova->pfn_lo)) { @@ -144,11 +162,48 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) iovad->max32_alloc_size = iovad->dma_32bit_pfn; } - cached_iova = rb_entry(iovad->cached_node, struct iova, node); + cached_iova = to_iova(iovad->cached_node); if (free->pfn_lo >= cached_iova->pfn_lo) iovad->cached_node = rb_next(&free->node); } +static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn) +{ + struct rb_node *node, *next; + /* + * Ideally what we'd like to judge here is whether limit_pfn is close + * enough to the highest-allocated IOVA that starting the allocation + * walk from the anchor node will be quicker than this initial work to + * find an exact starting point (especially if that ends up being the + * anchor node anyway). This is an incredibly crude approximation which + * only really helps the most likely case, but is at least trivially easy. + */ + if (limit_pfn > iovad->dma_32bit_pfn) + return &iovad->anchor.node; + + node = iovad->rbroot.rb_node; + while (to_iova(node)->pfn_hi < limit_pfn) + node = node->rb_right; + +search_left: + while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn) + node = node->rb_left; + + if (!node->rb_left) + return node; + + next = node->rb_left; + while (next->rb_right) { + next = next->rb_right; + if (to_iova(next)->pfn_lo >= limit_pfn) { + node = next; + goto search_left; + } + } + + return node; +} + /* Insert the iova into domain rbtree by holding writer lock */ static void iova_insert_rbtree(struct rb_root *root, struct iova *iova, @@ -159,7 +214,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, new = (start) ? &start : &(root->rb_node); /* Figure out where to put new node */ while (*new) { - struct iova *this = rb_entry(*new, struct iova, node); + struct iova *this = to_iova(*new); parent = *new; @@ -198,7 +253,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, goto iova32_full; curr = __get_cached_rbnode(iovad, limit_pfn); - curr_iova = rb_entry(curr, struct iova, node); + curr_iova = to_iova(curr); retry_pfn = curr_iova->pfn_hi + 1; retry: @@ -207,15 +262,15 @@ retry: new_pfn = (high_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); - curr_iova = rb_entry(curr, struct iova, node); + curr_iova = to_iova(curr); } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn); if (high_pfn < size || new_pfn < low_pfn) { if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { high_pfn = limit_pfn; low_pfn = retry_pfn; - curr = &iovad->anchor.node; - curr_iova = rb_entry(curr, struct iova, node); + curr = iova_find_limit(iovad, limit_pfn); + curr_iova = to_iova(curr); goto retry; } iovad->max32_alloc_size = size; @@ -257,10 +312,21 @@ int iova_cache_get(void) { mutex_lock(&iova_cache_mutex); if (!iova_cache_users) { + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL, + iova_cpuhp_dead); + if (ret) { + mutex_unlock(&iova_cache_mutex); + pr_err("Couldn't register cpuhp handler\n"); + return ret; + } + iova_cache = kmem_cache_create( "iommu_iova", sizeof(struct iova), 0, SLAB_HWCACHE_ALIGN, NULL); if (!iova_cache) { + cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); mutex_unlock(&iova_cache_mutex); pr_err("Couldn't create iova cache\n"); return -ENOMEM; @@ -282,8 +348,10 @@ void iova_cache_put(void) return; } iova_cache_users--; - if (!iova_cache_users) + if (!iova_cache_users) { + cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); kmem_cache_destroy(iova_cache); + } mutex_unlock(&iova_cache_mutex); } EXPORT_SYMBOL_GPL(iova_cache_put); @@ -331,7 +399,7 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) assert_spin_locked(&iovad->iova_rbtree_lock); while (node) { - struct iova *iova = rb_entry(node, struct iova, node); + struct iova *iova = to_iova(node); if (pfn < iova->pfn_lo) node = node->rb_left; @@ -467,7 +535,6 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) free_iova(iovad, pfn); } -EXPORT_SYMBOL_GPL(free_iova_fast); #define fq_ring_for_each(i, fq) \ for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) @@ -606,6 +673,9 @@ void put_iova_domain(struct iova_domain *iovad) { struct iova *iova, *tmp; + cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + free_iova_flush_queue(iovad); free_iova_rcaches(iovad); rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) @@ -617,7 +687,7 @@ static int __is_range_overlap(struct rb_node *node, unsigned long pfn_lo, unsigned long pfn_hi) { - struct iova *iova = rb_entry(node, struct iova, node); + struct iova *iova = to_iova(node); if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) return 1; @@ -685,7 +755,7 @@ reserve_iova(struct iova_domain *iovad, spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { - iova = rb_entry(node, struct iova, node); + iova = to_iova(node); __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); if ((pfn_lo >= iova->pfn_lo) && (pfn_hi <= iova->pfn_hi)) @@ -970,7 +1040,7 @@ static void free_iova_rcaches(struct iova_domain *iovad) /* * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) */ -void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) { struct iova_cpu_rcache *cpu_rcache; struct iova_rcache *rcache; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6ecc007f07cd..cdda3a85fc08 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -17,6 +17,7 @@ #include <linux/iopoll.h> #include <linux/list.h> #include <linux/mfd/syscon.h> +#include <linux/module.h> #include <linux/of_address.h> #include <linux/of_iommu.h> #include <linux/of_irq.h> @@ -683,18 +684,12 @@ static const struct iommu_ops mtk_iommu_ops = { .get_resv_regions = mtk_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, + .owner = THIS_MODULE, }; static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) { u32 regval; - int ret; - - ret = clk_prepare_enable(data->bclk); - if (ret) { - dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret); - return ret; - } if (data->plat_data->m4u_plat == M4U_MT8173) { regval = F_MMU_PREFETCH_RT_REPLACE_MOD | @@ -760,7 +755,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, dev_name(data->dev), (void *)data)) { writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR); - clk_disable_unprepare(data->bclk); dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq); return -ENODEV; } @@ -977,14 +971,19 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) void __iomem *base = data->base; int ret; - /* Avoid first resume to affect the default value of registers below. */ - if (!m4u_dom) - return 0; ret = clk_prepare_enable(data->bclk); if (ret) { dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); return ret; } + + /* + * Uppon first resume, only enable the clk and return, since the values of the + * registers are not yet set. + */ + if (!m4u_dom) + return 0; + writel_relaxed(reg->wr_len_ctrl, base + REG_MMU_WR_LEN_CTRL); writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); @@ -1079,16 +1078,7 @@ static struct platform_driver mtk_iommu_driver = { .pm = &mtk_iommu_pm_ops, } }; +module_platform_driver(mtk_iommu_driver); -static int __init mtk_iommu_init(void) -{ - int ret; - - ret = platform_driver_register(&mtk_iommu_driver); - if (ret != 0) - pr_err("Failed to register MTK IOMMU driver\n"); - - return ret; -} - -subsys_initcall(mtk_iommu_init) +MODULE_DESCRIPTION("IOMMU API for MediaTek M4U implementations"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 82ddfe9170d4..8ba9a2ec5509 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -20,6 +20,7 @@ #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/list.h> +#include <linux/module.h> #include <linux/of_address.h> #include <linux/of_iommu.h> #include <linux/of_irq.h> @@ -423,23 +424,21 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct of_phandle_args iommu_spec; - struct of_phandle_iterator it; struct mtk_iommu_data *data; - int err; + int err, idx = 0; - of_for_each_phandle(&it, err, dev->of_node, "iommus", - "#iommu-cells", -1) { - int count = of_phandle_iterator_args(&it, iommu_spec.args, - MAX_PHANDLE_ARGS); - iommu_spec.np = of_node_get(it.node); - iommu_spec.args_count = count; + while (!of_parse_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells", + idx, &iommu_spec)) { - mtk_iommu_create_mapping(dev, &iommu_spec); + err = mtk_iommu_create_mapping(dev, &iommu_spec); + of_node_put(iommu_spec.np); + if (err) + return ERR_PTR(err); /* dev->iommu_fwspec might have changed */ fwspec = dev_iommu_fwspec_get(dev); - - of_node_put(iommu_spec.np); + idx++; } if (!fwspec || fwspec->ops != &mtk_iommu_ops) @@ -529,6 +528,7 @@ static const struct iommu_ops mtk_iommu_ops = { .def_domain_type = mtk_iommu_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, + .owner = THIS_MODULE, }; static const struct of_device_id mtk_iommu_of_ids[] = { @@ -547,10 +547,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct resource *res; struct component_match *match = NULL; - struct of_phandle_args larb_spec; - struct of_phandle_iterator it; void *protect; - int larb_nr, ret, err; + int larb_nr, ret, i; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -578,35 +576,33 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (IS_ERR(data->bclk)) return PTR_ERR(data->bclk); - larb_nr = 0; - of_for_each_phandle(&it, err, dev->of_node, - "mediatek,larbs", NULL, 0) { + larb_nr = of_count_phandle_with_args(dev->of_node, + "mediatek,larbs", NULL); + if (larb_nr < 0) + return larb_nr; + + for (i = 0; i < larb_nr; i++) { + struct device_node *larbnode; struct platform_device *plarbdev; - int count = of_phandle_iterator_args(&it, larb_spec.args, - MAX_PHANDLE_ARGS); - if (count) - continue; + larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); + if (!larbnode) + return -EINVAL; - larb_spec.np = of_node_get(it.node); - if (!of_device_is_available(larb_spec.np)) + if (!of_device_is_available(larbnode)) { + of_node_put(larbnode); continue; + } - plarbdev = of_find_device_by_node(larb_spec.np); + plarbdev = of_find_device_by_node(larbnode); if (!plarbdev) { - plarbdev = of_platform_device_create( - larb_spec.np, NULL, - platform_bus_type.dev_root); - if (!plarbdev) { - of_node_put(larb_spec.np); - return -EPROBE_DEFER; - } + of_node_put(larbnode); + return -EPROBE_DEFER; } + data->larb_imu[i].dev = &plarbdev->dev; - data->larb_imu[larb_nr].dev = &plarbdev->dev; component_match_add_release(dev, &match, release_of, - compare_of, larb_spec.np); - larb_nr++; + compare_of, larbnode); } platform_set_drvdata(pdev, data); @@ -624,12 +620,26 @@ static int mtk_iommu_probe(struct platform_device *pdev) ret = iommu_device_register(&data->iommu); if (ret) - return ret; + goto out_sysfs_remove; - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + if (!iommu_present(&platform_bus_type)) { + ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); + if (ret) + goto out_dev_unreg; + } + + ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); + if (ret) + goto out_bus_set_null; + return ret; - return component_master_add_with_match(dev, &mtk_iommu_com_ops, match); +out_bus_set_null: + bus_set_iommu(&platform_bus_type, NULL); +out_dev_unreg: + iommu_device_unregister(&data->iommu); +out_sysfs_remove: + iommu_device_sysfs_remove(&data->iommu); + return ret; } static int mtk_iommu_remove(struct platform_device *pdev) @@ -691,9 +701,7 @@ static struct platform_driver mtk_iommu_driver = { .pm = &mtk_iommu_pm_ops, } }; +module_platform_driver(mtk_iommu_driver); -static int __init m4u_init(void) -{ - return platform_driver_register(&mtk_iommu_driver); -} -subsys_initcall(m4u_init); +MODULE_DESCRIPTION("IOMMU API for MediaTek M4U v1 implementations"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e505b9130a1c..a9d2df001149 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -210,11 +210,6 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, of_pci_iommu_init, &info); } else { err = of_iommu_configure_device(master_np, dev, id); - - fwspec = dev_iommu_fwspec_get(dev); - if (!err && fwspec) - of_property_read_u32(master_np, "pasid-num-bits", - &fwspec->num_pasid_bits); } /* diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 97eb62f667d2..602aab98c079 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -849,12 +849,11 @@ static struct iommu_device *tegra_smmu_probe_device(struct device *dev) smmu = tegra_smmu_find(args.np); if (smmu) { err = tegra_smmu_configure(smmu, dev, &args); - of_node_put(args.np); - if (err < 0) + if (err < 0) { + of_node_put(args.np); return ERR_PTR(err); - - break; + } } of_node_put(args.np); |