diff options
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r-- | drivers/iommu/intel-iommu.c | 400 |
1 files changed, 318 insertions, 82 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 35365f046923..f1042daef9ad 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -34,6 +34,7 @@ #include <linux/mempool.h> #include <linux/memory.h> #include <linux/timer.h> +#include <linux/io.h> #include <linux/iova.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> @@ -418,10 +419,13 @@ struct device_domain_info { struct list_head global; /* link to global list */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ - struct { - u8 enabled:1; - u8 qdep; - } ats; /* ATS state */ + u8 pasid_supported:3; + u8 pasid_enabled:1; + u8 pri_supported:1; + u8 pri_enabled:1; + u8 ats_supported:1; + u8 ats_enabled:1; + u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ @@ -497,13 +501,37 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int intel_iommu_ecs = 1; +static int intel_iommu_pasid28; +static int iommu_identity_mapping; + +#define IDENTMAP_ALL 1 +#define IDENTMAP_GFX 2 +#define IDENTMAP_AZALIA 4 -/* We only actually use ECS when PASID support (on the new bit 40) - * is also advertised. Some early implementations — the ones with - * PASID support on bit 28 — have issues even when we *only* use - * extended root/context tables. */ +/* Broadwell and Skylake have broken ECS support — normal so-called "second + * level" translation of DMA requests-without-PASID doesn't actually happen + * unless you also set the NESTE bit in an extended context-entry. Which of + * course means that SVM doesn't work because it's trying to do nested + * translation of the physical addresses it finds in the process page tables, + * through the IOVA->phys mapping found in the "second level" page tables. + * + * The VT-d specification was retroactively changed to change the definition + * of the capability bits and pretend that Broadwell/Skylake never happened... + * but unfortunately the wrong bit was changed. It's ECS which is broken, but + * for some reason it was the PASID capability bit which was redefined (from + * bit 28 on BDW/SKL to bit 40 in future). + * + * So our test for ECS needs to eschew those implementations which set the old + * PASID capabiity bit 28, since those are the ones on which ECS is broken. + * Unless we are working around the 'pasid28' limitations, that is, by putting + * the device into passthrough mode for normal DMA and thus masking the bug. + */ #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - ecap_pasid(iommu->ecap)) + (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) +/* PASID support is thus enabled if ECS is enabled and *either* of the old + * or new capability bits are set. */ +#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ + (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -566,6 +594,11 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable extended context table support\n"); intel_iommu_ecs = 0; + } else if (!strncmp(str, "pasid28", 7)) { + printk(KERN_INFO + "Intel-IOMMU: enable pre-production PASID support\n"); + intel_iommu_pasid28 = 1; + iommu_identity_mapping |= IDENTMAP_GFX; } str += strcspn(str, ","); @@ -1407,37 +1440,22 @@ static struct device_domain_info * iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, u8 bus, u8 devfn) { - bool found = false; struct device_domain_info *info; - struct pci_dev *pdev; assert_spin_locked(&device_domain_lock); - if (!ecap_dev_iotlb_support(iommu->ecap)) - return NULL; - if (!iommu->qi) return NULL; list_for_each_entry(info, &domain->devices, link) if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { - found = true; + if (info->ats_supported && info->dev) + return info; break; } - if (!found || !info->dev || !dev_is_pci(info->dev)) - return NULL; - - pdev = to_pci_dev(info->dev); - - if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS)) - return NULL; - - if (!dmar_find_matched_atsr_unit(pdev)) - return NULL; - - return info; + return NULL; } static void iommu_enable_dev_iotlb(struct device_domain_info *info) @@ -1448,20 +1466,48 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - if (pci_enable_ats(pdev, VTD_PAGE_SHIFT)) - return; - info->ats.enabled = 1; - info->ats.qdep = pci_ats_queue_depth(pdev); +#ifdef CONFIG_INTEL_IOMMU_SVM + /* The PCIe spec, in its wisdom, declares that the behaviour of + the device if you enable PASID support after ATS support is + undefined. So always enable PASID support on devices which + have it, even if we can't yet know if we're ever going to + use it. */ + if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + info->pasid_enabled = 1; + + if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) + info->pri_enabled = 1; +#endif + if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { + info->ats_enabled = 1; + info->ats_qdep = pci_ats_queue_depth(pdev); + } } static void iommu_disable_dev_iotlb(struct device_domain_info *info) { - if (!info->ats.enabled) + struct pci_dev *pdev; + + if (dev_is_pci(info->dev)) return; - pci_disable_ats(to_pci_dev(info->dev)); - info->ats.enabled = 0; + pdev = to_pci_dev(info->dev); + + if (info->ats_enabled) { + pci_disable_ats(pdev); + info->ats_enabled = 0; + } +#ifdef CONFIG_INTEL_IOMMU_SVM + if (info->pri_enabled) { + pci_disable_pri(pdev); + info->pri_enabled = 0; + } + if (info->pasid_enabled) { + pci_disable_pasid(pdev); + info->pasid_enabled = 0; + } +#endif } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1473,11 +1519,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &domain->devices, link) { - if (!info->ats.enabled) + if (!info->ats_enabled) continue; sid = info->bus << 8 | info->devfn; - qdep = info->ats.qdep; + qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1667,6 +1713,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); + +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) { + if (ecap_prs(iommu->ecap)) + intel_svm_finish_prq(iommu); + intel_svm_free_pasid_tables(iommu); + } +#endif } static struct dmar_domain *alloc_domain(int flags) @@ -1934,8 +1988,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - translation = info ? CONTEXT_TT_DEV_IOTLB : - CONTEXT_TT_MULTI_LEVEL; + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); context_set_address_width(context, iommu->agaw); @@ -2115,15 +2171,19 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; /* It is large page*/ if (largepage_lvl > 1) { + unsigned long nr_superpages, end_pfn; + pteval |= DMA_PTE_LARGE_PAGE; lvl_pages = lvl_to_nr_pages(largepage_lvl); + + nr_superpages = sg_res / lvl_pages; + end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; + /* * Ensure that old small page tables are - * removed to make room for superpage, - * if they exist. + * removed to make room for superpage(s). */ - dma_pte_free_pagetable(domain, iov_pfn, - iov_pfn + lvl_pages - 1); + dma_pte_free_pagetable(domain, iov_pfn, end_pfn); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -2269,12 +2329,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->bus = bus; info->devfn = devfn; - info->ats.enabled = 0; - info->ats.qdep = 0; + info->ats_supported = info->pasid_supported = info->pri_supported = 0; + info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; + info->ats_qdep = 0; info->dev = dev; info->domain = domain; info->iommu = iommu; + if (dev && dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(info->dev); + + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + dmar_find_matched_atsr_unit(pdev)) + info->ats_supported = 1; + + if (ecs_enabled(iommu)) { + if (pasid_enabled(iommu)) { + int features = pci_pasid_features(pdev); + if (features >= 0) + info->pasid_supported = features | 1; + } + + if (info->ats_supported && ecap_prs(iommu->ecap) && + pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI)) + info->pri_supported = 1; + } + } + spin_lock_irqsave(&device_domain_lock, flags); if (dev) found = find_domain(dev); @@ -2400,11 +2482,6 @@ found_domain: return domain; } -static int iommu_identity_mapping; -#define IDENTMAP_ALL 1 -#define IDENTMAP_GFX 2 -#define IDENTMAP_AZALIA 4 - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) @@ -2430,17 +2507,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int iommu_prepare_identity_map(struct device *dev, - unsigned long long start, - unsigned long long end) +static int domain_prepare_identity_map(struct device *dev, + struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) { - struct dmar_domain *domain; - int ret; - - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; - /* For _hardware_ passthrough, don't bother. But for software passthrough, we do it anyway -- it may indicate a memory range which is reserved in E820, so which didn't get set @@ -2460,8 +2531,7 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } if (end >> agaw_to_width(domain->agaw)) { @@ -2471,18 +2541,27 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } - ret = iommu_domain_identity_map(domain, start, end); - if (ret) - goto error; + return iommu_domain_identity_map(domain, start, end); +} - return 0; +static int iommu_prepare_identity_map(struct device *dev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + ret = domain_prepare_identity_map(dev, domain, start, end); + if (ret) + domain_exit(domain); - error: - domain_exit(domain); return ret; } @@ -2808,18 +2887,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu) } static int copy_context_table(struct intel_iommu *iommu, - struct root_entry __iomem *old_re, + struct root_entry *old_re, struct context_entry **tbl, int bus, bool ext) { int tbl_idx, pos = 0, idx, devfn, ret = 0, did; - struct context_entry __iomem *old_ce = NULL; struct context_entry *new_ce = NULL, ce; + struct context_entry *old_ce = NULL; struct root_entry re; phys_addr_t old_ce_phys; tbl_idx = ext ? bus * 2 : bus; - memcpy_fromio(&re, old_re, sizeof(re)); + memcpy(&re, old_re, sizeof(re)); for (devfn = 0; devfn < 256; devfn++) { /* First calculate the correct index */ @@ -2854,7 +2933,8 @@ static int copy_context_table(struct intel_iommu *iommu, } ret = -ENOMEM; - old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE); + old_ce = memremap(old_ce_phys, PAGE_SIZE, + MEMREMAP_WB); if (!old_ce) goto out; @@ -2866,7 +2946,7 @@ static int copy_context_table(struct intel_iommu *iommu, } /* Now copy the context entry */ - memcpy_fromio(&ce, old_ce + idx, sizeof(ce)); + memcpy(&ce, old_ce + idx, sizeof(ce)); if (!__context_present(&ce)) continue; @@ -2902,7 +2982,7 @@ static int copy_context_table(struct intel_iommu *iommu, __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); out_unmap: - iounmap(old_ce); + memunmap(old_ce); out: return ret; @@ -2910,8 +2990,8 @@ out: static int copy_translation_tables(struct intel_iommu *iommu) { - struct root_entry __iomem *old_rt; struct context_entry **ctxt_tbls; + struct root_entry *old_rt; phys_addr_t old_rt_phys; int ctxt_table_entries; unsigned long flags; @@ -2936,7 +3016,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) if (!old_rt_phys) return -EINVAL; - old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE); + old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); if (!old_rt) return -ENOMEM; @@ -2985,7 +3065,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) ret = 0; out_unmap: - iounmap(old_rt); + memunmap(old_rt); return ret; } @@ -3096,6 +3176,10 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_alloc_pasid_tables(iommu); +#endif } if (iommu_pass_through) @@ -3183,6 +3267,13 @@ domains_done: iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + ret = intel_svm_enable_prq(iommu); + if (ret) + goto free_iommu; + } +#endif ret = dmar_set_interrupt(iommu); if (ret) goto free_iommu; @@ -3242,7 +3333,10 @@ static struct iova *intel_alloc_iova(struct device *dev, static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) { + struct dmar_rmrr_unit *rmrr; struct dmar_domain *domain; + struct device *i_dev; + int i, ret; domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) { @@ -3251,6 +3345,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) return NULL; } + /* We have a new domain - setup possible RMRRs for the device */ + rcu_read_lock(); + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, i_dev) { + if (i_dev != dev) + continue; + + ret = domain_prepare_identity_map(dev, domain, + rmrr->base_address, + rmrr->end_address); + if (ret) + dev_err(dev, "Mapping reserved region failed\n"); + } + } + rcu_read_unlock(); + return domain; } @@ -3536,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, flags |= GFP_DMA32; } - if (flags & __GFP_WAIT) { + if (gfpflags_allow_blocking(flags)) { unsigned int count = size >> PAGE_SHIFT; page = dma_alloc_from_contiguous(dev, count, order); @@ -4111,6 +4222,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_alloc_pasid_tables(iommu); +#endif + if (dmaru->ignored) { /* * we always have to disable PMRs or DMA may fail on this device @@ -4122,6 +4238,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) intel_iommu_init_qi(iommu); iommu_flush_write_buffer(iommu); + +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + ret = intel_svm_enable_prq(iommu); + if (ret) + goto disable_iommu; + } +#endif ret = dmar_set_interrupt(iommu); if (ret) goto disable_iommu; @@ -4190,14 +4314,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) dev = pci_physfn(dev); for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; - if (!bridge || !pci_is_pcie(bridge) || + /* If it's an integrated device, allow ATS */ + if (!bridge) + return 1; + /* Connected via non-PCIe: no ATS */ + if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) return 0; + /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; } - if (!bridge) - return 0; rcu_read_lock(); list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { @@ -4861,6 +4988,114 @@ static void intel_iommu_remove_device(struct device *dev) iommu_device_unlink(iommu->iommu_dev, dev); } +#ifdef CONFIG_INTEL_IOMMU_SVM +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) +{ + struct device_domain_info *info; + struct context_entry *context; + struct dmar_domain *domain; + unsigned long flags; + u64 ctx_lo; + int ret; + + domain = get_valid_domain_for_dev(sdev->dev); + if (!domain) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + + ret = -EINVAL; + info = sdev->dev->archdata.iommu; + if (!info || !info->pasid_supported) + goto out; + + context = iommu_context_addr(iommu, info->bus, info->devfn, 0); + if (WARN_ON(!context)) + goto out; + + ctx_lo = context[0].lo; + + sdev->did = domain->iommu_did[iommu->seq_id]; + sdev->sid = PCI_DEVID(info->bus, info->devfn); + + if (!(ctx_lo & CONTEXT_PASIDE)) { + context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); + context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap); + wmb(); + /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both + * extended to permit requests-with-PASID if the PASIDE bit + * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, + * however, the PASIDE bit is ignored and requests-with-PASID + * are unconditionally blocked. Which makes less sense. + * So convert from CONTEXT_TT_PASS_THROUGH to one of the new + * "guest mode" translation types depending on whether ATS + * is available or not. Annoyingly, we can't use the new + * modes *unless* PASIDE is set. */ + if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { + ctx_lo &= ~CONTEXT_TT_MASK; + if (info->ats_supported) + ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; + else + ctx_lo |= CONTEXT_TT_PT_PASID << 2; + } + ctx_lo |= CONTEXT_PASIDE; + if (iommu->pasid_state_table) + ctx_lo |= CONTEXT_DINVE; + if (info->pri_supported) + ctx_lo |= CONTEXT_PRS; + context[0].lo = ctx_lo; + wmb(); + iommu->flush.flush_context(iommu, sdev->did, sdev->sid, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + } + + /* Enable PASID support in the device, if it wasn't already */ + if (!info->pasid_enabled) + iommu_enable_dev_iotlb(info); + + if (info->ats_enabled) { + sdev->dev_iotlb = 1; + sdev->qdep = info->ats_qdep; + if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) + sdev->qdep = 0; + } + ret = 0; + + out: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} + +struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) +{ + struct intel_iommu *iommu; + u8 bus, devfn; + + if (iommu_dummy(dev)) { + dev_warn(dev, + "No IOMMU translation for device; cannot enable SVM\n"); + return NULL; + } + + iommu = device_to_iommu(dev, &bus, &devfn); + if ((!iommu)) { + dev_err(dev, "No IOMMU for device; cannot enable SVM\n"); + return NULL; + } + + if (!iommu->pasid_table) { + dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n"); + return NULL; + } + + return iommu; +} +#endif /* CONFIG_INTEL_IOMMU_SVM */ + static const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4873,6 +5108,7 @@ static const struct iommu_ops intel_iommu_ops = { .iova_to_phys = intel_iommu_iova_to_phys, .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, + .device_group = pci_device_group, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; |