diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 337 |
1 files changed, 158 insertions, 179 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 11652e0bcab3..50eb9aed47cc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,7 +27,6 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" -#include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" #include "perfmon.h" @@ -97,6 +96,81 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } +static int device_rid_cmp_key(const void *key, const struct rb_node *node) +{ + struct device_domain_info *info = + rb_entry(node, struct device_domain_info, node); + const u16 *rid_lhs = key; + + if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) + return -1; + + if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) + return 1; + + return 0; +} + +static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) +{ + struct device_domain_info *info = + rb_entry(lhs, struct device_domain_info, node); + u16 key = PCI_DEVID(info->bus, info->devfn); + + return device_rid_cmp_key(&key, rhs); +} + +/* + * Looks up an IOMMU-probed device using its source ID. + * + * Returns the pointer to the device if there is a match. Otherwise, + * returns NULL. + * + * Note that this helper doesn't guarantee that the device won't be + * released by the iommu subsystem after being returned. The caller + * should use its own synchronization mechanism to avoid the device + * being released during its use if its possibly the case. + */ +struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) +{ + struct device_domain_info *info = NULL; + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); + if (node) + info = rb_entry(node, struct device_domain_info, node); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + + return info ? info->dev : NULL; +} + +static int device_rbtree_insert(struct intel_iommu *iommu, + struct device_domain_info *info) +{ + struct rb_node *curr; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); + if (WARN_ON(curr)) + return -EEXIST; + + return 0; +} + +static void device_rbtree_remove(struct device_domain_info *info) +{ + struct intel_iommu *iommu = info->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->device_rbtree_lock, flags); + rb_erase(&info->node, &iommu->device_rbtree); + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); +} + /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -1776,34 +1850,17 @@ static void domain_exit(struct dmar_domain *domain) kfree(domain); } -/* - * Get the PASID directory size for scalable mode context entry. - * Value of X in the PDTS field of a scalable mode context entry - * indicates PASID directory with 2^(X + 7) entries. - */ -static unsigned long context_get_sm_pds(struct pasid_table *table) -{ - unsigned long pds, max_pde; - - max_pde = table->max_pasid >> PASID_PDE_SHIFT; - pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); - if (pds < 7) - return 0; - - return pds - 7; -} - static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, - struct pasid_table *table, u8 bus, u8 devfn) { struct device_domain_info *info = domain_lookup_dev_info(domain, iommu, bus, devfn); u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; + struct dma_pte *pgd = domain->pgd; struct context_entry *context; - int ret; + int agaw, ret; if (hw_pass_through && domain_type_is_si(domain)) translation = CONTEXT_TT_PASS_THROUGH; @@ -1846,65 +1903,37 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } context_clear_entry(context); + context_set_domain_id(context, did); - if (sm_supported(iommu)) { - unsigned long pds; - - /* Setup the PASID DIR pointer: */ - pds = context_get_sm_pds(table); - context->lo = (u64)virt_to_phys(table->table) | - context_pdts(pds); - - /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, IOMMU_NO_PASID); - + if (translation != CONTEXT_TT_PASS_THROUGH) { /* - * Setup the Device-TLB enable bit and Page request - * Enable bit: + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. */ - if (info && info->ats_supported) - context_set_sm_dte(context); - if (info && info->pri_supported) - context_set_sm_pre(context); - if (info && info->pasid_supported) - context_set_pasid(context); - } else { - struct dma_pte *pgd = domain->pgd; - int agaw; - - context_set_domain_id(context, did); - - if (translation != CONTEXT_TT_PASS_THROUGH) { - /* - * Skip top levels of page tables for iommu which has - * less agaw than default. Unnecessary for PT mode. - */ - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } - - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; - - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { - /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. - */ - context_set_address_width(context, iommu->msagaw); + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; } - context_set_translation_type(context, translation); + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); + } else { + /* + * In pass through mode, AW must be programmed to + * indicate the largest AGAW value supported by + * hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); } + context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); if (!ecap_coherent(iommu->ecap)) @@ -1934,43 +1963,29 @@ out_unlock: return ret; } -struct domain_context_mapping_data { - struct dmar_domain *domain; - struct intel_iommu *iommu; - struct pasid_table *table; -}; - static int domain_context_mapping_cb(struct pci_dev *pdev, u16 alias, void *opaque) { - struct domain_context_mapping_data *data = opaque; + struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); + struct intel_iommu *iommu = info->iommu; + struct dmar_domain *domain = opaque; - return domain_context_mapping_one(data->domain, data->iommu, - data->table, PCI_BUS_NUM(alias), - alias & 0xff); + return domain_context_mapping_one(domain, iommu, + PCI_BUS_NUM(alias), alias & 0xff); } static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct domain_context_mapping_data data; struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; - struct pasid_table *table; - - table = intel_pasid_get_table(dev); if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, table, - bus, devfn); - - data.domain = domain; - data.iommu = iommu; - data.table = table; + return domain_context_mapping_one(domain, iommu, bus, devfn); return pci_for_each_dma_alias(to_pci_dev(dev), - &domain_context_mapping_cb, &data); + domain_context_mapping_cb, domain); } /* Returns a number of VTD pages, but aligned to MM page size */ @@ -2160,9 +2175,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 struct context_entry *context; u16 did_old; - if (!iommu) - return; - spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (!context) { @@ -2170,14 +2182,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } - if (sm_supported(iommu)) { - if (hw_pass_through && domain_type_is_si(info->domain)) - did_old = FLPT_DEFAULT_DID; - else - did_old = domain_id_iommu(info->domain, iommu); - } else { - did_old = context_domain_id(context); - } + did_old = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); @@ -2188,9 +2193,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - if (sm_supported(iommu)) - qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0); - iommu->flush.flush_iotlb(iommu, did_old, 0, @@ -2330,28 +2332,19 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&domain->lock, flags); - /* PASID table is mandatory for a PCI device in scalable mode. */ - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { - /* Setup the PASID entry for requests without PASID: */ - if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, - dev, IOMMU_NO_PASID); - else if (domain->use_first_level) - ret = domain_setup_first_level(iommu, domain, dev, - IOMMU_NO_PASID); - else - ret = intel_pasid_setup_second_level(iommu, domain, - dev, IOMMU_NO_PASID); - if (ret) { - dev_err(dev, "Setup RID2PASID failed\n"); - device_block_translation(dev); - return ret; - } - } + if (dev_is_real_dma_subdevice(dev)) + return 0; + + if (!sm_supported(iommu)) + ret = domain_context_mapping(domain, dev); + else if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); + else if (domain->use_first_level) + ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); + else + ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); - ret = domain_context_mapping(domain, dev); if (ret) { - dev_err(dev, "Domain context map failed\n"); device_block_translation(dev); return ret; } @@ -2712,10 +2705,6 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } -#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - dmar_map_gfx = 0; -#endif - if (!dmar_map_gfx) iommu_identity_mapping |= IDENTMAP_GFX; @@ -3799,30 +3788,6 @@ static void domain_context_clear(struct device_domain_info *info) &domain_context_clear_one_cb, info); } -static void dmar_remove_one_dev_info(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *domain = info->domain; - struct intel_iommu *iommu = info->iommu; - unsigned long flags; - - if (!dev_is_real_dma_subdevice(info->dev)) { - if (dev_is_pci(info->dev) && sm_supported(iommu)) - intel_pasid_tear_down_entry(iommu, info->dev, - IOMMU_NO_PASID, false); - - iommu_disable_pci_caps(info); - domain_context_clear(info); - } - - spin_lock_irqsave(&domain->lock, flags); - list_del(&info->link); - spin_unlock_irqrestore(&domain->lock, flags); - - domain_detach_iommu(domain, iommu); - info->domain = NULL; -} - /* * Clear the page table pointer in context or pasid table entries so that * all DMA requests without PASID from the device are blocked. If the page @@ -4027,6 +3992,10 @@ int prepare_domain_attach_device(struct iommu_domain *domain, dmar_domain->agaw--; } + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + context_copied(iommu, info->bus, info->devfn)) + return intel_pasid_setup_sm_context(dev); + return 0; } @@ -4330,26 +4299,50 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } dev_iommu_priv_set(dev, info); + ret = device_rbtree_insert(iommu, info); + if (ret) + goto free; if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - kfree(info); - return ERR_PTR(ret); + goto clear_rbtree; + } + + if (!context_copied(iommu, info->bus, info->devfn)) { + ret = intel_pasid_setup_sm_context(dev); + if (ret) + goto free_table; } } intel_iommu_debugfs_create_dev(info); return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + mutex_lock(&iommu->iopf_lock); + device_rbtree_remove(info); + mutex_unlock(&iommu->iopf_lock); + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && + !context_copied(iommu, info->bus, info->devfn)) + intel_pasid_teardown_sm_context(dev); - dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); kfree(info); @@ -4492,23 +4485,15 @@ static int intel_iommu_enable_iopf(struct device *dev) if (ret) return ret; - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); - if (ret) - goto iopf_remove_device; - ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) - goto iopf_unregister_handler; + if (ret) { + iopf_queue_remove_device(iommu->iopf_queue, dev); + return ret; + } + info->pri_enabled = 1; return 0; - -iopf_unregister_handler: - iommu_unregister_device_fault_handler(dev); -iopf_remove_device: - iopf_queue_remove_device(iommu->iopf_queue, dev); - - return ret; } static int intel_iommu_disable_iopf(struct device *dev) @@ -4529,14 +4514,7 @@ static int intel_iommu_disable_iopf(struct device *dev) */ pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - - /* - * With PRI disabled and outstanding PRQs drained, unregistering - * fault handler and removing device from iopf queue should never - * fail. - */ - WARN_ON(iommu_unregister_device_fault_handler(dev)); - WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); + iopf_queue_remove_device(iommu->iopf_queue, dev); return 0; } @@ -4855,6 +4833,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = { const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, + .release_domain = &blocking_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, |