diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/amd/iommu.c | 5 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 18 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 38 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 3 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 2 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 77 | ||||
-rw-r--r-- | drivers/iommu/iommufd/Kconfig | 2 | ||||
-rw-r--r-- | drivers/iommu/iommufd/device.c | 8 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 2 | ||||
-rw-r--r-- | drivers/iommu/iommufd/main.c | 3 | ||||
-rw-r--r-- | drivers/iommu/iommufd/pages.c | 6 | ||||
-rw-r--r-- | drivers/iommu/iommufd/vfio_compat.c | 107 | ||||
-rw-r--r-- | drivers/iommu/s390-iommu.c | 17 |
14 files changed, 191 insertions, 99 deletions
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cbeaab55c0db..321d50e9df5b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2271,8 +2271,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return (irq_remapping_enabled == 1); case IOMMU_CAP_NOEXEC: return false; case IOMMU_CAP_PRE_BOOT_PROTECTION: @@ -3671,7 +3669,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) } irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | + IRQ_DOMAIN_FLAG_ISOLATED_MSI; if (amd_iommu_np_cache) iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f798c44e0903..c99e4bc55d8c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -713,7 +713,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, if (!iova) return DMA_MAPPING_ERROR; - if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { + if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { iommu_dma_free_iova(cookie, iova, size, NULL); return DMA_MAPPING_ERROR; } @@ -822,7 +822,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, if (!iova) goto out_free_pages; - if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL)) + /* + * Remove the zone/policy flags from the GFP - these are applied to the + * __iommu_dma_alloc_pages() but are not used for the supporting + * internal allocations that follow. + */ + gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP); + + if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp)) goto out_free_iova; if (!(ioprot & IOMMU_CACHE)) { @@ -833,7 +840,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, arch_dma_prep_coherent(sg_page(sg), sg->length); } - ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot); + ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, + gfp); if (ret < 0 || ret < size) goto out_free_sg; @@ -1281,7 +1289,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * We'll leave any physical concatenation to the IOMMU driver's * implementation - it knows better than we do. */ - ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot); + ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); if (ret < 0 || ret < iova_len) goto out_free_iova; @@ -1615,7 +1623,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!iova) goto out_free_page; - if (iommu_map(domain, iova, msi_addr, size, prot)) + if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL)) goto out_free_iova; INIT_LIST_HEAD(&msi_page->list); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 59df7e42fd53..6fd223cf639f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node, gfp_t gfp) { struct page *page; void *vaddr = NULL; - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); if (page) vaddr = page_address(page); return vaddr; @@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node); + context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -908,7 +908,8 @@ pgtable_walk: #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node); + root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node); + new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -4136,7 +4140,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) + @@ -4464,8 +4470,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return irq_remapping_enabled == 1; case IOMMU_CAP_PRE_BOOT_PROTECTION: return dmar_platform_optin(); case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 06e61e474856..ca9a035e0110 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -737,7 +737,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, extern int dmar_ir_support(void); -void *alloc_pgtable_page(int node); +void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index f58f5f57af78..6d01fa078c36 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -573,7 +573,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) } irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | + IRQ_DOMAIN_FLAG_ISOLATED_MSI; if (cap_caching_mode(iommu->cap)) iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb3c7020028d..c5bf74e9372d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -200,7 +200,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node); + entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5f6a85aea501..501e8bcf1aaa 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -30,6 +30,7 @@ #include <linux/cc_platform.h> #include <trace/events/iommu.h> #include <linux/sched/mm.h> +#include <linux/msi.h> #include "dma-iommu.h" @@ -930,7 +931,7 @@ map_end: if (map_size) { ret = iommu_map(domain, addr - map_size, addr - map_size, map_size, - entry->prot); + entry->prot, GFP_KERNEL); if (ret) goto out; map_size = 0; @@ -1898,6 +1899,29 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap) EXPORT_SYMBOL_GPL(device_iommu_capable); /** + * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() + * for a group + * @group: Group to query + * + * IOMMU groups should not have differing values of + * msi_device_has_isolated_msi() for devices in a group. However nothing + * directly prevents this, so ensure mistakes don't result in isolation failures + * by checking that all the devices are the same. + */ +bool iommu_group_has_isolated_msi(struct iommu_group *group) +{ + struct group_device *group_dev; + bool ret = true; + + mutex_lock(&group->mutex); + list_for_each_entry(group_dev, &group->devices, list) + ret &= msi_device_has_isolated_msi(group_dev->dev); + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); + +/** * iommu_set_fault_handler() - set a fault handler for an iommu domain * @domain: iommu domain * @handler: fault handler @@ -2360,34 +2384,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static int _iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); if (ret == 0 && ops->iotlb_sync_map) ops->iotlb_sync_map(domain, iova, size); return ret; } - -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - might_sleep(); - return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map); -int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); -} -EXPORT_SYMBOL_GPL(iommu_map_atomic); - static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2477,9 +2494,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap_fast); -static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot, - gfp_t gfp) +ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot, + gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; @@ -2487,6 +2504,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, unsigned int i = 0; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + while (i <= nents) { phys_addr_t s_phys = sg_phys(sg); @@ -2526,21 +2550,8 @@ out_err: return ret; } - -ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - might_sleep(); - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map_sg); -ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); -} - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 8306616b6d81..ada693ea51a7 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER removed. IOMMUFD VFIO container emulation is known to lack certain features - of the native VFIO container, such as no-IOMMU support, peer-to-peer + of the native VFIO container, such as peer-to-peer DMA mapping, PPC IOMMU support, as well as other potentially undiscovered gaps. This option is currently intended for the purpose of testing IOMMUFD with unmodified userspace supporting VFIO diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index d81f93a321af..a0c66f47a65a 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -4,7 +4,6 @@ #include <linux/iommufd.h> #include <linux/slab.h> #include <linux/iommu.h> -#include <linux/irqdomain.h> #include "io_pagetable.h" #include "iommufd_private.h" @@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, * operation from the device (eg a simple DMA) cannot trigger an * interrupt outside this iommufd context. */ - if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) && - !irq_domain_check_msi_remap()) { + if (!iommu_group_has_isolated_msi(idev->group)) { if (!allow_unsafe_interrupts) return -EPERM; @@ -346,10 +344,6 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) rc = iommufd_device_do_attach(idev, hwpt); if (rc) goto out_put_pt_obj; - - mutex_lock(&hwpt->ioas->mutex); - list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); - mutex_unlock(&hwpt->ioas->mutex); break; } case IOMMUFD_OBJ_IOAS: { diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 222e86591f8a..9d7f71510ca1 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -18,6 +18,8 @@ struct iommufd_ctx { struct xarray objects; u8 account_mode; + /* Compatibility with VFIO no iommu */ + u8 no_iommu_mode; struct iommufd_ioas *vfio_ioas; }; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 083e6fcbe10a..3fbe636c3d8a 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -252,9 +252,12 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; + struct iommu_ioas_copy ioas_copy; struct iommu_ioas_iova_ranges iova_ranges; struct iommu_ioas_map map; struct iommu_ioas_unmap unmap; + struct iommu_option option; + struct iommu_vfio_ioas vfio_ioas; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 1e1d3509efae..f8d92c9bb65b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain, size % PAGE_SIZE); while (size) { - rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); + rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova += PAGE_SIZE; @@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, - next_iova - iova, area->iommu_prot); + next_iova - iova, area->iommu_prot, + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova = next_iova; diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 3ceca0e8311c..514494a0025b 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -26,39 +26,84 @@ out_unlock: } /** - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use + * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists + * @ictx: Context to operate on + * @out_ioas_id: The IOAS ID of the compatibility IOAS + * + * Return the ID of the current compatibility IOAS. The ID can be passed into + * other functions that take an ioas_id. + */ +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +{ + struct iommufd_ioas *ioas; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + *out_ioas_id = ioas->obj.id; + iommufd_put_object(&ioas->obj); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); + +/** + * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached + * @ictx: Context to operate on + * + * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types. + */ +int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) +{ + int ret; + + xa_lock(&ictx->objects); + if (!ictx->vfio_ioas) { + ictx->no_iommu_mode = 1; + ret = 0; + } else { + ret = -EINVAL; + } + xa_unlock(&ictx->objects); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO); + +/** + * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created * @ictx: Context to operate on - * @out_ioas_id: The ioas_id the caller should use * * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate * on since they do not have an IOAS ID input in their ABI. Only attaching a - * group should cause a default creation of the internal ioas, this returns the - * existing ioas if it has already been assigned somehow. + * group should cause a default creation of the internal ioas, this does nothing + * if an existing ioas has already been assigned somehow. */ -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) { struct iommufd_ioas *ioas = NULL; - struct iommufd_ioas *out_ioas; + int ret; ioas = iommufd_ioas_alloc(ictx); if (IS_ERR(ioas)) return PTR_ERR(ioas); xa_lock(&ictx->objects); - if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) - out_ioas = ictx->vfio_ioas; - else { - out_ioas = ioas; - ictx->vfio_ioas = ioas; + /* + * VFIO won't allow attaching a container to both iommu and no iommu + * operation + */ + if (ictx->no_iommu_mode) { + ret = -EINVAL; + goto out_abort; } - xa_unlock(&ictx->objects); - *out_ioas_id = out_ioas->obj.id; - if (out_ioas != ioas) { - iommufd_put_object(&out_ioas->obj); - iommufd_object_abort(ictx, &ioas->obj); - return 0; + if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { + ret = 0; + iommufd_put_object(&ictx->vfio_ioas->obj); + goto out_abort; } + ictx->vfio_ioas = ioas; + xa_unlock(&ictx->objects); + /* * An automatically created compat IOAS is treated as a userspace * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, @@ -67,8 +112,13 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) */ iommufd_object_finalize(ictx, &ioas->obj); return 0; + +out_abort: + xa_unlock(&ictx->objects); + iommufd_object_abort(ictx, &ioas->obj); + return ret; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) { @@ -235,6 +285,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, case VFIO_UNMAP_ALL: return 1; + case VFIO_NOIOMMU_IOMMU: + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); + case VFIO_DMA_CC_IOMMU: return iommufd_vfio_cc_iommu(ictx); @@ -261,10 +314,24 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) { + bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode); struct iommufd_ioas *ioas = NULL; int rc = 0; - if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) + /* + * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all + * other ioctls. We let them keep working but they mostly fail since no + * IOAS should exist. + */ + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU && + no_iommu_mode) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + return 0; + } + + if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) || + no_iommu_mode) return -EINVAL; /* VFIO fails the set_iommu if there is no group */ @@ -381,7 +448,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, }; size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); struct vfio_info_cap_header __user *last_cap = NULL; - struct vfio_iommu_type1_info info; + struct vfio_iommu_type1_info info = {}; struct iommufd_ioas *ioas; size_t total_cap_size; int rc; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ed33c6cce083..c489714ddc4d 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return true; default: return false; } @@ -52,7 +50,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) if (!s390_domain) return NULL; - s390_domain->dma_table = dma_alloc_cpu_table(); + s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!s390_domain->dma_table) { kfree(s390_domain); return NULL; @@ -260,7 +258,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, static int s390_iommu_validate_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, - unsigned long nr_pages, int flags) + unsigned long nr_pages, int flags, + gfp_t gfp) { phys_addr_t page_addr = pa & PAGE_MASK; unsigned long *entry; @@ -268,7 +267,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, int rc; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + gfp); if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; @@ -284,7 +284,7 @@ undo_cpu_trans: while (i-- > 0) { dma_addr -= PAGE_SIZE; entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); + dma_addr, gfp); if (!entry) break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); @@ -301,7 +301,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, int rc = 0; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + GFP_ATOMIC); if (unlikely(!entry)) { rc = -EINVAL; break; @@ -339,7 +340,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_validate_trans(s390_domain, paddr, iova, - pgcount, flags); + pgcount, flags, gfp); if (!rc) *mapped = size; |