diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2023-01-30 18:53:04 +0100 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2023-01-30 18:54:35 +0100 |
commit | fd9f2a912255106d3b53163d816a4bd99ba29664 (patch) | |
tree | 892f38b617b534c93dc5c0b1f1efc3950812e828 /drivers/iommu/intel | |
parent | iommufd: Add three missing structures in ucmd_buffer (diff) | |
parent | iommu/s390: Use GFP_KERNEL in sleepable contexts (diff) | |
download | linux-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.xz linux-fd9f2a912255106d3b53163d816a4bd99ba29664.zip |
Merge branch 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu intoiommufd/for-next
Jason Gunthorpe says:
====================
iommufd follows the same design as KVM and uses memory cgroups to limit
the amount of kernel memory a iommufd file descriptor can pin down. The
various internal data structures already use GFP_KERNEL_ACCOUNT to charge
its own memory.
However, one of the biggest consumers of kernel memory is the IOPTEs
stored under the iommu_domain and these allocations are not tracked.
This series is the first step in fixing it.
The iommu driver contract already includes a 'gfp' argument to the
map_pages op, allowing iommufd to specify GFP_KERNEL_ACCOUNT and then
having the driver allocate the IOPTE tables with that flag will capture a
significant amount of the allocations.
Update the iommu_map() API to pass in the GFP argument, and fix all call
sites. Replace iommu_map_atomic().
Audit the "enterprise" iommu drivers to make sure they do the right thing.
Intel and S390 ignore the GFP argument and always use GFP_ATOMIC. This is
problematic for iommufd anyhow, so fix it. AMD and ARM SMMUv2/3 are
already correct.
A follow up series will be needed to capture the allocations made when the
iommu_domain itself is allocated, which will complete the job.
====================
* 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
iommu/s390: Use GFP_KERNEL in sleepable contexts
iommu/s390: Push the gfp parameter to the kmem_cache_alloc()'s
iommu/intel: Use GFP_KERNEL in sleepable contexts
iommu/intel: Support the gfp argument to the map_pages op
iommu/intel: Add a gfp parameter to alloc_pgtable_page()
iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map()
iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous()
iommu: Add a gfp parameter to iommu_map_sg()
iommu: Remove iommu_map_atomic()
iommu: Add a gfp parameter to iommu_map()
Link: https://lore.kernel.org/linux-iommu/0-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 36 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 2 |
3 files changed, 23 insertions, 17 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7cfab5fd5e59..6fd223cf639f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node, gfp_t gfp) { struct page *page; void *vaddr = NULL; - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); if (page) vaddr = page_address(page); return vaddr; @@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node); + context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -908,7 +908,8 @@ pgtable_walk: #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node); + root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node); + new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -4136,7 +4140,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) + diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 06e61e474856..ca9a035e0110 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -737,7 +737,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, extern int dmar_ir_support(void); -void *alloc_pgtable_page(int node); +void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb3c7020028d..c5bf74e9372d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -200,7 +200,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node); + entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; |