From 1672730cffaf56ea3f7b697d499d9697a34b77f1 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Sun, 16 Jul 2023 00:16:21 +0800 Subject: iommu/arm-smmu-v3: Change vmid alloc strategy from bitmap to ida For current implementation of vmid allocation of arm smmu-v3, a per-smmu devide bitmap of 64K bits(8K bytes) is allocated on behalf of possible VMID range, which is two pages for some architectures. Besides that, its memory consumption is 'static', despite of how many VMIDs are allocated actually. That's memory inefficient and lack of scalability. So an IDA based implementation is introduced to address this issue, which is capable of self-expanding on the actual need of VMID allocation. Signed-off-by: Dawei Li Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/TYCP286MB2323E0C525FF9F94E3B07C7ACA35A@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 29 +++++++---------------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- 2 files changed, 8 insertions(+), 23 deletions(-) (limited to 'drivers/iommu/arm/arm-smmu-v3') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9b0dc3505601..d9487602701f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2055,24 +2055,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) return &smmu_domain->domain; } -static int arm_smmu_bitmap_alloc(unsigned long *map, int span) -{ - int idx, size = 1 << span; - - do { - idx = find_first_zero_bit(map, size); - if (idx == size) - return -ENOSPC; - } while (test_and_set_bit(idx, map)); - - return idx; -} - -static void arm_smmu_bitmap_free(unsigned long *map, int idx) -{ - clear_bit(idx, map); -} - static void arm_smmu_domain_free(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2093,7 +2075,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; if (cfg->vmid) - arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid); + ida_free(&smmu->vmid_map, cfg->vmid); } kfree(smmu_domain); @@ -2167,7 +2149,9 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; - vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); + /* Reserve VMID 0 for stage-2 bypass STEs */ + vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, + GFP_KERNEL); if (vmid < 0) return vmid; @@ -3098,8 +3082,8 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) reg |= STRTAB_BASE_RA; smmu->strtab_cfg.strtab_base = reg; - /* Allocate the first VMID for stage-2 bypass STEs */ - set_bit(0, smmu->vmid_map); + ida_init(&smmu->vmid_map); + return 0; } @@ -3923,6 +3907,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); iopf_queue_free(smmu->evtq.iopf); + ida_destroy(&smmu->vmid_map); } static void arm_smmu_device_shutdown(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index dcab85698a4e..9915850dd4db 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -670,7 +670,7 @@ struct arm_smmu_device { #define ARM_SMMU_MAX_VMIDS (1 << 16) unsigned int vmid_bits; - DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS); + struct ida vmid_map; unsigned int ssid_bits; unsigned int sid_bits; -- cgit v1.2.3 From 4298780126c298f20ae4bc8676591eaf8c48767e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 9 Aug 2023 20:47:54 +0800 Subject: iommu: Generalize PASID 0 for normal DMA w/o PASID PCIe Process address space ID (PASID) is used to tag DMA traffic, it provides finer grained isolation than requester ID (RID). For each device/RID, 0 is a special PASID for the normal DMA (no PASID). This is universal across all architectures that supports PASID, therefore warranted to be reserved globally and declared in the common header. Consequently, we can avoid the conflict between different PASID use cases in the generic code. e.g. SVA and DMA API with PASIDs. This paved away for device drivers to choose global PASID policy while continue doing normal DMA. Noting that VT-d could support none-zero RID/NO_PASID, but currently not used. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jason Gunthorpe Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230802212427.1497170-2-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ++++++++-------- drivers/iommu/intel/iommu.c | 24 ++++++++++++------------ drivers/iommu/intel/pasid.c | 2 +- drivers/iommu/intel/pasid.h | 2 -- include/linux/iommu.h | 1 + 6 files changed, 23 insertions(+), 24 deletions(-) (limited to 'drivers/iommu/arm/arm-smmu-v3') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a5a63b1c947e..5e6b39881c04 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -80,7 +80,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, 0, cd); + arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9b0dc3505601..ee70687f060b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1059,7 +1059,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, /* * This function handles the following cases: * - * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). * (2) Install a secondary CD, for SID+SSID traffic. * (3) Update ASID of a CD. Atomically write the first 64 bits of the * CD, then invalidate the old entry and mappings. @@ -1607,7 +1607,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) sid = FIELD_GET(PRIQ_0_SID, evt[0]); ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); - ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0; + ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); @@ -1748,7 +1748,7 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, */ *cmd = (struct arm_smmu_cmdq_ent) { .opcode = CMDQ_OP_ATC_INV, - .substream_valid = !!ssid, + .substream_valid = (ssid != IOMMU_NO_PASID), .atc.ssid = ssid, }; @@ -1795,7 +1795,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -1875,7 +1875,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -1968,7 +1968,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2142,7 +2142,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, * the master has been added to the devices list for this domain. * This isn't an issue because the STE hasn't been installed yet. */ - ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd); if (ret) goto out_free_cd_tables; @@ -2328,7 +2328,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) pdev = to_pci_dev(master->dev); atomic_inc(&smmu_domain->nr_ats_masters); - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c8c5cdc36cf..ddff43def3ab 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -877,7 +877,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, } /* For request-without-pasid, get the pasid from context entry */ if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) - pasid = PASID_RID2PASID; + pasid = IOMMU_NO_PASID; dir_index = pasid >> PASID_PDE_SHIFT; pde = &dir[dir_index]; @@ -1449,7 +1449,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); + quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1484,7 +1484,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1554,7 +1554,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1940,7 +1940,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_pdts(pds); /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, PASID_RID2PASID); + context_set_sm_rid2pasid(context, IOMMU_NO_PASID); /* * Setup the Device-TLB enable bit and Page request @@ -2420,13 +2420,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); + IOMMU_NO_PASID); else ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); device_block_translation(dev); @@ -3968,7 +3968,7 @@ static void dmar_remove_one_dev_info(struct device *dev) if (!dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); iommu_disable_pci_caps(info); domain_context_clear(info); @@ -3997,7 +3997,7 @@ static void device_block_translation(struct device *dev) if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); else domain_context_clear(info); } @@ -4331,7 +4331,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) list_for_each_entry(info, &domain->devices, link) intel_pasid_setup_page_snoop_control(info->iommu, info->dev, - PASID_RID2PASID); + IOMMU_NO_PASID); } static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) @@ -4987,7 +4987,7 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, return; sid = PCI_DEVID(info->bus, info->devfn); - if (pasid == PASID_RID2PASID) { + if (pasid == IOMMU_NO_PASID) { qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, address, mask); } else { diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c5d479770e12..23dca3bc319d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -438,7 +438,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, * SVA usage, device could do DMA with multiple PASIDs. It is more * efficient to flush devTLB specific to the PASID. */ - if (pasid == PASID_RID2PASID) + if (pasid == IOMMU_NO_PASID) qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); else qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index d6b7d21244b1..4e9e68c3c388 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -10,8 +10,6 @@ #ifndef __INTEL_PASID_H #define __INTEL_PASID_H -#define PASID_RID2PASID 0x0 -#define PASID_MIN 0x1 #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d31642596675..2870bc29d456 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -196,6 +196,7 @@ enum iommu_dev_features { IOMMU_DEV_FEAT_IOPF, }; +#define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; -- cgit v1.2.3