diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-05 18:49:54 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-05 18:49:54 +0100 |
commit | 4fc2ea6a8608d9a649eff5e3c2ee477eb70f0fb6 (patch) | |
tree | 5b8c5656538961ffa67515dde551f470068904cb /drivers | |
parent | Merge tag 'for-linus-5.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kerne... (diff) | |
parent | Merge branches 'iommu/fixes', 'arm/smmu', 'x86/amd', 'x86/vt-d' and 'core' in... (diff) | |
download | linux-4fc2ea6a8608d9a649eff5e3c2ee477eb70f0fb6.tar.xz linux-4fc2ea6a8608d9a649eff5e3c2ee477eb70f0fb6.zip |
Merge tag 'iommu-updates-v5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
- Allow compiling the ARM-SMMU drivers as modules.
- Fixes and cleanups for the ARM-SMMU drivers and io-pgtable code
collected by Will Deacon. The merge-commit (6855d1ba7537) has all the
details.
- Cleanup of the iommu_put_resv_regions() call-backs in various
drivers.
- AMD IOMMU driver cleanups.
- Update for the x2APIC support in the AMD IOMMU driver.
- Preparation patches for Intel VT-d nested mode support.
- RMRR and identity domain handling fixes for the Intel VT-d driver.
- More small fixes and cleanups.
* tag 'iommu-updates-v5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (87 commits)
iommu/amd: Remove the unnecessary assignment
iommu/vt-d: Remove unnecessary WARN_ON_ONCE()
iommu/vt-d: Unnecessary to handle default identity domain
iommu/vt-d: Allow devices with RMRRs to use identity domain
iommu/vt-d: Add RMRR base and end addresses sanity check
iommu/vt-d: Mark firmware tainted if RMRR fails sanity check
iommu/amd: Remove unused struct member
iommu/amd: Replace two consecutive readl calls with one readq
iommu/vt-d: Don't reject Host Bridge due to scope mismatch
PCI/ATS: Add PASID stubs
iommu/arm-smmu-v3: Return -EBUSY when trying to re-add a device
iommu/arm-smmu-v3: Improve add_device() error handling
iommu/arm-smmu-v3: Use WRITE_ONCE() when changing validity of an STE
iommu/arm-smmu-v3: Add second level of context descriptor table
iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure
iommu/arm-smmu-v3: Propagate ssid_bits
iommu/arm-smmu-v3: Add support for Substream IDs
iommu/arm-smmu-v3: Add context descriptor tables allocators
iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
ACPI/IORT: Parse SSID property of named component node
...
Diffstat (limited to 'drivers')
30 files changed, 1578 insertions, 802 deletions
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 6078064684c6..ed3d2d1a7ae9 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "ACPI: IORT: " fmt #include <linux/acpi_iort.h> +#include <linux/bitfield.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/list.h> @@ -902,9 +903,9 @@ static inline bool iort_iommu_driver_enabled(u8 type) { switch (type) { case ACPI_IORT_NODE_SMMU_V3: - return IS_BUILTIN(CONFIG_ARM_SMMU_V3); + return IS_ENABLED(CONFIG_ARM_SMMU_V3); case ACPI_IORT_NODE_SMMU: - return IS_BUILTIN(CONFIG_ARM_SMMU); + return IS_ENABLED(CONFIG_ARM_SMMU); default: pr_warn("IORT node type %u does not describe an SMMU\n", type); return false; @@ -976,6 +977,20 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) return iort_iommu_xlate(info->dev, parent, streamid); } +static void iort_named_component_init(struct device *dev, + struct acpi_iort_node *node) +{ + struct acpi_iort_named_component *nc; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (!fwspec) + return; + + nc = (struct acpi_iort_named_component *)node->node_data; + fwspec->num_pasid_bits = FIELD_GET(ACPI_IORT_NC_PASID_BITS, + nc->node_flags); +} + /** * iort_iommu_configure - Set-up IOMMU configuration for a device. * @@ -1030,6 +1045,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) if (parent) err = iort_iommu_xlate(dev, parent, streamid); } while (parent && !err); + + if (!err) + iort_named_component_init(dev, node); } /* diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 0b9d78a0f3ac..d2fade984999 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -82,7 +82,7 @@ config IOMMU_DEBUGFS config IOMMU_DEFAULT_PASSTHROUGH bool "IOMMU passthrough by default" depends on IOMMU_API - help + help Enable passthrough by default, removing the need to pass in iommu.passthrough=on or iommu=pt through command line. If this is enabled, you can still disable with iommu.passthrough=off @@ -91,8 +91,8 @@ config IOMMU_DEFAULT_PASSTHROUGH If unsure, say N here. config OF_IOMMU - def_bool y - depends on OF && IOMMU_API + def_bool y + depends on OF && IOMMU_API # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA @@ -214,6 +214,7 @@ config INTEL_IOMMU_SVM select PCI_PASID select PCI_PRI select MMU_NOTIFIER + select IOASID help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by @@ -248,6 +249,18 @@ config INTEL_IOMMU_FLOPPY_WA workaround will setup a 1:1 mapping for the first 16MiB to make floppy (an ISA device) work. +config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON + bool "Enable Intel IOMMU scalable mode by default" + depends on INTEL_IOMMU + help + Selecting this option will enable by default the scalable mode if + hardware presents the capability. The scalable mode is defined in + VT-d 3.0. The scalable mode capability could be checked by reading + /sys/devices/virtual/iommu/dmar*/intel-iommu/ecap. If this option + is not selected, scalable mode support could also be enabled by + passing intel_iommu=sm_on to the kernel. If not sure, please use + the default value. + config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI @@ -356,7 +369,7 @@ config SPAPR_TCE_IOMMU # ARM IOMMU support config ARM_SMMU - bool "ARM Ltd. System MMU (SMMU) Support" + tristate "ARM Ltd. System MMU (SMMU) Support" depends on (ARM64 || ARM) && MMU select IOMMU_API select IOMMU_IO_PGTABLE_LPAE @@ -368,6 +381,18 @@ config ARM_SMMU Say Y here if your SoC includes an IOMMU device implementing the ARM SMMU architecture. +config ARM_SMMU_LEGACY_DT_BINDINGS + bool "Support the legacy \"mmu-masters\" devicetree bindings" + depends on ARM_SMMU=y && OF + help + Support for the badly designed and deprecated "mmu-masters" + devicetree bindings. This allows some DMA masters to attach + to the SMMU but does not provide any support via the DMA API. + If you're lucky, you might be able to get VFIO up and running. + + If you say Y here then you'll make me very sad. Instead, say N + and move your firmware to the utopian future that was 2016. + config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT bool "Default to disabling bypass on ARM SMMU v1 and v2" depends on ARM_SMMU @@ -394,7 +419,7 @@ config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT config. config ARM_SMMU_V3 - bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support" + tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support" depends on ARM64 select IOMMU_API select IOMMU_IO_PGTABLE_LPAE diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 97814cc861ea..2104fb8afc06 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -14,7 +14,8 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o -obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o +obj-$(CONFIG_ARM_SMMU) += arm-smmu-mod.o +arm-smmu-mod-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7a6c056b9b9c..aac132bd1ef0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2294,7 +2294,6 @@ int __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; - iommu_detected = 1; if (amd_iommu_unmap_flush) pr_info("IO/TLB flush on unmap enabled\n"); @@ -2638,15 +2637,6 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -static void amd_iommu_put_resv_regions(struct device *dev, - struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - kfree(entry); -} - static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev) { @@ -2685,7 +2675,7 @@ const struct iommu_ops amd_iommu_ops = { .device_group = amd_iommu_device_group, .domain_get_attr = amd_iommu_domain_get_attr, .get_resv_regions = amd_iommu_get_resv_regions, - .put_resv_regions = amd_iommu_put_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .flush_iotlb_all = amd_iommu_flush_iotlb_all, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 823cc4ef51fd..2759a8d57b7f 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -71,6 +71,8 @@ #define IVHD_FLAG_ISOC_EN_MASK 0x08 #define IVMD_FLAG_EXCL_RANGE 0x08 +#define IVMD_FLAG_IW 0x04 +#define IVMD_FLAG_IR 0x02 #define IVMD_FLAG_UNITY_MAP 0x01 #define ACPI_DEVFLAG_INITPASS 0x01 @@ -147,7 +149,7 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; -static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; +static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; @@ -714,7 +716,7 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); - iommu_feature_enable(iommu, CONTROL_PPFLOG_EN); + iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); iommu_feature_enable(iommu, CONTROL_PPR_EN); } @@ -1116,21 +1118,17 @@ static int __init add_early_maps(void) */ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) return; - if (iommu) { - /* - * We only can configure exclusion ranges per IOMMU, not - * per device. But we can enable the exclusion range per - * device. This is done here - */ - set_dev_entry_bit(devid, DEV_ENTRY_EX); - iommu->exclusion_start = m->range_start; - iommu->exclusion_length = m->range_length; - } + /* + * Treat per-device exclusion ranges as r/w unity-mapped regions + * since some buggy BIOSes might lead to the overwritten exclusion + * range (exclusion_start and exclusion_length members). This + * happens when there are multiple exclusion ranges (IVMD entries) + * defined in ACPI table. + */ + m->flags = (IVMD_FLAG_IW | IVMD_FLAG_IR | IVMD_FLAG_UNITY_MAP); } /* @@ -1523,8 +1521,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; - if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0)) - amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; break; case 0x11: case 0x40: @@ -1534,8 +1530,15 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; - if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0)) - amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; + /* + * Note: Since iommu_update_intcapxt() leverages + * the IOMMU MMIO access to MSI capability block registers + * for MSI address lo/hi/data, we need to check both + * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support. + */ + if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) && + (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT))) + amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; break; default: return -EINVAL; @@ -1727,7 +1730,6 @@ static const struct attribute_group *amd_iommu_groups[] = { static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; - u32 range, misc, low, high; int ret; iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid), @@ -1740,19 +1742,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, - &range); - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, - &misc); if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; /* read extended feature bits */ - low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); - high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); - - iommu->features = ((u64)high << 32) | low; + iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); if (iommu_feature(iommu, FEATURE_GT)) { int glxval; @@ -1996,8 +1991,8 @@ static int iommu_init_intcapxt(struct amd_iommu *iommu) struct irq_affinity_notify *notify = &iommu->intcapxt_notify; /** - * IntCapXT requires XTSup=1, which can be inferred - * amd_iommu_xt_mode. + * IntCapXT requires XTSup=1 and MsiCapMmioSup=1, + * which can be inferred from amd_iommu_xt_mode. */ if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) return 0; @@ -2044,7 +2039,7 @@ enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) - iommu_feature_enable(iommu, CONTROL_PPFINT_EN); + iommu_feature_enable(iommu, CONTROL_PPRINT_EN); iommu_ga_log_enable(iommu); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f52f59d5c6bd..f8d01d6b00da 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -147,8 +147,8 @@ #define CONTROL_COHERENT_EN 0x0aULL #define CONTROL_ISOC_EN 0x0bULL #define CONTROL_CMDBUF_EN 0x0cULL -#define CONTROL_PPFLOG_EN 0x0dULL -#define CONTROL_PPFINT_EN 0x0eULL +#define CONTROL_PPRLOG_EN 0x0dULL +#define CONTROL_PPRINT_EN 0x0eULL #define CONTROL_PPR_EN 0x0fULL #define CONTROL_GT_EN 0x10ULL #define CONTROL_GA_EN 0x11ULL @@ -377,12 +377,12 @@ #define IOMMU_CAP_EFR 27 /* IOMMU Feature Reporting Field (for IVHD type 10h */ -#define IOMMU_FEAT_XTSUP_SHIFT 0 #define IOMMU_FEAT_GASUP_SHIFT 6 /* IOMMU Extended Feature Register (EFR) */ #define IOMMU_EFR_XTSUP_SHIFT 2 #define IOMMU_EFR_GASUP_SHIFT 7 +#define IOMMU_EFR_MSICAPMMIOSUP_SHIFT 46 #define MAX_DOMAIN_ID 65536 @@ -463,7 +463,6 @@ struct amd_irte_ops; * independent of their use. */ struct protection_domain { - struct list_head list; /* for list of all protection domains */ struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index b2fe72a8f019..74d97a886e93 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -119,7 +119,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) * Secure has also cleared SACR.CACHE_LOCK for this to take effect... */ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7); - major = FIELD_GET(ID7_MAJOR, reg); + major = FIELD_GET(ARM_SMMU_ID7_MAJOR, reg); reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR); if (major >= 2) reg &= ~ARM_MMU500_ACR_CACHE_LOCK; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index effe72eb89e7..aa3ac2a03807 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -21,8 +21,7 @@ #include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/iopoll.h> -#include <linux/init.h> -#include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/msi.h> #include <linux/of.h> #include <linux/of_address.h> @@ -224,9 +223,15 @@ #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4) #define STRTAB_STE_0_S1FMT_LINEAR 0 +#define STRTAB_STE_0_S1FMT_64K_L2 2 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59) +#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0) +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0 +#define STRTAB_STE_1_S1DSS_BYPASS 0x1 +#define STRTAB_STE_1_S1DSS_SSID0 0x2 + #define STRTAB_STE_1_S1C_CACHE_NC 0UL #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL #define STRTAB_STE_1_S1C_CACHE_WT 2UL @@ -251,6 +256,13 @@ #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0) #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32) +#define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0) +#define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6) +#define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8) +#define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10) +#define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12) +#define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14) +#define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16) #define STRTAB_STE_2_S2AA64 (1UL << 51) #define STRTAB_STE_2_S2ENDI (1UL << 52) #define STRTAB_STE_2_S2PTW (1UL << 54) @@ -258,30 +270,34 @@ #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) -/* Context descriptor (stage-1 only) */ +/* + * Context descriptors. + * + * Linear: when less than 1024 SSIDs are supported + * 2lvl: at most 1024 L1 entries, + * 1024 lazy entries per table. + */ +#define CTXDESC_SPLIT 10 +#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT) + +#define CTXDESC_L1_DESC_DWORDS 1 +#define CTXDESC_L1_DESC_V (1UL << 0) +#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12) + #define CTXDESC_CD_DWORDS 8 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) -#define ARM64_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) -#define ARM64_TCR_TG0 GENMASK_ULL(15, 14) #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) -#define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8) #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10) -#define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10) #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12) -#define ARM64_TCR_SH0 GENMASK_ULL(13, 12) #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14) -#define ARM64_TCR_EPD0 (1ULL << 7) #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30) -#define ARM64_TCR_EPD1 (1ULL << 23) #define CTXDESC_CD_0_ENDI (1UL << 15) #define CTXDESC_CD_0_V (1UL << 31) #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) -#define ARM64_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) -#define ARM64_TCR_TBI0 (1ULL << 37) #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) @@ -292,9 +308,11 @@ #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4) -/* Convert between AArch64 (CPU) TCR format and SMMU CD format */ -#define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \ - FIELD_GET(ARM64_TCR_##fld, tcr)) +/* + * When the SMMU only supports linear context descriptor tables, pick a + * reasonable size limit (64kB). + */ +#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) /* Command queue */ #define CMDQ_ENT_SZ_SHIFT 4 @@ -323,6 +341,7 @@ #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0) #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12) +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12) #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32) #define CMDQ_CFGI_1_LEAF (1UL << 0) #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) @@ -384,10 +403,6 @@ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 -/* - * not really modular, but the easiest way to keep compat with existing - * bootargs behaviour is to continue using module_param_named here. - */ static bool disable_bypass = 1; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, @@ -440,8 +455,11 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_CFGI_STE 0x3 #define CMDQ_OP_CFGI_ALL 0x4 + #define CMDQ_OP_CFGI_CD 0x5 + #define CMDQ_OP_CFGI_CD_ALL 0x6 struct { u32 sid; + u32 ssid; union { bool leaf; u8 span; @@ -547,16 +565,30 @@ struct arm_smmu_strtab_l1_desc { dma_addr_t l2ptr_dma; }; +struct arm_smmu_ctx_desc { + u16 asid; + u64 ttbr; + u64 tcr; + u64 mair; +}; + +struct arm_smmu_l1_ctx_desc { + __le64 *l2ptr; + dma_addr_t l2ptr_dma; +}; + +struct arm_smmu_ctx_desc_cfg { + __le64 *cdtab; + dma_addr_t cdtab_dma; + struct arm_smmu_l1_ctx_desc *l1_desc; + unsigned int num_l1_ents; +}; + struct arm_smmu_s1_cfg { - __le64 *cdptr; - dma_addr_t cdptr_dma; - - struct arm_smmu_ctx_desc { - u16 asid; - u64 ttbr; - u64 tcr; - u64 mair; - } cd; + struct arm_smmu_ctx_desc_cfg cdcfg; + struct arm_smmu_ctx_desc cd; + u8 s1fmt; + u8 s1cdmax; }; struct arm_smmu_s2_cfg { @@ -638,6 +670,7 @@ struct arm_smmu_master { u32 *sids; unsigned int num_sids; bool ats_enabled; + unsigned int ssid_bits; }; /* SMMU private data for an IOMMU domain */ @@ -847,15 +880,22 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; break; + case CMDQ_OP_CFGI_CD: + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); + /* Fallthrough */ case CMDQ_OP_CFGI_STE: cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); break; + case CMDQ_OP_CFGI_CD_ALL: + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); + break; case CMDQ_OP_CFGI_ALL: /* Cover the entire SID range */ cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; @@ -1443,50 +1483,238 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) } /* Context descriptor manipulation functions */ -static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, + int ssid, bool leaf) { - u64 val = 0; + size_t i; + unsigned long flags; + struct arm_smmu_master *master; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_CFGI_CD, + .cfgi = { + .ssid = ssid, + .leaf = leaf, + }, + }; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + for (i = 0; i < master->num_sids; i++) { + cmd.cfgi.sid = master->sids[i]; + arm_smmu_cmdq_issue_cmd(smmu, &cmd); + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + arm_smmu_cmdq_issue_sync(smmu); +} - /* Repack the TCR. Just care about TTBR0 for now */ - val |= ARM_SMMU_TCR2CD(tcr, T0SZ); - val |= ARM_SMMU_TCR2CD(tcr, TG0); - val |= ARM_SMMU_TCR2CD(tcr, IRGN0); - val |= ARM_SMMU_TCR2CD(tcr, ORGN0); - val |= ARM_SMMU_TCR2CD(tcr, SH0); - val |= ARM_SMMU_TCR2CD(tcr, EPD0); - val |= ARM_SMMU_TCR2CD(tcr, EPD1); - val |= ARM_SMMU_TCR2CD(tcr, IPS); +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, + struct arm_smmu_l1_ctx_desc *l1_desc) +{ + size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); - return val; + l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, + &l1_desc->l2ptr_dma, GFP_KERNEL); + if (!l1_desc->l2ptr) { + dev_warn(smmu->dev, + "failed to allocate context descriptor table\n"); + return -ENOMEM; + } + return 0; } -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, - struct arm_smmu_s1_cfg *cfg) +static void arm_smmu_write_cd_l1_desc(__le64 *dst, + struct arm_smmu_l1_ctx_desc *l1_desc) { - u64 val; + u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | + CTXDESC_L1_DESC_V; + + WRITE_ONCE(*dst, cpu_to_le64(val)); +} + +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, + u32 ssid) +{ + __le64 *l1ptr; + unsigned int idx; + struct arm_smmu_l1_ctx_desc *l1_desc; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) + return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; + + idx = ssid >> CTXDESC_SPLIT; + l1_desc = &cdcfg->l1_desc[idx]; + if (!l1_desc->l2ptr) { + if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + return NULL; + + l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + /* An invalid L1CD can be cached */ + arm_smmu_sync_cd(smmu_domain, ssid, false); + } + idx = ssid & (CTXDESC_L2_ENTRIES - 1); + return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; +} + +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, + int ssid, struct arm_smmu_ctx_desc *cd) +{ /* - * We don't need to issue any invalidation here, as we'll invalidate - * the STE when installing the new entry anyway. + * This function handles the following cases: + * + * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (2) Install a secondary CD, for SID+SSID traffic. + * (3) Update ASID of a CD. Atomically write the first 64 bits of the + * CD, then invalidate the old entry and mappings. + * (4) Remove a secondary CD. */ - val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) | + u64 val; + bool cd_live; + __le64 *cdptr; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) + return -E2BIG; + + cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + if (!cdptr) + return -ENOMEM; + + val = le64_to_cpu(cdptr[0]); + cd_live = !!(val & CTXDESC_CD_0_V); + + if (!cd) { /* (4) */ + val = 0; + } else if (cd_live) { /* (3) */ + val &= ~CTXDESC_CD_0_ASID; + val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); + /* + * Until CD+TLB invalidation, both ASIDs may be used for tagging + * this substream's traffic + */ + } else { /* (1) and (2) */ + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); + cdptr[2] = 0; + cdptr[3] = cpu_to_le64(cd->mair); + + /* + * STE is live, and the SMMU might read dwords of this CD in any + * order. Ensure that it observes valid values before reading + * V=1. + */ + arm_smmu_sync_cd(smmu_domain, ssid, true); + + val = cd->tcr | #ifdef __BIG_ENDIAN - CTXDESC_CD_0_ENDI | + CTXDESC_CD_0_ENDI | #endif - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) | - CTXDESC_CD_0_V; + CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | + CTXDESC_CD_0_AA64 | + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | + CTXDESC_CD_0_V; - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) - val |= CTXDESC_CD_0_S; + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + val |= CTXDESC_CD_0_S; + } - cfg->cdptr[0] = cpu_to_le64(val); + /* + * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 + * "Configuration structures and configuration invalidation completion" + * + * The size of single-copy atomic reads made by the SMMU is + * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single + * field within an aligned 64-bit span of a structure can be altered + * without first making the structure invalid. + */ + WRITE_ONCE(cdptr[0], cpu_to_le64(val)); + arm_smmu_sync_cd(smmu_domain, ssid, true); + return 0; +} + +static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) +{ + int ret; + size_t l1size; + size_t max_contexts; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; + + max_contexts = 1 << cfg->s1cdmax; + + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || + max_contexts <= CTXDESC_L2_ENTRIES) { + cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + cdcfg->num_l1_ents = max_contexts; - val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; - cfg->cdptr[1] = cpu_to_le64(val); + l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); + } else { + cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; + cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, + CTXDESC_L2_ENTRIES); + + cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, + sizeof(*cdcfg->l1_desc), + GFP_KERNEL); + if (!cdcfg->l1_desc) + return -ENOMEM; + + l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + } + + cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, + GFP_KERNEL); + if (!cdcfg->cdtab) { + dev_warn(smmu->dev, "failed to allocate context descriptor\n"); + ret = -ENOMEM; + goto err_free_l1; + } + + return 0; - cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair); +err_free_l1: + if (cdcfg->l1_desc) { + devm_kfree(smmu->dev, cdcfg->l1_desc); + cdcfg->l1_desc = NULL; + } + return ret; +} + +static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) +{ + int i; + size_t size, l1size; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + + if (cdcfg->l1_desc) { + size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + + for (i = 0; i < cdcfg->num_l1_ents; i++) { + if (!cdcfg->l1_desc[i].l2ptr) + continue; + + dmam_free_coherent(smmu->dev, size, + cdcfg->l1_desc[i].l2ptr, + cdcfg->l1_desc[i].l2ptr_dma); + } + devm_kfree(smmu->dev, cdcfg->l1_desc); + cdcfg->l1_desc = NULL; + + l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + } else { + l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + } + + dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); + cdcfg->cdtab_dma = 0; + cdcfg->cdtab = NULL; } /* Stream table manipulation functions */ @@ -1608,6 +1836,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (s1_cfg) { BUG_ON(ste_live); dst[1] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | @@ -1617,8 +1846,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); + val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | + FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | + FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); } if (s2_cfg) { @@ -1642,7 +1873,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_1_EATS_TRANS)); arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ @@ -1675,7 +1907,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) desc->span = STRTAB_SPLIT + 1; desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", @@ -2131,12 +2363,8 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - if (cfg->cdptr) { - dmam_free_coherent(smmu_domain->smmu->dev, - CTXDESC_CD_DWORDS << 3, - cfg->cdptr, - cfg->cdptr_dma); - + if (cfg->cdcfg.cdtab) { + arm_smmu_free_cd_tables(smmu_domain); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); } } else { @@ -2149,55 +2377,82 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int ret; int asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits); if (asid < 0) return asid; - cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, - &cfg->cdptr_dma, - GFP_KERNEL | __GFP_ZERO); - if (!cfg->cdptr) { - dev_warn(smmu->dev, "failed to allocate context descriptor\n"); - ret = -ENOMEM; + cfg->s1cdmax = master->ssid_bits; + + ret = arm_smmu_alloc_cd_tables(smmu_domain); + if (ret) goto out_free_asid; - } cfg->cd.asid = (u16)asid; - cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | + FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | + FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | + CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; + + /* + * Note that this will end up calling arm_smmu_sync_cd() before + * the master has been added to the devices list for this domain. + * This isn't an issue because the STE hasn't been installed yet. + */ + ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + if (ret) + goto out_free_cd_tables; + return 0; +out_free_cd_tables: + arm_smmu_free_cd_tables(smmu_domain); out_free_asid: arm_smmu_bitmap_free(smmu->asid_map, asid); return ret; } static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; + typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); if (vmid < 0) return vmid; + vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; cfg->vmid = (u16)vmid; cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); return 0; } -static int arm_smmu_domain_finalise(struct iommu_domain *domain) +static int arm_smmu_domain_finalise(struct iommu_domain *domain, + struct arm_smmu_master *master) { int ret; unsigned long ias, oas; @@ -2205,6 +2460,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_domain *, + struct arm_smmu_master *, struct io_pgtable_cfg *); struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -2259,7 +2515,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; - ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); + ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; @@ -2412,7 +2668,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!smmu_domain->smmu) { smmu_domain->smmu = smmu; - ret = arm_smmu_domain_finalise(domain); + ret = arm_smmu_domain_finalise(domain, master); if (ret) { smmu_domain->smmu = NULL; goto out_unlock; @@ -2424,6 +2680,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_name(smmu->dev)); ret = -ENXIO; goto out_unlock; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && + master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { + dev_err(dev, + "cannot attach to incompatible domain (%u SSID bits != %u)\n", + smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); + ret = -EINVAL; + goto out_unlock; } master->domain = smmu_domain; @@ -2431,9 +2694,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) master->ats_enabled = arm_smmu_ats_supported(master); - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) - arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); - arm_smmu_install_ste_for_dev(master); spin_lock_irqsave(&smmu_domain->devices_lock, flags); @@ -2534,51 +2794,66 @@ static int arm_smmu_add_device(struct device *dev) if (!fwspec || fwspec->ops != &arm_smmu_ops) return -ENODEV; - /* - * We _can_ actually withstand dodgy bus code re-calling add_device() - * without an intervening remove_device()/of_xlate() sequence, but - * we're not going to do so quietly... - */ - if (WARN_ON_ONCE(fwspec->iommu_priv)) { - master = fwspec->iommu_priv; - smmu = master->smmu; - } else { - smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); - if (!smmu) - return -ENODEV; - master = kzalloc(sizeof(*master), GFP_KERNEL); - if (!master) - return -ENOMEM; - master->dev = dev; - master->smmu = smmu; - master->sids = fwspec->ids; - master->num_sids = fwspec->num_ids; - fwspec->iommu_priv = master; - } + if (WARN_ON_ONCE(fwspec->iommu_priv)) + return -EBUSY; + + smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); + if (!smmu) + return -ENODEV; + + master = kzalloc(sizeof(*master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + master->dev = dev; + master->smmu = smmu; + master->sids = fwspec->ids; + master->num_sids = fwspec->num_ids; + fwspec->iommu_priv = master; /* Check the SIDs are in range of the SMMU and our stream table */ for (i = 0; i < master->num_sids; i++) { u32 sid = master->sids[i]; - if (!arm_smmu_sid_in_range(smmu, sid)) - return -ERANGE; + if (!arm_smmu_sid_in_range(smmu, sid)) { + ret = -ERANGE; + goto err_free_master; + } /* Ensure l2 strtab is initialised */ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { ret = arm_smmu_init_l2_strtab(smmu, sid); if (ret) - return ret; + goto err_free_master; } } + master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); + + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) + master->ssid_bits = min_t(u8, master->ssid_bits, + CTXDESC_LINEAR_CDMAX); + + ret = iommu_device_link(&smmu->iommu, dev); + if (ret) + goto err_free_master; + group = iommu_group_get_for_dev(dev); - if (!IS_ERR(group)) { - iommu_group_put(group); - iommu_device_link(&smmu->iommu, dev); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto err_unlink; } - return PTR_ERR_OR_ZERO(group); + iommu_group_put(group); + return 0; + +err_unlink: + iommu_device_unlink(&smmu->iommu, dev); +err_free_master: + kfree(master); + fwspec->iommu_priv = NULL; + return ret; } static void arm_smmu_remove_device(struct device *dev) @@ -2710,15 +2985,6 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } -static void arm_smmu_put_resv_regions(struct device *dev, - struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - kfree(entry); -} - static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -2736,7 +3002,7 @@ static struct iommu_ops arm_smmu_ops = { .domain_set_attr = arm_smmu_domain_set_attr, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .put_resv_regions = arm_smmu_put_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ }; @@ -2883,7 +3149,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", @@ -2910,7 +3176,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", @@ -3570,6 +3836,43 @@ static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) return SZ_128K; } +static int arm_smmu_set_bus_ops(struct iommu_ops *ops) +{ + int err; + +#ifdef CONFIG_PCI + if (pci_bus_type.iommu_ops != ops) { + err = bus_set_iommu(&pci_bus_type, ops); + if (err) + return err; + } +#endif +#ifdef CONFIG_ARM_AMBA + if (amba_bustype.iommu_ops != ops) { + err = bus_set_iommu(&amba_bustype, ops); + if (err) + goto err_reset_pci_ops; + } +#endif + if (platform_bus_type.iommu_ops != ops) { + err = bus_set_iommu(&platform_bus_type, ops); + if (err) + goto err_reset_amba_ops; + } + + return 0; + +err_reset_amba_ops: +#ifdef CONFIG_ARM_AMBA + bus_set_iommu(&amba_bustype, NULL); +#endif +err_reset_pci_ops: __maybe_unused; +#ifdef CONFIG_PCI + bus_set_iommu(&pci_bus_type, NULL); +#endif + return err; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -3599,7 +3902,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { + if (resource_size(res) < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } @@ -3660,48 +3963,45 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return ret; } -#ifdef CONFIG_PCI - if (pci_bus_type.iommu_ops != &arm_smmu_ops) { - pci_request_acs(); - ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops); - if (ret) - return ret; - } -#endif -#ifdef CONFIG_ARM_AMBA - if (amba_bustype.iommu_ops != &arm_smmu_ops) { - ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops); - if (ret) - return ret; - } -#endif - if (platform_bus_type.iommu_ops != &arm_smmu_ops) { - ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops); - if (ret) - return ret; - } - return 0; + return arm_smmu_set_bus_ops(&arm_smmu_ops); } -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + arm_smmu_set_bus_ops(NULL); + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); + + return 0; +} + +static void arm_smmu_device_shutdown(struct platform_device *pdev) +{ + arm_smmu_device_remove(pdev); } static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v3", }, { }, }; +MODULE_DEVICE_TABLE(of, arm_smmu_of_match); static struct platform_driver arm_smmu_driver = { .driver = { - .name = "arm-smmu-v3", - .of_match_table = of_match_ptr(arm_smmu_of_match), - .suppress_bind_attrs = true, + .name = "arm-smmu-v3", + .of_match_table = arm_smmu_of_match, + .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, + .remove = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; -builtin_platform_driver(arm_smmu_driver); +module_platform_driver(arm_smmu_driver); + +MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); +MODULE_AUTHOR("Will Deacon <will@kernel.org>"); +MODULE_ALIAS("platform:arm-smmu-v3"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 4f1a350d9529..16c4b87af42b 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -27,8 +27,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/iopoll.h> -#include <linux/init.h> -#include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> @@ -60,10 +59,6 @@ #define MSI_IOVA_LENGTH 0x100000 static int force_stage; -/* - * not really modular, but the easiest way to keep compat with existing - * bootargs behaviour is to continue using module_param() here. - */ module_param(force_stage, int, S_IRUGO); MODULE_PARM_DESC(force_stage, "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); @@ -131,6 +126,12 @@ static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) return container_of(dom, struct arm_smmu_domain, domain); } +static struct platform_driver arm_smmu_driver; +static struct iommu_ops arm_smmu_ops; + +#ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS +static int arm_smmu_bus_init(struct iommu_ops *ops); + static struct device_node *dev_get_dev_node(struct device *dev) { if (dev_is_pci(dev)) { @@ -166,9 +167,6 @@ static int __find_legacy_master_phandle(struct device *dev, void *data) return err == -ENOENT ? 0 : err; } -static struct platform_driver arm_smmu_driver; -static struct iommu_ops arm_smmu_ops; - static int arm_smmu_register_legacy_master(struct device *dev, struct arm_smmu_device **smmu) { @@ -220,6 +218,27 @@ static int arm_smmu_register_legacy_master(struct device *dev, return err; } +/* + * With the legacy DT binding in play, we have no guarantees about + * probe order, but then we're also not doing default domains, so we can + * delay setting bus ops until we're sure every possible SMMU is ready, + * and that way ensure that no add_device() calls get missed. + */ +static int arm_smmu_legacy_bus_init(void) +{ + if (using_legacy_binding) + return arm_smmu_bus_init(&arm_smmu_ops); + return 0; +} +device_initcall_sync(arm_smmu_legacy_bus_init); +#else +static int arm_smmu_register_legacy_master(struct device *dev, + struct arm_smmu_device **smmu) +{ + return -ENODEV; +} +#endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */ + static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end) { int idx; @@ -252,7 +271,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { reg = arm_smmu_readl(smmu, page, status); - if (!(reg & sTLBGSTATUS_GSACTIVE)) + if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE)) return; cpu_relax(); } @@ -459,7 +478,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int idx = smmu_domain->cfg.cbndx; fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & FSR_FAULT)) + if (!(fsr & ARM_SMMU_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -491,7 +510,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) if (__ratelimit(&rs)) { if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && - (gfsr & sGFSR_USF)) + (gfsr & ARM_SMMU_sGFSR_USF)) dev_err(smmu->dev, "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n", (u16)gfsynr1); @@ -521,26 +540,28 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr; } else { - cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM); + cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg); + cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg); if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - cb->tcr[1] |= TCR2_AS; + cb->tcr[1] |= ARM_SMMU_TCR2_AS; + else + cb->tcr[0] |= ARM_SMMU_TCR_EAE; } } else { - cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg); } /* TTBRs */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; - cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; + cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; + cb->ttbr[1] = 0; } else { - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); - cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid); + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); + cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); } } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; @@ -576,31 +597,33 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) /* CBA2R */ if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg = CBA2R_VA64; + reg = ARM_SMMU_CBA2R_VA64; else reg = 0; /* 16-bit VMIDs live in CBA2R */ if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid); + reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid); arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg); } /* CBAR */ - reg = FIELD_PREP(CBAR_TYPE, cfg->cbar); + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar); if (smmu->version < ARM_SMMU_V2) - reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx); + reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx); /* * Use the weakest shareability/memory types, so they are * overridden by the ttbcr/pte. */ if (stage1) { - reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) | - FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB); + reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG, + ARM_SMMU_CBAR_S1_BPSHCFG_NSH) | + FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR, + ARM_SMMU_CBAR_S1_MEMATTR_WB); } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) { /* 8-bit VMIDs live in CBAR */ - reg |= FIELD_PREP(CBAR_VMID, cfg->vmid); + reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid); } arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg); @@ -632,11 +655,12 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) } /* SCTLR */ - reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M; + reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE | + ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M; if (stage1) - reg |= SCTLR_S1_ASIDPNE; + reg |= ARM_SMMU_SCTLR_S1_ASIDPNE; if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - reg |= SCTLR_E; + reg |= ARM_SMMU_SCTLR_E; arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg); } @@ -818,7 +842,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", cfg->irptndx, irq); - cfg->irptndx = INVALID_IRPTNDX; + cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX; } mutex_unlock(&smmu_domain->init_mutex); @@ -856,7 +880,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) smmu->cbs[cfg->cbndx].cfg = NULL; arm_smmu_write_context_bank(smmu, cfg->cbndx); - if (cfg->irptndx != INVALID_IRPTNDX) { + if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; devm_free_irq(smmu->dev, irq, domain); } @@ -912,23 +936,24 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_smr *smr = smmu->smrs + idx; - u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask); + u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) | + FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask); if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid) - reg |= SMR_VALID; + reg |= ARM_SMMU_SMR_VALID; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg); } static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; - u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) | - FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) | - FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg); + u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) | + FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) | + FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg); if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs && smmu->smrs[idx].valid) - reg |= S2CR_EXIDVALID; + reg |= ARM_SMMU_S2CR_EXIDVALID; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg); } @@ -946,24 +971,37 @@ static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx) static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu) { u32 smr; + int i; if (!smmu->smrs) return; - + /* + * If we've had to accommodate firmware memory regions, we may + * have live SMRs by now; tread carefully... + * + * Somewhat perversely, not having a free SMR for this test implies we + * can get away without it anyway, as we'll only be able to 'allocate' + * these SMRs for the ID/mask values we're already trusting to be OK. + */ + for (i = 0; i < smmu->num_mapping_groups; i++) + if (!smmu->smrs[i].valid) + goto smr_ok; + return; +smr_ok: /* * SMR.ID bits may not be preserved if the corresponding MASK * bits are set, so check each one separately. We can reject * masters later if they try to claim IDs outside these masks. */ - smr = FIELD_PREP(SMR_ID, smmu->streamid_mask); - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); - smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); - smmu->streamid_mask = FIELD_GET(SMR_ID, smr); + smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr); + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); + smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr); - smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask); - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); - smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); - smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr); + smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr); + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); + smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); } static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask) @@ -1032,8 +1070,8 @@ static int arm_smmu_master_alloc_smes(struct device *dev) mutex_lock(&smmu->stream_map_mutex); /* Figure out a viable stream map entry allocation */ for_each_cfg_sme(fwspec, i, idx) { - u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]); - u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]); + u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]); + u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]); if (idx != INVALID_SMENDX) { ret = -EEXIST; @@ -1277,7 +1315,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va); reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR; - if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { + if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE), + 5, 50)) { spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", @@ -1287,7 +1326,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR); spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); - if (phys & CB_PAR_F) { + if (phys & ARM_SMMU_CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); return 0; @@ -1368,8 +1407,8 @@ static int arm_smmu_add_device(struct device *dev) ret = -EINVAL; for (i = 0; i < fwspec->num_ids; i++) { - u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]); - u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]); + u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]); + u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]); if (sid & ~smmu->streamid_mask) { dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n", @@ -1550,12 +1589,12 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) u32 mask, fwid = 0; if (args->args_count > 0) - fwid |= FIELD_PREP(SMR_ID, args->args[0]); + fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]); if (args->args_count > 1) - fwid |= FIELD_PREP(SMR_MASK, args->args[1]); + fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]); else if (!of_property_read_u32(args->np, "stream-match-mask", &mask)) - fwid |= FIELD_PREP(SMR_MASK, mask); + fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask); return iommu_fwspec_add_ids(dev, &fwid, 1); } @@ -1576,15 +1615,6 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } -static void arm_smmu_put_resv_regions(struct device *dev, - struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - kfree(entry); -} - static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -1602,7 +1632,7 @@ static struct iommu_ops arm_smmu_ops = { .domain_set_attr = arm_smmu_domain_set_attr, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .put_resv_regions = arm_smmu_put_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ }; @@ -1625,7 +1655,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { arm_smmu_write_context_bank(smmu, i); - arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT); + arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); } /* Invalidate the TLB, just in case */ @@ -1635,29 +1665,30 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0); /* Enable fault reporting */ - reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE); + reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE | + ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE); /* Disable TLB broadcasting. */ - reg |= (sCR0_VMIDPNE | sCR0_PTM); + reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM); /* Enable client access, handling unmatched streams as appropriate */ - reg &= ~sCR0_CLIENTPD; + reg &= ~ARM_SMMU_sCR0_CLIENTPD; if (disable_bypass) - reg |= sCR0_USFCFG; + reg |= ARM_SMMU_sCR0_USFCFG; else - reg &= ~sCR0_USFCFG; + reg &= ~ARM_SMMU_sCR0_USFCFG; /* Disable forced broadcasting */ - reg &= ~sCR0_FB; + reg &= ~ARM_SMMU_sCR0_FB; /* Don't upgrade barriers */ - reg &= ~(sCR0_BSU); + reg &= ~(ARM_SMMU_sCR0_BSU); if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= sCR0_VMID16EN; + reg |= ARM_SMMU_sCR0_VMID16EN; if (smmu->features & ARM_SMMU_FEAT_EXIDS) - reg |= sCR0_EXIDENABLE; + reg |= ARM_SMMU_sCR0_EXIDENABLE; if (smmu->impl && smmu->impl->reset) smmu->impl->reset(smmu); @@ -1702,21 +1733,21 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Restrict available stages based on module parameter */ if (force_stage == 1) - id &= ~(ID0_S2TS | ID0_NTS); + id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS); else if (force_stage == 2) - id &= ~(ID0_S1TS | ID0_NTS); + id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS); - if (id & ID0_S1TS) { + if (id & ARM_SMMU_ID0_S1TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S1; dev_notice(smmu->dev, "\tstage 1 translation\n"); } - if (id & ID0_S2TS) { + if (id & ARM_SMMU_ID0_S2TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S2; dev_notice(smmu->dev, "\tstage 2 translation\n"); } - if (id & ID0_NTS) { + if (id & ARM_SMMU_ID0_NTS) { smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED; dev_notice(smmu->dev, "\tnested translation\n"); } @@ -1727,8 +1758,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && - ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) { + if ((id & ARM_SMMU_ID0_S1TS) && + ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } @@ -1739,7 +1770,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) * Fortunately, this also opens up a workaround for systems where the * ID register value has ended up configured incorrectly. */ - cttw_reg = !!(id & ID0_CTTW); + cttw_reg = !!(id & ARM_SMMU_ID0_CTTW); if (cttw_fw || cttw_reg) dev_notice(smmu->dev, "\t%scoherent table walk\n", cttw_fw ? "" : "non-"); @@ -1748,16 +1779,16 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) "\t(IDR0.CTTW overridden by FW configuration)\n"); /* Max. number of entries we have for stream matching/indexing */ - if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) { + if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) { smmu->features |= ARM_SMMU_FEAT_EXIDS; size = 1 << 16; } else { - size = 1 << FIELD_GET(ID0_NUMSIDB, id); + size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id); } smmu->streamid_mask = size - 1; - if (id & ID0_SMS) { + if (id & ARM_SMMU_ID0_SMS) { smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH; - size = FIELD_GET(ID0_NUMSMRG, id); + size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id); if (size == 0) { dev_err(smmu->dev, "stream-matching supported, but no SMRs present!\n"); @@ -1785,18 +1816,19 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) mutex_init(&smmu->stream_map_mutex); spin_lock_init(&smmu->global_sync_lock); - if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) { + if (smmu->version < ARM_SMMU_V2 || + !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) { smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L; - if (!(id & ID0_PTFS_NO_AARCH32S)) + if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S)) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S; } /* ID1 */ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1); - smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; + smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12; /* Check for size mismatch of SMMU address space from mapped region */ - size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1); + size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1); if (smmu->numpage != 2 * size << smmu->pgshift) dev_warn(smmu->dev, "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n", @@ -1804,8 +1836,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */ smmu->numpage = size; - smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id); - smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id); + smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id); + smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id); if (smmu->num_s2_context_banks > smmu->num_context_banks) { dev_err(smmu->dev, "impossible number of S2 context banks!\n"); return -ENODEV; @@ -1819,14 +1851,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID2 */ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2); - size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id)); + size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id)); smmu->ipa_size = size; /* The output mask is also applied for bypass */ - size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id)); + size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id)); smmu->pa_size = size; - if (id & ID2_VMID16) + if (id & ARM_SMMU_ID2_VMID16) smmu->features |= ARM_SMMU_FEAT_VMID16; /* @@ -1843,13 +1875,13 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) if (smmu->version == ARM_SMMU_V1_64K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } else { - size = FIELD_GET(ID2_UBS, id); + size = FIELD_GET(ARM_SMMU_ID2_UBS, id); smmu->va_size = arm_smmu_id_size_to_bits(size); - if (id & ID2_PTFS_4K) + if (id & ARM_SMMU_ID2_PTFS_4K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K; - if (id & ID2_PTFS_16K) + if (id & ARM_SMMU_ID2_PTFS_16K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K; - if (id & ID2_PTFS_64K) + if (id & ARM_SMMU_ID2_PTFS_64K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } @@ -1911,6 +1943,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 }, { }, }; +MODULE_DEVICE_TABLE(of, arm_smmu_of_match); #ifdef CONFIG_ACPI static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) @@ -1997,8 +2030,10 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL); if (legacy_binding && !using_generic_binding) { - if (!using_legacy_binding) - pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n"); + if (!using_legacy_binding) { + pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n", + IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU"); + } using_legacy_binding = true; } else if (!legacy_binding && !using_legacy_binding) { using_generic_binding = true; @@ -2013,25 +2048,50 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, return 0; } -static void arm_smmu_bus_init(void) +static int arm_smmu_bus_init(struct iommu_ops *ops) { + int err; + /* Oh, for a proper bus abstraction */ - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &arm_smmu_ops); + if (!iommu_present(&platform_bus_type)) { + err = bus_set_iommu(&platform_bus_type, ops); + if (err) + return err; + } #ifdef CONFIG_ARM_AMBA - if (!iommu_present(&amba_bustype)) - bus_set_iommu(&amba_bustype, &arm_smmu_ops); + if (!iommu_present(&amba_bustype)) { + err = bus_set_iommu(&amba_bustype, ops); + if (err) + goto err_reset_platform_ops; + } #endif #ifdef CONFIG_PCI if (!iommu_present(&pci_bus_type)) { - pci_request_acs(); - bus_set_iommu(&pci_bus_type, &arm_smmu_ops); + err = bus_set_iommu(&pci_bus_type, ops); + if (err) + goto err_reset_amba_ops; } #endif #ifdef CONFIG_FSL_MC_BUS - if (!iommu_present(&fsl_mc_bus_type)) - bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops); + if (!iommu_present(&fsl_mc_bus_type)) { + err = bus_set_iommu(&fsl_mc_bus_type, ops); + if (err) + goto err_reset_pci_ops; + } #endif + return 0; + +err_reset_pci_ops: __maybe_unused; +#ifdef CONFIG_PCI + bus_set_iommu(&pci_bus_type, NULL); +#endif +err_reset_amba_ops: __maybe_unused; +#ifdef CONFIG_ARM_AMBA + bus_set_iommu(&amba_bustype, NULL); +#endif +err_reset_platform_ops: __maybe_unused; + bus_set_iommu(&platform_bus_type, NULL); + return err; } static int arm_smmu_device_probe(struct platform_device *pdev) @@ -2177,38 +2237,28 @@ static int arm_smmu_device_probe(struct platform_device *pdev) * ready to handle default domain setup as soon as any SMMU exists. */ if (!using_legacy_binding) - arm_smmu_bus_init(); + return arm_smmu_bus_init(&arm_smmu_ops); return 0; } -/* - * With the legacy DT binding in play, though, we have no guarantees about - * probe order, but then we're also not doing default domains, so we can - * delay setting bus ops until we're sure every possible SMMU is ready, - * and that way ensure that no add_device() calls get missed. - */ -static int arm_smmu_legacy_bus_init(void) -{ - if (using_legacy_binding) - arm_smmu_bus_init(); - return 0; -} -device_initcall_sync(arm_smmu_legacy_bus_init); - -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); if (!smmu) - return; + return -ENODEV; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_err(&pdev->dev, "removing device with active domains!\n"); + arm_smmu_bus_init(NULL); + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + arm_smmu_rpm_get(smmu); /* Turn the thing off */ - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); arm_smmu_rpm_put(smmu); if (pm_runtime_enabled(smmu->dev)) @@ -2217,6 +2267,12 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) clk_bulk_disable(smmu->num_clks, smmu->clks); clk_bulk_unprepare(smmu->num_clks, smmu->clks); + return 0; +} + +static void arm_smmu_device_shutdown(struct platform_device *pdev) +{ + arm_smmu_device_remove(pdev); } static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) @@ -2267,11 +2323,17 @@ static const struct dev_pm_ops arm_smmu_pm_ops = { static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu", - .of_match_table = of_match_ptr(arm_smmu_of_match), + .of_match_table = arm_smmu_of_match, .pm = &arm_smmu_pm_ops, - .suppress_bind_attrs = true, + .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, + .remove = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; -builtin_platform_driver(arm_smmu_driver); +module_platform_driver(arm_smmu_driver); + +MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); +MODULE_AUTHOR("Will Deacon <will@kernel.org>"); +MODULE_ALIAS("platform:arm-smmu"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 62b9f0cec49b..8d1cd54d82a6 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -11,6 +11,7 @@ #define _ARM_SMMU_H #include <linux/atomic.h> +#include <linux/bitfield.h> #include <linux/bits.h> #include <linux/clk.h> #include <linux/device.h> @@ -23,51 +24,51 @@ /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 -#define sCR0_VMID16EN BIT(31) -#define sCR0_BSU GENMASK(15, 14) -#define sCR0_FB BIT(13) -#define sCR0_PTM BIT(12) -#define sCR0_VMIDPNE BIT(11) -#define sCR0_USFCFG BIT(10) -#define sCR0_GCFGFIE BIT(5) -#define sCR0_GCFGFRE BIT(4) -#define sCR0_EXIDENABLE BIT(3) -#define sCR0_GFIE BIT(2) -#define sCR0_GFRE BIT(1) -#define sCR0_CLIENTPD BIT(0) +#define ARM_SMMU_sCR0_VMID16EN BIT(31) +#define ARM_SMMU_sCR0_BSU GENMASK(15, 14) +#define ARM_SMMU_sCR0_FB BIT(13) +#define ARM_SMMU_sCR0_PTM BIT(12) +#define ARM_SMMU_sCR0_VMIDPNE BIT(11) +#define ARM_SMMU_sCR0_USFCFG BIT(10) +#define ARM_SMMU_sCR0_GCFGFIE BIT(5) +#define ARM_SMMU_sCR0_GCFGFRE BIT(4) +#define ARM_SMMU_sCR0_EXIDENABLE BIT(3) +#define ARM_SMMU_sCR0_GFIE BIT(2) +#define ARM_SMMU_sCR0_GFRE BIT(1) +#define ARM_SMMU_sCR0_CLIENTPD BIT(0) /* Auxiliary Configuration register */ #define ARM_SMMU_GR0_sACR 0x10 /* Identification registers */ #define ARM_SMMU_GR0_ID0 0x20 -#define ID0_S1TS BIT(30) -#define ID0_S2TS BIT(29) -#define ID0_NTS BIT(28) -#define ID0_SMS BIT(27) -#define ID0_ATOSNS BIT(26) -#define ID0_PTFS_NO_AARCH32 BIT(25) -#define ID0_PTFS_NO_AARCH32S BIT(24) -#define ID0_NUMIRPT GENMASK(23, 16) -#define ID0_CTTW BIT(14) -#define ID0_NUMSIDB GENMASK(12, 9) -#define ID0_EXIDS BIT(8) -#define ID0_NUMSMRG GENMASK(7, 0) +#define ARM_SMMU_ID0_S1TS BIT(30) +#define ARM_SMMU_ID0_S2TS BIT(29) +#define ARM_SMMU_ID0_NTS BIT(28) +#define ARM_SMMU_ID0_SMS BIT(27) +#define ARM_SMMU_ID0_ATOSNS BIT(26) +#define ARM_SMMU_ID0_PTFS_NO_AARCH32 BIT(25) +#define ARM_SMMU_ID0_PTFS_NO_AARCH32S BIT(24) +#define ARM_SMMU_ID0_NUMIRPT GENMASK(23, 16) +#define ARM_SMMU_ID0_CTTW BIT(14) +#define ARM_SMMU_ID0_NUMSIDB GENMASK(12, 9) +#define ARM_SMMU_ID0_EXIDS BIT(8) +#define ARM_SMMU_ID0_NUMSMRG GENMASK(7, 0) #define ARM_SMMU_GR0_ID1 0x24 -#define ID1_PAGESIZE BIT(31) -#define ID1_NUMPAGENDXB GENMASK(30, 28) -#define ID1_NUMS2CB GENMASK(23, 16) -#define ID1_NUMCB GENMASK(7, 0) +#define ARM_SMMU_ID1_PAGESIZE BIT(31) +#define ARM_SMMU_ID1_NUMPAGENDXB GENMASK(30, 28) +#define ARM_SMMU_ID1_NUMS2CB GENMASK(23, 16) +#define ARM_SMMU_ID1_NUMCB GENMASK(7, 0) #define ARM_SMMU_GR0_ID2 0x28 -#define ID2_VMID16 BIT(15) -#define ID2_PTFS_64K BIT(14) -#define ID2_PTFS_16K BIT(13) -#define ID2_PTFS_4K BIT(12) -#define ID2_UBS GENMASK(11, 8) -#define ID2_OAS GENMASK(7, 4) -#define ID2_IAS GENMASK(3, 0) +#define ARM_SMMU_ID2_VMID16 BIT(15) +#define ARM_SMMU_ID2_PTFS_64K BIT(14) +#define ARM_SMMU_ID2_PTFS_16K BIT(13) +#define ARM_SMMU_ID2_PTFS_4K BIT(12) +#define ARM_SMMU_ID2_UBS GENMASK(11, 8) +#define ARM_SMMU_ID2_OAS GENMASK(7, 4) +#define ARM_SMMU_ID2_IAS GENMASK(3, 0) #define ARM_SMMU_GR0_ID3 0x2c #define ARM_SMMU_GR0_ID4 0x30 @@ -75,11 +76,11 @@ #define ARM_SMMU_GR0_ID6 0x38 #define ARM_SMMU_GR0_ID7 0x3c -#define ID7_MAJOR GENMASK(7, 4) -#define ID7_MINOR GENMASK(3, 0) +#define ARM_SMMU_ID7_MAJOR GENMASK(7, 4) +#define ARM_SMMU_ID7_MINOR GENMASK(3, 0) #define ARM_SMMU_GR0_sGFSR 0x48 -#define sGFSR_USF BIT(1) +#define ARM_SMMU_sGFSR_USF BIT(1) #define ARM_SMMU_GR0_sGFSYNR0 0x50 #define ARM_SMMU_GR0_sGFSYNR1 0x54 @@ -92,106 +93,132 @@ #define ARM_SMMU_GR0_sTLBGSYNC 0x70 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74 -#define sTLBGSTATUS_GSACTIVE BIT(0) +#define ARM_SMMU_sTLBGSTATUS_GSACTIVE BIT(0) /* Stream mapping registers */ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) -#define SMR_VALID BIT(31) -#define SMR_MASK GENMASK(31, 16) -#define SMR_ID GENMASK(15, 0) +#define ARM_SMMU_SMR_VALID BIT(31) +#define ARM_SMMU_SMR_MASK GENMASK(31, 16) +#define ARM_SMMU_SMR_ID GENMASK(15, 0) #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) -#define S2CR_PRIVCFG GENMASK(25, 24) +#define ARM_SMMU_S2CR_PRIVCFG GENMASK(25, 24) enum arm_smmu_s2cr_privcfg { S2CR_PRIVCFG_DEFAULT, S2CR_PRIVCFG_DIPAN, S2CR_PRIVCFG_UNPRIV, S2CR_PRIVCFG_PRIV, }; -#define S2CR_TYPE GENMASK(17, 16) +#define ARM_SMMU_S2CR_TYPE GENMASK(17, 16) enum arm_smmu_s2cr_type { S2CR_TYPE_TRANS, S2CR_TYPE_BYPASS, S2CR_TYPE_FAULT, }; -#define S2CR_EXIDVALID BIT(10) -#define S2CR_CBNDX GENMASK(7, 0) +#define ARM_SMMU_S2CR_EXIDVALID BIT(10) +#define ARM_SMMU_S2CR_CBNDX GENMASK(7, 0) /* Context bank attribute registers */ #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) -#define CBAR_IRPTNDX GENMASK(31, 24) -#define CBAR_TYPE GENMASK(17, 16) +#define ARM_SMMU_CBAR_IRPTNDX GENMASK(31, 24) +#define ARM_SMMU_CBAR_TYPE GENMASK(17, 16) enum arm_smmu_cbar_type { CBAR_TYPE_S2_TRANS, CBAR_TYPE_S1_TRANS_S2_BYPASS, CBAR_TYPE_S1_TRANS_S2_FAULT, CBAR_TYPE_S1_TRANS_S2_TRANS, }; -#define CBAR_S1_MEMATTR GENMASK(15, 12) -#define CBAR_S1_MEMATTR_WB 0xf -#define CBAR_S1_BPSHCFG GENMASK(9, 8) -#define CBAR_S1_BPSHCFG_NSH 3 -#define CBAR_VMID GENMASK(7, 0) +#define ARM_SMMU_CBAR_S1_MEMATTR GENMASK(15, 12) +#define ARM_SMMU_CBAR_S1_MEMATTR_WB 0xf +#define ARM_SMMU_CBAR_S1_BPSHCFG GENMASK(9, 8) +#define ARM_SMMU_CBAR_S1_BPSHCFG_NSH 3 +#define ARM_SMMU_CBAR_VMID GENMASK(7, 0) #define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) -#define CBA2R_VMID16 GENMASK(31, 16) -#define CBA2R_VA64 BIT(0) +#define ARM_SMMU_CBA2R_VMID16 GENMASK(31, 16) +#define ARM_SMMU_CBA2R_VA64 BIT(0) #define ARM_SMMU_CB_SCTLR 0x0 -#define SCTLR_S1_ASIDPNE BIT(12) -#define SCTLR_CFCFG BIT(7) -#define SCTLR_CFIE BIT(6) -#define SCTLR_CFRE BIT(5) -#define SCTLR_E BIT(4) -#define SCTLR_AFE BIT(2) -#define SCTLR_TRE BIT(1) -#define SCTLR_M BIT(0) +#define ARM_SMMU_SCTLR_S1_ASIDPNE BIT(12) +#define ARM_SMMU_SCTLR_CFCFG BIT(7) +#define ARM_SMMU_SCTLR_CFIE BIT(6) +#define ARM_SMMU_SCTLR_CFRE BIT(5) +#define ARM_SMMU_SCTLR_E BIT(4) +#define ARM_SMMU_SCTLR_AFE BIT(2) +#define ARM_SMMU_SCTLR_TRE BIT(1) +#define ARM_SMMU_SCTLR_M BIT(0) #define ARM_SMMU_CB_ACTLR 0x4 #define ARM_SMMU_CB_RESUME 0x8 -#define RESUME_TERMINATE BIT(0) +#define ARM_SMMU_RESUME_TERMINATE BIT(0) #define ARM_SMMU_CB_TCR2 0x10 -#define TCR2_SEP GENMASK(17, 15) -#define TCR2_SEP_UPSTREAM 0x7 -#define TCR2_AS BIT(4) +#define ARM_SMMU_TCR2_SEP GENMASK(17, 15) +#define ARM_SMMU_TCR2_SEP_UPSTREAM 0x7 +#define ARM_SMMU_TCR2_AS BIT(4) +#define ARM_SMMU_TCR2_PASIZE GENMASK(3, 0) #define ARM_SMMU_CB_TTBR0 0x20 #define ARM_SMMU_CB_TTBR1 0x28 -#define TTBRn_ASID GENMASK_ULL(63, 48) +#define ARM_SMMU_TTBRn_ASID GENMASK_ULL(63, 48) #define ARM_SMMU_CB_TCR 0x30 +#define ARM_SMMU_TCR_EAE BIT(31) +#define ARM_SMMU_TCR_EPD1 BIT(23) +#define ARM_SMMU_TCR_TG0 GENMASK(15, 14) +#define ARM_SMMU_TCR_SH0 GENMASK(13, 12) +#define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10) +#define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) +#define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) + +#define ARM_SMMU_VTCR_RES1 BIT(31) +#define ARM_SMMU_VTCR_PS GENMASK(18, 16) +#define ARM_SMMU_VTCR_TG0 ARM_SMMU_TCR_TG0 +#define ARM_SMMU_VTCR_SH0 ARM_SMMU_TCR_SH0 +#define ARM_SMMU_VTCR_ORGN0 ARM_SMMU_TCR_ORGN0 +#define ARM_SMMU_VTCR_IRGN0 ARM_SMMU_TCR_IRGN0 +#define ARM_SMMU_VTCR_SL0 GENMASK(7, 6) +#define ARM_SMMU_VTCR_T0SZ ARM_SMMU_TCR_T0SZ + #define ARM_SMMU_CB_CONTEXTIDR 0x34 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c #define ARM_SMMU_CB_PAR 0x50 -#define CB_PAR_F BIT(0) +#define ARM_SMMU_CB_PAR_F BIT(0) #define ARM_SMMU_CB_FSR 0x58 -#define FSR_MULTI BIT(31) -#define FSR_SS BIT(30) -#define FSR_UUT BIT(8) -#define FSR_ASF BIT(7) -#define FSR_TLBLKF BIT(6) -#define FSR_TLBMCF BIT(5) -#define FSR_EF BIT(4) -#define FSR_PF BIT(3) -#define FSR_AFF BIT(2) -#define FSR_TF BIT(1) - -#define FSR_IGN (FSR_AFF | FSR_ASF | \ - FSR_TLBMCF | FSR_TLBLKF) -#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF | FSR_IGN) +#define ARM_SMMU_FSR_MULTI BIT(31) +#define ARM_SMMU_FSR_SS BIT(30) +#define ARM_SMMU_FSR_UUT BIT(8) +#define ARM_SMMU_FSR_ASF BIT(7) +#define ARM_SMMU_FSR_TLBLKF BIT(6) +#define ARM_SMMU_FSR_TLBMCF BIT(5) +#define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_PF BIT(3) +#define ARM_SMMU_FSR_AFF BIT(2) +#define ARM_SMMU_FSR_TF BIT(1) + +#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ + ARM_SMMU_FSR_ASF | \ + ARM_SMMU_FSR_TLBMCF | \ + ARM_SMMU_FSR_TLBLKF) + +#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ + ARM_SMMU_FSR_SS | \ + ARM_SMMU_FSR_UUT | \ + ARM_SMMU_FSR_EF | \ + ARM_SMMU_FSR_PF | \ + ARM_SMMU_FSR_TF | \ + ARM_SMMU_FSR_IGN) #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 -#define FSYNR0_WNR BIT(4) +#define ARM_SMMU_FSYNR0_WNR BIT(4) #define ARM_SMMU_CB_S1_TLBIVA 0x600 #define ARM_SMMU_CB_S1_TLBIASID 0x610 @@ -203,7 +230,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 -#define ATSR_ACTIVE BIT(0) +#define ARM_SMMU_ATSR_ACTIVE BIT(0) /* Maximum number of context banks per SMMU */ @@ -297,7 +324,7 @@ struct arm_smmu_cfg { enum arm_smmu_cbar_type cbar; enum arm_smmu_context_fmt fmt; }; -#define INVALID_IRPTNDX 0xff +#define ARM_SMMU_INVALID_IRPTNDX 0xff enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, @@ -318,6 +345,33 @@ struct arm_smmu_domain { struct iommu_domain domain; }; +static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) +{ + return ARM_SMMU_TCR_EPD1 | + FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | + FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | + FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | + FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | + FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); +} + +static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) +{ + return FIELD_PREP(ARM_SMMU_TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) | + FIELD_PREP(ARM_SMMU_TCR2_SEP, ARM_SMMU_TCR2_SEP_UPSTREAM); +} + +static inline u32 arm_smmu_lpae_vtcr(struct io_pgtable_cfg *cfg) +{ + return ARM_SMMU_VTCR_RES1 | + FIELD_PREP(ARM_SMMU_VTCR_PS, cfg->arm_lpae_s2_cfg.vtcr.ps) | + FIELD_PREP(ARM_SMMU_VTCR_TG0, cfg->arm_lpae_s2_cfg.vtcr.tg) | + FIELD_PREP(ARM_SMMU_VTCR_SH0, cfg->arm_lpae_s2_cfg.vtcr.sh) | + FIELD_PREP(ARM_SMMU_VTCR_ORGN0, cfg->arm_lpae_s2_cfg.vtcr.orgn) | + FIELD_PREP(ARM_SMMU_VTCR_IRGN0, cfg->arm_lpae_s2_cfg.vtcr.irgn) | + FIELD_PREP(ARM_SMMU_VTCR_SL0, cfg->arm_lpae_s2_cfg.vtcr.sl) | + FIELD_PREP(ARM_SMMU_VTCR_T0SZ, cfg->arm_lpae_s2_cfg.vtcr.tsz); +} /* Implementation details, yay! */ struct arm_smmu_impl { diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 3acfa6a25fa2..071bb42bbbc5 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -244,7 +244,7 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) || (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE && (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL && - info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) { + info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) { pr_warn("Device scope type does not match for %s\n", pci_name(info->dev)); return -EINVAL; @@ -1354,7 +1354,6 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, struct qi_desc desc; if (mask) { - WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else @@ -1371,6 +1370,47 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, qi_submit_sync(&desc, iommu); } +/* PASID-based IOTLB invalidation */ +void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, + unsigned long npages, bool ih) +{ + struct qi_desc desc = {.qw2 = 0, .qw3 = 0}; + + /* + * npages == -1 means a PASID-selective invalidation, otherwise, + * a positive value for Page-selective-within-PASID invalidation. + * 0 is not a valid input. + */ + if (WARN_ON(!npages)) { + pr_err("Invalid input npages = %ld\n", npages); + return; + } + + if (npages == -1) { + desc.qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = 0; + } else { + int mask = ilog2(__roundup_pow_of_two(npages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); + + if (WARN_ON_ONCE(!ALIGN(addr, align))) + addr &= ~(align - 1); + + desc.qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = QI_EIOTLB_ADDR(addr) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); + } + + qi_submit_sync(&desc, iommu); +} + /* * Disable Queued Invalidation interface. */ diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 471f05d452e0..c1257bef553c 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -5,6 +5,7 @@ * Authors: Gayatri Kammela <gayatri.kammela@intel.com> * Sohil Mehta <sohil.mehta@intel.com> * Jacob Pan <jacob.jun.pan@linux.intel.com> + * Lu Baolu <baolu.lu@linux.intel.com> */ #include <linux/debugfs.h> @@ -283,6 +284,77 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused) } DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct); +static inline unsigned long level_to_directory_size(int level) +{ + return BIT_ULL(VTD_PAGE_SHIFT + VTD_STRIDE_SHIFT * (level - 1)); +} + +static inline void +dump_page_info(struct seq_file *m, unsigned long iova, u64 *path) +{ + seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n", + iova >> VTD_PAGE_SHIFT, path[5], path[4], + path[3], path[2], path[1]); +} + +static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, + int level, unsigned long start, + u64 *path) +{ + int i; + + if (level > 5 || level < 1) + return; + + for (i = 0; i < BIT_ULL(VTD_STRIDE_SHIFT); + i++, pde++, start += level_to_directory_size(level)) { + if (!dma_pte_present(pde)) + continue; + + path[level] = pde->val; + if (dma_pte_superpage(pde) || level == 1) + dump_page_info(m, start, path); + else + pgtable_walk_level(m, phys_to_virt(dma_pte_addr(pde)), + level - 1, start, path); + path[level] = 0; + } +} + +static int show_device_domain_translation(struct device *dev, void *data) +{ + struct dmar_domain *domain = find_domain(dev); + struct seq_file *m = data; + u64 path[6] = { 0 }; + + if (!domain) + return 0; + + seq_printf(m, "Device %s with pasid %d @0x%llx\n", + dev_name(dev), domain->default_pasid, + (u64)virt_to_phys(domain->pgd)); + seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n"); + + pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path); + seq_putc(m, '\n'); + + return 0; +} + +static int domain_translation_struct_show(struct seq_file *m, void *unused) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&device_domain_lock, flags); + ret = bus_for_each_dev(&pci_bus_type, NULL, m, + show_device_domain_translation); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} +DEFINE_SHOW_ATTRIBUTE(domain_translation_struct); + #ifdef CONFIG_IRQ_REMAP static void ir_tbl_remap_entry_show(struct seq_file *m, struct intel_iommu *iommu) @@ -396,6 +468,9 @@ void __init intel_iommu_debugfs_init(void) &iommu_regset_fops); debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug, NULL, &dmar_translation_struct_fops); + debugfs_create_file("domain_translation_struct", 0444, + intel_iommu_debug, NULL, + &domain_translation_struct_fops); #ifdef CONFIG_IRQ_REMAP debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, NULL, &ir_translation_struct_fops); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 35a4a3abedc6..9dc37672bf89 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -307,6 +307,20 @@ static int hw_pass_through = 1; */ #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) +/* + * When VT-d works in the scalable mode, it allows DMA translation to + * happen through either first level or second level page table. This + * bit marks that the DMA translation for the domain goes through the + * first level page table, otherwise, it goes through the second level. + */ +#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2) + +/* + * Domain represents a virtual machine which demands iommu nested + * translation mode support. + */ +#define DOMAIN_FLAG_NESTING_MODE BIT(3) + #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) @@ -355,9 +369,14 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int dmar_disabled = 0; #else int dmar_disabled = 1; -#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ +#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */ +#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON +int intel_iommu_sm = 1; +#else int intel_iommu_sm; +#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */ + int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); @@ -368,7 +387,6 @@ static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int intel_no_bounce; -#define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 @@ -377,7 +395,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) -static DEFINE_SPINLOCK(device_domain_lock); +DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \ @@ -552,6 +570,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain) return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; } +static inline bool domain_use_first_level(struct dmar_domain *domain) +{ + return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL; +} + static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -661,11 +684,12 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) return ret; } -static int domain_update_iommu_superpage(struct intel_iommu *skip) +static int domain_update_iommu_superpage(struct dmar_domain *domain, + struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int mask = 0xf; + int mask = 0x3; if (!intel_iommu_superpage) { return 0; @@ -675,7 +699,13 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - mask &= cap_super_page_val(iommu->cap); + if (domain && domain_use_first_level(domain)) { + if (!cap_fl1gp_support(iommu->cap)) + mask = 0x1; + } else { + mask &= cap_super_page_val(iommu->cap); + } + if (!mask) break; } @@ -690,7 +720,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain->iommu_snooping = domain_update_iommu_snooping(NULL); - domain->iommu_superpage = domain_update_iommu_superpage(NULL); + domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -907,6 +937,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + if (domain_use_first_level(domain)) + pteval |= DMA_FL_PTE_XD; if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -1477,6 +1509,20 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); } +static void domain_flush_piotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, + u64 addr, unsigned long npages, bool ih) +{ + u16 did = domain->iommu_did[iommu->seq_id]; + + if (domain->default_pasid) + qi_flush_piotlb(iommu, did, domain->default_pasid, + addr, npages, ih); + + if (!list_empty(&domain->devices)) + qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1490,18 +1536,23 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - /* - * Fallback to domain selective flush if no PSI support or the size is - * too big. - * PSI requires page size to be 2 ^ x, and the base address is naturally - * aligned to the size - */ - if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, - DMA_TLB_PSI_FLUSH); + + if (domain_use_first_level(domain)) { + domain_flush_piotlb(iommu, domain, addr, pages, ih); + } else { + /* + * Fallback to domain selective flush if no PSI support or + * the size is too big. PSI requires page size to be 2 ^ x, + * and the base address is naturally aligned to the size. + */ + if (!cap_pgsel_inv(iommu->cap) || + mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, + DMA_TLB_PSI_FLUSH); + } /* * In caching mode, changes of pages from non-present to present require @@ -1516,8 +1567,11 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages) { - /* It's a non-present to present mapping. Only flush if caching mode */ - if (cap_caching_mode(iommu->cap)) + /* + * It's a non-present to present mapping. Only flush if caching mode + * and second level. + */ + if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain)) iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); else iommu_flush_write_buffer(iommu); @@ -1534,7 +1588,11 @@ static void iommu_flush_iova(struct iova_domain *iovad) struct intel_iommu *iommu = g_iommus[idx]; u16 did = domain->iommu_did[iommu->seq_id]; - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + if (domain_use_first_level(domain)) + domain_flush_piotlb(iommu, domain, 0, -1, 0); + else + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); if (!cap_caching_mode(iommu->cap)) iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), @@ -1703,6 +1761,33 @@ static void free_dmar_iommu(struct intel_iommu *iommu) #endif } +/* + * Check and return whether first level is used by default for + * DMA translation. + */ +static bool first_level_by_default(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + static int first_level_support = -1; + + if (likely(first_level_support != -1)) + return first_level_support; + + first_level_support = 1; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) { + first_level_support = 0; + break; + } + } + rcu_read_unlock(); + + return first_level_support; +} + static struct dmar_domain *alloc_domain(int flags) { struct dmar_domain *domain; @@ -1714,6 +1799,8 @@ static struct dmar_domain *alloc_domain(int flags) memset(domain, 0, sizeof(*domain)); domain->nid = NUMA_NO_NODE; domain->flags = flags; + if (first_level_by_default()) + domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); @@ -1843,14 +1930,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, { int adjust_width, agaw; unsigned long sagaw; - int err; + int ret; init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - err = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (err) - return err; + if (!intel_iommu_strict) { + ret = init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, iova_entry_free); + if (ret) + pr_info("iova flush queue initialization failed\n"); + } domain_reserve_special_ranges(domain); @@ -2223,17 +2312,20 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long sg_res = 0; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + u64 attr; BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); + if (domain_use_first_level(domain)) + attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD; if (!sg) { sg_res = nr_pages; - pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; } while (nr_pages > 0) { @@ -2245,7 +2337,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg_res = aligned_nrpages(sg->offset, sg->length); sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; sg->dma_length = sg->length; - pteval = (sg_phys(sg) - pgoff) | prot; + pteval = (sg_phys(sg) - pgoff) | attr; phys_pfn = pteval >> VTD_PAGE_SHIFT; } @@ -2414,7 +2506,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags); } -static struct dmar_domain *find_domain(struct device *dev) +struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; @@ -2460,6 +2552,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) return NULL; } +static int domain_setup_first_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, + int pasid) +{ + int flags = PASID_FLAG_SUPERVISOR_MODE; + struct dma_pte *pgd = domain->pgd; + int agaw, level; + + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + return -ENOMEM; + } + + level = agaw_to_level(agaw); + if (level != 4 && level != 5) + return -EINVAL; + + flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + + return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, + domain->iommu_did[iommu->seq_id], + flags); +} + static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, int devfn, struct device *dev, @@ -2559,6 +2681,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); + else if (domain_use_first_level(domain)) + ret = domain_setup_first_level(iommu, domain, dev, + PASID_RID2PASID); else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); @@ -2764,10 +2889,8 @@ static int __init si_domain_init(int hw) } /* - * Normally we use DMA domains for devices which have RMRRs. But we - * loose this requirement for graphic and usb devices. Identity map - * the RMRRs for graphic and USB devices so that they could use the - * si_domain. + * Identity map the RMRRs so that devices with RMRRs could also use + * the si_domain. */ for_each_rmrr_units(rmrr) { for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, @@ -2775,9 +2898,6 @@ static int __init si_domain_init(int hw) unsigned long long start = rmrr->base_address; unsigned long long end = rmrr->end_address; - if (device_is_rmrr_locked(dev)) - continue; - if (WARN_ON(end < start || end >> agaw_to_width(si_domain->agaw))) continue; @@ -2916,9 +3036,6 @@ static int device_def_domain_type(struct device *dev) if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - if (device_is_rmrr_locked(dev)) - return IOMMU_DOMAIN_DMA; - /* * Prevent any device marked as untrusted from getting * placed into the statically identity mapping domain. @@ -2956,13 +3073,9 @@ static int device_def_domain_type(struct device *dev) return IOMMU_DOMAIN_DMA; } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) return IOMMU_DOMAIN_DMA; - } else { - if (device_has_rmrr(dev)) - return IOMMU_DOMAIN_DMA; } - return (iommu_identity_mapping & IDENTMAP_ALL) ? - IOMMU_DOMAIN_IDENTITY : 0; + return 0; } static void intel_iommu_init_qi(struct intel_iommu *iommu) @@ -3291,10 +3404,7 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; -#ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_supported(iommu)) - intel_svm_init(iommu); -#endif + intel_svm_check(iommu); } /* @@ -3309,9 +3419,6 @@ static int __init init_dmars(void) iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } - if (iommu_default_passthrough()) - iommu_identity_mapping |= IDENTMAP_ALL; - #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA dmar_map_gfx = 0; #endif @@ -3384,8 +3491,21 @@ static unsigned long intel_alloc_iova(struct device *dev, { unsigned long iova_pfn; - /* Restrict dma_mask to the width that the iommu can handle */ - dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + /* + * Restrict dma_mask to the width that the iommu can handle. + * First-level translation restricts the input-address to a + * canonical address (i.e., address bits 63:N have the same + * value as address bit [N-1], where N is 48-bits with 4-level + * paging and 57-bits with 5-level paging). Hence, skip bit + * [N-1]. + */ + if (domain_use_first_level(domain)) + dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1), + dma_mask); + else + dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), + dma_mask); + /* Ensure we reserve the whole size-aligned region */ nrpages = __roundup_pow_of_two(nrpages); @@ -3403,7 +3523,8 @@ static unsigned long intel_alloc_iova(struct device *dev, iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { - dev_err(dev, "Allocating %ld-page iova failed", nrpages); + dev_err_once(dev, "Allocating %ld-page iova failed\n", + nrpages); return 0; } @@ -3771,8 +3892,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return 0; } - trace_map_sg(dev, iova_pfn << PAGE_SHIFT, - sg_phys(sglist), size << VTD_PAGE_SHIFT); + for_each_sg(sglist, sg, nelems, i) + trace_map_sg(dev, i + 1, nelems, sg); return nelems; } @@ -3984,6 +4105,9 @@ bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, sg_dma_len(sg) = sg->length; } + for_each_sg(sglist, sg, nelems, i) + trace_bounce_map_sg(dev, i + 1, nelems, sg); + return nelems; out_unmap: @@ -4312,16 +4436,31 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ +static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +{ + if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || + !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || + rmrr->end_address <= rmrr->base_address || + arch_rmrr_sanity_check(rmrr)) + return -EINVAL; + + return 0; +} + int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; - int ret; rmrr = (struct acpi_dmar_reserved_memory *)header; - ret = arch_rmrr_sanity_check(rmrr); - if (ret) - return ret; + if (rmrr_sanity_check(rmrr)) + WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + rmrr->base_address, rmrr->end_address, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) @@ -4467,7 +4606,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu->name); return -ENXIO; } - sp = domain_update_iommu_superpage(iommu) - 1; + sp = domain_update_iommu_superpage(NULL, iommu) - 1; if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { pr_warn("%s: Doesn't support large page.\n", iommu->name); @@ -4487,10 +4626,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; -#ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_supported(iommu)) - intel_svm_init(iommu); -#endif + intel_svm_check(iommu); if (dmaru->ignored) { /* @@ -4895,7 +5031,7 @@ static int __init platform_optin_force_iommu(void) * map for all devices except those marked as being untrusted. */ if (dmar_disabled) - iommu_identity_mapping |= IDENTMAP_ALL; + iommu_set_default_passthrough(false); dmar_disabled = 0; no_iommu = 0; @@ -5195,6 +5331,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; + int ret; switch (type) { case IOMMU_DOMAIN_DMA: @@ -5211,11 +5348,12 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return NULL; } - if (type == IOMMU_DOMAIN_DMA && - init_iova_flush_queue(&dmar_domain->iovad, - iommu_flush_iova, iova_entry_free)) { - pr_warn("iova flush queue initialization failed\n"); - intel_iommu_strict = 1; + if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) { + ret = init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, + iova_entry_free); + if (ret) + pr_info("iova flush queue initialization failed\n"); } domain_update_iommu_cap(dmar_domain); @@ -5281,7 +5419,7 @@ static void auxiliary_unlink_device(struct dmar_domain *domain, domain->auxd_refcnt--; if (!domain->auxd_refcnt && domain->default_pasid > 0) - intel_pasid_free_id(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int aux_domain_add_dev(struct dmar_domain *domain, @@ -5299,10 +5437,11 @@ static int aux_domain_add_dev(struct dmar_domain *domain, if (domain->default_pasid <= 0) { int pasid; - pasid = intel_pasid_alloc_id(domain, PASID_MIN, - pci_max_pasids(to_pci_dev(dev)), - GFP_KERNEL); - if (pasid <= 0) { + /* No private data needed for the default pasid */ + pasid = ioasid_alloc(NULL, PASID_MIN, + pci_max_pasids(to_pci_dev(dev)) - 1, + NULL); + if (pasid == INVALID_IOASID) { pr_err("Can't allocate default pasid\n"); return -ENODEV; } @@ -5320,8 +5459,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain, goto attach_failed; /* Setup the PASID entry for mediated devices: */ - ret = intel_pasid_setup_second_level(iommu, domain, dev, - domain->default_pasid); + if (domain_use_first_level(domain)) + ret = domain_setup_first_level(iommu, domain, dev, + domain->default_pasid); + else + ret = intel_pasid_setup_second_level(iommu, domain, dev, + domain->default_pasid); if (ret) goto table_failed; spin_unlock(&iommu->lock); @@ -5338,7 +5481,7 @@ attach_failed: spin_unlock(&iommu->lock); spin_unlock_irqrestore(&device_domain_lock, flags); if (!domain->auxd_refcnt && domain->default_pasid > 0) - intel_pasid_free_id(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -5592,6 +5735,24 @@ static inline bool iommu_pasid_support(void) return ret; } +static inline bool nested_mode_support(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + return ret; +} + static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) @@ -5749,15 +5910,6 @@ static void intel_iommu_get_resv_regions(struct device *device, list_add_tail(®->list, head); } -static void intel_iommu_put_resv_regions(struct device *dev, - struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - kfree(entry); -} - int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) { struct device_domain_info *info; @@ -5984,10 +6136,42 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; } +static int +intel_iommu_domain_set_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long flags; + int ret = 0; + + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + + switch (attr) { + case DOMAIN_ATTR_NESTING: + spin_lock_irqsave(&device_domain_lock, flags); + if (nested_mode_support() && + list_empty(&dmar_domain->devices)) { + dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; + dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; + } else { + ret = -ENODEV; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, .domain_free = intel_iommu_domain_free, + .domain_set_attr = intel_iommu_domain_set_attr, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, @@ -5999,7 +6183,7 @@ const struct iommu_ops intel_iommu_ops = { .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, .get_resv_regions = intel_iommu_get_resv_regions, - .put_resv_regions = intel_iommu_put_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .apply_resv_region = intel_iommu_apply_resv_region, .device_group = intel_iommu_device_group, .dev_has_feat = intel_iommu_dev_has_feat, diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 040a445be300..22b30f10b396 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -26,42 +26,6 @@ */ static DEFINE_SPINLOCK(pasid_lock); u32 intel_pasid_max_id = PASID_MAX; -static DEFINE_IDR(pasid_idr); - -int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp) -{ - int ret, min, max; - - min = max_t(int, start, PASID_MIN); - max = min_t(int, end, intel_pasid_max_id); - - WARN_ON(in_interrupt()); - idr_preload(gfp); - spin_lock(&pasid_lock); - ret = idr_alloc(&pasid_idr, ptr, min, max, GFP_ATOMIC); - spin_unlock(&pasid_lock); - idr_preload_end(); - - return ret; -} - -void intel_pasid_free_id(int pasid) -{ - spin_lock(&pasid_lock); - idr_remove(&pasid_idr, pasid); - spin_unlock(&pasid_lock); -} - -void *intel_pasid_lookup_id(int pasid) -{ - void *p; - - spin_lock(&pasid_lock); - p = idr_find(&pasid_idr, pasid); - spin_unlock(&pasid_lock); - - return p; -} /* * Per device pasid table management: @@ -465,6 +429,21 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, devtlb_invalidation_with_pasid(iommu, dev, pasid); } +static void pasid_flush_caches(struct intel_iommu *iommu, + struct pasid_entry *pte, + int pasid, u16 did) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } +} + /* * Set up the scalable mode pasid table entry for first only * translation type. @@ -498,10 +477,15 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_sre(pte); } -#ifdef CONFIG_X86 - if (cpu_feature_enabled(X86_FEATURE_LA57)) - pasid_set_flpm(pte, 1); -#endif /* CONFIG_X86 */ + if (flags & PASID_FLAG_FL5LP) { + if (cap_5lp_support(iommu->cap)) { + pasid_set_flpm(pte, 1); + } else { + pr_err("No 5-level paging support for first-level\n"); + pasid_clear_entry(pte); + return -EINVAL; + } + } pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); @@ -510,16 +494,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, 1); pasid_set_present(pte); - - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(pte, sizeof(*pte)); - - if (cap_caching_mode(iommu->cap)) { - pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); - } else { - iommu_flush_write_buffer(iommu); - } + pasid_flush_caches(iommu, pte, pasid, did); return 0; } @@ -583,16 +558,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, */ pasid_set_sre(pte); pasid_set_present(pte); - - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(pte, sizeof(*pte)); - - if (cap_caching_mode(iommu->cap)) { - pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); - } else { - iommu_flush_write_buffer(iommu); - } + pasid_flush_caches(iommu, pte, pasid, did); return 0; } @@ -626,16 +592,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, */ pasid_set_sre(pte); pasid_set_present(pte); - - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(pte, sizeof(*pte)); - - if (cap_caching_mode(iommu->cap)) { - pasid_cache_invalidation_with_pasid(iommu, did, pasid); - iotlb_invalidation_with_pasid(iommu, did, pasid); - } else { - iommu_flush_write_buffer(iommu); - } + pasid_flush_caches(iommu, pte, pasid, did); return 0; } diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index fc8cd8f17de1..92de6df24ccb 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -37,6 +37,12 @@ */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) +/* + * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first- + * level translation, otherwise, 4-level paging will be used. + */ +#define PASID_FLAG_FL5LP BIT(1) + struct pasid_dir_entry { u64 val; }; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index dca88f9fdf29..d7f2a5358900 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -17,25 +17,13 @@ #include <linux/dmar.h> #include <linux/interrupt.h> #include <linux/mm_types.h> +#include <linux/ioasid.h> #include <asm/page.h> #include "intel-pasid.h" static irqreturn_t prq_event_thread(int irq, void *d); -int intel_svm_init(struct intel_iommu *iommu) -{ - if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && - !cap_fl1gp_support(iommu->cap)) - return -EINVAL; - - if (cpu_feature_enabled(X86_FEATURE_LA57) && - !cap_5lp_support(iommu->cap)) - return -EINVAL; - - return 0; -} - #define PRQ_ORDER 0 int intel_svm_enable_prq(struct intel_iommu *iommu) @@ -99,6 +87,33 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) return 0; } +static inline bool intel_svm_capable(struct intel_iommu *iommu) +{ + return iommu->flags & VTD_FLAG_SVM_CAPABLE; +} + +void intel_svm_check(struct intel_iommu *iommu) +{ + if (!pasid_supported(iommu)) + return; + + if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && + !cap_fl1gp_support(iommu->cap)) { + pr_err("%s SVM disabled, incompatible 1GB page capability\n", + iommu->name); + return; + } + + if (cpu_feature_enabled(X86_FEATURE_LA57) && + !cap_5lp_support(iommu->cap)) { + pr_err("%s SVM disabled, incompatible paging mode\n", + iommu->name); + return; + } + + iommu->flags |= VTD_FLAG_SVM_CAPABLE; +} + static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, unsigned long address, unsigned long pages, int ih) { @@ -207,6 +222,10 @@ static const struct mmu_notifier_ops intel_mmuops = { static DEFINE_MUTEX(pasid_mutex); static LIST_HEAD(global_svm_list); +#define for_each_svm_dev(sdev, svm, d) \ + list_for_each_entry((sdev), &(svm)->devs, list) \ + if ((d) != (sdev)->dev) {} else + int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); @@ -220,6 +239,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ if (!iommu || dmar_disabled) return -EINVAL; + if (!intel_svm_capable(iommu)) + return -ENOTSUPP; + if (dev_is_pci(dev)) { pasid_max = pci_max_pasids(to_pci_dev(dev)); if (pasid_max < 0) @@ -252,15 +274,14 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ goto out; } - list_for_each_entry(sdev, &svm->devs, list) { - if (dev == sdev->dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } - sdev->users++; - goto success; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + if (sdev->ops != ops) { + ret = -EBUSY; + goto out; } + sdev->users++; + goto success; } break; @@ -314,16 +335,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ if (pasid_max > intel_pasid_max_id) pasid_max = intel_pasid_max_id; - /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ - ret = intel_pasid_alloc_id(svm, - !!cap_caching_mode(iommu->cap), - pasid_max - 1, GFP_KERNEL); - if (ret < 0) { + /* Do not use PASID 0, reserved for RID to PASID */ + svm->pasid = ioasid_alloc(NULL, PASID_MIN, + pasid_max - 1, svm); + if (svm->pasid == INVALID_IOASID) { kfree(svm); kfree(sdev); + ret = -ENOSPC; goto out; } - svm->pasid = ret; svm->notifier.ops = &intel_mmuops; svm->mm = mm; svm->flags = flags; @@ -333,7 +353,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); if (ret) { - intel_pasid_free_id(svm->pasid); + ioasid_free(svm->pasid); kfree(svm); kfree(sdev); goto out; @@ -344,12 +364,14 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, - mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | + (cpu_feature_enabled(X86_FEATURE_LA57) ? + PASID_FLAG_FL5LP : 0)); spin_unlock(&iommu->lock); if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); - intel_pasid_free_id(svm->pasid); + ioasid_free(svm->pasid); kfree(svm); kfree(sdev); goto out; @@ -365,7 +387,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, - mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | + (cpu_feature_enabled(X86_FEATURE_LA57) ? + PASID_FLAG_FL5LP : 0)); spin_unlock(&iommu->lock); if (ret) { kfree(sdev); @@ -397,44 +421,45 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) if (!iommu) goto out; - svm = intel_pasid_lookup_id(pasid); + svm = ioasid_find(NULL, pasid, NULL); if (!svm) goto out; - list_for_each_entry(sdev, &svm->devs, list) { - if (dev == sdev->dev) { - ret = 0; - sdev->users--; - if (!sdev->users) { - list_del_rcu(&sdev->list); - /* Flush the PASID cache and IOTLB for this device. - * Note that we do depend on the hardware *not* using - * the PASID any more. Just as we depend on other - * devices never using PASIDs that they have no right - * to use. We have a *shared* PASID table, because it's - * large and has to be physically contiguous. So it's - * hard to be as defensive as we might like. */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); - kfree_rcu(sdev, rcu); - - if (list_empty(&svm->devs)) { - intel_pasid_free_id(svm->pasid); - if (svm->mm) - mmu_notifier_unregister(&svm->notifier, svm->mm); - - list_del(&svm->list); - - /* We mandate that no page faults may be outstanding - * for the PASID when intel_svm_unbind_mm() is called. - * If that is not obeyed, subtle errors will happen. - * Let's make them less subtle... */ - memset(svm, 0x6b, sizeof(*svm)); - kfree(svm); - } + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + ret = 0; + sdev->users--; + if (!sdev->users) { + list_del_rcu(&sdev->list); + /* Flush the PASID cache and IOTLB for this device. + * Note that we do depend on the hardware *not* using + * the PASID any more. Just as we depend on other + * devices never using PASIDs that they have no right + * to use. We have a *shared* PASID table, because it's + * large and has to be physically contiguous. So it's + * hard to be as defensive as we might like. */ + intel_pasid_tear_down_entry(iommu, dev, svm->pasid); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + ioasid_free(svm->pasid); + if (svm->mm) + mmu_notifier_unregister(&svm->notifier, svm->mm); + list_del(&svm->list); + /* We mandate that no page faults may be outstanding + * for the PASID when intel_svm_unbind_mm() is called. + * If that is not obeyed, subtle errors will happen. + * Let's make them less subtle... */ + memset(svm, 0x6b, sizeof(*svm)); + kfree(svm); } - break; } + break; } out: mutex_unlock(&pasid_mutex); @@ -454,10 +479,14 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid) if (!iommu) goto out; - svm = intel_pasid_lookup_id(pasid); + svm = ioasid_find(NULL, pasid, NULL); if (!svm) goto out; + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } /* init_mm is used in this case */ if (!svm->mm) ret = 1; @@ -564,13 +593,12 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = intel_pasid_lookup_id(req->pasid); + svm = ioasid_find(NULL, req->pasid, NULL); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ rcu_read_unlock(); - - if (!svm) { + if (IS_ERR_OR_NULL(svm)) { pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", iommu->name, req->pasid, ((unsigned long long *)req)[0], ((unsigned long long *)req)[1]); @@ -654,11 +682,10 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (req->priv_data_present) memcpy(&resp.qw2, req->priv_data, sizeof(req->priv_data)); + resp.qw2 = 0; + resp.qw3 = 0; + qi_submit_sync(&resp, iommu); } - resp.qw2 = 0; - resp.qw3 = 0; - qi_submit_sync(&resp, iommu); - head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 7c3bd2c3cdca..4272fe4e17f4 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -149,8 +149,6 @@ #define ARM_V7S_TTBR_IRGN_ATTR(attr) \ ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1)) -#define ARM_V7S_TCR_PD1 BIT(5) - #ifdef CONFIG_ZONE_DMA32 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 @@ -798,8 +796,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, */ cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M; - /* TCR: T0SZ=0, disable TTBR1 */ - cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1; + /* TCR: T0SZ=0, EAE=0 (if applicable) */ + cfg->arm_v7s_cfg.tcr = 0; /* * TEX remap: the indices used map to the closest equivalent types @@ -822,15 +820,13 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) | - ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | - (cfg->coherent_walk ? - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); - cfg->arm_v7s_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S | + (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS | + ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); return &data->iop; out_free_data: diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index bdf47f745268..983b08477e64 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -100,40 +100,29 @@ #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) /* Register bits */ -#define ARM_32_LPAE_TCR_EAE (1 << 31) -#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) +#define ARM_LPAE_TCR_TG0_4K 0 +#define ARM_LPAE_TCR_TG0_64K 1 +#define ARM_LPAE_TCR_TG0_16K 2 -#define ARM_LPAE_TCR_EPD1 (1 << 23) +#define ARM_LPAE_TCR_TG1_16K 1 +#define ARM_LPAE_TCR_TG1_4K 2 +#define ARM_LPAE_TCR_TG1_64K 3 -#define ARM_LPAE_TCR_TG0_4K (0 << 14) -#define ARM_LPAE_TCR_TG0_64K (1 << 14) -#define ARM_LPAE_TCR_TG0_16K (2 << 14) - -#define ARM_LPAE_TCR_SH0_SHIFT 12 -#define ARM_LPAE_TCR_SH0_MASK 0x3 #define ARM_LPAE_TCR_SH_NS 0 #define ARM_LPAE_TCR_SH_OS 2 #define ARM_LPAE_TCR_SH_IS 3 -#define ARM_LPAE_TCR_ORGN0_SHIFT 10 -#define ARM_LPAE_TCR_IRGN0_SHIFT 8 -#define ARM_LPAE_TCR_RGN_MASK 0x3 #define ARM_LPAE_TCR_RGN_NC 0 #define ARM_LPAE_TCR_RGN_WBWA 1 #define ARM_LPAE_TCR_RGN_WT 2 #define ARM_LPAE_TCR_RGN_WB 3 -#define ARM_LPAE_TCR_SL0_SHIFT 6 -#define ARM_LPAE_TCR_SL0_MASK 0x3 +#define ARM_LPAE_VTCR_SL0_MASK 0x3 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 -#define ARM_LPAE_TCR_SZ_MASK 0xf - -#define ARM_LPAE_TCR_PS_SHIFT 16 -#define ARM_LPAE_TCR_PS_MASK 0x7 -#define ARM_LPAE_TCR_IPS_SHIFT 32 -#define ARM_LPAE_TCR_IPS_MASK 0x7 +#define ARM_LPAE_VTCR_PS_SHIFT 16 +#define ARM_LPAE_VTCR_PS_MASK 0x7 #define ARM_LPAE_TCR_PS_32_BIT 0x0ULL #define ARM_LPAE_TCR_PS_36_BIT 0x1ULL @@ -293,17 +282,11 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte = prot; - if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; - if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; else pte |= ARM_LPAE_PTE_TYPE_BLOCK; - if (data->iop.fmt != ARM_MALI_LPAE) - pte |= ARM_LPAE_PTE_AF; - pte |= ARM_LPAE_PTE_SH_IS; pte |= paddr_to_iopte(paddr, data); __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); @@ -460,9 +443,20 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, << ARM_LPAE_PTE_ATTRINDX_SHIFT); } + if (prot & IOMMU_CACHE) + pte |= ARM_LPAE_PTE_SH_IS; + else + pte |= ARM_LPAE_PTE_SH_OS; + if (prot & IOMMU_NOEXEC) pte |= ARM_LPAE_PTE_XN; + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (data->iop.fmt != ARM_MALI_LPAE) + pte |= ARM_LPAE_PTE_AF; + return pte; } @@ -474,6 +468,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, arm_lpae_iopte *ptep = data->pgd; int ret, lvl = data->start_level; arm_lpae_iopte prot; + long iaext = (long)iova >> cfg->ias; /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) @@ -482,7 +477,9 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return -EINVAL; - if (WARN_ON(iova >> data->iop.cfg.ias || paddr >> data->iop.cfg.oas)) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext || paddr >> cfg->oas)) return -ERANGE; prot = arm_lpae_prot_to_pte(data, iommu_prot); @@ -648,11 +645,14 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; + long iaext = (long)iova >> cfg->ias; if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return 0; - if (WARN_ON(iova >> data->iop.cfg.ias)) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) return 0; return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep); @@ -787,9 +787,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { u64 reg; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; + bool tg1; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | - IO_PGTABLE_QUIRK_NON_STRICT)) + IO_PGTABLE_QUIRK_NON_STRICT | + IO_PGTABLE_QUIRK_ARM_TTBR1)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -798,58 +801,55 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* TCR */ if (cfg->coherent_walk) { - reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + tcr->sh = ARM_LPAE_TCR_SH_IS; + tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; } else { - reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT); + tcr->sh = ARM_LPAE_TCR_SH_OS; + tcr->irgn = ARM_LPAE_TCR_RGN_NC; + tcr->orgn = ARM_LPAE_TCR_RGN_NC; } + tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - - /* Disable speculative walks through TTBR1 */ - reg |= ARM_LPAE_TCR_EPD1; - cfg->arm_lpae_s1_cfg.tcr = reg; + tcr->tsz = 64ULL - cfg->ias; /* MAIRs */ reg = (ARM_LPAE_MAIR_ATTR_NC @@ -872,9 +872,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); - cfg->arm_lpae_s1_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); return &data->iop; out_free_data: @@ -885,8 +884,9 @@ out_free_data: static struct io_pgtable * arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - u64 reg, sl; + u64 sl; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; /* The NS quirk doesn't apply at stage 2 */ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) @@ -911,55 +911,59 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) } /* VTCR */ - reg = ARM_64_LPAE_S2_TCR_RES1 | - (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + if (cfg->coherent_walk) { + vtcr->sh = ARM_LPAE_TCR_SH_IS; + vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; + } else { + vtcr->sh = ARM_LPAE_TCR_SH_OS; + vtcr->irgn = ARM_LPAE_TCR_RGN_NC; + vtcr->orgn = ARM_LPAE_TCR_RGN_NC; + } sl = data->start_level; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + vtcr->tg = ARM_LPAE_TCR_TG0_4K; sl++; /* SL0 format is different for 4K granule size */ break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + vtcr->tg = ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + vtcr->tg = ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT; - cfg->arm_lpae_s2_cfg.vtcr = reg; + vtcr->tsz = 64ULL - cfg->ias; + vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; /* Allocate pgd pages */ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), @@ -982,35 +986,21 @@ out_free_data: static struct io_pgtable * arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 32 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE; - cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff; - } - - return iop; + return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); } static struct io_pgtable * arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 40 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie); - if (iop) - cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff; - - return iop; + return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); } static struct io_pgtable * diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index ced53e5b72b5..94394c81468f 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -63,7 +63,7 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops) if (!ops) return; - iop = container_of(ops, struct io_pgtable, ops); + iop = io_pgtable_ops_to_pgtable(ops); io_pgtable_tlb_flush_all(iop); io_pgtable_init_table[iop->fmt]->free(iop); } diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index e436ff813e7e..99869217fbec 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -87,6 +87,7 @@ error: put_device(iommu->dev); return ret; } +EXPORT_SYMBOL_GPL(iommu_device_sysfs_add); void iommu_device_sysfs_remove(struct iommu_device *iommu) { @@ -94,6 +95,8 @@ void iommu_device_sysfs_remove(struct iommu_device *iommu) device_unregister(iommu->dev); iommu->dev = NULL; } +EXPORT_SYMBOL_GPL(iommu_device_sysfs_remove); + /* * IOMMU drivers can indicate a device is managed by a given IOMMU using * this interface. A link to the device will be created in the "devices" @@ -119,6 +122,7 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) return ret; } +EXPORT_SYMBOL_GPL(iommu_device_link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link) { @@ -128,3 +132,4 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link) sysfs_remove_link(&link->kobj, "iommu"); sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } +EXPORT_SYMBOL_GPL(iommu_device_unlink); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3ead597e1c57..3e3528436e0b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -22,6 +22,7 @@ #include <linux/bitops.h> #include <linux/property.h> #include <linux/fsl/mc.h> +#include <linux/module.h> #include <trace/events/iommu.h> static struct kset *iommu_group_kset; @@ -141,6 +142,7 @@ int iommu_device_register(struct iommu_device *iommu) spin_unlock(&iommu_device_lock); return 0; } +EXPORT_SYMBOL_GPL(iommu_device_register); void iommu_device_unregister(struct iommu_device *iommu) { @@ -148,6 +150,7 @@ void iommu_device_unregister(struct iommu_device *iommu) list_del(&iommu->list); spin_unlock(&iommu_device_lock); } +EXPORT_SYMBOL_GPL(iommu_device_unregister); static struct iommu_param *iommu_get_dev_param(struct device *dev) { @@ -183,10 +186,21 @@ int iommu_probe_device(struct device *dev) if (!iommu_get_dev_param(dev)) return -ENOMEM; + if (!try_module_get(ops->owner)) { + ret = -EINVAL; + goto err_free_dev_param; + } + ret = ops->add_device(dev); if (ret) - iommu_free_dev_param(dev); + goto err_module_put; + + return 0; +err_module_put: + module_put(ops->owner); +err_free_dev_param: + iommu_free_dev_param(dev); return ret; } @@ -197,7 +211,10 @@ void iommu_release_device(struct device *dev) if (dev->iommu_group) ops->remove_device(dev); - iommu_free_dev_param(dev); + if (dev->iommu_param) { + module_put(ops->owner); + iommu_free_dev_param(dev); + } } static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, @@ -887,6 +904,7 @@ struct iommu_group *iommu_group_ref_get(struct iommu_group *group) kobject_get(group->devices_kobj); return group; } +EXPORT_SYMBOL_GPL(iommu_group_ref_get); /** * iommu_group_put - Decrement group reference @@ -1260,6 +1278,7 @@ struct iommu_group *generic_device_group(struct device *dev) { return iommu_group_alloc(); } +EXPORT_SYMBOL_GPL(generic_device_group); /* * Use standard PCI bus topology, isolation features, and DMA alias quirks @@ -1327,6 +1346,7 @@ struct iommu_group *pci_device_group(struct device *dev) /* No shared group found, allocate new */ return iommu_group_alloc(); } +EXPORT_SYMBOL_GPL(pci_device_group); /* Get the IOMMU group for device on fsl-mc bus */ struct iommu_group *fsl_mc_device_group(struct device *dev) @@ -1339,6 +1359,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev) group = iommu_group_alloc(); return group; } +EXPORT_SYMBOL_GPL(fsl_mc_device_group); /** * iommu_group_get_for_dev - Find or create the IOMMU group for a device @@ -1407,6 +1428,7 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) return group; } +EXPORT_SYMBOL(iommu_group_get_for_dev); struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) { @@ -1537,6 +1559,11 @@ int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) { int err; + if (ops == NULL) { + bus->iommu_ops = NULL; + return 0; + } + if (bus->iommu_ops != NULL) return -EBUSY; @@ -2230,6 +2257,25 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list) ops->put_resv_regions(dev, list); } +/** + * generic_iommu_put_resv_regions - Reserved region driver helper + * @dev: device for which to free reserved regions + * @list: reserved region list for device + * + * IOMMU drivers can use this to implement their .put_resv_regions() callback + * for simple reservations. Memory allocated for each reserved region will be + * freed. If an IOMMU driver allocates additional resources per region, it is + * going to have to implement a custom callback. + */ +void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, list, list) + kfree(entry); +} +EXPORT_SYMBOL(generic_iommu_put_resv_regions); + struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, enum iommu_resv_type type) @@ -2247,6 +2293,7 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, region->type = type; return region; } +EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); static int request_default_domain_for_dev(struct device *dev, unsigned long type) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index c7a914b9bbbc..0e6a9536eca6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -233,7 +233,7 @@ static DEFINE_MUTEX(iova_cache_mutex); struct iova *alloc_iova_mem(void) { - return kmem_cache_zalloc(iova_cache, GFP_ATOMIC); + return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN); } EXPORT_SYMBOL(alloc_iova_mem); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d02edd2751f3..ecb3f9464dd5 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -374,7 +374,7 @@ static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain) u32 tmp; /* TTBR0 */ - ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; + ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr; ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr); ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 93f14bca26ee..94a6df1bddd6 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -279,8 +279,8 @@ static void __program_context(void __iomem *base, int ctx, SET_V2PCFG(base, ctx, 0x3); SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr); - SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]); - SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]); + SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr); + SET_TTBR1(base, ctx, 0); /* Set prrr and nmrr */ SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6fc1f5ecf91e..95945f467c03 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -367,7 +367,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, /* Update the pgtable base address register of the M4U HW */ if (!data->m4u_dom) { data->m4u_dom = dom; - writel(dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK, + writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); } @@ -765,7 +765,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); if (m4u_dom) - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK, + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); return 0; } diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 026ad2b29dcd..20738aacac89 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -8,11 +8,12 @@ #include <linux/export.h> #include <linux/iommu.h> #include <linux/limits.h> -#include <linux/pci.h> +#include <linux/module.h> #include <linux/msi.h> #include <linux/of.h> #include <linux/of_iommu.h> #include <linux/of_pci.h> +#include <linux/pci.h> #include <linux/slab.h> #include <linux/fsl/mc.h> @@ -91,16 +92,16 @@ static int of_iommu_xlate(struct device *dev, { const struct iommu_ops *ops; struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; - int err; + int ret; ops = iommu_ops_from_fwnode(fwnode); if ((ops && !ops->of_xlate) || !of_device_is_available(iommu_spec->np)) return NO_IOMMU; - err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); - if (err) - return err; + ret = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); + if (ret) + return ret; /* * The otherwise-empty fwspec handily serves to indicate the specific * IOMMU device we're waiting for, which will be useful if we ever get @@ -109,7 +110,12 @@ static int of_iommu_xlate(struct device *dev, if (!ops) return driver_deferred_probe_check_state(dev); - return ops->of_xlate(dev, iommu_spec); + if (!try_module_get(ops->owner)) + return -ENODEV; + + ret = ops->of_xlate(dev, iommu_spec); + module_put(ops->owner); + return ret; } struct of_pci_iommu_alias_info { @@ -179,6 +185,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, .np = master_np, }; + pci_request_acs(); err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); } else if (dev_is_fsl_mc(dev)) { @@ -196,8 +203,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, if (err) break; } - } + fwspec = dev_iommu_fwspec_get(dev); + if (!err && fwspec) + of_property_read_u32(master_np, "pasid-num-bits", + &fwspec->num_pasid_bits); + } /* * Two success conditions can be represented by non-negative err here: diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 52f38292df5b..39759db4f003 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -201,7 +201,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); - if (!(fsr & FSR_FAULT)) + if (!(fsr & ARM_SMMU_FSR_FAULT)) return IRQ_NONE; fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); @@ -215,7 +215,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) } iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); - iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE); + iommu_writel(ctx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); return IRQ_HANDLED; } @@ -269,18 +269,15 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, - pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] | - FIELD_PREP(TTBRn_ASID, ctx->asid)); - iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, - pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] | - FIELD_PREP(TTBRn_ASID, ctx->asid)); + pgtbl_cfg.arm_lpae_s1_cfg.ttbr | + FIELD_PREP(ARM_SMMU_TTBRn_ASID, ctx->asid)); + iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, 0); /* TCR */ iommu_writel(ctx, ARM_SMMU_CB_TCR2, - (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) | - FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM)); + arm_smmu_lpae_tcr2(&pgtbl_cfg)); iommu_writel(ctx, ARM_SMMU_CB_TCR, - pgtbl_cfg.arm_lpae_s1_cfg.tcr); + arm_smmu_lpae_tcr(&pgtbl_cfg) | ARM_SMMU_TCR_EAE); /* MAIRs (stage-1 only) */ iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, @@ -289,11 +286,13 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, pgtbl_cfg.arm_lpae_s1_cfg.mair >> 32); /* SCTLR */ - reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | - SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG; + reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | + ARM_SMMU_SCTLR_AFE | ARM_SMMU_SCTLR_TRE | + ARM_SMMU_SCTLR_M | ARM_SMMU_SCTLR_S1_ASIDPNE | + ARM_SMMU_SCTLR_CFCFG; if (IS_ENABLED(CONFIG_BIG_ENDIAN)) - reg |= SCTLR_E; + reg |= ARM_SMMU_SCTLR_E; iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 315c7cc4f99d..cce329d71fba 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -837,14 +837,6 @@ static void viommu_get_resv_regions(struct device *dev, struct list_head *head) iommu_dma_get_resv_regions(dev, head); } -static void viommu_put_resv_regions(struct device *dev, struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - kfree(entry); -} - static struct iommu_ops viommu_ops; static struct virtio_driver virtio_iommu_drv; @@ -914,7 +906,7 @@ static int viommu_add_device(struct device *dev) err_unlink_dev: iommu_device_unlink(&viommu->iommu, dev); err_free_dev: - viommu_put_resv_regions(dev, &vdev->resv_regions); + generic_iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); return ret; @@ -932,7 +924,7 @@ static void viommu_remove_device(struct device *dev) iommu_group_remove_device(dev); iommu_device_unlink(&vdev->viommu->iommu, dev); - viommu_put_resv_regions(dev, &vdev->resv_regions); + generic_iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); } @@ -961,7 +953,7 @@ static struct iommu_ops viommu_ops = { .remove_device = viommu_remove_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, - .put_resv_regions = viommu_put_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .of_xlate = viommu_of_xlate, }; diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 982b46f0a54d..dcbcf1331bb2 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -69,6 +69,7 @@ int pci_enable_ats(struct pci_dev *dev, int ps) dev->ats_enabled = 1; return 0; } +EXPORT_SYMBOL_GPL(pci_enable_ats); /** * pci_disable_ats - disable the ATS capability @@ -87,6 +88,7 @@ void pci_disable_ats(struct pci_dev *dev) dev->ats_enabled = 0; } +EXPORT_SYMBOL_GPL(pci_disable_ats); void pci_restore_ats_state(struct pci_dev *dev) { diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3c30e72e79ec..d828ca835a98 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -131,6 +131,7 @@ bool pci_ats_disabled(void) { return pcie_ats_disabled; } +EXPORT_SYMBOL_GPL(pci_ats_disabled); /* Disable bridge_d3 for all PCIe ports */ static bool pci_bridge_d3_disable; |