summaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig18
-rw-r--r--drivers/iommu/amd/init.c16
-rw-r--r--drivers/iommu/amd/iommu.c94
-rw-r--r--drivers/iommu/apple-dart.c632
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c2
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c4
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c25
-rw-r--r--drivers/iommu/dma-iommu.c21
-rw-r--r--drivers/iommu/exynos-iommu.c227
-rw-r--r--drivers/iommu/fsl_pamu_domain.c6
-rw-r--r--drivers/iommu/intel/Kconfig11
-rw-r--r--drivers/iommu/intel/Makefile1
-rw-r--r--drivers/iommu/intel/dmar.c36
-rw-r--r--drivers/iommu/intel/iommu.c124
-rw-r--r--drivers/iommu/intel/iommu.h117
-rw-r--r--drivers/iommu/intel/irq_remapping.c9
-rw-r--r--drivers/iommu/intel/pasid.c20
-rw-r--r--drivers/iommu/intel/perfmon.c897
-rw-r--r--drivers/iommu/intel/perfmon.h64
-rw-r--r--drivers/iommu/intel/svm.c90
-rw-r--r--drivers/iommu/iommu-traces.c1
-rw-r--r--drivers/iommu/iommu.c195
-rw-r--r--drivers/iommu/iommufd/Kconfig2
-rw-r--r--drivers/iommu/iommufd/device.c8
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h2
-rw-r--r--drivers/iommu/iommufd/main.c3
-rw-r--r--drivers/iommu/iommufd/pages.c22
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c107
-rw-r--r--drivers/iommu/ipmmu-vmsa.c28
-rw-r--r--drivers/iommu/msm_iommu.c6
-rw-r--r--drivers/iommu/mtk_iommu.c9
-rw-r--r--drivers/iommu/mtk_iommu_v1.c4
-rw-r--r--drivers/iommu/of_iommu.c96
-rw-r--r--drivers/iommu/omap-iommu.c6
-rw-r--r--drivers/iommu/rockchip-iommu.c1
-rw-r--r--drivers/iommu/s390-iommu.c24
-rw-r--r--drivers/iommu/sprd-iommu.c16
-rw-r--r--drivers/iommu/sun50i-iommu.c1
-rw-r--r--drivers/iommu/tegra-gart.c6
-rw-r--r--drivers/iommu/tegra-smmu.c9
40 files changed, 2364 insertions, 596 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 79707685d54a..889c7efd050b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -32,7 +32,8 @@ config IOMMU_IO_PGTABLE
config IOMMU_IO_PGTABLE_LPAE
bool "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
- depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARM || ARM64 || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for cpmxchg64()
help
Enable support for the ARM long descriptor pagetable format.
This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -70,7 +71,8 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
config IOMMU_IO_PGTABLE_DART
bool "Apple DART Formats"
select IOMMU_IO_PGTABLE
- depends on ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARM64 || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for cpmxchg64()
help
Enable support for the Apple DART pagetable formats. These include
the t8020 and t6000/t8110 DART formats used in Apple M1/M2 family
@@ -284,7 +286,8 @@ config EXYNOS_IOMMU_DEBUG
config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU"
- depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARCH_RENESAS || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU
@@ -304,7 +307,8 @@ config SPAPR_TCE_IOMMU
config APPLE_DART
tristate "Apple DART IOMMU Support"
- depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARCH_APPLE || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_DART
select IOMMU_API
select IOMMU_IO_PGTABLE_DART
default ARCH_APPLE
@@ -319,7 +323,8 @@ config APPLE_DART
# ARM IOMMU support
config ARM_SMMU
tristate "ARM Ltd. System MMU (SMMU) Support"
- depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARM64 || ARM || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU if ARM
@@ -466,7 +471,8 @@ config MTK_IOMMU_V1
config QCOM_IOMMU
# Note: iommu drivers cannot (yet?) be built as modules
bool "Qualcomm IOMMU Support"
- depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on ARCH_QCOM || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
select QCOM_SCM
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ found:
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index cbeaab55c0db..5a505ba5467e 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -558,6 +558,15 @@ static void amd_iommu_report_page_fault(struct amd_iommu *iommu,
* prevent logging it.
*/
if (IS_IOMMU_MEM_TRANSACTION(flags)) {
+ /* Device not attached to domain properly */
+ if (dev_data->domain == NULL) {
+ pr_err_ratelimited("Event logged [Device not attached to domain properly]\n");
+ pr_err_ratelimited(" device=%04x:%02x:%02x.%x domain=0x%04x\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), domain_id);
+ goto out;
+ }
+
if (!report_iommu_fault(&dev_data->domain->domain,
&pdev->dev, address,
IS_WRITE_REQUEST(flags) ?
@@ -667,7 +676,14 @@ retry:
event[0], event[1], event[2], event[3]);
}
- memset(__evt, 0, 4 * sizeof(u32));
+ /*
+ * To detect the hardware errata 732 we need to clear the
+ * entry back to zero. This issue does not exist on SNP
+ * enabled system. Also this buffer is not writeable on
+ * SNP enabled system.
+ */
+ if (!amd_iommu_snp_en)
+ memset(__evt, 0, 4 * sizeof(u32));
}
static void iommu_poll_events(struct amd_iommu *iommu)
@@ -736,10 +752,13 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
entry[1] = raw[1];
/*
- * To detect the hardware bug we need to clear the entry
- * back to zero.
+ * To detect the hardware errata 733 we need to clear the
+ * entry back to zero. This issue does not exist on SNP
+ * enabled system. Also this buffer is not writeable on
+ * SNP enabled system.
*/
- raw[0] = raw[1] = 0UL;
+ if (!amd_iommu_snp_en)
+ raw[0] = raw[1] = 0UL;
/* Update head pointer of hardware ring-buffer */
head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
@@ -1702,27 +1721,29 @@ static int pdev_pri_ats_enable(struct pci_dev *pdev)
/* Only allow access to user-accessible pages */
ret = pci_enable_pasid(pdev, 0);
if (ret)
- goto out_err;
+ return ret;
/* First reset the PRI state of the device */
ret = pci_reset_pri(pdev);
if (ret)
- goto out_err;
+ goto out_err_pasid;
/* Enable PRI */
/* FIXME: Hardcode number of outstanding requests for now */
ret = pci_enable_pri(pdev, 32);
if (ret)
- goto out_err;
+ goto out_err_pasid;
ret = pci_enable_ats(pdev, PAGE_SHIFT);
if (ret)
- goto out_err;
+ goto out_err_pri;
return 0;
-out_err:
+out_err_pri:
pci_disable_pri(pdev);
+
+out_err_pasid:
pci_disable_pasid(pdev);
return ret;
@@ -2072,6 +2093,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
if (ret)
goto out_err;
+ /* No need to allocate io pgtable ops in passthrough mode */
+ if (type == IOMMU_DOMAIN_IDENTITY)
+ return domain;
+
pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
if (!pgtbl_ops) {
domain_id_free(domain->id);
@@ -2126,31 +2151,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
protection_domain_free(domain);
}
-static void amd_iommu_detach_device(struct iommu_domain *dom,
- struct device *dev)
-{
- struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- struct amd_iommu *iommu;
-
- if (!check_device(dev))
- return;
-
- if (dev_data->domain != NULL)
- detach_device(dev);
-
- iommu = rlookup_amd_iommu(dev);
- if (!iommu)
- return;
-
-#ifdef CONFIG_IRQ_REMAP
- if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- (dom->type == IOMMU_DOMAIN_UNMANAGED))
- dev_data->use_vapic = 0;
-#endif
-
- iommu_completion_wait(iommu);
-}
-
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
@@ -2159,6 +2159,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
struct amd_iommu *iommu = rlookup_amd_iommu(dev);
int ret;
+ /*
+ * Skip attach device to domain if new domain is same as
+ * devices current domain
+ */
+ if (dev_data->domain == domain)
+ return 0;
+
dev_data->defer_attach = false;
if (dev_data->domain)
@@ -2271,8 +2278,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
@@ -2387,12 +2392,17 @@ static int amd_iommu_def_domain_type(struct device *dev)
return 0;
/*
- * Do not identity map IOMMUv2 capable devices when memory encryption is
- * active, because some of those devices (AMD GPUs) don't have the
- * encryption bit in their DMA-mask and require remapping.
+ * Do not identity map IOMMUv2 capable devices when:
+ * - memory encryption is active, because some of those devices
+ * (AMD GPUs) don't have the encryption bit in their DMA-mask
+ * and require remapping.
+ * - SNP is enabled, because it prohibits DTE[Mode]=0.
*/
- if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT) && dev_data->iommu_v2)
+ if (dev_data->iommu_v2 &&
+ !cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
+ !amd_iommu_snp_en) {
return IOMMU_DOMAIN_IDENTITY;
+ }
return 0;
}
@@ -2416,7 +2426,6 @@ const struct iommu_ops amd_iommu_ops = {
.def_domain_type = amd_iommu_def_domain_type,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
- .detach_dev = amd_iommu_detach_device,
.map_pages = amd_iommu_map_pages,
.unmap_pages = amd_iommu_unmap_pages,
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
@@ -3671,7 +3680,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
}
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (amd_iommu_np_cache)
iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 4f4a323be0d0..06169d36eab8 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -34,57 +34,154 @@
#include "dma-iommu.h"
-#define DART_MAX_STREAMS 16
+#define DART_MAX_STREAMS 256
#define DART_MAX_TTBR 4
#define MAX_DARTS_PER_DEVICE 2
-#define DART_STREAM_ALL 0xffff
+/* Common registers */
#define DART_PARAMS1 0x00
-#define DART_PARAMS_PAGE_SHIFT GENMASK(27, 24)
+#define DART_PARAMS1_PAGE_SHIFT GENMASK(27, 24)
#define DART_PARAMS2 0x04
-#define DART_PARAMS_BYPASS_SUPPORT BIT(0)
+#define DART_PARAMS2_BYPASS_SUPPORT BIT(0)
-#define DART_STREAM_COMMAND 0x20
-#define DART_STREAM_COMMAND_BUSY BIT(2)
-#define DART_STREAM_COMMAND_INVALIDATE BIT(20)
+/* T8020/T6000 registers */
-#define DART_STREAM_SELECT 0x34
+#define DART_T8020_STREAM_COMMAND 0x20
+#define DART_T8020_STREAM_COMMAND_BUSY BIT(2)
+#define DART_T8020_STREAM_COMMAND_INVALIDATE BIT(20)
-#define DART_ERROR 0x40
-#define DART_ERROR_STREAM GENMASK(27, 24)
-#define DART_ERROR_CODE GENMASK(11, 0)
-#define DART_ERROR_FLAG BIT(31)
+#define DART_T8020_STREAM_SELECT 0x34
-#define DART_ERROR_READ_FAULT BIT(4)
-#define DART_ERROR_WRITE_FAULT BIT(3)
-#define DART_ERROR_NO_PTE BIT(2)
-#define DART_ERROR_NO_PMD BIT(1)
-#define DART_ERROR_NO_TTBR BIT(0)
+#define DART_T8020_ERROR 0x40
+#define DART_T8020_ERROR_STREAM GENMASK(27, 24)
+#define DART_T8020_ERROR_CODE GENMASK(11, 0)
+#define DART_T8020_ERROR_FLAG BIT(31)
-#define DART_CONFIG 0x60
-#define DART_CONFIG_LOCK BIT(15)
+#define DART_T8020_ERROR_READ_FAULT BIT(4)
+#define DART_T8020_ERROR_WRITE_FAULT BIT(3)
+#define DART_T8020_ERROR_NO_PTE BIT(2)
+#define DART_T8020_ERROR_NO_PMD BIT(1)
+#define DART_T8020_ERROR_NO_TTBR BIT(0)
-#define DART_STREAM_COMMAND_BUSY_TIMEOUT 100
-
-#define DART_ERROR_ADDR_HI 0x54
-#define DART_ERROR_ADDR_LO 0x50
-
-#define DART_STREAMS_ENABLE 0xfc
+#define DART_T8020_CONFIG 0x60
+#define DART_T8020_CONFIG_LOCK BIT(15)
-#define DART_TCR(sid) (0x100 + 4 * (sid))
-#define DART_TCR_TRANSLATE_ENABLE BIT(7)
-#define DART_TCR_BYPASS0_ENABLE BIT(8)
-#define DART_TCR_BYPASS1_ENABLE BIT(12)
+#define DART_STREAM_COMMAND_BUSY_TIMEOUT 100
-#define DART_TTBR(sid, idx) (0x200 + 16 * (sid) + 4 * (idx))
-#define DART_TTBR_VALID BIT(31)
-#define DART_TTBR_SHIFT 12
+#define DART_T8020_ERROR_ADDR_HI 0x54
+#define DART_T8020_ERROR_ADDR_LO 0x50
+
+#define DART_T8020_STREAMS_ENABLE 0xfc
+
+#define DART_T8020_TCR 0x100
+#define DART_T8020_TCR_TRANSLATE_ENABLE BIT(7)
+#define DART_T8020_TCR_BYPASS_DART BIT(8)
+#define DART_T8020_TCR_BYPASS_DAPF BIT(12)
+
+#define DART_T8020_TTBR 0x200
+#define DART_T8020_TTBR_VALID BIT(31)
+#define DART_T8020_TTBR_ADDR_FIELD_SHIFT 0
+#define DART_T8020_TTBR_SHIFT 12
+
+/* T8110 registers */
+
+#define DART_T8110_PARAMS3 0x08
+#define DART_T8110_PARAMS3_PA_WIDTH GENMASK(29, 24)
+#define DART_T8110_PARAMS3_VA_WIDTH GENMASK(21, 16)
+#define DART_T8110_PARAMS3_VER_MAJ GENMASK(15, 8)
+#define DART_T8110_PARAMS3_VER_MIN GENMASK(7, 0)
+
+#define DART_T8110_PARAMS4 0x0c
+#define DART_T8110_PARAMS4_NUM_CLIENTS GENMASK(24, 16)
+#define DART_T8110_PARAMS4_NUM_SIDS GENMASK(8, 0)
+
+#define DART_T8110_TLB_CMD 0x80
+#define DART_T8110_TLB_CMD_BUSY BIT(31)
+#define DART_T8110_TLB_CMD_OP GENMASK(10, 8)
+#define DART_T8110_TLB_CMD_OP_FLUSH_ALL 0
+#define DART_T8110_TLB_CMD_OP_FLUSH_SID 1
+#define DART_T8110_TLB_CMD_STREAM GENMASK(7, 0)
+
+#define DART_T8110_ERROR 0x100
+#define DART_T8110_ERROR_STREAM GENMASK(27, 20)
+#define DART_T8110_ERROR_CODE GENMASK(14, 0)
+#define DART_T8110_ERROR_FLAG BIT(31)
+
+#define DART_T8110_ERROR_MASK 0x104
+
+#define DART_T8110_ERROR_READ_FAULT BIT(5)
+#define DART_T8110_ERROR_WRITE_FAULT BIT(4)
+#define DART_T8110_ERROR_NO_PTE BIT(3)
+#define DART_T8110_ERROR_NO_PMD BIT(2)
+#define DART_T8110_ERROR_NO_PGD BIT(1)
+#define DART_T8110_ERROR_NO_TTBR BIT(0)
+
+#define DART_T8110_ERROR_ADDR_LO 0x170
+#define DART_T8110_ERROR_ADDR_HI 0x174
+
+#define DART_T8110_PROTECT 0x200
+#define DART_T8110_UNPROTECT 0x204
+#define DART_T8110_PROTECT_LOCK 0x208
+#define DART_T8110_PROTECT_TTBR_TCR BIT(0)
+
+#define DART_T8110_ENABLE_STREAMS 0xc00
+#define DART_T8110_DISABLE_STREAMS 0xc20
+
+#define DART_T8110_TCR 0x1000
+#define DART_T8110_TCR_REMAP GENMASK(11, 8)
+#define DART_T8110_TCR_REMAP_EN BIT(7)
+#define DART_T8110_TCR_BYPASS_DAPF BIT(2)
+#define DART_T8110_TCR_BYPASS_DART BIT(1)
+#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0)
+
+#define DART_T8110_TTBR 0x1400
+#define DART_T8110_TTBR_VALID BIT(0)
+#define DART_T8110_TTBR_ADDR_FIELD_SHIFT 2
+#define DART_T8110_TTBR_SHIFT 14
+
+#define DART_TCR(dart, sid) ((dart)->hw->tcr + ((sid) << 2))
+
+#define DART_TTBR(dart, sid, idx) ((dart)->hw->ttbr + \
+ (((dart)->hw->ttbr_count * (sid)) << 2) + \
+ ((idx) << 2))
+
+struct apple_dart_stream_map;
+
+enum dart_type {
+ DART_T8020,
+ DART_T6000,
+ DART_T8110,
+};
struct apple_dart_hw {
+ enum dart_type type;
+ irqreturn_t (*irq_handler)(int irq, void *dev);
+ int (*invalidate_tlb)(struct apple_dart_stream_map *stream_map);
+
u32 oas;
enum io_pgtable_fmt fmt;
+
+ int max_sid_count;
+
+ u64 lock;
+ u64 lock_bit;
+
+ u64 error;
+
+ u64 enable_streams;
+
+ u64 tcr;
+ u64 tcr_enabled;
+ u64 tcr_disabled;
+ u64 tcr_bypass;
+
+ u64 ttbr;
+ u64 ttbr_valid;
+ u64 ttbr_addr_field_shift;
+ u64 ttbr_shift;
+ int ttbr_count;
};
/*
@@ -115,12 +212,18 @@ struct apple_dart {
spinlock_t lock;
+ u32 ias;
+ u32 oas;
u32 pgsize;
+ u32 num_streams;
u32 supports_bypass : 1;
u32 force_bypass : 1;
struct iommu_group *sid2group[DART_MAX_STREAMS];
struct iommu_device iommu;
+
+ u32 save_tcr[DART_MAX_STREAMS];
+ u32 save_ttbr[DART_MAX_STREAMS][DART_MAX_TTBR];
};
/*
@@ -140,11 +243,11 @@ struct apple_dart {
*/
struct apple_dart_stream_map {
struct apple_dart *dart;
- unsigned long sidmap;
+ DECLARE_BITMAP(sidmap, DART_MAX_STREAMS);
};
struct apple_dart_atomic_stream_map {
struct apple_dart *dart;
- atomic64_t sidmap;
+ atomic_long_t sidmap[BITS_TO_LONGS(DART_MAX_STREAMS)];
};
/*
@@ -202,50 +305,55 @@ static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom)
static void
apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map)
{
+ struct apple_dart *dart = stream_map->dart;
int sid;
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
- writel(DART_TCR_TRANSLATE_ENABLE,
- stream_map->dart->regs + DART_TCR(sid));
+ for_each_set_bit(sid, stream_map->sidmap, dart->num_streams)
+ writel(dart->hw->tcr_enabled, dart->regs + DART_TCR(dart, sid));
}
static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map)
{
+ struct apple_dart *dart = stream_map->dart;
int sid;
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
- writel(0, stream_map->dart->regs + DART_TCR(sid));
+ for_each_set_bit(sid, stream_map->sidmap, dart->num_streams)
+ writel(dart->hw->tcr_disabled, dart->regs + DART_TCR(dart, sid));
}
static void
apple_dart_hw_enable_bypass(struct apple_dart_stream_map *stream_map)
{
+ struct apple_dart *dart = stream_map->dart;
int sid;
WARN_ON(!stream_map->dart->supports_bypass);
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
- writel(DART_TCR_BYPASS0_ENABLE | DART_TCR_BYPASS1_ENABLE,
- stream_map->dart->regs + DART_TCR(sid));
+ for_each_set_bit(sid, stream_map->sidmap, dart->num_streams)
+ writel(dart->hw->tcr_bypass,
+ dart->regs + DART_TCR(dart, sid));
}
static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map,
u8 idx, phys_addr_t paddr)
{
+ struct apple_dart *dart = stream_map->dart;
int sid;
- WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1));
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
- writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT),
- stream_map->dart->regs + DART_TTBR(sid, idx));
+ WARN_ON(paddr & ((1 << dart->hw->ttbr_shift) - 1));
+ for_each_set_bit(sid, stream_map->sidmap, dart->num_streams)
+ writel(dart->hw->ttbr_valid |
+ (paddr >> dart->hw->ttbr_shift) << dart->hw->ttbr_addr_field_shift,
+ dart->regs + DART_TTBR(dart, sid, idx));
}
static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map,
u8 idx)
{
+ struct apple_dart *dart = stream_map->dart;
int sid;
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
- writel(0, stream_map->dart->regs + DART_TTBR(sid, idx));
+ for_each_set_bit(sid, stream_map->sidmap, dart->num_streams)
+ writel(0, dart->regs + DART_TTBR(dart, sid, idx));
}
static void
@@ -253,12 +361,12 @@ apple_dart_hw_clear_all_ttbrs(struct apple_dart_stream_map *stream_map)
{
int i;
- for (i = 0; i < DART_MAX_TTBR; ++i)
+ for (i = 0; i < stream_map->dart->hw->ttbr_count; ++i)
apple_dart_hw_clear_ttbr(stream_map, i);
}
static int
-apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map,
+apple_dart_t8020_hw_stream_command(struct apple_dart_stream_map *stream_map,
u32 command)
{
unsigned long flags;
@@ -267,12 +375,12 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map,
spin_lock_irqsave(&stream_map->dart->lock, flags);
- writel(stream_map->sidmap, stream_map->dart->regs + DART_STREAM_SELECT);
- writel(command, stream_map->dart->regs + DART_STREAM_COMMAND);
+ writel(stream_map->sidmap[0], stream_map->dart->regs + DART_T8020_STREAM_SELECT);
+ writel(command, stream_map->dart->regs + DART_T8020_STREAM_COMMAND);
ret = readl_poll_timeout_atomic(
- stream_map->dart->regs + DART_STREAM_COMMAND, command_reg,
- !(command_reg & DART_STREAM_COMMAND_BUSY), 1,
+ stream_map->dart->regs + DART_T8020_STREAM_COMMAND, command_reg,
+ !(command_reg & DART_T8020_STREAM_COMMAND_BUSY), 1,
DART_STREAM_COMMAND_BUSY_TIMEOUT);
spin_unlock_irqrestore(&stream_map->dart->lock, flags);
@@ -280,7 +388,45 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map,
if (ret) {
dev_err(stream_map->dart->dev,
"busy bit did not clear after command %x for streams %lx\n",
- command, stream_map->sidmap);
+ command, stream_map->sidmap[0]);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+apple_dart_t8110_hw_tlb_command(struct apple_dart_stream_map *stream_map,
+ u32 command)
+{
+ struct apple_dart *dart = stream_map->dart;
+ unsigned long flags;
+ int ret = 0;
+ int sid;
+
+ spin_lock_irqsave(&dart->lock, flags);
+
+ for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) {
+ u32 val = FIELD_PREP(DART_T8110_TLB_CMD_OP, command) |
+ FIELD_PREP(DART_T8110_TLB_CMD_STREAM, sid);
+ writel(val, dart->regs + DART_T8110_TLB_CMD);
+
+ ret = readl_poll_timeout_atomic(
+ dart->regs + DART_T8110_TLB_CMD, val,
+ !(val & DART_T8110_TLB_CMD_BUSY), 1,
+ DART_STREAM_COMMAND_BUSY_TIMEOUT);
+
+ if (ret)
+ break;
+
+ }
+
+ spin_unlock_irqrestore(&dart->lock, flags);
+
+ if (ret) {
+ dev_err(stream_map->dart->dev,
+ "busy bit did not clear after command %x for stream %d\n",
+ command, sid);
return ret;
}
@@ -288,48 +434,64 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map,
}
static int
-apple_dart_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map)
+apple_dart_t8020_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map)
+{
+ return apple_dart_t8020_hw_stream_command(
+ stream_map, DART_T8020_STREAM_COMMAND_INVALIDATE);
+}
+
+static int
+apple_dart_t8110_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map)
{
- return apple_dart_hw_stream_command(stream_map,
- DART_STREAM_COMMAND_INVALIDATE);
+ return apple_dart_t8110_hw_tlb_command(
+ stream_map, DART_T8110_TLB_CMD_OP_FLUSH_SID);
}
static int apple_dart_hw_reset(struct apple_dart *dart)
{
u32 config;
struct apple_dart_stream_map stream_map;
+ int i;
- config = readl(dart->regs + DART_CONFIG);
- if (config & DART_CONFIG_LOCK) {
+ config = readl(dart->regs + dart->hw->lock);
+ if (config & dart->hw->lock_bit) {
dev_err(dart->dev, "DART is locked down until reboot: %08x\n",
config);
return -EINVAL;
}
stream_map.dart = dart;
- stream_map.sidmap = DART_STREAM_ALL;
+ bitmap_zero(stream_map.sidmap, DART_MAX_STREAMS);
+ bitmap_set(stream_map.sidmap, 0, dart->num_streams);
apple_dart_hw_disable_dma(&stream_map);
apple_dart_hw_clear_all_ttbrs(&stream_map);
/* enable all streams globally since TCR is used to control isolation */
- writel(DART_STREAM_ALL, dart->regs + DART_STREAMS_ENABLE);
+ for (i = 0; i < BITS_TO_U32(dart->num_streams); i++)
+ writel(U32_MAX, dart->regs + dart->hw->enable_streams + 4 * i);
/* clear any pending errors before the interrupt is unmasked */
- writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR);
+ writel(readl(dart->regs + dart->hw->error), dart->regs + dart->hw->error);
+
+ if (dart->hw->type == DART_T8110)
+ writel(0, dart->regs + DART_T8110_ERROR_MASK);
- return apple_dart_hw_invalidate_tlb(&stream_map);
+ return dart->hw->invalidate_tlb(&stream_map);
}
static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain)
{
- int i;
+ int i, j;
struct apple_dart_atomic_stream_map *domain_stream_map;
struct apple_dart_stream_map stream_map;
for_each_stream_map(i, domain, domain_stream_map) {
stream_map.dart = domain_stream_map->dart;
- stream_map.sidmap = atomic64_read(&domain_stream_map->sidmap);
- apple_dart_hw_invalidate_tlb(&stream_map);
+
+ for (j = 0; j < BITS_TO_LONGS(stream_map.dart->num_streams); j++)
+ stream_map.sidmap[j] = atomic_long_read(&domain_stream_map->sidmap[j]);
+
+ stream_map.dart->hw->invalidate_tlb(&stream_map);
}
}
@@ -399,11 +561,11 @@ apple_dart_setup_translation(struct apple_dart_domain *domain,
for (i = 0; i < pgtbl_cfg->apple_dart_cfg.n_ttbrs; ++i)
apple_dart_hw_set_ttbr(stream_map, i,
pgtbl_cfg->apple_dart_cfg.ttbr[i]);
- for (; i < DART_MAX_TTBR; ++i)
+ for (; i < stream_map->dart->hw->ttbr_count; ++i)
apple_dart_hw_clear_ttbr(stream_map, i);
apple_dart_hw_enable_translation(stream_map);
- apple_dart_hw_invalidate_tlb(stream_map);
+ stream_map->dart->hw->invalidate_tlb(stream_map);
}
static int apple_dart_finalize_domain(struct iommu_domain *domain,
@@ -413,7 +575,7 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
struct apple_dart *dart = cfg->stream_maps[0].dart;
struct io_pgtable_cfg pgtbl_cfg;
int ret = 0;
- int i;
+ int i, j;
mutex_lock(&dart_domain->init_lock);
@@ -422,14 +584,15 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
dart_domain->stream_maps[i].dart = cfg->stream_maps[i].dart;
- atomic64_set(&dart_domain->stream_maps[i].sidmap,
- cfg->stream_maps[i].sidmap);
+ for (j = 0; j < BITS_TO_LONGS(dart->num_streams); j++)
+ atomic_long_set(&dart_domain->stream_maps[i].sidmap[j],
+ cfg->stream_maps[i].sidmap[j]);
}
pgtbl_cfg = (struct io_pgtable_cfg){
.pgsize_bitmap = dart->pgsize,
- .ias = 32,
- .oas = dart->hw->oas,
+ .ias = dart->ias,
+ .oas = dart->oas,
.coherent_walk = 1,
.iommu_dev = dart->dev,
};
@@ -443,7 +606,7 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_start = 0;
- domain->geometry.aperture_end = DMA_BIT_MASK(32);
+ domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias);
domain->geometry.force_aperture = true;
dart_domain->finalized = true;
@@ -458,7 +621,7 @@ apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps,
struct apple_dart_stream_map *master_maps,
bool add_streams)
{
- int i;
+ int i, j;
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
if (domain_maps[i].dart != master_maps[i].dart)
@@ -468,12 +631,14 @@ apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps,
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
if (!domain_maps[i].dart)
break;
- if (add_streams)
- atomic64_or(master_maps[i].sidmap,
- &domain_maps[i].sidmap);
- else
- atomic64_and(~master_maps[i].sidmap,
- &domain_maps[i].sidmap);
+ for (j = 0; j < BITS_TO_LONGS(domain_maps[i].dart->num_streams); j++) {
+ if (add_streams)
+ atomic_long_or(master_maps[i].sidmap[j],
+ &domain_maps[i].sidmap[j]);
+ else
+ atomic_long_and(~master_maps[i].sidmap[j],
+ &domain_maps[i].sidmap[j]);
+ }
}
return 0;
@@ -486,13 +651,6 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
true);
}
-static int apple_dart_domain_remove_streams(struct apple_dart_domain *domain,
- struct apple_dart_master_cfg *cfg)
-{
- return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps,
- false);
-}
-
static int apple_dart_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
@@ -535,22 +693,6 @@ static int apple_dart_attach_dev(struct iommu_domain *domain,
return ret;
}
-static void apple_dart_detach_dev(struct iommu_domain *domain,
- struct device *dev)
-{
- int i;
- struct apple_dart_stream_map *stream_map;
- struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
- struct apple_dart_domain *dart_domain = to_dart_domain(domain);
-
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_hw_disable_dma(stream_map);
-
- if (domain->type == IOMMU_DOMAIN_DMA ||
- domain->type == IOMMU_DOMAIN_UNMANAGED)
- apple_dart_domain_remove_streams(dart_domain, cfg);
-}
-
static struct iommu_device *apple_dart_probe_device(struct device *dev)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
@@ -637,14 +779,14 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
if (cfg->stream_maps[i].dart == dart) {
- cfg->stream_maps[i].sidmap |= 1 << sid;
+ set_bit(sid, cfg->stream_maps[i].sidmap);
return 0;
}
}
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
if (!cfg->stream_maps[i].dart) {
cfg->stream_maps[i].dart = dart;
- cfg->stream_maps[i].sidmap = 1 << sid;
+ set_bit(sid, cfg->stream_maps[i].sidmap);
return 0;
}
}
@@ -663,13 +805,36 @@ static void apple_dart_release_group(void *iommu_data)
mutex_lock(&apple_dart_groups_lock);
for_each_stream_map(i, group_master_cfg, stream_map)
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+ for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams)
stream_map->dart->sid2group[sid] = NULL;
kfree(iommu_data);
mutex_unlock(&apple_dart_groups_lock);
}
+static int apple_dart_merge_master_cfg(struct apple_dart_master_cfg *dst,
+ struct apple_dart_master_cfg *src)
+{
+ /*
+ * We know that this function is only called for groups returned from
+ * pci_device_group and that all Apple Silicon platforms never spread
+ * PCIe devices from the same bus across multiple DARTs such that we can
+ * just assume that both src and dst only have the same single DART.
+ */
+ if (src->stream_maps[1].dart)
+ return -EINVAL;
+ if (dst->stream_maps[1].dart)
+ return -EINVAL;
+ if (src->stream_maps[0].dart != dst->stream_maps[0].dart)
+ return -EINVAL;
+
+ bitmap_or(dst->stream_maps[0].sidmap,
+ dst->stream_maps[0].sidmap,
+ src->stream_maps[0].sidmap,
+ dst->stream_maps[0].dart->num_streams);
+ return 0;
+}
+
static struct iommu_group *apple_dart_device_group(struct device *dev)
{
int i, sid;
@@ -682,7 +847,7 @@ static struct iommu_group *apple_dart_device_group(struct device *dev)
mutex_lock(&apple_dart_groups_lock);
for_each_stream_map(i, cfg, stream_map) {
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) {
+ for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) {
struct iommu_group *stream_group =
stream_map->dart->sid2group[sid];
@@ -711,17 +876,31 @@ static struct iommu_group *apple_dart_device_group(struct device *dev)
if (!group)
goto out;
- group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg), GFP_KERNEL);
- if (!group_master_cfg) {
- iommu_group_put(group);
- goto out;
- }
+ group_master_cfg = iommu_group_get_iommudata(group);
+ if (group_master_cfg) {
+ int ret;
+
+ ret = apple_dart_merge_master_cfg(group_master_cfg, cfg);
+ if (ret) {
+ dev_err(dev, "Failed to merge DART IOMMU grups.\n");
+ iommu_group_put(group);
+ res = ERR_PTR(ret);
+ goto out;
+ }
+ } else {
+ group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg),
+ GFP_KERNEL);
+ if (!group_master_cfg) {
+ iommu_group_put(group);
+ goto out;
+ }
- iommu_group_set_iommudata(group, group_master_cfg,
- apple_dart_release_group);
+ iommu_group_set_iommudata(group, group_master_cfg,
+ apple_dart_release_group);
+ }
for_each_stream_map(i, cfg, stream_map)
- for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+ for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams)
stream_map->dart->sid2group[sid] = group;
res = group;
@@ -780,7 +959,6 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = apple_dart_attach_dev,
- .detach_dev = apple_dart_detach_dev,
.map_pages = apple_dart_map_pages,
.unmap_pages = apple_dart_unmap_pages,
.flush_iotlb_all = apple_dart_flush_iotlb_all,
@@ -791,30 +969,69 @@ static const struct iommu_ops apple_dart_iommu_ops = {
}
};
-static irqreturn_t apple_dart_irq(int irq, void *dev)
+static irqreturn_t apple_dart_t8020_irq(int irq, void *dev)
+{
+ struct apple_dart *dart = dev;
+ const char *fault_name = NULL;
+ u32 error = readl(dart->regs + DART_T8020_ERROR);
+ u32 error_code = FIELD_GET(DART_T8020_ERROR_CODE, error);
+ u32 addr_lo = readl(dart->regs + DART_T8020_ERROR_ADDR_LO);
+ u32 addr_hi = readl(dart->regs + DART_T8020_ERROR_ADDR_HI);
+ u64 addr = addr_lo | (((u64)addr_hi) << 32);
+ u8 stream_idx = FIELD_GET(DART_T8020_ERROR_STREAM, error);
+
+ if (!(error & DART_T8020_ERROR_FLAG))
+ return IRQ_NONE;
+
+ /* there should only be a single bit set but let's use == to be sure */
+ if (error_code == DART_T8020_ERROR_READ_FAULT)
+ fault_name = "READ FAULT";
+ else if (error_code == DART_T8020_ERROR_WRITE_FAULT)
+ fault_name = "WRITE FAULT";
+ else if (error_code == DART_T8020_ERROR_NO_PTE)
+ fault_name = "NO PTE FOR IOVA";
+ else if (error_code == DART_T8020_ERROR_NO_PMD)
+ fault_name = "NO PMD FOR IOVA";
+ else if (error_code == DART_T8020_ERROR_NO_TTBR)
+ fault_name = "NO TTBR FOR IOVA";
+ else
+ fault_name = "unknown";
+
+ dev_err_ratelimited(
+ dart->dev,
+ "translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx",
+ error, stream_idx, error_code, fault_name, addr);
+
+ writel(error, dart->regs + DART_T8020_ERROR);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t apple_dart_t8110_irq(int irq, void *dev)
{
struct apple_dart *dart = dev;
const char *fault_name = NULL;
- u32 error = readl(dart->regs + DART_ERROR);
- u32 error_code = FIELD_GET(DART_ERROR_CODE, error);
- u32 addr_lo = readl(dart->regs + DART_ERROR_ADDR_LO);
- u32 addr_hi = readl(dart->regs + DART_ERROR_ADDR_HI);
+ u32 error = readl(dart->regs + DART_T8110_ERROR);
+ u32 error_code = FIELD_GET(DART_T8110_ERROR_CODE, error);
+ u32 addr_lo = readl(dart->regs + DART_T8110_ERROR_ADDR_LO);
+ u32 addr_hi = readl(dart->regs + DART_T8110_ERROR_ADDR_HI);
u64 addr = addr_lo | (((u64)addr_hi) << 32);
- u8 stream_idx = FIELD_GET(DART_ERROR_STREAM, error);
+ u8 stream_idx = FIELD_GET(DART_T8110_ERROR_STREAM, error);
- if (!(error & DART_ERROR_FLAG))
+ if (!(error & DART_T8110_ERROR_FLAG))
return IRQ_NONE;
/* there should only be a single bit set but let's use == to be sure */
- if (error_code == DART_ERROR_READ_FAULT)
+ if (error_code == DART_T8110_ERROR_READ_FAULT)
fault_name = "READ FAULT";
- else if (error_code == DART_ERROR_WRITE_FAULT)
+ else if (error_code == DART_T8110_ERROR_WRITE_FAULT)
fault_name = "WRITE FAULT";
- else if (error_code == DART_ERROR_NO_PTE)
+ else if (error_code == DART_T8110_ERROR_NO_PTE)
fault_name = "NO PTE FOR IOVA";
- else if (error_code == DART_ERROR_NO_PMD)
+ else if (error_code == DART_T8110_ERROR_NO_PMD)
fault_name = "NO PMD FOR IOVA";
- else if (error_code == DART_ERROR_NO_TTBR)
+ else if (error_code == DART_T8110_ERROR_NO_PGD)
+ fault_name = "NO PGD FOR IOVA";
+ else if (error_code == DART_T8110_ERROR_NO_TTBR)
fault_name = "NO TTBR FOR IOVA";
else
fault_name = "unknown";
@@ -824,14 +1041,14 @@ static irqreturn_t apple_dart_irq(int irq, void *dev)
"translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx",
error, stream_idx, error_code, fault_name, addr);
- writel(error, dart->regs + DART_ERROR);
+ writel(error, dart->regs + DART_T8110_ERROR);
return IRQ_HANDLED;
}
static int apple_dart_probe(struct platform_device *pdev)
{
int ret;
- u32 dart_params[2];
+ u32 dart_params[4];
struct resource *res;
struct apple_dart *dart;
struct device *dev = &pdev->dev;
@@ -866,17 +1083,42 @@ static int apple_dart_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = apple_dart_hw_reset(dart);
- if (ret)
- goto err_clk_disable;
-
dart_params[0] = readl(dart->regs + DART_PARAMS1);
dart_params[1] = readl(dart->regs + DART_PARAMS2);
- dart->pgsize = 1 << FIELD_GET(DART_PARAMS_PAGE_SHIFT, dart_params[0]);
- dart->supports_bypass = dart_params[1] & DART_PARAMS_BYPASS_SUPPORT;
+ dart->pgsize = 1 << FIELD_GET(DART_PARAMS1_PAGE_SHIFT, dart_params[0]);
+ dart->supports_bypass = dart_params[1] & DART_PARAMS2_BYPASS_SUPPORT;
+
+ switch (dart->hw->type) {
+ case DART_T8020:
+ case DART_T6000:
+ dart->ias = 32;
+ dart->oas = dart->hw->oas;
+ dart->num_streams = dart->hw->max_sid_count;
+ break;
+
+ case DART_T8110:
+ dart_params[2] = readl(dart->regs + DART_T8110_PARAMS3);
+ dart_params[3] = readl(dart->regs + DART_T8110_PARAMS4);
+ dart->ias = FIELD_GET(DART_T8110_PARAMS3_VA_WIDTH, dart_params[2]);
+ dart->oas = FIELD_GET(DART_T8110_PARAMS3_PA_WIDTH, dart_params[2]);
+ dart->num_streams = FIELD_GET(DART_T8110_PARAMS4_NUM_SIDS, dart_params[3]);
+ break;
+ }
+
+ if (dart->num_streams > DART_MAX_STREAMS) {
+ dev_err(&pdev->dev, "Too many streams (%d > %d)\n",
+ dart->num_streams, DART_MAX_STREAMS);
+ ret = -EINVAL;
+ goto err_clk_disable;
+ }
+
dart->force_bypass = dart->pgsize > PAGE_SIZE;
- ret = request_irq(dart->irq, apple_dart_irq, IRQF_SHARED,
+ ret = apple_dart_hw_reset(dart);
+ if (ret)
+ goto err_clk_disable;
+
+ ret = request_irq(dart->irq, dart->hw->irq_handler, IRQF_SHARED,
"apple-dart fault handler", dart);
if (ret)
goto err_clk_disable;
@@ -894,8 +1136,8 @@ static int apple_dart_probe(struct platform_device *pdev)
dev_info(
&pdev->dev,
- "DART [pagesize %x, bypass support: %d, bypass forced: %d] initialized\n",
- dart->pgsize, dart->supports_bypass, dart->force_bypass);
+ "DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n",
+ dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass);
return 0;
err_sysfs_remove:
@@ -924,16 +1166,123 @@ static int apple_dart_remove(struct platform_device *pdev)
}
static const struct apple_dart_hw apple_dart_hw_t8103 = {
+ .type = DART_T8020,
+ .irq_handler = apple_dart_t8020_irq,
+ .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb,
.oas = 36,
.fmt = APPLE_DART,
+ .max_sid_count = 16,
+
+ .enable_streams = DART_T8020_STREAMS_ENABLE,
+ .lock = DART_T8020_CONFIG,
+ .lock_bit = DART_T8020_CONFIG_LOCK,
+
+ .error = DART_T8020_ERROR,
+
+ .tcr = DART_T8020_TCR,
+ .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE,
+ .tcr_disabled = 0,
+ .tcr_bypass = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART,
+
+ .ttbr = DART_T8020_TTBR,
+ .ttbr_valid = DART_T8020_TTBR_VALID,
+ .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT,
+ .ttbr_shift = DART_T8020_TTBR_SHIFT,
+ .ttbr_count = 4,
};
static const struct apple_dart_hw apple_dart_hw_t6000 = {
+ .type = DART_T6000,
+ .irq_handler = apple_dart_t8020_irq,
+ .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb,
.oas = 42,
.fmt = APPLE_DART2,
+ .max_sid_count = 16,
+
+ .enable_streams = DART_T8020_STREAMS_ENABLE,
+ .lock = DART_T8020_CONFIG,
+ .lock_bit = DART_T8020_CONFIG_LOCK,
+
+ .error = DART_T8020_ERROR,
+
+ .tcr = DART_T8020_TCR,
+ .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE,
+ .tcr_disabled = 0,
+ .tcr_bypass = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART,
+
+ .ttbr = DART_T8020_TTBR,
+ .ttbr_valid = DART_T8020_TTBR_VALID,
+ .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT,
+ .ttbr_shift = DART_T8020_TTBR_SHIFT,
+ .ttbr_count = 4,
};
+static const struct apple_dart_hw apple_dart_hw_t8110 = {
+ .type = DART_T8110,
+ .irq_handler = apple_dart_t8110_irq,
+ .invalidate_tlb = apple_dart_t8110_hw_invalidate_tlb,
+ .fmt = APPLE_DART2,
+ .max_sid_count = 256,
+
+ .enable_streams = DART_T8110_ENABLE_STREAMS,
+ .lock = DART_T8110_PROTECT,
+ .lock_bit = DART_T8110_PROTECT_TTBR_TCR,
+
+ .error = DART_T8110_ERROR,
+
+ .tcr = DART_T8110_TCR,
+ .tcr_enabled = DART_T8110_TCR_TRANSLATE_ENABLE,
+ .tcr_disabled = 0,
+ .tcr_bypass = DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART,
+
+ .ttbr = DART_T8110_TTBR,
+ .ttbr_valid = DART_T8110_TTBR_VALID,
+ .ttbr_addr_field_shift = DART_T8110_TTBR_ADDR_FIELD_SHIFT,
+ .ttbr_shift = DART_T8110_TTBR_SHIFT,
+ .ttbr_count = 1,
+};
+
+static __maybe_unused int apple_dart_suspend(struct device *dev)
+{
+ struct apple_dart *dart = dev_get_drvdata(dev);
+ unsigned int sid, idx;
+
+ for (sid = 0; sid < dart->num_streams; sid++) {
+ dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(dart, sid));
+ for (idx = 0; idx < dart->hw->ttbr_count; idx++)
+ dart->save_ttbr[sid][idx] =
+ readl(dart->regs + DART_TTBR(dart, sid, idx));
+ }
+
+ return 0;
+}
+
+static __maybe_unused int apple_dart_resume(struct device *dev)
+{
+ struct apple_dart *dart = dev_get_drvdata(dev);
+ unsigned int sid, idx;
+ int ret;
+
+ ret = apple_dart_hw_reset(dart);
+ if (ret) {
+ dev_err(dev, "Failed to reset DART on resume\n");
+ return ret;
+ }
+
+ for (sid = 0; sid < dart->num_streams; sid++) {
+ for (idx = 0; idx < dart->hw->ttbr_count; idx++)
+ writel(dart->save_ttbr[sid][idx],
+ dart->regs + DART_TTBR(dart, sid, idx));
+ writel(dart->save_tcr[sid], dart->regs + DART_TCR(dart, sid));
+ }
+
+ return 0;
+}
+
+DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume);
+
static const struct of_device_id apple_dart_of_match[] = {
{ .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 },
+ { .compatible = "apple,t8110-dart", .data = &apple_dart_hw_t8110 },
{ .compatible = "apple,t6000-dart", .data = &apple_dart_hw_t6000 },
{},
};
@@ -944,6 +1293,7 @@ static struct platform_driver apple_dart_driver = {
.name = "apple-dart",
.of_match_table = apple_dart_of_match,
.suppress_bind_attrs = true,
+ .pm = pm_sleep_ptr(&apple_dart_pm_ops),
},
.probe = apple_dart_probe,
.remove = apple_dart_remove,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index 74e9ef2fd580..b5b14108e086 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -4,7 +4,7 @@
*/
#include <linux/of_device.h>
-#include <linux/qcom_scm.h>
+#include <linux/firmware/qcom/qcom_scm.h>
#include <linux/ratelimit.h>
#include "arm-smmu.h"
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 91d404deb115..d1b296b95c86 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -7,7 +7,7 @@
#include <linux/adreno-smmu-priv.h>
#include <linux/delay.h>
#include <linux/of_device.h>
-#include <linux/qcom_scm.h>
+#include <linux/firmware/qcom/qcom_scm.h>
#include "arm-smmu.h"
#include "arm-smmu-qcom.h"
@@ -250,6 +250,8 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mdss" },
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
+ { .compatible = "qcom,sc8280xp-mdss" },
+ { .compatible = "qcom,sm8150-mdss" },
{ .compatible = "qcom,sm8250-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 270c3d9128ba..c8b70f476cd8 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -27,7 +27,7 @@
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
-#include <linux/qcom_scm.h>
+#include <linux/firmware/qcom/qcom_scm.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -387,28 +387,6 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
return 0;
}
-static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
-{
- struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
- unsigned i;
-
- if (WARN_ON(!qcom_domain->iommu))
- return;
-
- pm_runtime_get_sync(qcom_iommu->dev);
- for (i = 0; i < fwspec->num_ids; i++) {
- struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
-
- /* Disable the context bank: */
- iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
-
- ctx->domain = NULL;
- }
- pm_runtime_put_sync(qcom_iommu->dev);
-}
-
static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -583,7 +561,6 @@ static const struct iommu_ops qcom_iommu_ops = {
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = qcom_iommu_attach_dev,
- .detach_dev = qcom_iommu_detach_dev,
.map_pages = qcom_iommu_map,
.unmap_pages = qcom_iommu_unmap,
.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f798c44e0903..99b2646cb5c7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -23,6 +23,7 @@
#include <linux/memremap.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
@@ -391,6 +392,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
iort_iommu_get_resv_regions(dev, list);
+ if (dev->of_node)
+ of_iommu_get_resv_regions(dev, list);
}
EXPORT_SYMBOL(iommu_dma_get_resv_regions);
@@ -713,7 +716,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
if (!iova)
return DMA_MAPPING_ERROR;
- if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
+ if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
@@ -822,7 +825,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
if (!iova)
goto out_free_pages;
- if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
+ /*
+ * Remove the zone/policy flags from the GFP - these are applied to the
+ * __iommu_dma_alloc_pages() but are not used for the supporting
+ * internal allocations that follow.
+ */
+ gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP);
+
+ if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp))
goto out_free_iova;
if (!(ioprot & IOMMU_CACHE)) {
@@ -833,7 +843,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
+ ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot,
+ gfp);
if (ret < 0 || ret < size)
goto out_free_sg;
@@ -1281,7 +1292,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
* We'll leave any physical concatenation to the IOMMU driver's
* implementation - it knows better than we do.
*/
- ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
+ ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
if (ret < 0 || ret < iova_len)
goto out_free_iova;
@@ -1615,7 +1626,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
if (!iova)
goto out_free_page;
- if (iommu_map(domain, iova, msi_addr, size, prot))
+ if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL))
goto out_free_iova;
INIT_LIST_HEAD(&msi_page->list);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index b0cde2211987..1abd187c6075 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -185,38 +185,43 @@ static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova)
lv2table_base(sent)) + lv2ent_offset(iova);
}
-/*
- * IOMMU fault information register
- */
-struct sysmmu_fault_info {
- unsigned int bit; /* bit number in STATUS register */
- unsigned short addr_reg; /* register to read VA fault address */
+struct sysmmu_fault {
+ sysmmu_iova_t addr; /* IOVA address that caused fault */
+ const char *name; /* human readable fault name */
+ unsigned int type; /* fault type for report_iommu_fault() */
+};
+
+struct sysmmu_v1_fault_info {
+ unsigned short addr_reg; /* register to read IOVA fault address */
const char *name; /* human readable fault name */
unsigned int type; /* fault type for report_iommu_fault */
};
-static const struct sysmmu_fault_info sysmmu_faults[] = {
- { 0, REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ },
- { 1, REG_AR_FAULT_ADDR, "AR MULTI-HIT", IOMMU_FAULT_READ },
- { 2, REG_AW_FAULT_ADDR, "AW MULTI-HIT", IOMMU_FAULT_WRITE },
- { 3, REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ },
- { 4, REG_AR_FAULT_ADDR, "AR SECURITY PROTECTION", IOMMU_FAULT_READ },
- { 5, REG_AR_FAULT_ADDR, "AR ACCESS PROTECTION", IOMMU_FAULT_READ },
- { 6, REG_AW_FAULT_ADDR, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE },
- { 7, REG_AW_FAULT_ADDR, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE },
+static const struct sysmmu_v1_fault_info sysmmu_v1_faults[] = {
+ { REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ },
+ { REG_AR_FAULT_ADDR, "MULTI-HIT", IOMMU_FAULT_READ },
+ { REG_AW_FAULT_ADDR, "MULTI-HIT", IOMMU_FAULT_WRITE },
+ { REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ },
+ { REG_AR_FAULT_ADDR, "SECURITY PROTECTION", IOMMU_FAULT_READ },
+ { REG_AR_FAULT_ADDR, "ACCESS PROTECTION", IOMMU_FAULT_READ },
+ { REG_AW_FAULT_ADDR, "SECURITY PROTECTION", IOMMU_FAULT_WRITE },
+ { REG_AW_FAULT_ADDR, "ACCESS PROTECTION", IOMMU_FAULT_WRITE },
};
-static const struct sysmmu_fault_info sysmmu_v5_faults[] = {
- { 0, REG_V5_FAULT_AR_VA, "AR PTW", IOMMU_FAULT_READ },
- { 1, REG_V5_FAULT_AR_VA, "AR PAGE", IOMMU_FAULT_READ },
- { 2, REG_V5_FAULT_AR_VA, "AR MULTI-HIT", IOMMU_FAULT_READ },
- { 3, REG_V5_FAULT_AR_VA, "AR ACCESS PROTECTION", IOMMU_FAULT_READ },
- { 4, REG_V5_FAULT_AR_VA, "AR SECURITY PROTECTION", IOMMU_FAULT_READ },
- { 16, REG_V5_FAULT_AW_VA, "AW PTW", IOMMU_FAULT_WRITE },
- { 17, REG_V5_FAULT_AW_VA, "AW PAGE", IOMMU_FAULT_WRITE },
- { 18, REG_V5_FAULT_AW_VA, "AW MULTI-HIT", IOMMU_FAULT_WRITE },
- { 19, REG_V5_FAULT_AW_VA, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE },
- { 20, REG_V5_FAULT_AW_VA, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE },
+/* SysMMU v5 has the same faults for AR (0..4 bits) and AW (16..20 bits) */
+static const char * const sysmmu_v5_fault_names[] = {
+ "PTW",
+ "PAGE",
+ "MULTI-HIT",
+ "ACCESS PROTECTION",
+ "SECURITY PROTECTION"
+};
+
+static const char * const sysmmu_v7_fault_names[] = {
+ "PTW",
+ "PAGE",
+ "ACCESS PROTECTION",
+ "RESERVED"
};
/*
@@ -246,9 +251,12 @@ struct exynos_iommu_domain {
struct iommu_domain domain; /* generic domain data structure */
};
+struct sysmmu_drvdata;
+
/*
* SysMMU version specific data. Contains offsets for the registers which can
* be found in different SysMMU variants, but have different offset values.
+ * Also contains version specific callbacks to abstract the hardware.
*/
struct sysmmu_variant {
u32 pt_base; /* page table base address (physical) */
@@ -259,6 +267,11 @@ struct sysmmu_variant {
u32 flush_end; /* end address of range invalidation */
u32 int_status; /* interrupt status information */
u32 int_clear; /* clear the interrupt */
+ u32 fault_va; /* IOVA address that caused fault */
+ u32 fault_info; /* fault transaction info */
+
+ int (*get_fault_info)(struct sysmmu_drvdata *data, unsigned int itype,
+ struct sysmmu_fault *fault);
};
/*
@@ -293,6 +306,59 @@ struct sysmmu_drvdata {
#define SYSMMU_REG(data, reg) ((data)->sfrbase + (data)->variant->reg)
+static int exynos_sysmmu_v1_get_fault_info(struct sysmmu_drvdata *data,
+ unsigned int itype,
+ struct sysmmu_fault *fault)
+{
+ const struct sysmmu_v1_fault_info *finfo;
+
+ if (itype >= ARRAY_SIZE(sysmmu_v1_faults))
+ return -ENXIO;
+
+ finfo = &sysmmu_v1_faults[itype];
+ fault->addr = readl(data->sfrbase + finfo->addr_reg);
+ fault->name = finfo->name;
+ fault->type = finfo->type;
+
+ return 0;
+}
+
+static int exynos_sysmmu_v5_get_fault_info(struct sysmmu_drvdata *data,
+ unsigned int itype,
+ struct sysmmu_fault *fault)
+{
+ unsigned int addr_reg;
+
+ if (itype < ARRAY_SIZE(sysmmu_v5_fault_names)) {
+ fault->type = IOMMU_FAULT_READ;
+ addr_reg = REG_V5_FAULT_AR_VA;
+ } else if (itype >= 16 && itype <= 20) {
+ fault->type = IOMMU_FAULT_WRITE;
+ addr_reg = REG_V5_FAULT_AW_VA;
+ itype -= 16;
+ } else {
+ return -ENXIO;
+ }
+
+ fault->name = sysmmu_v5_fault_names[itype];
+ fault->addr = readl(data->sfrbase + addr_reg);
+
+ return 0;
+}
+
+static int exynos_sysmmu_v7_get_fault_info(struct sysmmu_drvdata *data,
+ unsigned int itype,
+ struct sysmmu_fault *fault)
+{
+ u32 info = readl(SYSMMU_REG(data, fault_info));
+
+ fault->addr = readl(SYSMMU_REG(data, fault_va));
+ fault->name = sysmmu_v7_fault_names[itype % 4];
+ fault->type = (info & BIT(20)) ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+
+ return 0;
+}
+
/* SysMMU v1..v3 */
static const struct sysmmu_variant sysmmu_v1_variant = {
.flush_all = 0x0c,
@@ -300,9 +366,11 @@ static const struct sysmmu_variant sysmmu_v1_variant = {
.pt_base = 0x14,
.int_status = 0x18,
.int_clear = 0x1c,
+
+ .get_fault_info = exynos_sysmmu_v1_get_fault_info,
};
-/* SysMMU v5 and v7 (non-VM capable) */
+/* SysMMU v5 */
static const struct sysmmu_variant sysmmu_v5_variant = {
.pt_base = 0x0c,
.flush_all = 0x10,
@@ -312,9 +380,27 @@ static const struct sysmmu_variant sysmmu_v5_variant = {
.flush_end = 0x24,
.int_status = 0x60,
.int_clear = 0x64,
+
+ .get_fault_info = exynos_sysmmu_v5_get_fault_info,
+};
+
+/* SysMMU v7: non-VM capable register layout */
+static const struct sysmmu_variant sysmmu_v7_variant = {
+ .pt_base = 0x0c,
+ .flush_all = 0x10,
+ .flush_entry = 0x14,
+ .flush_range = 0x18,
+ .flush_start = 0x20,
+ .flush_end = 0x24,
+ .int_status = 0x60,
+ .int_clear = 0x64,
+ .fault_va = 0x70,
+ .fault_info = 0x78,
+
+ .get_fault_info = exynos_sysmmu_v7_get_fault_info,
};
-/* SysMMU v7: VM capable register set */
+/* SysMMU v7: VM capable register layout */
static const struct sysmmu_variant sysmmu_v7_vm_variant = {
.pt_base = 0x800c,
.flush_all = 0x8010,
@@ -324,6 +410,10 @@ static const struct sysmmu_variant sysmmu_v7_vm_variant = {
.flush_end = 0x8024,
.int_status = 0x60,
.int_clear = 0x64,
+ .fault_va = 0x1000,
+ .fault_info = 0x1004,
+
+ .get_fault_info = exynos_sysmmu_v7_get_fault_info,
};
static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom)
@@ -446,75 +536,63 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data)
if (data->has_vcr)
data->variant = &sysmmu_v7_vm_variant;
else
- data->variant = &sysmmu_v5_variant;
+ data->variant = &sysmmu_v7_variant;
}
__sysmmu_disable_clocks(data);
}
static void show_fault_information(struct sysmmu_drvdata *data,
- const struct sysmmu_fault_info *finfo,
- sysmmu_iova_t fault_addr)
+ const struct sysmmu_fault *fault)
{
sysmmu_pte_t *ent;
- dev_err(data->sysmmu, "%s: %s FAULT occurred at %#x\n",
- dev_name(data->master), finfo->name, fault_addr);
+ dev_err(data->sysmmu, "%s: [%s] %s FAULT occurred at %#x\n",
+ dev_name(data->master),
+ fault->type == IOMMU_FAULT_READ ? "READ" : "WRITE",
+ fault->name, fault->addr);
dev_dbg(data->sysmmu, "Page table base: %pa\n", &data->pgtable);
- ent = section_entry(phys_to_virt(data->pgtable), fault_addr);
+ ent = section_entry(phys_to_virt(data->pgtable), fault->addr);
dev_dbg(data->sysmmu, "\tLv1 entry: %#x\n", *ent);
if (lv1ent_page(ent)) {
- ent = page_entry(ent, fault_addr);
+ ent = page_entry(ent, fault->addr);
dev_dbg(data->sysmmu, "\t Lv2 entry: %#x\n", *ent);
}
}
static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
{
- /* SYSMMU is in blocked state when interrupt occurred. */
struct sysmmu_drvdata *data = dev_id;
- const struct sysmmu_fault_info *finfo;
- unsigned int i, n, itype;
- sysmmu_iova_t fault_addr;
+ unsigned int itype;
+ struct sysmmu_fault fault;
int ret = -ENOSYS;
WARN_ON(!data->active);
- if (MMU_MAJ_VER(data->version) < 5) {
- finfo = sysmmu_faults;
- n = ARRAY_SIZE(sysmmu_faults);
- } else {
- finfo = sysmmu_v5_faults;
- n = ARRAY_SIZE(sysmmu_v5_faults);
- }
-
spin_lock(&data->lock);
-
clk_enable(data->clk_master);
itype = __ffs(readl(SYSMMU_REG(data, int_status)));
- for (i = 0; i < n; i++, finfo++)
- if (finfo->bit == itype)
- break;
- /* unknown/unsupported fault */
- BUG_ON(i == n);
-
- /* print debug message */
- fault_addr = readl(data->sfrbase + finfo->addr_reg);
- show_fault_information(data, finfo, fault_addr);
-
- if (data->domain)
- ret = report_iommu_fault(&data->domain->domain,
- data->master, fault_addr, finfo->type);
- /* fault is not recovered by fault handler */
- BUG_ON(ret != 0);
+ ret = data->variant->get_fault_info(data, itype, &fault);
+ if (ret) {
+ dev_err(data->sysmmu, "Unhandled interrupt bit %u\n", itype);
+ goto out;
+ }
+ show_fault_information(data, &fault);
+
+ if (data->domain) {
+ ret = report_iommu_fault(&data->domain->domain, data->master,
+ fault.addr, fault.type);
+ }
+ if (ret)
+ panic("Unrecoverable System MMU Fault!");
+out:
writel(1 << itype, SYSMMU_REG(data, int_clear));
+ /* SysMMU is in blocked state when interrupt occurred */
sysmmu_unblock(data);
-
clk_disable(data->clk_master);
-
spin_unlock(&data->lock);
return IRQ_HANDLED;
@@ -1337,21 +1415,26 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
return &data->iommu;
}
-static void exynos_iommu_release_device(struct device *dev)
+static void exynos_iommu_set_platform_dma(struct device *dev)
{
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
- struct sysmmu_drvdata *data;
if (owner->domain) {
struct iommu_group *group = iommu_group_get(dev);
if (group) {
- WARN_ON(owner->domain !=
- iommu_group_default_domain(group));
exynos_iommu_detach_device(owner->domain, dev);
iommu_group_put(group);
}
}
+}
+
+static void exynos_iommu_release_device(struct device *dev)
+{
+ struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
+ struct sysmmu_drvdata *data;
+
+ exynos_iommu_set_platform_dma(dev);
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
@@ -1398,13 +1481,15 @@ static int exynos_iommu_of_xlate(struct device *dev,
static const struct iommu_ops exynos_iommu_ops = {
.domain_alloc = exynos_iommu_domain_alloc,
.device_group = generic_device_group,
+#ifdef CONFIG_ARM
+ .set_platform_dma_ops = exynos_iommu_set_platform_dma,
+#endif
.probe_device = exynos_iommu_probe_device,
.release_device = exynos_iommu_release_device,
.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
.of_xlate = exynos_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = exynos_iommu_attach_device,
- .detach_dev = exynos_iommu_detach_device,
.map = exynos_iommu_map,
.unmap = exynos_iommu_unmap,
.iova_to_phys = exynos_iommu_iova_to_phys,
@@ -1446,7 +1531,7 @@ static int __init exynos_iommu_init(void)
return 0;
err_reg_driver:
- platform_driver_unregister(&exynos_sysmmu_driver);
+ kmem_cache_free(lv2table_kmem_cache, zero_lv2_table);
err_zero_lv2:
kmem_cache_destroy(lv2table_kmem_cache);
return ret;
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 4408ac3c49b6..bce372297099 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -283,9 +283,9 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
return ret;
}
-static void fsl_pamu_detach_device(struct iommu_domain *domain,
- struct device *dev)
+static void fsl_pamu_set_platform_dma(struct device *dev)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
const u32 *prop;
int len;
@@ -452,9 +452,9 @@ static const struct iommu_ops fsl_pamu_ops = {
.domain_alloc = fsl_pamu_domain_alloc,
.probe_device = fsl_pamu_probe_device,
.device_group = fsl_pamu_device_group,
+ .set_platform_dma_ops = fsl_pamu_set_platform_dma,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = fsl_pamu_attach_device,
- .detach_dev = fsl_pamu_detach_device,
.iova_to_phys = fsl_pamu_iova_to_phys,
.free = fsl_pamu_domain_free,
}
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index b7dff5092fd2..12e1e90fdae1 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -96,4 +96,15 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
passing intel_iommu=sm_on to the kernel. If not sure, please use
the default value.
+config INTEL_IOMMU_PERF_EVENTS
+ def_bool y
+ bool "Intel IOMMU performance events"
+ depends on INTEL_IOMMU && PERF_EVENTS
+ help
+ Selecting this option will enable the performance monitoring
+ infrastructure in the Intel IOMMU. It collects information about
+ key events occurring during operation of the remapping hardware,
+ to aid performance tuning and debug. These are available on modern
+ processors which support Intel VT-d 4.0 and later.
+
endif # INTEL_IOMMU
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index fa0dae16441c..7af3b8a4f2a0 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
+obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index b00a0ceb2d13..23828d189c2a 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -34,6 +34,7 @@
#include "../irq_remapping.h"
#include "perf.h"
#include "trace.h"
+#include "perfmon.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -427,6 +428,8 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
memcpy(dmaru->hdr, header, header->length);
dmaru->reg_base_addr = drhd->address;
dmaru->segment = drhd->segment;
+ /* The size of the register set is 2 ^ N 4 KB pages. */
+ dmaru->reg_size = 1UL << (drhd->size + 12);
dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
((void *)drhd) + drhd->header.length,
@@ -956,17 +959,18 @@ static void unmap_iommu(struct intel_iommu *iommu)
/**
* map_iommu: map the iommu's registers
* @iommu: the iommu to map
- * @phys_addr: the physical address of the base resgister
+ * @drhd: DMA remapping hardware definition structure
*
* Memory map the iommu's registers. Start w/ a single page, and
* possibly expand if that turns out to be insufficent.
*/
-static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
+static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd)
{
+ u64 phys_addr = drhd->reg_base_addr;
int map_size, err=0;
iommu->reg_phys = phys_addr;
- iommu->reg_size = VTD_PAGE_SIZE;
+ iommu->reg_size = drhd->reg_size;
if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
pr_err("Can't reserve memory\n");
@@ -1013,6 +1017,16 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
goto release;
}
}
+
+ if (cap_ecmds(iommu->cap)) {
+ int i;
+
+ for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) {
+ iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG +
+ i * DMA_ECMD_REG_STEP);
+ }
+ }
+
err = 0;
goto out;
@@ -1050,14 +1064,15 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
}
sprintf(iommu->name, "dmar%d", iommu->seq_id);
- err = map_iommu(iommu, drhd->reg_base_addr);
+ err = map_iommu(iommu, drhd);
if (err) {
pr_err("Failed to map %s\n", iommu->name);
goto error_free_seq_id;
}
err = -EINVAL;
- if (cap_sagaw(iommu->cap) == 0) {
+ if (!cap_sagaw(iommu->cap) &&
+ (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
iommu->name);
drhd->ignored = 1;
@@ -1103,6 +1118,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
if (sts & DMA_GSTS_QIES)
iommu->gcmd |= DMA_GCMD_QIE;
+ if (alloc_iommu_pmu(iommu))
+ pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id);
+
raw_spin_lock_init(&iommu->register_lock);
/*
@@ -1127,6 +1145,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
if (err)
goto err_sysfs;
+
+ iommu_pmu_register(iommu);
}
drhd->iommu = iommu;
@@ -1137,6 +1157,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
err_unmap:
+ free_iommu_pmu(iommu);
unmap_iommu(iommu);
error_free_seq_id:
ida_free(&dmar_seq_ids, iommu->seq_id);
@@ -1148,10 +1169,13 @@ error:
static void free_iommu(struct intel_iommu *iommu)
{
if (intel_iommu_enabled && !iommu->drhd->ignored) {
+ iommu_pmu_unregister(iommu);
iommu_device_unregister(&iommu->iommu);
iommu_device_sysfs_remove(&iommu->iommu);
}
+ free_iommu_pmu(iommu);
+
if (iommu->irq) {
if (iommu->pr_irq) {
free_irq(iommu->pr_irq, iommu);
@@ -1859,6 +1883,8 @@ static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
return DMAR_FECTL_REG;
else if (iommu->pr_irq == irq)
return DMAR_PECTL_REG;
+ else if (iommu->perf_irq == irq)
+ return DMAR_PERFINTRCTL_REG;
else
BUG();
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 59df7e42fd53..7c2f4bd33582 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -16,7 +16,6 @@
#include <linux/crash_dump.h>
#include <linux/dma-direct.h>
#include <linux/dmi.h>
-#include <linux/intel-svm.h>
#include <linux/memory.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
@@ -30,6 +29,7 @@
#include "../iommu-sva.h"
#include "pasid.h"
#include "cap_audit.h"
+#include "perfmon.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-void *alloc_pgtable_page(int node)
+void *alloc_pgtable_page(int node, gfp_t gfp)
{
struct page *page;
void *vaddr = NULL;
- page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
+ page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (page)
vaddr = page_address(page);
return vaddr;
@@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = alloc_pgtable_page(iommu->node);
+ context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!context)
return NULL;
@@ -908,7 +908,8 @@ pgtable_walk:
#endif
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn, int *target_level)
+ unsigned long pfn, int *target_level,
+ gfp_t gfp)
{
struct dma_pte *parent, *pte;
int level = agaw_to_level(domain->agaw);
@@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval;
- tmp_page = alloc_pgtable_page(domain->nid);
+ tmp_page = alloc_pgtable_page(domain->nid, gfp);
if (!tmp_page)
return NULL;
@@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = (struct root_entry *)alloc_pgtable_page(iommu->node);
+ root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
while (start_pfn <= end_pfn) {
if (!pte)
- pte = pfn_to_dma_pte(domain, start_pfn, &level);
+ pte = pfn_to_dma_pte(domain, start_pfn, &level,
+ GFP_ATOMIC);
if (dma_pte_present(pte)) {
dma_pte_free_pagetable(domain, start_pfn,
@@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phys_pfn, unsigned long nr_pages, int prot)
+ unsigned long phys_pfn, unsigned long nr_pages, int prot,
+ gfp_t gfp)
{
struct dma_pte *first_pte = NULL, *pte = NULL;
unsigned int largepage_lvl = 0;
@@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
phys_pfn, nr_pages);
- pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
+ pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
+ gfp);
if (!pte)
return -ENOMEM;
first_pte = pte;
@@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
return __domain_mapping(domain, first_vpfn,
first_vpfn, last_vpfn - first_vpfn + 1,
- DMA_PTE_READ|DMA_PTE_WRITE);
+ DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
}
static int md_domain_init(struct dmar_domain *domain, int guest_width);
@@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = alloc_pgtable_page(iommu->node);
+ new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
if (!new_ce)
goto out_unmap;
@@ -4005,7 +4009,8 @@ int __init intel_iommu_init(void)
* is likely to be much lower than the overhead of synchronizing
* the virtual and physical IOMMU page-tables.
*/
- if (cap_caching_mode(iommu->cap)) {
+ if (cap_caching_mode(iommu->cap) &&
+ !first_level_by_default(IOMMU_DOMAIN_DMA)) {
pr_info_once("IOMMU batching disallowed due to virtualization\n");
iommu_set_dma_strict();
}
@@ -4013,6 +4018,8 @@ int __init intel_iommu_init(void)
intel_iommu_groups,
"%s", iommu->name);
iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
+
+ iommu_pmu_register(iommu);
}
up_read(&dmar_global_lock);
@@ -4136,7 +4143,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = alloc_pgtable_page(domain->nid);
+ domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -4298,7 +4305,7 @@ static int intel_iommu_map(struct iommu_domain *domain,
the low bits of hpa would take us onto the next page */
size = aligned_nrpages(hpa, size);
return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
- hpa >> VTD_PAGE_SHIFT, size, prot);
+ hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
}
static int intel_iommu_map_pages(struct iommu_domain *domain,
@@ -4333,7 +4340,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
- BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
+ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+ GFP_ATOMIC));
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4346,7 +4354,12 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
- iommu_iotlb_gather_add_page(domain, gather, iova, size);
+ /*
+ * We do not use page-selective IOTLB invalidation in flush queue,
+ * so there is no need to track page and sync iotlb.
+ */
+ if (!iommu_iotlb_gather_queued(gather))
+ iommu_iotlb_gather_add_page(domain, gather, iova, size);
return size;
}
@@ -4392,7 +4405,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
int level = 0;
u64 phys = 0;
- pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+ GFP_ATOMIC);
if (pte && dma_pte_present(pte))
phys = dma_pte_addr(pte) +
(iova & (BIT_MASK(level_to_offset_bits(level) +
@@ -4464,8 +4478,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return irq_remapping_enabled == 1;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
@@ -4642,8 +4654,12 @@ static int intel_iommu_enable_sva(struct device *dev)
return -EINVAL;
ret = iopf_queue_add_device(iommu->iopf_queue, dev);
- if (!ret)
- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ if (ret)
+ return ret;
+
+ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ if (ret)
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
return ret;
}
@@ -4655,8 +4671,12 @@ static int intel_iommu_disable_sva(struct device *dev)
int ret;
ret = iommu_unregister_device_fault_handler(dev);
- if (!ret)
- ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+ if (ret)
+ return ret;
+
+ ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+ if (ret)
+ iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
return ret;
}
@@ -5023,3 +5043,59 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
pasid, qdep, address, mask);
}
}
+
+#define ecmd_get_status_code(res) (((res) & 0xff) >> 1)
+
+/*
+ * Function to submit a command to the enhanced command interface. The
+ * valid enhanced command descriptions are defined in Table 47 of the
+ * VT-d spec. The VT-d hardware implementation may support some but not
+ * all commands, which can be determined by checking the Enhanced
+ * Command Capability Register.
+ *
+ * Return values:
+ * - 0: Command successful without any error;
+ * - Negative: software error value;
+ * - Nonzero positive: failure status code defined in Table 48.
+ */
+int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
+{
+ unsigned long flags;
+ u64 res;
+ int ret;
+
+ if (!cap_ecmds(iommu->cap))
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+
+ res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
+ if (res & DMA_ECMD_ECRSP_IP) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ /*
+ * Unconditionally write the operand B, because
+ * - There is no side effect if an ecmd doesn't require an
+ * operand B, but we set the register to some value.
+ * - It's not invoked in any critical path. The extra MMIO
+ * write doesn't bring any performance concerns.
+ */
+ dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
+ dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
+
+ IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
+ !(res & DMA_ECMD_ECRSP_IP), res);
+
+ if (res & DMA_ECMD_ECRSP_IP) {
+ ret = -ETIMEDOUT;
+ goto err;
+ }
+
+ ret = ecmd_get_status_code(res);
+err:
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ return ret;
+}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 06e61e474856..694ab9b7d3e9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -22,6 +22,7 @@
#include <linux/ioasid.h>
#include <linux/bitfield.h>
#include <linux/xarray.h>
+#include <linux/perf_event.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -125,6 +126,17 @@
#define DMAR_MTRR_PHYSMASK8_REG 0x208
#define DMAR_MTRR_PHYSBASE9_REG 0x210
#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_PERFCAP_REG 0x300
+#define DMAR_PERFCFGOFF_REG 0x310
+#define DMAR_PERFOVFOFF_REG 0x318
+#define DMAR_PERFCNTROFF_REG 0x31c
+#define DMAR_PERFINTRSTS_REG 0x324
+#define DMAR_PERFINTRCTL_REG 0x328
+#define DMAR_PERFEVNTCAP_REG 0x380
+#define DMAR_ECMD_REG 0x400
+#define DMAR_ECEO_REG 0x408
+#define DMAR_ECRSP_REG 0x410
+#define DMAR_ECCAP_REG 0x430
#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */
#define DMAR_VCMD_REG 0xe00 /* Virtual command register */
#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */
@@ -148,6 +160,7 @@
*/
#define cap_esrtps(c) (((c) >> 63) & 1)
#define cap_esirtps(c) (((c) >> 62) & 1)
+#define cap_ecmds(c) (((c) >> 61) & 1)
#define cap_fl5lp_support(c) (((c) >> 60) & 1)
#define cap_pi_support(c) (((c) >> 59) & 1)
#define cap_fl1gp_support(c) (((c) >> 56) & 1)
@@ -179,7 +192,8 @@
* Extended Capability Register
*/
-#define ecap_rps(e) (((e) >> 49) & 0x1)
+#define ecap_pms(e) (((e) >> 51) & 0x1)
+#define ecap_rps(e) (((e) >> 49) & 0x1)
#define ecap_smpwc(e) (((e) >> 48) & 0x1)
#define ecap_flts(e) (((e) >> 47) & 0x1)
#define ecap_slts(e) (((e) >> 46) & 0x1)
@@ -210,6 +224,22 @@
#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf)
#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */
+/*
+ * Decoding Perf Capability Register
+ */
+#define pcap_num_cntr(p) ((p) & 0xffff)
+#define pcap_cntr_width(p) (((p) >> 16) & 0x7f)
+#define pcap_num_event_group(p) (((p) >> 24) & 0x1f)
+#define pcap_filters_mask(p) (((p) >> 32) & 0x1f)
+#define pcap_interrupt(p) (((p) >> 50) & 0x1)
+/* The counter stride is calculated as 2 ^ (x+10) bytes */
+#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10))
+
+/*
+ * Decoding Perf Event Capability Register
+ */
+#define pecap_es(p) ((p) & 0xfffffff)
+
/* Virtual command interface capability */
#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */
@@ -281,6 +311,26 @@
#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+/* ECMD_REG */
+#define DMA_MAX_NUM_ECMD 256
+#define DMA_MAX_NUM_ECMDCAP (DMA_MAX_NUM_ECMD / 64)
+#define DMA_ECMD_REG_STEP 8
+#define DMA_ECMD_ENABLE 0xf0
+#define DMA_ECMD_DISABLE 0xf1
+#define DMA_ECMD_FREEZE 0xf4
+#define DMA_ECMD_UNFREEZE 0xf5
+#define DMA_ECMD_OA_SHIFT 16
+#define DMA_ECMD_ECRSP_IP 0x1
+#define DMA_ECMD_ECCAP3 3
+#define DMA_ECMD_ECCAP3_ECNTS BIT_ULL(48)
+#define DMA_ECMD_ECCAP3_DCNTS BIT_ULL(49)
+#define DMA_ECMD_ECCAP3_FCNTS BIT_ULL(52)
+#define DMA_ECMD_ECCAP3_UFCNTS BIT_ULL(53)
+#define DMA_ECMD_ECCAP3_ESSENTIAL (DMA_ECMD_ECCAP3_ECNTS | \
+ DMA_ECMD_ECCAP3_DCNTS | \
+ DMA_ECMD_ECCAP3_FCNTS | \
+ DMA_ECMD_ECCAP3_UFCNTS)
+
/* FECTL_REG */
#define DMA_FECTL_IM (((u32)1) << 31)
@@ -309,6 +359,9 @@
#define DMA_VCS_PAS ((u64)1)
+/* PERFINTRSTS_REG */
+#define DMA_PERFINTRSTS_PIS ((u32)1)
+
#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
do { \
cycles_t start_time = get_cycles(); \
@@ -438,6 +491,11 @@ struct q_inval {
int free_cnt;
};
+/* Page Request Queue depth */
+#define PRQ_ORDER 4
+#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
+#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5)
+
struct dmar_pci_notify_info;
#ifdef CONFIG_IRQ_REMAP
@@ -554,6 +612,42 @@ struct dmar_domain {
iommu core */
};
+/*
+ * In theory, the VT-d 4.0 spec can support up to 2 ^ 16 counters.
+ * But in practice, there are only 14 counters for the existing
+ * platform. Setting the max number of counters to 64 should be good
+ * enough for a long time. Also, supporting more than 64 counters
+ * requires more extras, e.g., extra freeze and overflow registers,
+ * which is not necessary for now.
+ */
+#define IOMMU_PMU_IDX_MAX 64
+
+struct iommu_pmu {
+ struct intel_iommu *iommu;
+ u32 num_cntr; /* Number of counters */
+ u32 num_eg; /* Number of event group */
+ u32 cntr_width; /* Counter width */
+ u32 cntr_stride; /* Counter Stride */
+ u32 filter; /* Bitmask of filter support */
+ void __iomem *base; /* the PerfMon base address */
+ void __iomem *cfg_reg; /* counter configuration base address */
+ void __iomem *cntr_reg; /* counter 0 address*/
+ void __iomem *overflow; /* overflow status register */
+
+ u64 *evcap; /* Indicates all supported events */
+ u32 **cntr_evcap; /* Supported events of each counter. */
+
+ struct pmu pmu;
+ DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
+ struct perf_event *event_list[IOMMU_PMU_IDX_MAX];
+ unsigned char irq_name[16];
+ struct hlist_node cpuhp_node;
+ int cpu;
+};
+
+#define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED)
+#define IOMMU_IRQ_ID_OFFSET_PERF (2 * DMAR_UNITS_SUPPORTED)
+
struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 reg_phys; /* physical address of hw register set */
@@ -561,12 +655,13 @@ struct intel_iommu {
u64 cap;
u64 ecap;
u64 vccap;
+ u64 ecmdcap[DMA_MAX_NUM_ECMDCAP];
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
raw_spinlock_t register_lock; /* protect register handling */
int seq_id; /* sequence id of the iommu */
int agaw; /* agaw of this iommu */
int msagaw; /* max sagaw of this iommu */
- unsigned int irq, pr_irq;
+ unsigned int irq, pr_irq, perf_irq;
u16 segment; /* PCI segment# */
unsigned char name[13]; /* Device Name */
@@ -600,6 +695,8 @@ struct intel_iommu {
struct dmar_drhd_unit *drhd;
void *perf_statistic;
+
+ struct iommu_pmu *pmu;
};
/* PCI domain-device relationship */
@@ -737,7 +834,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
extern int dmar_ir_support(void);
-void *alloc_pgtable_page(int node);
+void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
@@ -756,19 +853,13 @@ struct intel_svm_dev {
struct rcu_head rcu;
struct device *dev;
struct intel_iommu *iommu;
- struct iommu_sva sva;
- u32 pasid;
- int users;
u16 did;
- u16 dev_iotlb:1;
u16 sid, qdep;
};
struct intel_svm {
struct mmu_notifier notifier;
struct mm_struct *mm;
-
- unsigned int flags;
u32 pasid;
struct list_head devs;
};
@@ -800,6 +891,14 @@ extern const struct iommu_ops intel_iommu_ops;
extern int intel_iommu_sm;
extern int iommu_calculate_agaw(struct intel_iommu *iommu);
extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
+int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob);
+
+static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu)
+{
+ return (iommu->ecmdcap[DMA_ECMD_ECCAP3] & DMA_ECMD_ECCAP3_ESSENTIAL) ==
+ DMA_ECMD_ECCAP3_ESSENTIAL;
+}
+
extern int dmar_disabled;
extern int intel_iommu_enabled;
#else
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index f58f5f57af78..df9e261af0b5 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -311,14 +311,12 @@ static int set_ioapic_sid(struct irte *irte, int apic)
if (!irte)
return -1;
- down_read(&dmar_global_lock);
for (i = 0; i < MAX_IO_APICS; i++) {
if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
break;
}
}
- up_read(&dmar_global_lock);
if (sid == 0) {
pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic);
@@ -338,14 +336,12 @@ static int set_hpet_sid(struct irte *irte, u8 id)
if (!irte)
return -1;
- down_read(&dmar_global_lock);
for (i = 0; i < MAX_HPET_TBS; i++) {
if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
break;
}
}
- up_read(&dmar_global_lock);
if (sid == 0) {
pr_warn("Failed to set source-id of HPET block (%d)\n", id);
@@ -573,7 +569,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
}
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (cap_caching_mode(iommu->cap))
iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
@@ -1338,9 +1335,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
if (!data)
goto out_free_parent;
- down_read(&dmar_global_lock);
index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs);
- up_read(&dmar_global_lock);
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
kfree(data);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index fb3c7020028d..633e0a4a01e7 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(pasid_table->table, size);
+
return 0;
}
@@ -200,7 +203,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
- entries = alloc_pgtable_page(info->iommu->node);
+ entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
if (!entries)
return NULL;
@@ -215,6 +218,10 @@ retry:
free_pgtable_page(entries);
goto retry;
}
+ if (!ecap_coherent(info->iommu->ecap)) {
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
+ }
}
return &entries[index];
@@ -365,6 +372,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
}
/*
+ * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID
+ * entry. It is required when XD bit of the first level page table
+ * entry is about to be set.
+ */
+static inline void pasid_set_nxe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5);
+}
+
+/*
* Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode
* PASID entry.
*/
@@ -557,6 +574,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ pasid_set_nxe(pte);
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
new file mode 100644
index 000000000000..cf43e798eca4
--- /dev/null
+++ b/drivers/iommu/intel/perfmon.c
@@ -0,0 +1,897 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support Intel IOMMU PerfMon
+ * Copyright(c) 2023 Intel Corporation.
+ */
+#define pr_fmt(fmt) "DMAR: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/dmar.h>
+#include "iommu.h"
+#include "perfmon.h"
+
+PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */
+PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */
+
+static struct attribute *iommu_pmu_format_attrs[] = {
+ &format_attr_event_group.attr,
+ &format_attr_event.attr,
+ NULL
+};
+
+static struct attribute_group iommu_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = iommu_pmu_format_attrs,
+};
+
+/* The available events are added in attr_update later */
+static struct attribute *attrs_empty[] = {
+ NULL
+};
+
+static struct attribute_group iommu_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = attrs_empty,
+};
+
+static cpumask_t iommu_pmu_cpu_mask;
+
+static ssize_t
+cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *iommu_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group iommu_pmu_cpumask_attr_group = {
+ .attrs = iommu_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *iommu_pmu_attr_groups[] = {
+ &iommu_pmu_format_attr_group,
+ &iommu_pmu_events_attr_group,
+ &iommu_pmu_cpumask_attr_group,
+ NULL
+};
+
+static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
+{
+ /*
+ * The perf_event creates its own dev for each PMU.
+ * See pmu_dev_alloc()
+ */
+ return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
+}
+
+#define IOMMU_PMU_ATTR(_name, _format, _filter) \
+ PMU_FORMAT_ATTR(_name, _format); \
+ \
+static struct attribute *_name##_attr[] = { \
+ &format_attr_##_name.attr, \
+ NULL \
+}; \
+ \
+static umode_t \
+_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
+{ \
+ struct device *dev = kobj_to_dev(kobj); \
+ struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
+ \
+ if (!iommu_pmu) \
+ return 0; \
+ return (iommu_pmu->filter & _filter) ? attr->mode : 0; \
+} \
+ \
+static struct attribute_group _name = { \
+ .name = "format", \
+ .attrs = _name##_attr, \
+ .is_visible = _name##_is_visible, \
+};
+
+IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID);
+IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN);
+IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID);
+IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS);
+IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE);
+IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID);
+IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN);
+IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID);
+IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS);
+IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE);
+
+#define iommu_pmu_en_requester_id(e) ((e) & 0x1)
+#define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1)
+#define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1)
+#define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1)
+#define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1)
+#define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff)
+#define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff)
+#define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff)
+#define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f)
+#define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f)
+
+#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \
+{ \
+ if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \
+ dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
+ IOMMU_PMU_CFG_SIZE + \
+ (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
+ iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
+ } \
+}
+
+#define iommu_pmu_clear_filter(_filter, _idx) \
+{ \
+ if (iommu_pmu->filter & _filter) { \
+ dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
+ IOMMU_PMU_CFG_SIZE + \
+ (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
+ 0); \
+ } \
+}
+
+/*
+ * Define the event attr related functions
+ * Input: _name: event attr name
+ * _string: string of the event in sysfs
+ * _g_idx: event group encoding
+ * _event: event encoding
+ */
+#define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \
+ PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \
+ \
+static struct attribute *_name##_attr[] = { \
+ &event_attr_##_name.attr.attr, \
+ NULL \
+}; \
+ \
+static umode_t \
+_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
+{ \
+ struct device *dev = kobj_to_dev(kobj); \
+ struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
+ \
+ if (!iommu_pmu) \
+ return 0; \
+ return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \
+} \
+ \
+static struct attribute_group _name = { \
+ .name = "events", \
+ .attrs = _name##_attr, \
+ .is_visible = _name##_is_visible, \
+};
+
+IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001)
+IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002)
+IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004)
+IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008)
+IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001)
+IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020)
+IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040)
+IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001)
+IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002)
+IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004)
+IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008)
+IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010)
+IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020)
+IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040)
+IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080)
+IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100)
+IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200)
+IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001)
+IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002)
+IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004)
+IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008)
+IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001)
+IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002)
+IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004)
+
+static const struct attribute_group *iommu_pmu_attr_update[] = {
+ &filter_requester_id_en,
+ &filter_domain_en,
+ &filter_pasid_en,
+ &filter_ats_en,
+ &filter_page_table_en,
+ &filter_requester_id,
+ &filter_domain,
+ &filter_pasid,
+ &filter_ats,
+ &filter_page_table,
+ &iommu_clocks,
+ &iommu_requests,
+ &pw_occupancy,
+ &ats_blocked,
+ &iommu_mrds,
+ &iommu_mem_blocked,
+ &pg_req_posted,
+ &ctxt_cache_lookup,
+ &ctxt_cache_hit,
+ &pasid_cache_lookup,
+ &pasid_cache_hit,
+ &ss_nonleaf_lookup,
+ &ss_nonleaf_hit,
+ &fs_nonleaf_lookup,
+ &fs_nonleaf_hit,
+ &hpt_nonleaf_lookup,
+ &hpt_nonleaf_hit,
+ &iotlb_lookup,
+ &iotlb_hit,
+ &hpt_leaf_lookup,
+ &hpt_leaf_hit,
+ &int_cache_lookup,
+ &int_cache_hit_nonposted,
+ &int_cache_hit_posted,
+ NULL
+};
+
+static inline void __iomem *
+iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
+{
+ return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
+}
+
+static inline void __iomem *
+iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
+{
+ return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
+}
+
+static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct iommu_pmu, pmu);
+}
+
+static inline u64 iommu_event_config(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
+ (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
+ IOMMU_EVENT_CFG_INT;
+}
+
+static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
+ struct perf_event *event)
+{
+ return event->pmu == &iommu_pmu->pmu;
+}
+
+static int iommu_pmu_validate_event(struct perf_event *event)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ u32 event_group = iommu_event_group(event->attr.config);
+
+ if (event_group >= iommu_pmu->num_eg)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int iommu_pmu_validate_group(struct perf_event *event)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ struct perf_event *sibling;
+ int nr = 0;
+
+ /*
+ * All events in a group must be scheduled simultaneously.
+ * Check whether there is enough counters for all the events.
+ */
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
+ sibling->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (++nr > iommu_pmu->num_cntr)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int iommu_pmu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* sampling not supported */
+ if (event->attr.sample_period)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ if (iommu_pmu_validate_event(event))
+ return -EINVAL;
+
+ hwc->config = iommu_event_config(event);
+
+ return iommu_pmu_validate_group(event);
+}
+
+static void iommu_pmu_event_update(struct perf_event *event)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_count, new_count, delta;
+ int shift = 64 - iommu_pmu->cntr_width;
+
+again:
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
+ if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
+ goto again;
+
+ /*
+ * The counter width is enumerated. Always shift the counter
+ * before using it.
+ */
+ delta = (new_count << shift) - (prev_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+}
+
+static void iommu_pmu_start(struct perf_event *event, int flags)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ struct intel_iommu *iommu = iommu_pmu->iommu;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* Always reprogram the period */
+ count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
+ local64_set((&hwc->prev_count), count);
+
+ /*
+ * The error of ecmd will be ignored.
+ * - The existing perf_event subsystem doesn't handle the error.
+ * Only IOMMU PMU returns runtime HW error. We don't want to
+ * change the existing generic interfaces for the specific case.
+ * - It's a corner case caused by HW, which is very unlikely to
+ * happen. There is nothing SW can do.
+ * - The worst case is that the user will get <not count> with
+ * perf command, which can give the user some hints.
+ */
+ ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
+
+ perf_event_update_userpage(event);
+}
+
+static void iommu_pmu_stop(struct perf_event *event, int flags)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ struct intel_iommu *iommu = iommu_pmu->iommu;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
+
+ iommu_pmu_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static inline int
+iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
+ int idx, struct perf_event *event)
+{
+ u32 event_group = iommu_event_group(event->attr.config);
+ u32 select = iommu_event_select(event->attr.config);
+
+ if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ /*
+ * The counters which support limited events are usually at the end.
+ * Schedule them first to accommodate more events.
+ */
+ for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
+ if (test_and_set_bit(idx, iommu_pmu->used_mask))
+ continue;
+ /* Check per-counter event capabilities */
+ if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
+ break;
+ clear_bit(idx, iommu_pmu->used_mask);
+ }
+ if (idx < 0)
+ return -EINVAL;
+
+ iommu_pmu->event_list[idx] = event;
+ hwc->idx = idx;
+
+ /* config events */
+ dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
+
+ iommu_pmu_set_filter(requester_id, event->attr.config1,
+ IOMMU_PMU_FILTER_REQUESTER_ID, idx,
+ event->attr.config1);
+ iommu_pmu_set_filter(domain, event->attr.config1,
+ IOMMU_PMU_FILTER_DOMAIN, idx,
+ event->attr.config1);
+ iommu_pmu_set_filter(pasid, event->attr.config1,
+ IOMMU_PMU_FILTER_PASID, idx,
+ event->attr.config1);
+ iommu_pmu_set_filter(ats, event->attr.config2,
+ IOMMU_PMU_FILTER_ATS, idx,
+ event->attr.config1);
+ iommu_pmu_set_filter(page_table, event->attr.config2,
+ IOMMU_PMU_FILTER_PAGE_TABLE, idx,
+ event->attr.config1);
+
+ return 0;
+}
+
+static int iommu_pmu_add(struct perf_event *event, int flags)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ struct hw_perf_event *hwc = &event->hw;
+ int ret;
+
+ ret = iommu_pmu_assign_event(iommu_pmu, event);
+ if (ret < 0)
+ return ret;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ iommu_pmu_start(event, 0);
+
+ return 0;
+}
+
+static void iommu_pmu_del(struct perf_event *event, int flags)
+{
+ struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
+ int idx = event->hw.idx;
+
+ iommu_pmu_stop(event, PERF_EF_UPDATE);
+
+ iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
+ iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
+ iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
+ iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
+ iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
+
+ iommu_pmu->event_list[idx] = NULL;
+ event->hw.idx = -1;
+ clear_bit(idx, iommu_pmu->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static void iommu_pmu_enable(struct pmu *pmu)
+{
+ struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
+ struct intel_iommu *iommu = iommu_pmu->iommu;
+
+ ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
+}
+
+static void iommu_pmu_disable(struct pmu *pmu)
+{
+ struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
+ struct intel_iommu *iommu = iommu_pmu->iommu;
+
+ ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
+}
+
+static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
+{
+ struct perf_event *event;
+ u64 status;
+ int i;
+
+ /*
+ * Two counters may be overflowed very close. Always check
+ * whether there are more to handle.
+ */
+ while ((status = dmar_readq(iommu_pmu->overflow))) {
+ for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
+ /*
+ * Find the assigned event of the counter.
+ * Accumulate the value into the event->count.
+ */
+ event = iommu_pmu->event_list[i];
+ if (!event) {
+ pr_warn_once("Cannot find the assigned event for counter %d\n", i);
+ continue;
+ }
+ iommu_pmu_event_update(event);
+ }
+
+ dmar_writeq(iommu_pmu->overflow, status);
+ }
+}
+
+static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
+{
+ struct intel_iommu *iommu = dev_id;
+
+ if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
+ return IRQ_NONE;
+
+ iommu_pmu_counter_overflow(iommu->pmu);
+
+ /* Clear the status bit */
+ dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
+
+ return IRQ_HANDLED;
+}
+
+static int __iommu_pmu_register(struct intel_iommu *iommu)
+{
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ iommu_pmu->pmu.name = iommu->name;
+ iommu_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ iommu_pmu->pmu.event_init = iommu_pmu_event_init;
+ iommu_pmu->pmu.pmu_enable = iommu_pmu_enable;
+ iommu_pmu->pmu.pmu_disable = iommu_pmu_disable;
+ iommu_pmu->pmu.add = iommu_pmu_add;
+ iommu_pmu->pmu.del = iommu_pmu_del;
+ iommu_pmu->pmu.start = iommu_pmu_start;
+ iommu_pmu->pmu.stop = iommu_pmu_stop;
+ iommu_pmu->pmu.read = iommu_pmu_event_update;
+ iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups;
+ iommu_pmu->pmu.attr_update = iommu_pmu_attr_update;
+ iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ iommu_pmu->pmu.module = THIS_MODULE;
+
+ return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
+}
+
+static inline void __iomem *
+get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
+{
+ u32 off = dmar_readl(iommu->reg + offset);
+
+ return iommu->reg + off;
+}
+
+int alloc_iommu_pmu(struct intel_iommu *iommu)
+{
+ struct iommu_pmu *iommu_pmu;
+ int i, j, ret;
+ u64 perfcap;
+ u32 cap;
+
+ if (!ecap_pms(iommu->ecap))
+ return 0;
+
+ /* The IOMMU PMU requires the ECMD support as well */
+ if (!cap_ecmds(iommu->cap))
+ return -ENODEV;
+
+ perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
+ /* The performance monitoring is not supported. */
+ if (!perfcap)
+ return -ENODEV;
+
+ /* Sanity check for the number of the counters and event groups */
+ if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
+ return -ENODEV;
+
+ /* The interrupt on overflow is required */
+ if (!pcap_interrupt(perfcap))
+ return -ENODEV;
+
+ /* Check required Enhanced Command Capability */
+ if (!ecmd_has_pmu_essential(iommu))
+ return -ENODEV;
+
+ iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
+ if (!iommu_pmu)
+ return -ENOMEM;
+
+ iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
+ if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
+ pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
+ iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
+ iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
+ }
+
+ iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
+ iommu_pmu->filter = pcap_filters_mask(perfcap);
+ iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
+ iommu_pmu->num_eg = pcap_num_event_group(perfcap);
+
+ iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
+ if (!iommu_pmu->evcap) {
+ ret = -ENOMEM;
+ goto free_pmu;
+ }
+
+ /* Parse event group capabilities */
+ for (i = 0; i < iommu_pmu->num_eg; i++) {
+ u64 pcap;
+
+ pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
+ i * IOMMU_PMU_CAP_REGS_STEP);
+ iommu_pmu->evcap[i] = pecap_es(pcap);
+ }
+
+ iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
+ if (!iommu_pmu->cntr_evcap) {
+ ret = -ENOMEM;
+ goto free_pmu_evcap;
+ }
+ for (i = 0; i < iommu_pmu->num_cntr; i++) {
+ iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
+ if (!iommu_pmu->cntr_evcap[i]) {
+ ret = -ENOMEM;
+ goto free_pmu_cntr_evcap;
+ }
+ /*
+ * Set to the global capabilities, will adjust according
+ * to per-counter capabilities later.
+ */
+ for (j = 0; j < iommu_pmu->num_eg; j++)
+ iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
+ }
+
+ iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
+ iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
+ iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
+
+ /*
+ * Check per-counter capabilities. All counters should have the
+ * same capabilities on Interrupt on Overflow Support and Counter
+ * Width.
+ */
+ for (i = 0; i < iommu_pmu->num_cntr; i++) {
+ cap = dmar_readl(iommu_pmu->cfg_reg +
+ i * IOMMU_PMU_CFG_OFFSET +
+ IOMMU_PMU_CFG_CNTRCAP_OFFSET);
+ if (!iommu_cntrcap_pcc(cap))
+ continue;
+
+ /*
+ * It's possible that some counters have a different
+ * capability because of e.g., HW bug. Check the corner
+ * case here and simply drop those counters.
+ */
+ if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
+ !iommu_cntrcap_ios(cap)) {
+ iommu_pmu->num_cntr = i;
+ pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
+ iommu_pmu->num_cntr);
+ }
+
+ /* Clear the pre-defined events group */
+ for (j = 0; j < iommu_pmu->num_eg; j++)
+ iommu_pmu->cntr_evcap[i][j] = 0;
+
+ /* Override with per-counter event capabilities */
+ for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
+ cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
+ IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
+ (j * IOMMU_PMU_OFF_REGS_STEP));
+ iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
+ /*
+ * Some events may only be supported by a specific counter.
+ * Track them in the evcap as well.
+ */
+ iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
+ }
+ }
+
+ iommu_pmu->iommu = iommu;
+ iommu->pmu = iommu_pmu;
+
+ return 0;
+
+free_pmu_cntr_evcap:
+ for (i = 0; i < iommu_pmu->num_cntr; i++)
+ kfree(iommu_pmu->cntr_evcap[i]);
+ kfree(iommu_pmu->cntr_evcap);
+free_pmu_evcap:
+ kfree(iommu_pmu->evcap);
+free_pmu:
+ kfree(iommu_pmu);
+
+ return ret;
+}
+
+void free_iommu_pmu(struct intel_iommu *iommu)
+{
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
+ return;
+
+ if (iommu_pmu->evcap) {
+ int i;
+
+ for (i = 0; i < iommu_pmu->num_cntr; i++)
+ kfree(iommu_pmu->cntr_evcap[i]);
+ kfree(iommu_pmu->cntr_evcap);
+ }
+ kfree(iommu_pmu->evcap);
+ kfree(iommu_pmu);
+ iommu->pmu = NULL;
+}
+
+static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
+{
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+ int irq, ret;
+
+ irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
+ if (irq <= 0)
+ return -EINVAL;
+
+ snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
+
+ iommu->perf_irq = irq;
+ ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
+ IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
+ if (ret) {
+ dmar_free_hwirq(irq);
+ iommu->perf_irq = 0;
+ return ret;
+ }
+ return 0;
+}
+
+static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
+{
+ if (!iommu->perf_irq)
+ return;
+
+ free_irq(iommu->perf_irq, iommu);
+ dmar_free_hwirq(iommu->perf_irq);
+ iommu->perf_irq = 0;
+}
+
+static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+
+ if (cpumask_empty(&iommu_pmu_cpu_mask))
+ cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
+
+ if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
+ iommu_pmu->cpu = cpu;
+
+ return 0;
+}
+
+static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+ int target = cpumask_first(&iommu_pmu_cpu_mask);
+
+ /*
+ * The iommu_pmu_cpu_mask has been updated when offline the CPU
+ * for the first iommu_pmu. Migrate the other iommu_pmu to the
+ * new target.
+ */
+ if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
+ perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+ iommu_pmu->cpu = target;
+ return 0;
+ }
+
+ if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
+ else
+ return 0;
+
+ perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+ iommu_pmu->cpu = target;
+
+ return 0;
+}
+
+static int nr_iommu_pmu;
+static enum cpuhp_state iommu_cpuhp_slot;
+
+static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
+{
+ int ret;
+
+ if (!nr_iommu_pmu) {
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "driver/iommu/intel/perfmon:online",
+ iommu_pmu_cpu_online,
+ iommu_pmu_cpu_offline);
+ if (ret < 0)
+ return ret;
+ iommu_cpuhp_slot = ret;
+ }
+
+ ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+ if (ret) {
+ if (!nr_iommu_pmu)
+ cpuhp_remove_multi_state(iommu_cpuhp_slot);
+ return ret;
+ }
+ nr_iommu_pmu++;
+
+ return 0;
+}
+
+static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
+{
+ cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+
+ if (--nr_iommu_pmu)
+ return;
+
+ cpuhp_remove_multi_state(iommu_cpuhp_slot);
+}
+
+void iommu_pmu_register(struct intel_iommu *iommu)
+{
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
+ return;
+
+ if (__iommu_pmu_register(iommu))
+ goto err;
+
+ if (iommu_pmu_cpuhp_setup(iommu_pmu))
+ goto unregister;
+
+ /* Set interrupt for overflow */
+ if (iommu_pmu_set_interrupt(iommu))
+ goto cpuhp_free;
+
+ return;
+
+cpuhp_free:
+ iommu_pmu_cpuhp_free(iommu_pmu);
+unregister:
+ perf_pmu_unregister(&iommu_pmu->pmu);
+err:
+ pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
+ free_iommu_pmu(iommu);
+}
+
+void iommu_pmu_unregister(struct intel_iommu *iommu)
+{
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
+ return;
+
+ iommu_pmu_unset_interrupt(iommu);
+ iommu_pmu_cpuhp_free(iommu_pmu);
+ perf_pmu_unregister(&iommu_pmu->pmu);
+}
diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h
new file mode 100644
index 000000000000..58606af9a2b9
--- /dev/null
+++ b/drivers/iommu/intel/perfmon.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * PERFCFGOFF_REG, PERFFRZOFF_REG
+ * PERFOVFOFF_REG, PERFCNTROFF_REG
+ */
+#define IOMMU_PMU_NUM_OFF_REGS 4
+#define IOMMU_PMU_OFF_REGS_STEP 4
+
+#define IOMMU_PMU_FILTER_REQUESTER_ID 0x01
+#define IOMMU_PMU_FILTER_DOMAIN 0x02
+#define IOMMU_PMU_FILTER_PASID 0x04
+#define IOMMU_PMU_FILTER_ATS 0x08
+#define IOMMU_PMU_FILTER_PAGE_TABLE 0x10
+
+#define IOMMU_PMU_FILTER_EN BIT(31)
+
+#define IOMMU_PMU_CFG_OFFSET 0x100
+#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80
+#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84
+#define IOMMU_PMU_CFG_SIZE 0x8
+#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4
+
+#define IOMMU_PMU_CAP_REGS_STEP 8
+
+#define iommu_cntrcap_pcc(p) ((p) & 0x1)
+#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff)
+#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1)
+#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf)
+
+#define IOMMU_EVENT_CFG_EGI_SHIFT 8
+#define IOMMU_EVENT_CFG_ES_SHIFT 32
+#define IOMMU_EVENT_CFG_INT BIT_ULL(1)
+
+#define iommu_event_select(p) ((p) & 0xfffffff)
+#define iommu_event_group(p) (((p) >> 28) & 0xf)
+
+#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS
+int alloc_iommu_pmu(struct intel_iommu *iommu);
+void free_iommu_pmu(struct intel_iommu *iommu);
+void iommu_pmu_register(struct intel_iommu *iommu);
+void iommu_pmu_unregister(struct intel_iommu *iommu);
+#else
+static inline int
+alloc_iommu_pmu(struct intel_iommu *iommu)
+{
+ return 0;
+}
+
+static inline void
+free_iommu_pmu(struct intel_iommu *iommu)
+{
+}
+
+static inline void
+iommu_pmu_register(struct intel_iommu *iommu)
+{
+}
+
+static inline void
+iommu_pmu_unregister(struct intel_iommu *iommu)
+{
+}
+#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c76b66263467..7367f56c3bad 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -9,7 +9,6 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
-#include <linux/intel-svm.h>
#include <linux/rculist.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
@@ -79,7 +78,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
}
iommu->prq = page_address(pages);
- irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
+ irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
if (irq <= 0) {
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
@@ -299,9 +298,8 @@ out:
return 0;
}
-static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
- struct device *dev,
- struct mm_struct *mm)
+static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
+ struct mm_struct *mm)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_svm_dev *sdev;
@@ -313,7 +311,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
if (!svm) {
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
svm->pasid = mm->pasid;
svm->mm = mm;
@@ -323,24 +321,17 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
kfree(svm);
- return ERR_PTR(ret);
+ return ret;
}
ret = pasid_private_add(svm->pasid, svm);
if (ret) {
mmu_notifier_unregister(&svm->notifier, mm);
kfree(svm);
- return ERR_PTR(ret);
+ return ret;
}
}
- /* Find the matching device in svm list */
- sdev = svm_lookup_device_by_dev(svm, dev);
- if (sdev) {
- sdev->users++;
- goto success;
- }
-
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev) {
ret = -ENOMEM;
@@ -351,12 +342,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
sdev->iommu = iommu;
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
- sdev->users = 1;
- sdev->pasid = svm->pasid;
- sdev->sva.dev = dev;
init_rcu_head(&sdev->rcu);
if (info->ats_enabled) {
- sdev->dev_iotlb = 1;
sdev->qdep = info->ats_qdep;
if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
sdev->qdep = 0;
@@ -370,8 +357,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
goto free_sdev;
list_add_rcu(&sdev->list, &svm->devs);
-success:
- return &sdev->sva;
+
+ return 0;
free_sdev:
kfree(sdev);
@@ -382,7 +369,7 @@ free_svm:
kfree(svm);
}
- return ERR_PTR(ret);
+ return ret;
}
/* Caller must hold pasid_mutex */
@@ -404,32 +391,32 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
mm = svm->mm;
if (sdev) {
- sdev->users--;
- if (!sdev->users) {
- list_del_rcu(&sdev->list);
- /* Flush the PASID cache and IOTLB for this device.
- * Note that we do depend on the hardware *not* using
- * the PASID any more. Just as we depend on other
- * devices never using PASIDs that they have no right
- * to use. We have a *shared* PASID table, because it's
- * large and has to be physically contiguous. So it's
- * hard to be as defensive as we might like. */
- intel_pasid_tear_down_entry(iommu, dev,
- svm->pasid, false);
- intel_svm_drain_prq(dev, svm->pasid);
- kfree_rcu(sdev, rcu);
-
- if (list_empty(&svm->devs)) {
- if (svm->notifier.ops)
- mmu_notifier_unregister(&svm->notifier, mm);
- pasid_private_remove(svm->pasid);
- /* We mandate that no page faults may be outstanding
- * for the PASID when intel_svm_unbind_mm() is called.
- * If that is not obeyed, subtle errors will happen.
- * Let's make them less subtle... */
- memset(svm, 0x6b, sizeof(*svm));
- kfree(svm);
- }
+ list_del_rcu(&sdev->list);
+ /*
+ * Flush the PASID cache and IOTLB for this device.
+ * Note that we do depend on the hardware *not* using
+ * the PASID any more. Just as we depend on other
+ * devices never using PASIDs that they have no right
+ * to use. We have a *shared* PASID table, because it's
+ * large and has to be physically contiguous. So it's
+ * hard to be as defensive as we might like.
+ */
+ intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ pasid_private_remove(svm->pasid);
+ /*
+ * We mandate that no page faults may be outstanding
+ * for the PASID when intel_svm_unbind_mm() is called.
+ * If that is not obeyed, subtle errors will happen.
+ * Let's make them less subtle...
+ */
+ memset(svm, 0x6b, sizeof(*svm));
+ kfree(svm);
}
}
out:
@@ -854,13 +841,10 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
- struct iommu_sva *sva;
- int ret = 0;
+ int ret;
mutex_lock(&pasid_mutex);
- sva = intel_svm_bind_mm(iommu, dev, mm);
- if (IS_ERR(sva))
- ret = PTR_ERR(sva);
+ ret = intel_svm_bind_mm(iommu, dev, mm);
mutex_unlock(&pasid_mutex);
return ret;
diff --git a/drivers/iommu/iommu-traces.c b/drivers/iommu/iommu-traces.c
index 1e9ca7789de1..23416bf76df9 100644
--- a/drivers/iommu/iommu-traces.c
+++ b/drivers/iommu/iommu-traces.c
@@ -18,7 +18,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(remove_device_from_group);
/* iommu_device_event */
EXPORT_TRACEPOINT_SYMBOL_GPL(attach_device_to_domain);
-EXPORT_TRACEPOINT_SYMBOL_GPL(detach_device_from_domain);
/* iommu_map_unmap */
EXPORT_TRACEPOINT_SYMBOL_GPL(map);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5f6a85aea501..10db680acaed 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -30,6 +30,7 @@
#include <linux/cc_platform.h>
#include <trace/events/iommu.h>
#include <linux/sched/mm.h>
+#include <linux/msi.h>
#include "dma-iommu.h"
@@ -371,6 +372,30 @@ err_unlock:
return ret;
}
+static bool iommu_is_attach_deferred(struct device *dev)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+
+ if (ops->is_attach_deferred)
+ return ops->is_attach_deferred(dev);
+
+ return false;
+}
+
+static int iommu_group_do_dma_first_attach(struct device *dev, void *data)
+{
+ struct iommu_domain *domain = data;
+
+ lockdep_assert_held(&dev->iommu_group->mutex);
+
+ if (iommu_is_attach_deferred(dev)) {
+ dev->iommu->attach_deferred = 1;
+ return 0;
+ }
+
+ return __iommu_attach_device(domain, dev);
+}
+
int iommu_probe_device(struct device *dev)
{
const struct iommu_ops *ops;
@@ -401,7 +426,7 @@ int iommu_probe_device(struct device *dev)
* attach the default domain.
*/
if (group->default_domain && !group->owner) {
- ret = __iommu_attach_device(group->default_domain, dev);
+ ret = iommu_group_do_dma_first_attach(dev, group->default_domain);
if (ret) {
mutex_unlock(&group->mutex);
iommu_group_put(group);
@@ -774,12 +799,16 @@ struct iommu_group *iommu_group_alloc(void)
ret = iommu_group_create_file(group,
&iommu_group_attr_reserved_regions);
- if (ret)
+ if (ret) {
+ kobject_put(group->devices_kobj);
return ERR_PTR(ret);
+ }
ret = iommu_group_create_file(group, &iommu_group_attr_type);
- if (ret)
+ if (ret) {
+ kobject_put(group->devices_kobj);
return ERR_PTR(ret);
+ }
pr_debug("Allocated group %d\n", group->id);
@@ -930,7 +959,7 @@ map_end:
if (map_size) {
ret = iommu_map(domain, addr - map_size,
addr - map_size, map_size,
- entry->prot);
+ entry->prot, GFP_KERNEL);
if (ret)
goto out;
map_size = 0;
@@ -947,16 +976,6 @@ out:
return ret;
}
-static bool iommu_is_attach_deferred(struct device *dev)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->is_attach_deferred)
- return ops->is_attach_deferred(dev);
-
- return false;
-}
-
/**
* iommu_group_add_device - add a device to an iommu group
* @group: the group into which to add the device (reference should be held)
@@ -1009,8 +1028,8 @@ rename:
mutex_lock(&group->mutex);
list_add_tail(&device->list, &group->devices);
- if (group->domain && !iommu_is_attach_deferred(dev))
- ret = __iommu_attach_device(group->domain, dev);
+ if (group->domain)
+ ret = iommu_group_do_dma_first_attach(dev, group->domain);
mutex_unlock(&group->mutex);
if (ret)
goto err_put_group;
@@ -1776,21 +1795,10 @@ static void probe_alloc_default_domain(struct bus_type *bus,
}
-static int iommu_group_do_dma_attach(struct device *dev, void *data)
-{
- struct iommu_domain *domain = data;
- int ret = 0;
-
- if (!iommu_is_attach_deferred(dev))
- ret = __iommu_attach_device(domain, dev);
-
- return ret;
-}
-
-static int __iommu_group_dma_attach(struct iommu_group *group)
+static int __iommu_group_dma_first_attach(struct iommu_group *group)
{
return __iommu_group_for_each_dev(group, group->default_domain,
- iommu_group_do_dma_attach);
+ iommu_group_do_dma_first_attach);
}
static int iommu_group_do_probe_finalize(struct device *dev, void *data)
@@ -1855,7 +1863,7 @@ int bus_iommu_probe(struct bus_type *bus)
iommu_group_create_direct_mappings(group);
- ret = __iommu_group_dma_attach(group);
+ ret = __iommu_group_dma_first_attach(group);
mutex_unlock(&group->mutex);
@@ -1898,6 +1906,29 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
EXPORT_SYMBOL_GPL(device_iommu_capable);
/**
+ * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
+ * for a group
+ * @group: Group to query
+ *
+ * IOMMU groups should not have differing values of
+ * msi_device_has_isolated_msi() for devices in a group. However nothing
+ * directly prevents this, so ensure mistakes don't result in isolation failures
+ * by checking that all the devices are the same.
+ */
+bool iommu_group_has_isolated_msi(struct iommu_group *group)
+{
+ struct group_device *group_dev;
+ bool ret = true;
+
+ mutex_lock(&group->mutex);
+ list_for_each_entry(group_dev, &group->devices, list)
+ ret &= msi_device_has_isolated_msi(group_dev->dev);
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
+
+/**
* iommu_set_fault_handler() - set a fault handler for an iommu domain
* @domain: iommu domain
* @handler: fault handler
@@ -1987,9 +2018,11 @@ static int __iommu_attach_device(struct iommu_domain *domain,
return -ENODEV;
ret = domain->ops->attach_dev(domain, dev);
- if (!ret)
- trace_attach_device_to_domain(dev);
- return ret;
+ if (ret)
+ return ret;
+ dev->iommu->attach_deferred = 0;
+ trace_attach_device_to_domain(dev);
+ return 0;
}
/**
@@ -2034,22 +2067,12 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
{
- if (iommu_is_attach_deferred(dev))
+ if (dev->iommu && dev->iommu->attach_deferred)
return __iommu_attach_device(domain, dev);
return 0;
}
-static void __iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
-{
- if (iommu_is_attach_deferred(dev))
- return;
-
- domain->ops->detach_dev(domain, dev);
- trace_detach_device_from_domain(dev);
-}
-
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
{
struct iommu_group *group;
@@ -2124,8 +2147,22 @@ static int __iommu_attach_group(struct iommu_domain *domain,
ret = __iommu_group_for_each_dev(group, domain,
iommu_group_do_attach_device);
- if (ret == 0)
+ if (ret == 0) {
group->domain = domain;
+ } else {
+ /*
+ * To recover from the case when certain device within the
+ * group fails to attach to the new domain, we need force
+ * attaching all devices back to the old domain. The old
+ * domain is compatible for all devices in the group,
+ * hence the iommu driver should always return success.
+ */
+ struct iommu_domain *old_domain = group->domain;
+
+ group->domain = NULL;
+ WARN(__iommu_group_set_domain(group, old_domain),
+ "iommu driver failed to attach a compatible domain");
+ }
return ret;
}
@@ -2154,11 +2191,12 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
-static int iommu_group_do_detach_device(struct device *dev, void *data)
+static int iommu_group_do_set_platform_dma(struct device *dev, void *data)
{
- struct iommu_domain *domain = data;
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
- __iommu_detach_device(domain, dev);
+ if (!WARN_ON(!ops->set_platform_dma_ops))
+ ops->set_platform_dma_ops(dev);
return 0;
}
@@ -2172,15 +2210,13 @@ static int __iommu_group_set_domain(struct iommu_group *group,
return 0;
/*
- * New drivers should support default domains and so the detach_dev() op
- * will never be called. Otherwise the NULL domain represents some
+ * New drivers should support default domains, so set_platform_dma()
+ * op will never be called. Otherwise the NULL domain represents some
* platform specific behavior.
*/
if (!new_domain) {
- if (WARN_ON(!group->domain->ops->detach_dev))
- return -EINVAL;
- __iommu_group_for_each_dev(group, group->domain,
- iommu_group_do_detach_device);
+ __iommu_group_for_each_dev(group, NULL,
+ iommu_group_do_set_platform_dma);
group->domain = NULL;
return 0;
}
@@ -2360,34 +2396,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
int ret;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
if (ret == 0 && ops->iotlb_sync_map)
ops->iotlb_sync_map(domain, iova, size);
return ret;
}
-
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- might_sleep();
- return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
-}
EXPORT_SYMBOL_GPL(iommu_map);
-int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
-}
-EXPORT_SYMBOL_GPL(iommu_map_atomic);
-
static size_t __iommu_unmap_pages(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2477,9 +2506,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
-static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot,
- gfp_t gfp)
+ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
@@ -2487,6 +2516,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
unsigned int i = 0;
int ret;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
@@ -2526,21 +2562,8 @@ out_err:
return ret;
}
-
-ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
-{
- might_sleep();
- return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
-}
EXPORT_SYMBOL_GPL(iommu_map_sg);
-ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
-{
- return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
-}
-
/**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
* @domain: the iommu domain where the fault has happened
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 8306616b6d81..ada693ea51a7 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER
removed.
IOMMUFD VFIO container emulation is known to lack certain features
- of the native VFIO container, such as no-IOMMU support, peer-to-peer
+ of the native VFIO container, such as peer-to-peer
DMA mapping, PPC IOMMU support, as well as other potentially
undiscovered gaps. This option is currently intended for the
purpose of testing IOMMUFD with unmodified userspace supporting VFIO
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d81f93a321af..a0c66f47a65a 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,7 +4,6 @@
#include <linux/iommufd.h>
#include <linux/slab.h>
#include <linux/iommu.h>
-#include <linux/irqdomain.h>
#include "io_pagetable.h"
#include "iommufd_private.h"
@@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
* operation from the device (eg a simple DMA) cannot trigger an
* interrupt outside this iommufd context.
*/
- if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) &&
- !irq_domain_check_msi_remap()) {
+ if (!iommu_group_has_isolated_msi(idev->group)) {
if (!allow_unsafe_interrupts)
return -EPERM;
@@ -346,10 +344,6 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
rc = iommufd_device_do_attach(idev, hwpt);
if (rc)
goto out_put_pt_obj;
-
- mutex_lock(&hwpt->ioas->mutex);
- list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
- mutex_unlock(&hwpt->ioas->mutex);
break;
}
case IOMMUFD_OBJ_IOAS: {
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 222e86591f8a..9d7f71510ca1 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -18,6 +18,8 @@ struct iommufd_ctx {
struct xarray objects;
u8 account_mode;
+ /* Compatibility with VFIO no iommu */
+ u8 no_iommu_mode;
struct iommufd_ioas *vfio_ioas;
};
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 083e6fcbe10a..3fbe636c3d8a 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -252,9 +252,12 @@ union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
+ struct iommu_ioas_copy ioas_copy;
struct iommu_ioas_iova_ranges iova_ranges;
struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap;
+ struct iommu_option option;
+ struct iommu_vfio_ioas vfio_ioas;
#ifdef CONFIG_IOMMUFD_TEST
struct iommu_test_cmd test;
#endif
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 1e1d3509efae..3c47846cc5ef 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
batch->npfns[batch->end - 1] < keep_pfns);
batch->total_pfns = keep_pfns;
- batch->npfns[0] = keep_pfns;
batch->pfns[0] = batch->pfns[batch->end - 1] +
(batch->npfns[batch->end - 1] - keep_pfns);
+ batch->npfns[0] = keep_pfns;
batch->end = 0;
}
@@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain,
size % PAGE_SIZE);
while (size) {
- rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot);
+ rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot,
+ GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
iova += PAGE_SIZE;
@@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
else
rc = iommu_map(domain, iova,
PFN_PHYS(batch->pfns[cur]) + page_offset,
- next_iova - iova, area->iommu_prot);
+ next_iova - iova, area->iommu_prot,
+ GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
iova = next_iova;
@@ -1140,6 +1142,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
bool writable)
{
struct iopt_pages *pages;
+ unsigned long end;
/*
* The iommu API uses size_t as the length, and protect the DIV_ROUND_UP
@@ -1148,6 +1151,9 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
if (length > SIZE_MAX - PAGE_SIZE || length == 0)
return ERR_PTR(-EINVAL);
+ if (check_add_overflow((unsigned long)uptr, length, &end))
+ return ERR_PTR(-EOVERFLOW);
+
pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT);
if (!pages)
return ERR_PTR(-ENOMEM);
@@ -1201,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area,
unsigned long start =
max(start_index, *unmapped_end_index);
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+ batch->total_pfns)
+ WARN_ON(*unmapped_end_index -
+ batch->total_pfns !=
+ start_index);
batch_from_domain(batch, domain, area, start,
last_index);
- batch_last_index = start + batch->total_pfns - 1;
+ batch_last_index = start_index + batch->total_pfns - 1;
} else {
batch_last_index = last_index;
}
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+ WARN_ON(batch_last_index > real_last_index);
+
/*
* unmaps must always 'cut' at a place where the pfns are not
* contiguous to pair with the maps that always install
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index 3ceca0e8311c..514494a0025b 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -26,39 +26,84 @@ out_unlock:
}
/**
- * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
+ * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists
+ * @ictx: Context to operate on
+ * @out_ioas_id: The IOAS ID of the compatibility IOAS
+ *
+ * Return the ID of the current compatibility IOAS. The ID can be passed into
+ * other functions that take an ioas_id.
+ */
+int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+{
+ struct iommufd_ioas *ioas;
+
+ ioas = get_compat_ioas(ictx);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+ *out_ioas_id = ioas->obj.id;
+ iommufd_put_object(&ioas->obj);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
+
+/**
+ * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached
+ * @ictx: Context to operate on
+ *
+ * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types.
+ */
+int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
+{
+ int ret;
+
+ xa_lock(&ictx->objects);
+ if (!ictx->vfio_ioas) {
+ ictx->no_iommu_mode = 1;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ xa_unlock(&ictx->objects);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO);
+
+/**
+ * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created
* @ictx: Context to operate on
- * @out_ioas_id: The ioas_id the caller should use
*
* The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
* on since they do not have an IOAS ID input in their ABI. Only attaching a
- * group should cause a default creation of the internal ioas, this returns the
- * existing ioas if it has already been assigned somehow.
+ * group should cause a default creation of the internal ioas, this does nothing
+ * if an existing ioas has already been assigned somehow.
*/
-int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
{
struct iommufd_ioas *ioas = NULL;
- struct iommufd_ioas *out_ioas;
+ int ret;
ioas = iommufd_ioas_alloc(ictx);
if (IS_ERR(ioas))
return PTR_ERR(ioas);
xa_lock(&ictx->objects);
- if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj))
- out_ioas = ictx->vfio_ioas;
- else {
- out_ioas = ioas;
- ictx->vfio_ioas = ioas;
+ /*
+ * VFIO won't allow attaching a container to both iommu and no iommu
+ * operation
+ */
+ if (ictx->no_iommu_mode) {
+ ret = -EINVAL;
+ goto out_abort;
}
- xa_unlock(&ictx->objects);
- *out_ioas_id = out_ioas->obj.id;
- if (out_ioas != ioas) {
- iommufd_put_object(&out_ioas->obj);
- iommufd_object_abort(ictx, &ioas->obj);
- return 0;
+ if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
+ ret = 0;
+ iommufd_put_object(&ictx->vfio_ioas->obj);
+ goto out_abort;
}
+ ictx->vfio_ioas = ioas;
+ xa_unlock(&ictx->objects);
+
/*
* An automatically created compat IOAS is treated as a userspace
* created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET,
@@ -67,8 +112,13 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
*/
iommufd_object_finalize(ictx, &ioas->obj);
return 0;
+
+out_abort:
+ xa_unlock(&ictx->objects);
+ iommufd_object_abort(ictx, &ioas->obj);
+ return ret;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO);
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
{
@@ -235,6 +285,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
case VFIO_UNMAP_ALL:
return 1;
+ case VFIO_NOIOMMU_IOMMU:
+ return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
+
case VFIO_DMA_CC_IOMMU:
return iommufd_vfio_cc_iommu(ictx);
@@ -261,10 +314,24 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
{
+ bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode);
struct iommufd_ioas *ioas = NULL;
int rc = 0;
- if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU)
+ /*
+ * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all
+ * other ioctls. We let them keep working but they mostly fail since no
+ * IOAS should exist.
+ */
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU &&
+ no_iommu_mode) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ return 0;
+ }
+
+ if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) ||
+ no_iommu_mode)
return -EINVAL;
/* VFIO fails the set_iommu if there is no group */
@@ -381,7 +448,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
};
size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
struct vfio_info_cap_header __user *last_cap = NULL;
- struct vfio_iommu_type1_info info;
+ struct vfio_iommu_type1_info info = {};
struct iommufd_ioas *ioas;
size_t total_cap_size;
int rc;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index a003bd5fc65c..bdf1a4e5eae0 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -299,18 +299,6 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain,
mmu->utlb_ctx[utlb] = domain->context_id;
}
-/*
- * Disable MMU translation for the microTLB.
- */
-static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain,
- unsigned int utlb)
-{
- struct ipmmu_vmsa_device *mmu = domain->mmu;
-
- ipmmu_imuctr_write(mmu, utlb, 0);
- mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID;
-}
-
static void ipmmu_tlb_flush_all(void *cookie)
{
struct ipmmu_vmsa_domain *domain = cookie;
@@ -643,21 +631,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
return 0;
}
-static void ipmmu_detach_device(struct iommu_domain *io_domain,
- struct device *dev)
-{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
- unsigned int i;
-
- for (i = 0; i < fwspec->num_ids; ++i)
- ipmmu_utlb_disable(domain, fwspec->ids[i]);
-
- /*
- * TODO: Optimize by disabling the context when no device is attached.
- */
-}
-
static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -876,7 +849,6 @@ static const struct iommu_ops ipmmu_ops = {
.of_xlate = ipmmu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = ipmmu_attach_device,
- .detach_dev = ipmmu_detach_device,
.map_pages = ipmmu_map,
.unmap_pages = ipmmu_unmap,
.flush_iotlb_all = ipmmu_flush_iotlb_all,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index c60624910872..454f6331c889 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -443,9 +443,9 @@ fail:
return ret;
}
-static void msm_iommu_detach_dev(struct iommu_domain *domain,
- struct device *dev)
+static void msm_iommu_set_platform_dma(struct device *dev)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct msm_priv *priv = to_msm_priv(domain);
unsigned long flags;
struct msm_iommu_dev *iommu;
@@ -678,11 +678,11 @@ static struct iommu_ops msm_iommu_ops = {
.domain_alloc = msm_iommu_domain_alloc,
.probe_device = msm_iommu_probe_device,
.device_group = generic_device_group,
+ .set_platform_dma_ops = msm_iommu_set_platform_dma,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = msm_iommu_attach_dev,
- .detach_dev = msm_iommu_detach_dev,
.map_pages = msm_iommu_map,
.unmap_pages = msm_iommu_unmap,
/*
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 2badd6acfb23..d5a4955910ff 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -710,14 +710,6 @@ err_unlock:
return ret;
}
-static void mtk_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
-{
- struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
-
- mtk_iommu_config(data, dev, false, 0);
-}
-
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -946,7 +938,6 @@ static const struct iommu_ops mtk_iommu_ops = {
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = mtk_iommu_attach_device,
- .detach_dev = mtk_iommu_detach_device,
.map_pages = mtk_iommu_map,
.unmap_pages = mtk_iommu_unmap,
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index ca581ff1c769..43e4c8f89e23 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -319,7 +319,7 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device
return 0;
}
-static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct device *dev)
+static void mtk_iommu_v1_set_platform_dma(struct device *dev)
{
struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
@@ -585,10 +585,10 @@ static const struct iommu_ops mtk_iommu_v1_ops = {
.def_domain_type = mtk_iommu_v1_def_domain_type,
.device_group = generic_device_group,
.pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE,
+ .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = mtk_iommu_v1_attach_device,
- .detach_dev = mtk_iommu_v1_detach_device,
.map_pages = mtk_iommu_v1_map,
.unmap_pages = mtk_iommu_v1_unmap,
.iova_to_phys = mtk_iommu_v1_iova_to_phys,
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 00d98f08732f..40f57d293a79 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -10,6 +10,7 @@
#include <linux/limits.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_pci.h>
#include <linux/pci.h>
@@ -171,3 +172,98 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
return ops;
}
+
+static enum iommu_resv_type __maybe_unused
+iommu_resv_region_get_type(struct device *dev,
+ struct resource *phys,
+ phys_addr_t start, size_t length)
+{
+ phys_addr_t end = start + length - 1;
+
+ /*
+ * IOMMU regions without an associated physical region cannot be
+ * mapped and are simply reservations.
+ */
+ if (phys->start >= phys->end)
+ return IOMMU_RESV_RESERVED;
+
+ /* may be IOMMU_RESV_DIRECT_RELAXABLE for certain cases */
+ if (start == phys->start && end == phys->end)
+ return IOMMU_RESV_DIRECT;
+
+ dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys,
+ &start, &end);
+ return IOMMU_RESV_RESERVED;
+}
+
+/**
+ * of_iommu_get_resv_regions - reserved region driver helper for device tree
+ * @dev: device for which to get reserved regions
+ * @list: reserved region list
+ *
+ * IOMMU drivers can use this to implement their .get_resv_regions() callback
+ * for memory regions attached to a device tree node. See the reserved-memory
+ * device tree bindings on how to use these:
+ *
+ * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+ */
+void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
+{
+#if IS_ENABLED(CONFIG_OF_ADDRESS)
+ struct of_phandle_iterator it;
+ int err;
+
+ of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
+ const __be32 *maps, *end;
+ struct resource phys;
+ int size;
+
+ memset(&phys, 0, sizeof(phys));
+
+ /*
+ * The "reg" property is optional and can be omitted by reserved-memory regions
+ * that represent reservations in the IOVA space, which are regions that should
+ * not be mapped.
+ */
+ if (of_find_property(it.node, "reg", NULL)) {
+ err = of_address_to_resource(it.node, 0, &phys);
+ if (err < 0) {
+ dev_err(dev, "failed to parse memory region %pOF: %d\n",
+ it.node, err);
+ continue;
+ }
+ }
+
+ maps = of_get_property(it.node, "iommu-addresses", &size);
+ if (!maps)
+ continue;
+
+ end = maps + size / sizeof(__be32);
+
+ while (maps < end) {
+ struct device_node *np;
+ u32 phandle;
+
+ phandle = be32_to_cpup(maps++);
+ np = of_find_node_by_phandle(phandle);
+
+ if (np == dev->of_node) {
+ int prot = IOMMU_READ | IOMMU_WRITE;
+ struct iommu_resv_region *region;
+ enum iommu_resv_type type;
+ phys_addr_t iova;
+ size_t length;
+
+ maps = of_translate_dma_region(np, maps, &iova, &length);
+ type = iommu_resv_region_get_type(dev, &phys, iova, length);
+
+ region = iommu_alloc_resv_region(iova, length, prot, type,
+ GFP_KERNEL);
+ if (region)
+ list_add_tail(&region->list, list);
+ }
+ }
+ }
+#endif
+}
+EXPORT_SYMBOL(of_iommu_get_resv_regions);
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 2fd7702c6709..3ab078112a7c 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1556,9 +1556,9 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
omap_domain->dev = NULL;
}
-static void omap_iommu_detach_dev(struct iommu_domain *domain,
- struct device *dev)
+static void omap_iommu_set_platform_dma(struct device *dev)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
spin_lock(&omap_domain->lock);
@@ -1737,10 +1737,10 @@ static const struct iommu_ops omap_iommu_ops = {
.probe_device = omap_iommu_probe_device,
.release_device = omap_iommu_release_device,
.device_group = omap_iommu_device_group,
+ .set_platform_dma_ops = omap_iommu_set_platform_dma,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = omap_iommu_attach_dev,
- .detach_dev = omap_iommu_detach_dev,
.map = omap_iommu_map,
.unmap = omap_iommu_unmap,
.iova_to_phys = omap_iommu_iova_to_phys,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index a68eadd64f38..f30db22ea5d7 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1192,7 +1192,6 @@ static const struct iommu_ops rk_iommu_ops = {
.of_xlate = rk_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = rk_iommu_attach_device,
- .detach_dev = rk_iommu_detach_device,
.map = rk_iommu_map,
.unmap = rk_iommu_unmap,
.iova_to_phys = rk_iommu_iova_to_phys,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index ed33c6cce083..fbf59a8db29b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return true;
default:
return false;
}
@@ -52,7 +50,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
if (!s390_domain)
return NULL;
- s390_domain->dma_table = dma_alloc_cpu_table();
+ s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
if (!s390_domain->dma_table) {
kfree(s390_domain);
return NULL;
@@ -144,13 +142,10 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return 0;
}
-static void s390_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
+static void s390_iommu_set_platform_dma(struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);
- WARN_ON(zdev->s390_domain != to_s390_domain(domain));
-
__s390_iommu_detach_device(zdev);
zpci_dma_init_device(zdev);
}
@@ -260,7 +255,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
phys_addr_t pa, dma_addr_t dma_addr,
- unsigned long nr_pages, int flags)
+ unsigned long nr_pages, int flags,
+ gfp_t gfp)
{
phys_addr_t page_addr = pa & PAGE_MASK;
unsigned long *entry;
@@ -268,7 +264,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
int rc;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
+ gfp);
if (unlikely(!entry)) {
rc = -ENOMEM;
goto undo_cpu_trans;
@@ -284,7 +281,7 @@ undo_cpu_trans:
while (i-- > 0) {
dma_addr -= PAGE_SIZE;
entry = dma_walk_cpu_trans(s390_domain->dma_table,
- dma_addr);
+ dma_addr, gfp);
if (!entry)
break;
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
@@ -301,7 +298,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
int rc = 0;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
+ GFP_ATOMIC);
if (unlikely(!entry)) {
rc = -EINVAL;
break;
@@ -339,7 +337,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
flags |= ZPCI_TABLE_PROTECTED;
rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
- pgcount, flags);
+ pgcount, flags, gfp);
if (!rc)
*mapped = size;
@@ -435,11 +433,11 @@ static const struct iommu_ops s390_iommu_ops = {
.probe_device = s390_iommu_probe_device,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
+ .set_platform_dma_ops = s390_iommu_set_platform_dma,
.pgsize_bitmap = SZ_4K,
.get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = s390_iommu_attach_device,
- .detach_dev = s390_iommu_detach_device,
.map_pages = s390_iommu_map_pages,
.unmap_pages = s390_iommu_unmap_pages,
.flush_iotlb_all = s390_iommu_flush_iotlb_all,
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 219bfa11f7f4..ae94d74b73f4 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -255,21 +255,6 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain,
return 0;
}
-static void sprd_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
-{
- struct sprd_iommu_domain *dom = to_sprd_domain(domain);
- struct sprd_iommu_device *sdev = dom->sdev;
- size_t pgt_size = sprd_iommu_pgt_size(domain);
-
- if (!sdev)
- return;
-
- dma_free_coherent(sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa);
- sprd_iommu_hw_en(sdev, false);
- dom->sdev = NULL;
-}
-
static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -414,7 +399,6 @@ static const struct iommu_ops sprd_iommu_ops = {
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = sprd_iommu_attach_device,
- .detach_dev = sprd_iommu_detach_device,
.map_pages = sprd_iommu_map,
.unmap_pages = sprd_iommu_unmap,
.iotlb_sync_map = sprd_iommu_sync_map,
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 5b585eace3d4..2d993d0cea7d 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -834,7 +834,6 @@ static const struct iommu_ops sun50i_iommu_ops = {
.probe_device = sun50i_iommu_probe_device,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = sun50i_iommu_attach_device,
- .detach_dev = sun50i_iommu_detach_device,
.flush_iotlb_all = sun50i_iommu_flush_iotlb_all,
.iotlb_sync_map = sun50i_iommu_iotlb_sync_map,
.iotlb_sync = sun50i_iommu_iotlb_sync,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index ed53279d1106..a482ff838b53 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -124,9 +124,9 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain,
return ret;
}
-static void gart_iommu_detach_dev(struct iommu_domain *domain,
- struct device *dev)
+static void gart_iommu_set_platform_dma(struct device *dev)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct gart_device *gart = gart_handle;
spin_lock(&gart->dom_lock);
@@ -270,11 +270,11 @@ static const struct iommu_ops gart_iommu_ops = {
.domain_alloc = gart_iommu_domain_alloc,
.probe_device = gart_iommu_probe_device,
.device_group = generic_device_group,
+ .set_platform_dma_ops = gart_iommu_set_platform_dma,
.pgsize_bitmap = GART_IOMMU_PGSIZES,
.of_xlate = gart_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = gart_iommu_attach_dev,
- .detach_dev = gart_iommu_detach_dev,
.map = gart_iommu_map,
.unmap = gart_iommu_unmap,
.iova_to_phys = gart_iommu_iova_to_phys,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 5b1af40221ec..1cbf063ccf14 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -511,8 +511,9 @@ disable:
return err;
}
-static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
+static void tegra_smmu_set_platform_dma(struct device *dev)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu_as *as = to_smmu_as(domain);
struct tegra_smmu *smmu = as->smmu;
@@ -671,12 +672,12 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as,
* allocate page in a sleeping context if GFP flags permit. Hence
* spinlock needs to be unlocked and re-locked after allocation.
*/
- if (!(gfp & __GFP_ATOMIC))
+ if (gfpflags_allow_blocking(gfp))
spin_unlock_irqrestore(&as->lock, *flags);
page = alloc_page(gfp | __GFP_DMA | __GFP_ZERO);
- if (!(gfp & __GFP_ATOMIC))
+ if (gfpflags_allow_blocking(gfp))
spin_lock_irqsave(&as->lock, *flags);
/*
@@ -965,11 +966,11 @@ static const struct iommu_ops tegra_smmu_ops = {
.domain_alloc = tegra_smmu_domain_alloc,
.probe_device = tegra_smmu_probe_device,
.device_group = tegra_smmu_device_group,
+ .set_platform_dma_ops = tegra_smmu_set_platform_dma,
.of_xlate = tegra_smmu_of_xlate,
.pgsize_bitmap = SZ_4K,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = tegra_smmu_attach_dev,
- .detach_dev = tegra_smmu_detach_dev,
.map = tegra_smmu_map,
.unmap = tegra_smmu_unmap,
.iova_to_phys = tegra_smmu_iova_to_phys,