summaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
authorJoerg Roedel <jroedel@suse.de>2022-09-09 09:27:09 +0200
committerJoerg Roedel <jroedel@suse.de>2022-09-09 09:27:09 +0200
commit7f34891b151187c95511f2830a3994c8a54cabc1 (patch)
tree2b1d5fb4f1b1f7d272c48558a6363d053a803b91 /drivers/iommu
parentiova: Remove iovad->rcaches check in iova_rcache_get() (diff)
parentiommu/virtio: Fix interaction with VFIO (diff)
downloadlinux-7f34891b151187c95511f2830a3994c8a54cabc1.tar.xz
linux-7f34891b151187c95511f2830a3994c8a54cabc1.zip
Merge branch 'iommu/fixes' into core
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/amd/iommu.c3
-rw-r--r--drivers/iommu/intel/iommu.c214
-rw-r--r--drivers/iommu/intel/iommu.h9
-rw-r--r--drivers/iommu/virtio-iommu.c11
4 files changed, 130 insertions, 107 deletions
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b46b0e8ecbdf..67de9c42205a 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -937,7 +937,8 @@ static void build_completion_wait(struct iommu_cmd *cmd,
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
cmd->data[1] = upper_32_bits(paddr);
- cmd->data[2] = data;
+ cmd->data[2] = lower_32_bits(data);
+ cmd->data[3] = upper_32_bits(data);
CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3bbd865910a6..2c6eca037f0a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -163,38 +163,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
return re->hi & VTD_PAGE_MASK;
}
-static inline void context_clear_pasid_enable(struct context_entry *context)
-{
- context->lo &= ~(1ULL << 11);
-}
-
-static inline bool context_pasid_enabled(struct context_entry *context)
-{
- return !!(context->lo & (1ULL << 11));
-}
-
-static inline void context_set_copied(struct context_entry *context)
-{
- context->hi |= (1ull << 3);
-}
-
-static inline bool context_copied(struct context_entry *context)
-{
- return !!(context->hi & (1ULL << 3));
-}
-
-static inline bool __context_present(struct context_entry *context)
-{
- return (context->lo & 1);
-}
-
-bool context_present(struct context_entry *context)
-{
- return context_pasid_enabled(context) ?
- __context_present(context) :
- __context_present(context) && !context_copied(context);
-}
-
static inline void context_set_present(struct context_entry *context)
{
context->lo |= 1;
@@ -242,6 +210,26 @@ static inline void context_clear_entry(struct context_entry *context)
context->hi = 0;
}
+static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ if (!iommu->copied_tables)
+ return false;
+
+ return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
+}
+
+static inline void
+set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
+}
+
+static inline void
+clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
+}
+
/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -402,14 +390,36 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
}
+/*
+ * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
+ * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
+ * the returned SAGAW.
+ */
+static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
+{
+ unsigned long fl_sagaw, sl_sagaw;
+
+ fl_sagaw = BIT(2) | (cap_fl1gp_support(iommu->cap) ? BIT(3) : 0);
+ sl_sagaw = cap_sagaw(iommu->cap);
+
+ /* Second level only. */
+ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
+ return sl_sagaw;
+
+ /* First level only. */
+ if (!ecap_slts(iommu->ecap))
+ return fl_sagaw;
+
+ return fl_sagaw & sl_sagaw;
+}
+
static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
{
unsigned long sagaw;
int agaw;
- sagaw = cap_sagaw(iommu->cap);
- for (agaw = width_to_agaw(max_gaw);
- agaw >= 0; agaw--) {
+ sagaw = __iommu_calculate_sagaw(iommu);
+ for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
if (test_bit(agaw, &sagaw))
break;
}
@@ -505,8 +515,9 @@ static int domain_update_device_node(struct dmar_domain *domain)
{
struct device_domain_info *info;
int nid = NUMA_NO_NODE;
+ unsigned long flags;
- spin_lock(&domain->lock);
+ spin_lock_irqsave(&domain->lock, flags);
list_for_each_entry(info, &domain->devices, link) {
/*
* There could possibly be multiple device numa nodes as devices
@@ -518,7 +529,7 @@ static int domain_update_device_node(struct dmar_domain *domain)
if (nid != NUMA_NO_NODE)
break;
}
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
return nid;
}
@@ -578,6 +589,13 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
struct context_entry *context;
u64 *entry;
+ /*
+ * Except that the caller requested to allocate a new entry,
+ * returning a copied context entry makes no sense.
+ */
+ if (!alloc && context_copied(iommu, bus, devfn))
+ return NULL;
+
entry = &root->lo;
if (sm_supported(iommu)) {
if (devfn >= 0x80) {
@@ -795,32 +813,11 @@ static void free_context_table(struct intel_iommu *iommu)
}
#ifdef CONFIG_DMAR_DEBUG
-static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
+static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
+ u8 bus, u8 devfn, struct dma_pte *parent, int level)
{
- struct device_domain_info *info;
- struct dma_pte *parent, *pte;
- struct dmar_domain *domain;
- struct pci_dev *pdev;
- int offset, level;
-
- pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
- if (!pdev)
- return;
-
- info = dev_iommu_priv_get(&pdev->dev);
- if (!info || !info->domain) {
- pr_info("device [%02x:%02x.%d] not probed\n",
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- return;
- }
-
- domain = info->domain;
- level = agaw_to_level(domain->agaw);
- parent = domain->pgd;
- if (!parent) {
- pr_info("no page table setup\n");
- return;
- }
+ struct dma_pte *pte;
+ int offset;
while (1) {
offset = pfn_level_offset(pfn, level);
@@ -847,9 +844,10 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
struct pasid_entry *entries, *pte;
struct context_entry *ctx_entry;
struct root_entry *rt_entry;
+ int i, dir_index, index, level;
u8 devfn = source_id & 0xff;
u8 bus = source_id >> 8;
- int i, dir_index, index;
+ struct dma_pte *pgtable;
pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
@@ -877,8 +875,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
ctx_entry->hi, ctx_entry->lo);
/* legacy mode does not require PASID entries */
- if (!sm_supported(iommu))
+ if (!sm_supported(iommu)) {
+ level = agaw_to_level(ctx_entry->hi & 7);
+ pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
goto pgtable_walk;
+ }
/* get the pointer to pasid directory entry */
dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
@@ -905,8 +906,16 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
for (i = 0; i < ARRAY_SIZE(pte->val); i++)
pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
+ if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
+ level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
+ pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
+ } else {
+ level = agaw_to_level((pte->val[0] >> 2) & 0x7);
+ pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
+ }
+
pgtable_walk:
- pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
+ pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
}
#endif
@@ -1345,19 +1354,20 @@ iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
u8 bus, u8 devfn)
{
struct device_domain_info *info;
+ unsigned long flags;
if (!iommu->qi)
return NULL;
- spin_lock(&domain->lock);
+ spin_lock_irqsave(&domain->lock, flags);
list_for_each_entry(info, &domain->devices, link) {
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
return info->ats_supported ? info : NULL;
}
}
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
return NULL;
}
@@ -1366,8 +1376,9 @@ static void domain_update_iotlb(struct dmar_domain *domain)
{
struct device_domain_info *info;
bool has_iotlb_device = false;
+ unsigned long flags;
- spin_lock(&domain->lock);
+ spin_lock_irqsave(&domain->lock, flags);
list_for_each_entry(info, &domain->devices, link) {
if (info->ats_enabled) {
has_iotlb_device = true;
@@ -1375,7 +1386,7 @@ static void domain_update_iotlb(struct dmar_domain *domain)
}
}
domain->has_iotlb_device = has_iotlb_device;
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static void iommu_enable_dev_iotlb(struct device_domain_info *info)
@@ -1467,14 +1478,15 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
u64 addr, unsigned mask)
{
struct device_domain_info *info;
+ unsigned long flags;
if (!domain->has_iotlb_device)
return;
- spin_lock(&domain->lock);
+ spin_lock_irqsave(&domain->lock, flags);
list_for_each_entry(info, &domain->devices, link)
__iommu_flush_dev_iotlb(info, addr, mask);
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
@@ -1688,6 +1700,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
iommu->domain_ids = NULL;
}
+ if (iommu->copied_tables) {
+ bitmap_free(iommu->copied_tables);
+ iommu->copied_tables = NULL;
+ }
+
/* free context mapping */
free_context_table(iommu);
@@ -1913,7 +1930,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
goto out_unlock;
ret = 0;
- if (context_present(context))
+ if (context_present(context) && !context_copied(iommu, bus, devfn))
goto out_unlock;
/*
@@ -1925,7 +1942,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
* in-flight DMA will exist, and we don't need to worry anymore
* hereafter.
*/
- if (context_copied(context)) {
+ if (context_copied(iommu, bus, devfn)) {
u16 did_old = context_domain_id(context);
if (did_old < cap_ndoms(iommu->cap)) {
@@ -1936,6 +1953,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
DMA_TLB_DSI_FLUSH);
}
+
+ clear_context_copied(iommu, bus, devfn);
}
context_clear_entry(context);
@@ -2429,6 +2448,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu;
+ unsigned long flags;
u8 bus, devfn;
int ret;
@@ -2440,9 +2460,9 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
if (ret)
return ret;
info->domain = domain;
- spin_lock(&domain->lock);
+ spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
/* PASID table is mandatory for a PCI device in scalable mode. */
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
@@ -2684,32 +2704,14 @@ static int copy_context_table(struct intel_iommu *iommu,
/* Now copy the context entry */
memcpy(&ce, old_ce + idx, sizeof(ce));
- if (!__context_present(&ce))
+ if (!context_present(&ce))
continue;
did = context_domain_id(&ce);
if (did >= 0 && did < cap_ndoms(iommu->cap))
set_bit(did, iommu->domain_ids);
- /*
- * We need a marker for copied context entries. This
- * marker needs to work for the old format as well as
- * for extended context entries.
- *
- * Bit 67 of the context entry is used. In the old
- * format this bit is available to software, in the
- * extended format it is the PGE bit, but PGE is ignored
- * by HW if PASIDs are disabled (and thus still
- * available).
- *
- * So disable PASIDs first and then mark the entry
- * copied. This means that we don't copy PASID
- * translations from the old kernel, but this is fine as
- * faults there are not fatal.
- */
- context_clear_pasid_enable(&ce);
- context_set_copied(&ce);
-
+ set_context_copied(iommu, bus, devfn);
new_ce[idx] = ce;
}
@@ -2735,8 +2737,8 @@ static int copy_translation_tables(struct intel_iommu *iommu)
bool new_ext, ext;
rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
- ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
- new_ext = !!ecap_ecs(iommu->ecap);
+ ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
+ new_ext = !!sm_supported(iommu);
/*
* The RTT bit can only be changed when translation is disabled,
@@ -2747,6 +2749,10 @@ static int copy_translation_tables(struct intel_iommu *iommu)
if (new_ext != ext)
return -EINVAL;
+ iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
+ if (!iommu->copied_tables)
+ return -ENOMEM;
+
old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
if (!old_rt_phys)
return -EINVAL;
@@ -4078,6 +4084,7 @@ static void dmar_remove_one_dev_info(struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *domain = info->domain;
struct intel_iommu *iommu = info->iommu;
+ unsigned long flags;
if (!dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
@@ -4089,9 +4096,9 @@ static void dmar_remove_one_dev_info(struct device *dev)
intel_pasid_free_table(info->dev);
}
- spin_lock(&domain->lock);
+ spin_lock_irqsave(&domain->lock, flags);
list_del(&info->link);
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&domain->lock, flags);
domain_detach_iommu(domain, iommu);
info->domain = NULL;
@@ -4410,19 +4417,20 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long flags;
if (dmar_domain->force_snooping)
return true;
- spin_lock(&dmar_domain->lock);
+ spin_lock_irqsave(&dmar_domain->lock, flags);
if (!domain_support_force_snooping(dmar_domain)) {
- spin_unlock(&dmar_domain->lock);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
return false;
}
domain_set_force_snooping(dmar_domain);
dmar_domain->force_snooping = true;
- spin_unlock(&dmar_domain->lock);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
return true;
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index fae45bbb0c7f..74b0e19e23ee 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -197,7 +197,6 @@
#define ecap_dis(e) (((e) >> 27) & 0x1)
#define ecap_nest(e) (((e) >> 26) & 0x1)
#define ecap_mts(e) (((e) >> 25) & 0x1)
-#define ecap_ecs(e) (((e) >> 24) & 0x1)
#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
#define ecap_coherent(e) ((e) & 0x1)
@@ -265,7 +264,6 @@
#define DMA_GSTS_CFIS (((u32)1) << 23)
/* DMA_RTADDR_REG */
-#define DMA_RTADDR_RTT (((u64)1) << 11)
#define DMA_RTADDR_SMT (((u64)1) << 10)
/* CCMD_REG */
@@ -579,6 +577,7 @@ struct intel_iommu {
#ifdef CONFIG_INTEL_IOMMU
unsigned long *domain_ids; /* bitmap of domains */
+ unsigned long *copied_tables; /* bitmap of copied tables */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
@@ -701,6 +700,11 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte)
(struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
}
+static inline bool context_present(struct context_entry *context)
+{
+ return (context->lo & 1);
+}
+
extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
extern int dmar_enable_qi(struct intel_iommu *iommu);
@@ -784,7 +788,6 @@ static inline void intel_iommu_debugfs_init(void) {}
#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
extern const struct attribute_group *intel_iommu_groups[];
-bool context_present(struct context_entry *context);
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 5ee24dc6f5ae..da463db9f12a 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1005,7 +1005,18 @@ static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
}
+static bool viommu_capable(enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ default:
+ return false;
+ }
+}
+
static struct iommu_ops viommu_ops = {
+ .capable = viommu_capable,
.domain_alloc = viommu_domain_alloc,
.probe_device = viommu_probe_device,
.probe_finalize = viommu_probe_finalize,