summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/include/asm/pci.h1
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c291
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c159
-rw-r--r--arch/powerpc/platforms/powernv/pci.h7
4 files changed, 322 insertions, 136 deletions
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index baf2886d4d6e..0c72f1897063 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -132,5 +132,6 @@ extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
extern int pnv_npu2_init(struct pci_controller *hose);
extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
unsigned long msr);
+extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index ec52b970cefa..102983207734 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -327,31 +327,6 @@ static struct iommu_table_group_ops pnv_pci_npu_ops = {
.unset_window = pnv_npu_unset_window,
.take_ownership = pnv_npu_take_ownership,
};
-
-struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
-{
- struct pnv_phb *phb = npe->phb;
- struct pci_bus *pbus = phb->hose->bus;
- struct pci_dev *npdev, *gpdev = NULL, *gptmp;
- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
-
- if (!gpe || !gpdev)
- return NULL;
-
- npe->table_group.ops = &pnv_pci_npu_ops;
-
- list_for_each_entry(npdev, &pbus->devices, bus_list) {
- gptmp = pnv_pci_get_gpu_dev(npdev);
-
- if (gptmp != gpdev)
- continue;
-
- pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
- iommu_group_add_device(gpe->table_group.group, &npdev->dev);
- }
-
- return gpe;
-}
#endif /* !CONFIG_IOMMU_API */
/*
@@ -359,6 +334,17 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
*/
/* Maximum possible number of ATSD MMIO registers per NPU */
#define NV_NMMU_ATSD_REGS 8
+#define NV_NPU_MAX_PE_NUM 16
+
+/*
+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
+ * up to 3 x (GPU + 2xNPUs) (POWER9).
+ */
+struct npu_comp {
+ struct iommu_table_group table_group;
+ int pe_num;
+ struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
+};
/* An NPU descriptor, valid for POWER9 only */
struct npu {
@@ -371,8 +357,263 @@ struct npu {
/* Do we need to explicitly flush the nest mmu? */
bool nmmu_flush;
+
+ struct npu_comp npucomp;
};
+#ifdef CONFIG_IOMMU_API
+static long pnv_npu_peers_create_table_userspace(
+ struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ if (!npucomp->pe_num || !npucomp->pe[0] ||
+ !npucomp->pe[0]->table_group.ops ||
+ !npucomp->pe[0]->table_group.ops->create_table)
+ return -EFAULT;
+
+ return npucomp->pe[0]->table_group.ops->create_table(
+ &npucomp->pe[0]->table_group, num, page_shift,
+ window_size, levels, ptbl);
+}
+
+static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ int i, j;
+ long ret = 0;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->set_window)
+ continue;
+
+ ret = pe->table_group.ops->set_window(&pe->table_group,
+ num, tbl);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ for (j = 0; j < i; ++j) {
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+ if (!pe->table_group.ops->unset_window)
+ continue;
+
+ ret = pe->table_group.ops->unset_window(
+ &pe->table_group, num);
+ if (ret)
+ break;
+ }
+ } else {
+ table_group->tables[num] = iommu_tce_table_get(tbl);
+ }
+
+ return ret;
+}
+
+static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
+ int num)
+{
+ int i, j;
+ long ret = 0;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ WARN_ON(npucomp->table_group.tables[num] !=
+ table_group->tables[num]);
+ if (!npucomp->table_group.tables[num])
+ continue;
+
+ if (!pe->table_group.ops->unset_window)
+ continue;
+
+ ret = pe->table_group.ops->unset_window(&pe->table_group, num);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ for (j = 0; j < i; ++j) {
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+ if (!npucomp->table_group.tables[num])
+ continue;
+
+ if (!pe->table_group.ops->set_window)
+ continue;
+
+ ret = pe->table_group.ops->set_window(&pe->table_group,
+ num, table_group->tables[num]);
+ if (ret)
+ break;
+ }
+ } else if (table_group->tables[num]) {
+ iommu_tce_table_put(table_group->tables[num]);
+ table_group->tables[num] = NULL;
+ }
+
+ return ret;
+}
+
+static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
+{
+ int i;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->take_ownership)
+ continue;
+ pe->table_group.ops->take_ownership(&pe->table_group);
+ }
+}
+
+static void pnv_npu_peers_release_ownership(
+ struct iommu_table_group *table_group)
+{
+ int i;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->release_ownership)
+ continue;
+ pe->table_group.ops->release_ownership(&pe->table_group);
+ }
+}
+
+static struct iommu_table_group_ops pnv_npu_peers_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_npu_peers_create_table_userspace,
+ .set_window = pnv_npu_peers_set_window,
+ .unset_window = pnv_npu_peers_unset_window,
+ .take_ownership = pnv_npu_peers_take_ownership,
+ .release_ownership = pnv_npu_peers_release_ownership,
+};
+
+static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
+ struct pnv_ioda_pe *pe)
+{
+ if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
+ return;
+
+ npucomp->pe[npucomp->pe_num] = pe;
+ ++npucomp->pe_num;
+}
+
+struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table_group *table_group;
+ struct npu_comp *npucomp;
+ struct pci_dev *gpdev = NULL;
+ struct pci_controller *hose;
+ struct pci_dev *npdev = NULL;
+
+ list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
+ npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ if (npdev)
+ break;
+ }
+
+ if (!npdev)
+ /* It is not an NPU attached device, skip */
+ return NULL;
+
+ hose = pci_bus_to_host(npdev->bus);
+
+ if (hose->npu) {
+ table_group = &hose->npu->npucomp.table_group;
+
+ if (!table_group->group) {
+ table_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(table_group,
+ hose->global_number,
+ pe->pe_number);
+ }
+ } else {
+ /* Create a group for 1 GPU and attached NPUs for POWER8 */
+ pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
+ table_group = &pe->npucomp->table_group;
+ table_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(table_group, hose->global_number,
+ pe->pe_number);
+ }
+
+ /* Steal capabilities from a GPU PE */
+ table_group->max_dynamic_windows_supported =
+ pe->table_group.max_dynamic_windows_supported;
+ table_group->tce32_start = pe->table_group.tce32_start;
+ table_group->tce32_size = pe->table_group.tce32_size;
+ table_group->max_levels = pe->table_group.max_levels;
+ if (!table_group->pgsizes)
+ table_group->pgsizes = pe->table_group.pgsizes;
+
+ npucomp = container_of(table_group, struct npu_comp, table_group);
+ pnv_comp_attach_table_group(npucomp, pe);
+
+ return table_group;
+}
+
+struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table_group *table_group;
+ struct npu_comp *npucomp;
+ struct pci_dev *gpdev = NULL;
+ struct pci_dev *npdev;
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
+
+ WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
+ if (!gpe)
+ return NULL;
+
+ /*
+ * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
+ * but NPU bridges do not have this hook defined so we do it here.
+ * We do not setup other table group parameters as they won't be used
+ * anyway - NVLink bridges are subordinate PEs.
+ */
+ pe->table_group.ops = &pnv_pci_npu_ops;
+
+ table_group = iommu_group_get_iommudata(
+ iommu_group_get(&gpdev->dev));
+
+ /*
+ * On P9 NPU PHB and PCI PHB support different page sizes,
+ * keep only matching. We expect here that NVLink bridge PE pgsizes is
+ * initialized by the caller.
+ */
+ table_group->pgsizes &= pe->table_group.pgsizes;
+ npucomp = container_of(table_group, struct npu_comp, table_group);
+ pnv_comp_attach_table_group(npucomp, pe);
+
+ list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
+ struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
+
+ if (gpdevtmp != gpdev)
+ continue;
+
+ iommu_add_device(table_group, &npdev->dev);
+ }
+
+ return table_group;
+}
+#endif /* CONFIG_IOMMU_API */
+
/* Maximum number of nvlinks per npu */
#define NV_MAX_LINKS 6
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 87cc10bccbfa..1d6406a051f1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -190,7 +190,8 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
-
+ WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
+ kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
clear_bit(pe_num, phb->ioda.pe_alloc);
}
@@ -1534,7 +1535,9 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe);
#ifdef CONFIG_IOMMU_API
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group, struct pci_bus *bus);
+
#endif
static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
{
@@ -1590,7 +1593,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pnv_pci_ioda2_setup_dma_pe(phb, pe);
#ifdef CONFIG_IOMMU_API
- pnv_ioda_setup_bus_iommu_group(pe);
+ pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
#endif
}
}
@@ -2552,7 +2555,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
#endif
#ifdef CONFIG_IOMMU_API
-static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels)
{
unsigned long bytes = 0;
@@ -2626,127 +2629,40 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.release_ownership = pnv_ioda2_release_ownership,
};
-static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe **ptmppe = opaque;
- struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
- struct pci_dn *pdn = pci_get_pdn(pdev);
-
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
- return 0;
-
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU_NVLINK)
- return 0;
-
- *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
-
- return 1;
-}
-
-/*
- * This returns PE of associated NPU.
- * This assumes that NPU is in the same IOMMU group with GPU and there is
- * no other PEs.
- */
-static struct pnv_ioda_pe *gpe_table_group_to_npe(
- struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = NULL;
- int ret = iommu_group_for_each_dev(table_group->group, &npe,
- gpe_table_group_to_npe_cb);
-
- BUG_ON(!ret || !npe);
-
- return npe;
-}
-
-static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
- int num, struct iommu_table *tbl)
-{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
- long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
-
- if (ret)
- return ret;
-
- ret = npe->table_group.ops->set_window(&npe->table_group, num, tbl);
- if (ret)
- pnv_pci_ioda2_unset_window(table_group, num);
-
- return ret;
-}
-
-static long pnv_pci_ioda2_npu_unset_window(
- struct iommu_table_group *table_group,
- int num)
-{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
- long ret = pnv_pci_ioda2_unset_window(table_group, num);
-
- if (ret)
- return ret;
-
- return npe->table_group.ops->unset_window(&npe->table_group, num);
-}
-
-static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
-
- npe->table_group.ops->take_ownership(&npe->table_group);
- pnv_ioda2_take_ownership(table_group);
-}
-
-static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
- .get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_pci_ioda2_create_table_userspace,
- .set_window = pnv_pci_ioda2_npu_set_window,
- .unset_window = pnv_pci_ioda2_npu_unset_window,
- .take_ownership = pnv_ioda2_npu_take_ownership,
- .release_ownership = pnv_ioda2_release_ownership,
-};
-
static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group,
struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- iommu_add_device(&pe->table_group, &dev->dev);
+ iommu_add_device(table_group, &dev->dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_bus_iommu_group_add_devices(pe,
- dev->subordinate);
+ table_group, dev->subordinate);
}
}
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe)
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group, struct pci_bus *bus)
{
- if (!pnv_pci_ioda_pe_dma_weight(pe))
- return;
-
- iommu_register_group(&pe->table_group, pe->phb->hose->global_number,
- pe->pe_number);
- /*
- * set_iommu_table_base(&pe->pdev->dev, tbl) should have been called
- * by now
- */
if (pe->flags & PNV_IODA_PE_DEV)
- iommu_add_device(&pe->table_group, &pe->pdev->dev);
- else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_iommu_group_add_devices(pe, pe->pbus);
+ iommu_add_device(table_group, &pe->pdev->dev);
+
+ if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
+ pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
+ bus);
}
+static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
+
static void pnv_pci_ioda_setup_iommu_api(void)
{
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
struct pnv_phb *phb;
- struct pnv_ioda_pe *pe, *gpe;
+ struct pnv_ioda_pe *pe;
/*
* There are 4 types of PEs:
@@ -2768,24 +2684,45 @@ static void pnv_pci_ioda_setup_iommu_api(void)
if (phb->type == PNV_PHB_NPU_NVLINK)
continue;
- list_for_each_entry(pe, &phb->ioda.pe_list, list)
- pnv_ioda_setup_bus_iommu_group(pe);
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ struct iommu_table_group *table_group;
+
+ table_group = pnv_try_setup_npu_table_group(pe);
+ if (!table_group) {
+ if (!pnv_pci_ioda_pe_dma_weight(pe))
+ continue;
+
+ table_group = &pe->table_group;
+ iommu_register_group(&pe->table_group,
+ pe->phb->hose->global_number,
+ pe->pe_number);
+ }
+ pnv_ioda_setup_bus_iommu_group(pe, table_group,
+ pe->pbus);
+ }
}
/*
* Now we have all PHBs discovered, time to add NPU devices to
* the corresponding IOMMU groups.
*/
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list, list_node) {
+ unsigned long pgsizes;
+
phb = hose->private_data;
if (phb->type != PNV_PHB_NPU_NVLINK)
continue;
+ pgsizes = pnv_ioda_parse_tce_sizes(phb);
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- gpe = pnv_pci_npu_setup_iommu(pe);
- if (gpe)
- gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+ /*
+ * IODA2 bridges get this set up from
+ * pci_controller_ops::setup_bridge but NPU bridges
+ * do not have this hook defined so we do it here.
+ */
+ pe->table_group.pgsizes = pgsizes;
+ pnv_npu_compound_attach(pe);
}
}
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 534b8cee081b..8e36da379252 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -62,6 +62,7 @@ struct pnv_ioda_pe {
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
+ struct npu_comp *npucomp;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
@@ -199,6 +200,8 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -214,6 +217,10 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
+extern struct iommu_table_group *pnv_try_setup_npu_table_group(
+ struct pnv_ioda_pe *pe);
+extern struct iommu_table_group *pnv_npu_compound_attach(
+ struct pnv_ioda_pe *pe);
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 1