From cca87d303c85b257a7b0fd34f9d6fce1c59880a2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 17 Mar 2015 16:15:02 +1100 Subject: powerpc/pci: Refactor pci_dn Currently, the PCI config accessors are implemented based on device node. Unfortunately, SRIOV VFs won't have the corresponding device nodes. pci_dn will be used in replacement with device node for SRIOV VFs. So we have to use pci_dn in PCI config accessors. The patch refactors pci_dn in following aspects to make it ready to be used in PCI config accessors as we do in subsequent patch: * pci_dn is organized as a hierarchy tree. PCI device's pci_dn is put to the child list of pci_dn of its upstream bridge or PHB. VF's pci_dn will be put to the child list of pci_dn of PF's bridge. * For one particular PCI device (VF or not), its pci_dn can be found from pdev->dev.archdata.pci_data, PCI_DN(devnode), or parent's list. The fast path (fetching pci_dn through PCI device instance) is populated during early fixup time. [bhelgaas: changelog] Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_dn.c | 130 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 126 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel/pci_dn.c') diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 83df3075d3df..0ab2dadaf842 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -32,12 +32,108 @@ #include #include +/* + * The function is used to find the firmware data of one + * specific PCI device, which is attached to the indicated + * PCI bus. For VFs, their firmware data is linked to that + * one of PF's bridge. For other devices, their firmware + * data is linked to that of their bridge. + */ +static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus) +{ + struct pci_bus *pbus; + struct device_node *dn; + struct pci_dn *pdn; + + /* + * We probably have virtual bus which doesn't + * have associated bridge. + */ + pbus = bus; + while (pbus) { + if (pci_is_root_bus(pbus) || pbus->self) + break; + + pbus = pbus->parent; + } + + /* + * Except virtual bus, all PCI buses should + * have device nodes. + */ + dn = pci_bus_to_OF_node(pbus); + pdn = dn ? PCI_DN(dn) : NULL; + + return pdn; +} + +struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus, + int devfn) +{ + struct device_node *dn = NULL; + struct pci_dn *parent, *pdn; + struct pci_dev *pdev = NULL; + + /* Fast path: fetch from PCI device */ + list_for_each_entry(pdev, &bus->devices, bus_list) { + if (pdev->devfn == devfn) { + if (pdev->dev.archdata.pci_data) + return pdev->dev.archdata.pci_data; + + dn = pci_device_to_OF_node(pdev); + break; + } + } + + /* Fast path: fetch from device node */ + pdn = dn ? PCI_DN(dn) : NULL; + if (pdn) + return pdn; + + /* Slow path: fetch from firmware data hierarchy */ + parent = pci_bus_to_pdn(bus); + if (!parent) + return NULL; + + list_for_each_entry(pdn, &parent->child_list, list) { + if (pdn->busno == bus->number && + pdn->devfn == devfn) + return pdn; + } + + return NULL; +} + struct pci_dn *pci_get_pdn(struct pci_dev *pdev) { - struct device_node *dn = pci_device_to_OF_node(pdev); - if (!dn) + struct device_node *dn; + struct pci_dn *parent, *pdn; + + /* Search device directly */ + if (pdev->dev.archdata.pci_data) + return pdev->dev.archdata.pci_data; + + /* Check device node */ + dn = pci_device_to_OF_node(pdev); + pdn = dn ? PCI_DN(dn) : NULL; + if (pdn) + return pdn; + + /* + * VFs don't have device nodes. We hook their + * firmware data to PF's bridge. + */ + parent = pci_bus_to_pdn(pdev->bus); + if (!parent) return NULL; - return PCI_DN(dn); + + list_for_each_entry(pdn, &parent->child_list, list) { + if (pdn->busno == pdev->bus->number && + pdn->devfn == pdev->devfn) + return pdn; + } + + return NULL; } /* @@ -49,6 +145,7 @@ void *update_dn_pci_info(struct device_node *dn, void *data) struct pci_controller *phb = data; const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL); const __be32 *regs; + struct device_node *parent; struct pci_dn *pdn; pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); @@ -70,6 +167,15 @@ void *update_dn_pci_info(struct device_node *dn, void *data) } pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1); + + /* Attach to parent node */ + INIT_LIST_HEAD(&pdn->child_list); + INIT_LIST_HEAD(&pdn->list); + parent = of_get_parent(dn); + pdn->parent = parent ? PCI_DN(parent) : NULL; + if (pdn->parent) + list_add_tail(&pdn->list, &pdn->parent->child_list); + return NULL; } @@ -147,8 +253,11 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) /* PHB nodes themselves must not match */ update_dn_pci_info(dn, phb); pdn = dn->data; - if (pdn) + if (pdn) { pdn->devfn = pdn->busno = -1; + pdn->phb = phb; + phb->pci_data = pdn; + } /* Update dn->phb ptrs for new phb and children devices */ traverse_pci_devices(dn, update_dn_pci_info, phb); @@ -171,3 +280,16 @@ void __init pci_devs_phb_init(void) list_for_each_entry_safe(phb, tmp, &hose_list, list_node) pci_devs_phb_init_dynamic(phb); } + +static void pci_dev_pdn_setup(struct pci_dev *pdev) +{ + struct pci_dn *pdn; + + if (pdev->dev.archdata.pci_data) + return; + + /* Setup the fast path */ + pdn = pci_get_pdn(pdev); + pdev->dev.archdata.pci_data = pdn; +} +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pci_dev_pdn_setup); -- cgit v1.2.3 From c035ff1d2eaa03ab40839041e955a86a8e412eb4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 17 Mar 2015 16:15:04 +1100 Subject: powerpc/pci: Trace more information from pci_dn Originally, EEH probes on device_node or pci_dev and populates the corresponding eeh_dev. In the subsequent patches, EEH will probes on pci_dn and populates the corresponding eeh_dev. So we have to cache some information in pci_dn, either from device_node or SRIOV PF's enablement platform hook, to populate the eeh_dev properly. The motivation to probe pci_dn, instead of device node or pci_dev, to populate eeh_dev is SRIOV VFs are dynamically created and we don't have the corresponding device nodes for them. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pci-bridge.h | 3 +++ arch/powerpc/kernel/pci_dn.c | 10 ++++++++++ 2 files changed, 13 insertions(+) (limited to 'arch/powerpc/kernel/pci_dn.c') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 706710b571c3..01b730a8a5a3 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -159,6 +159,9 @@ struct pci_dn { int busno; /* pci bus number */ int devfn; /* pci device and function number */ + int vendor_id; /* Vendor ID */ + int device_id; /* Device ID */ + int class_code; /* Device class code */ struct pci_dn *parent; struct pci_controller *phb; /* for pci devices */ diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0ab2dadaf842..d139f72ff9d5 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -166,6 +166,15 @@ void *update_dn_pci_info(struct device_node *dn, void *data) pdn->devfn = (addr >> 8) & 0xff; } + /* vendor/device IDs and class code */ + regs = of_get_property(dn, "vendor-id", NULL); + pdn->vendor_id = regs ? of_read_number(regs, 1) : 0; + regs = of_get_property(dn, "device-id", NULL); + pdn->device_id = regs ? of_read_number(regs, 1) : 0; + regs = of_get_property(dn, "class-code", NULL); + pdn->class_code = regs ? of_read_number(regs, 1) : 0; + + /* Extended config space */ pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1); /* Attach to parent node */ @@ -255,6 +264,7 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) pdn = dn->data; if (pdn) { pdn->devfn = pdn->busno = -1; + pdn->vendor_id = pdn->device_id = pdn->class_code = 0; pdn->phb = phb; phb->pci_data = pdn; } -- cgit v1.2.3 From e8e9b34cef237d4d6fdc0d350cd8a95d1adb9ee9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 17 Mar 2015 16:15:05 +1100 Subject: powerpc/eeh: Create eeh_dev from pci_dn instead of device_node The patch adds function traverse_pci_dn(), which is similar to traverse_pci_devices() except it takes pci_dn, not device_node as parameter. The pci_dev.c has been reworked to create eeh_dev from pci_dn, instead of device_node. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 11 ++++++++-- arch/powerpc/include/asm/pci-bridge.h | 8 ++++++- arch/powerpc/include/asm/ppc-pci.h | 5 +++++ arch/powerpc/kernel/eeh_dev.c | 14 ++++++------ arch/powerpc/kernel/pci_dn.c | 40 ++++++++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/setup.c | 2 +- 6 files changed, 69 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel/pci_dn.c') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 55abfd09e47f..2106f83da2d5 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -30,6 +30,7 @@ struct pci_dev; struct pci_bus; struct device_node; +struct pci_dn; #ifdef CONFIG_EEH @@ -137,6 +138,7 @@ struct eeh_dev { struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ struct device_node *dn; /* Associated device node */ + struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ struct pci_bus *bus; /* PCI bus for partial hotplug */ }; @@ -146,6 +148,11 @@ static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) return edev ? edev->dn : NULL; } +static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev) +{ + return edev ? edev->pdn : NULL; +} + static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) { return edev ? edev->pdev : NULL; @@ -272,7 +279,7 @@ void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); -void *eeh_dev_init(struct device_node *dn, void *data); +void *eeh_dev_init(struct pci_dn *pdn, void *data); void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); @@ -323,7 +330,7 @@ static inline int eeh_init(void) return 0; } -static inline void *eeh_dev_init(struct device_node *dn, void *data) +static inline void *eeh_dev_init(struct pci_dn *pdn, void *data) { return NULL; } diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 01b730a8a5a3..7b74499b728c 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -213,8 +213,14 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) return PCI_DN(dn)->edev; } + +static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn *pdn) +{ + return pdn ? pdn->edev : NULL; +} #else -#define of_node_to_eeh_dev(x) (NULL) +#define of_node_to_eeh_dev(x) (NULL) +#define pdn_to_eeh_dev(x) (NULL) #endif /** Find the bus corresponding to the indicated device node */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index db1e2b8eff3c..ade75238ceb5 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -33,9 +33,14 @@ extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ /* PCI device_node operations */ struct device_node; +struct pci_dn; + typedef void *(*traverse_func)(struct device_node *me, void *data); void *traverse_pci_devices(struct device_node *start, traverse_func pre, void *data); +void *traverse_pci_dn(struct pci_dn *root, + void *(*fn)(struct pci_dn *, void *), + void *data); extern void pci_devs_phb_init(void); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index e5274ee9a75f..aabba94ff9cb 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -43,13 +43,13 @@ /** * eeh_dev_init - Create EEH device according to OF node - * @dn: device node + * @pdn: PCI device node * @data: PHB * * It will create EEH device according to the given OF node. The function * might be called by PCI emunation, DR, PHB hotplug. */ -void *eeh_dev_init(struct device_node *dn, void *data) +void *eeh_dev_init(struct pci_dn *pdn, void *data) { struct pci_controller *phb = data; struct eeh_dev *edev; @@ -63,8 +63,8 @@ void *eeh_dev_init(struct device_node *dn, void *data) } /* Associate EEH device with OF node */ - PCI_DN(dn)->edev = edev; - edev->dn = dn; + pdn->edev = edev; + edev->pdn = pdn; edev->phb = phb; INIT_LIST_HEAD(&edev->list); @@ -80,16 +80,16 @@ void *eeh_dev_init(struct device_node *dn, void *data) */ void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { - struct device_node *dn = phb->dn; + struct pci_dn *root = phb->pci_data; /* EEH PE for PHB */ eeh_phb_pe_create(phb); /* EEH device for PHB */ - eeh_dev_init(dn, phb); + eeh_dev_init(root, phb); /* EEH devices for children OF nodes */ - traverse_pci_devices(dn, eeh_dev_init, phb); + traverse_pci_dn(root, eeh_dev_init, phb); } /** diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d139f72ff9d5..65b98367005c 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -246,6 +246,46 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre, return NULL; } +static struct pci_dn *pci_dn_next_one(struct pci_dn *root, + struct pci_dn *pdn) +{ + struct list_head *next = pdn->child_list.next; + + if (next != &pdn->child_list) + return list_entry(next, struct pci_dn, list); + + while (1) { + if (pdn == root) + return NULL; + + next = pdn->list.next; + if (next != &pdn->parent->child_list) + break; + + pdn = pdn->parent; + } + + return list_entry(next, struct pci_dn, list); +} + +void *traverse_pci_dn(struct pci_dn *root, + void *(*fn)(struct pci_dn *, void *), + void *data) +{ + struct pci_dn *pdn = root; + void *ret; + + /* Only scan the child nodes */ + for (pdn = pci_dn_next_one(root, pdn); pdn; + pdn = pci_dn_next_one(root, pdn)) { + ret = fn(pdn, data); + if (ret) + return ret; + } + + return NULL; +} + /** * pci_devs_phb_init_dynamic - setup pci devices under this PHB * phb: pci-to-host bridge (top-level bridge connecting to cpu) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e445b6701f50..70304070a260 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -265,7 +265,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act update_dn_pci_info(np, pci->phb); /* Create EEH device for the OF node */ - eeh_dev_init(np, pci->phb); + eeh_dev_init(PCI_DN(np), pci->phb); } break; default: -- cgit v1.2.3 From a8b2f8288a3fdef8d93efef2b1ead7563004275e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 25 Mar 2015 16:23:52 +0800 Subject: powerpc/pci: Create pci_dn for VFs pci_dn is the extension of PCI device node and is created from device node. Unfortunately, VFs are enabled dynamically by PF's driver and they don't have corresponding device nodes and pci_dn, which is required to access VFs' config spaces. The patch creates pci_dn for VFs in pcibios_sriov_enable() on their PF, and removes pci_dn for VFs in pcibios_sriov_disable() on their PF. When VF's pci_dn is created, it's put to the child list of the pci_dn of PF's upstream bridge. The pci_dn is linked to pci_dev during early fixup time to setup the fast path. [bhelgaas: add ifdef around add_one_dev_pci_info(), use dev_printk()] Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pci-bridge.h | 3 + arch/powerpc/kernel/pci_dn.c | 116 ++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 16 +++++ 3 files changed, 135 insertions(+) (limited to 'arch/powerpc/kernel/pci_dn.c') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 2c6dc2a3d14a..ece30f589398 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -156,6 +156,7 @@ struct iommu_table; struct pci_dn { int flags; +#define PCI_DN_FLAG_IOV_VF 0x01 int busno; /* pci bus number */ int devfn; /* pci device and function number */ @@ -188,6 +189,8 @@ struct pci_dn { extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus, int devfn); extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev); +extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev); +extern void remove_dev_pci_data(struct pci_dev *pdev); extern void *update_dn_pci_info(struct device_node *dn, void *data); static inline int pci_device_from_OF_node(struct device_node *np, diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 65b98367005c..e5f1d78ef7cf 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -136,6 +136,122 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_PCI_IOV +static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, + struct pci_dev *pdev, + int busno, int devfn) +{ + struct pci_dn *pdn; + + /* Except PHB, we always have the parent */ + if (!parent) + return NULL; + + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); + if (!pdn) { + dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__); + return NULL; + } + + pdn->phb = parent->phb; + pdn->parent = parent; + pdn->busno = busno; + pdn->devfn = devfn; +#ifdef CONFIG_PPC_POWERNV + pdn->pe_number = IODA_INVALID_PE; +#endif + INIT_LIST_HEAD(&pdn->child_list); + INIT_LIST_HEAD(&pdn->list); + list_add_tail(&pdn->list, &parent->child_list); + + /* + * If we already have PCI device instance, lets + * bind them. + */ + if (pdev) + pdev->dev.archdata.pci_data = pdn; + + return pdn; +} +#endif + +struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) +{ +#ifdef CONFIG_PCI_IOV + struct pci_dn *parent, *pdn; + int i; + + /* Only support IOV for now */ + if (!pdev->is_physfn) + return pci_get_pdn(pdev); + + /* Check if VFs have been populated */ + pdn = pci_get_pdn(pdev); + if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF)) + return NULL; + + pdn->flags |= PCI_DN_FLAG_IOV_VF; + parent = pci_bus_to_pdn(pdev->bus); + if (!parent) + return NULL; + + for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + pdn = add_one_dev_pci_data(parent, NULL, + pci_iov_virtfn_bus(pdev, i), + pci_iov_virtfn_devfn(pdev, i)); + if (!pdn) { + dev_warn(&pdev->dev, "%s: Cannot create firmware data for VF#%d\n", + __func__, i); + return NULL; + } + } +#endif /* CONFIG_PCI_IOV */ + + return pci_get_pdn(pdev); +} + +void remove_dev_pci_data(struct pci_dev *pdev) +{ +#ifdef CONFIG_PCI_IOV + struct pci_dn *parent; + struct pci_dn *pdn, *tmp; + int i; + + /* Only support IOV PF for now */ + if (!pdev->is_physfn) + return; + + /* Check if VFs have been populated */ + pdn = pci_get_pdn(pdev); + if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF)) + return; + + pdn->flags &= ~PCI_DN_FLAG_IOV_VF; + parent = pci_bus_to_pdn(pdev->bus); + if (!parent) + return; + + /* + * We might introduce flag to pci_dn in future + * so that we can release VF's firmware data in + * a batch mode. + */ + for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + list_for_each_entry_safe(pdn, tmp, + &parent->child_list, list) { + if (pdn->busno != pci_iov_virtfn_bus(pdev, i) || + pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) + continue; + + if (!list_empty(&pdn->list)) + list_del(&pdn->list); + + kfree(pdn); + } + } +#endif /* CONFIG_PCI_IOV */ +} + /* * Traverse_func that inits the PCI fields of the device node. * NOTE: this *must* be done before read/write config to the device. diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 26fe09936935..7f58f199f2c1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -974,6 +974,22 @@ static void pnv_pci_ioda_setup_PEs(void) } } +#ifdef CONFIG_PCI_IOV +int pcibios_sriov_disable(struct pci_dev *pdev) +{ + /* Release PCI data */ + remove_dev_pci_data(pdev); + return 0; +} + +int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_dev_pci_data(pdev); + return 0; +} +#endif /* CONFIG_PCI_IOV */ + static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); -- cgit v1.2.3 From 781a868f3136c6eb8e8c5c19d148416d7da86610 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 25 Mar 2015 16:23:57 +0800 Subject: powerpc/powernv: Shift VF resource with an offset On PowerNV platform, resource position in M64 BAR implies the PE# the resource belongs to. In some cases, adjustment of a resource is necessary to locate it to a correct position in M64 BAR . This patch adds pnv_pci_vf_resource_shift() to shift the 'real' PF IOV BAR address according to an offset. Note: After doing so, there would be a "hole" in the /proc/iomem when offset is a positive value. It looks like the device return some mmio back to the system, which actually no one could use it. [bhelgaas: rework loops, rework overlap check, index resource[] conventionally, remove pci_regs.h include, squashed with next patch] Signed-off-by: Wei Yang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pci-bridge.h | 4 + arch/powerpc/kernel/pci_dn.c | 13 + arch/powerpc/platforms/powernv/pci-ioda.c | 528 +++++++++++++++++++++++++++++- arch/powerpc/platforms/powernv/pci.c | 18 + arch/powerpc/platforms/powernv/pci.h | 7 + 5 files changed, 553 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel/pci_dn.c') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 7b8ebc5929ff..8716db48e946 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -180,6 +180,10 @@ struct pci_dn { int pe_number; #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ + u16 num_vfs; /* number of VFs enabled*/ + int offset; /* PE# for the first VF PE */ +#define IODA_INVALID_M64 (-1) + int m64_wins[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ #endif struct list_head child_list; diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e5f1d78ef7cf..b3b4df91b792 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -217,6 +217,19 @@ void remove_dev_pci_data(struct pci_dev *pdev) struct pci_dn *pdn, *tmp; int i; + /* + * VF and VF PE are created/released dynamically, so we need to + * bind/unbind them. Otherwise the VF and VF PE would be mismatched + * when re-enabling SR-IOV. + */ + if (pdev->is_virtfn) { + pdn = pci_get_pdn(pdev); +#ifdef CONFIG_PPC_POWERNV + pdn->pe_number = IODA_INVALID_PE; +#endif + return; + } + /* Only support IOV PF for now */ if (!pdev->is_physfn) return; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 217eaad23cde..5187d164cfe1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -44,6 +44,9 @@ #include "powernv.h" #include "pci.h" +/* 256M DMA window, 4K TCE pages, 8 bytes TCE */ +#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) + static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { @@ -56,11 +59,18 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, vaf.fmt = fmt; vaf.va = &args; - if (pe->pdev) + if (pe->flags & PNV_IODA_PE_DEV) strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); - else + else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) sprintf(pfix, "%04x:%02x ", pci_domain_nr(pe->pbus), pe->pbus->number); +#ifdef CONFIG_PCI_IOV + else if (pe->flags & PNV_IODA_PE_VF) + sprintf(pfix, "%04x:%02x:%2x.%d", + pci_domain_nr(pe->parent_dev->bus), + (pe->rid & 0xff00) >> 8, + PCI_SLOT(pe->rid), PCI_FUNC(pe->rid)); +#endif /* CONFIG_PCI_IOV*/ printk("%spci %s: [PE# %.3d] %pV", level, pfix, pe->pe_number, &vaf); @@ -591,7 +601,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, bool is_add) { struct pnv_ioda_pe *slave; - struct pci_dev *pdev; + struct pci_dev *pdev = NULL; int ret; /* @@ -630,8 +640,12 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) pdev = pe->pbus->self; - else + else if (pe->flags & PNV_IODA_PE_DEV) pdev = pe->pdev->bus->self; +#ifdef CONFIG_PCI_IOV + else if (pe->flags & PNV_IODA_PE_VF) + pdev = pe->parent_dev->bus->self; +#endif /* CONFIG_PCI_IOV */ while (pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *parent; @@ -649,6 +663,87 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, return 0; } +#ifdef CONFIG_PCI_IOV +static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +{ + struct pci_dev *parent; + uint8_t bcomp, dcomp, fcomp; + int64_t rc; + long rid_end, rid; + + /* Currently, we just deconfigure VF PE. Bus PE will always there.*/ + if (pe->pbus) { + int count; + + dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; + fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; + parent = pe->pbus->self; + if (pe->flags & PNV_IODA_PE_BUS_ALL) + count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; + else + count = 1; + + switch(count) { + case 1: bcomp = OpalPciBusAll; break; + case 2: bcomp = OpalPciBus7Bits; break; + case 4: bcomp = OpalPciBus6Bits; break; + case 8: bcomp = OpalPciBus5Bits; break; + case 16: bcomp = OpalPciBus4Bits; break; + case 32: bcomp = OpalPciBus3Bits; break; + default: + dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n", + count); + /* Do an exact match only */ + bcomp = OpalPciBusAll; + } + rid_end = pe->rid + (count << 8); + } else { + if (pe->flags & PNV_IODA_PE_VF) + parent = pe->parent_dev; + else + parent = pe->pdev->bus->self; + bcomp = OpalPciBusAll; + dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; + fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; + rid_end = pe->rid + 1; + } + + /* Clear the reverse map */ + for (rid = pe->rid; rid < rid_end; rid++) + phb->ioda.pe_rmap[rid] = 0; + + /* Release from all parents PELT-V */ + while (parent) { + struct pci_dn *pdn = pci_get_pdn(parent); + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + /* XXX What to do in case of error ? */ + } + parent = parent->bus->self; + } + + opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + + /* Disassociate PE in PELT */ + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + if (rc) + pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc); + rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, + bcomp, dcomp, fcomp, OPAL_UNMAP_PE); + if (rc) + pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); + + pe->pbus = NULL; + pe->pdev = NULL; + pe->parent_dev = NULL; + + return 0; +} +#endif /* CONFIG_PCI_IOV */ + static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -675,15 +770,19 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) case 16: bcomp = OpalPciBus4Bits; break; case 32: bcomp = OpalPciBus3Bits; break; default: - pr_err("%s: Number of subordinate busses %d" - " unsupported\n", - pci_name(pe->pbus->self), count); + dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n", + count); /* Do an exact match only */ bcomp = OpalPciBusAll; } rid_end = pe->rid + (count << 8); } else { - parent = pe->pdev->bus->self; +#ifdef CONFIG_PCI_IOV + if (pe->flags & PNV_IODA_PE_VF) + parent = pe->parent_dev; + else +#endif /* CONFIG_PCI_IOV */ + parent = pe->pdev->bus->self; bcomp = OpalPciBusAll; dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; @@ -774,6 +873,78 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) return 10; } +#ifdef CONFIG_PCI_IOV +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) +{ + struct pci_dn *pdn = pci_get_pdn(dev); + int i; + struct resource *res, res2; + resource_size_t size; + u16 num_vfs; + + if (!dev->is_physfn) + return -EINVAL; + + /* + * "offset" is in VFs. The M64 windows are sized so that when they + * are segmented, each segment is the same size as the IOV BAR. + * Each segment is in a separate PE, and the high order bits of the + * address are the PE number. Therefore, each VF's BAR is in a + * separate PE, and changing the IOV BAR start address changes the + * range of PEs the VFs are in. + */ + num_vfs = pdn->num_vfs; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + if (!pnv_pci_is_mem_pref_64(res->flags)) + continue; + + /* + * The actual IOV BAR range is determined by the start address + * and the actual size for num_vfs VFs BAR. This check is to + * make sure that after shifting, the range will not overlap + * with another device. + */ + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2.flags = res->flags; + res2.start = res->start + (size * offset); + res2.end = res2.start + (size * num_vfs) - 1; + + if (res2.end > res->end) { + dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", + i, &res2, res, num_vfs, offset); + return -EBUSY; + } + } + + /* + * After doing so, there would be a "hole" in the /proc/iomem when + * offset is a positive value. It looks like the device return some + * mmio back to the system, which actually no one could use it. + */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + if (!pnv_pci_is_mem_pref_64(res->flags)) + continue; + + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2 = *res; + res->start += size * offset; + + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n", + i, &res2, res, num_vfs, offset); + pci_update_resource(dev, i + PCI_IOV_RESOURCES); + } + return 0; +} +#endif /* CONFIG_PCI_IOV */ + #if 0 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) { @@ -979,8 +1150,316 @@ static void pnv_pci_ioda_setup_PEs(void) } #ifdef CONFIG_PCI_IOV +static int pnv_pci_vf_release_m64(struct pci_dev *pdev) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct pci_dn *pdn; + int i; + + bus = pdev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + pdn = pci_get_pdn(pdev); + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + if (pdn->m64_wins[i] == IODA_INVALID_M64) + continue; + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 0); + clear_bit(pdn->m64_wins[i], &phb->ioda.m64_bar_alloc); + pdn->m64_wins[i] = IODA_INVALID_M64; + } + + return 0; +} + +static int pnv_pci_vf_assign_m64(struct pci_dev *pdev) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct pci_dn *pdn; + unsigned int win; + struct resource *res; + int i; + int64_t rc; + + bus = pdev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + pdn = pci_get_pdn(pdev); + + /* Initialize the m64_wins to IODA_INVALID_M64 */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) + pdn->m64_wins[i] = IODA_INVALID_M64; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + if (!pnv_pci_is_mem_pref_64(res->flags)) + continue; + + do { + win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, + phb->ioda.m64_bar_idx + 1, 0); + + if (win >= phb->ioda.m64_bar_idx + 1) + goto m64_failed; + } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); + + pdn->m64_wins[i] = win; + + /* Map the M64 here */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + pdn->m64_wins[i], + res->start, + 0, /* unused */ + resource_size(res)); + if (rc != OPAL_SUCCESS) { + dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", + win, rc); + goto m64_failed; + } + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 1); + if (rc != OPAL_SUCCESS) { + dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", + win, rc); + goto m64_failed; + } + } + return 0; + +m64_failed: + pnv_pci_vf_release_m64(pdev); + return -EBUSY; +} + +static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct iommu_table *tbl; + unsigned long addr; + int64_t rc; + + bus = dev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + tbl = pe->tce32_table; + addr = tbl->it_base; + + opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + pe->pe_number << 1, 1, __pa(addr), + 0, 0x1000); + + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, + pe->pe_number, + (pe->pe_number << 1) + 1, + pe->tce_bypass_base, + 0); + if (rc) + pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + + iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); + free_pages(addr, get_order(TCE32_TABLE_SIZE)); + pe->tce32_table = NULL; +} + +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe, *pe_n; + struct pci_dn *pdn; + + bus = pdev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + + if (!pdev->is_physfn) + return; + + pdn = pci_get_pdn(pdev); + list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { + if (pe->parent_dev != pdev) + continue; + + pnv_pci_ioda2_release_dma_pe(pdev, pe); + + /* Remove from list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + pnv_ioda_deconfigure_pe(phb, pe); + + pnv_ioda_free_pe(phb, pe->pe_number); + } +} + +void pnv_pci_sriov_disable(struct pci_dev *pdev) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct pci_dn *pdn; + struct pci_sriov *iov; + u16 num_vfs; + + bus = pdev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + pdn = pci_get_pdn(pdev); + iov = pdev->sriov; + num_vfs = pdn->num_vfs; + + /* Release VF PEs */ + pnv_ioda_release_vf_PE(pdev); + + if (phb->type == PNV_PHB_IODA2) { + pnv_pci_vf_resource_shift(pdev, -pdn->offset); + + /* Release M64 windows */ + pnv_pci_vf_release_m64(pdev); + + /* Release PE numbers */ + bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); + pdn->offset = 0; + } +} + +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe); +static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + int pe_num; + u16 vf_index; + struct pci_dn *pdn; + + bus = pdev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + pdn = pci_get_pdn(pdev); + + if (!pdev->is_physfn) + return; + + /* Reserve PE for each VF */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + pe_num = pdn->offset + vf_index; + + pe = &phb->ioda.pe_array[pe_num]; + pe->pe_number = pe_num; + pe->phb = phb; + pe->flags = PNV_IODA_PE_VF; + pe->pbus = NULL; + pe->parent_dev = pdev; + pe->tce32_seg = -1; + pe->mve_number = -1; + pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | + pci_iov_virtfn_devfn(pdev, vf_index); + + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n", + hose->global_number, pdev->bus->number, + PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), + PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + if (pe_num) + pnv_ioda_free_pe(phb, pe_num); + pe->pdev = NULL; + continue; + } + + pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), + GFP_KERNEL, hose->node); + pe->tce32_table->data = pe; + + /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_add_tail(&pe->list, &phb->ioda.pe_list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + pnv_pci_ioda2_setup_dma_pe(phb, pe); + } +} + +int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_bus *bus; + struct pci_controller *hose; + struct pnv_phb *phb; + struct pci_dn *pdn; + int ret; + + bus = pdev->bus; + hose = pci_bus_to_host(bus); + phb = hose->private_data; + pdn = pci_get_pdn(pdev); + + if (phb->type == PNV_PHB_IODA2) { + /* Calculate available PE for required VFs */ + mutex_lock(&phb->ioda.pe_alloc_mutex); + pdn->offset = bitmap_find_next_zero_area( + phb->ioda.pe_alloc, phb->ioda.total_pe, + 0, num_vfs, 0); + if (pdn->offset >= phb->ioda.total_pe) { + mutex_unlock(&phb->ioda.pe_alloc_mutex); + dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); + pdn->offset = 0; + return -EBUSY; + } + bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs); + pdn->num_vfs = num_vfs; + mutex_unlock(&phb->ioda.pe_alloc_mutex); + + /* Assign M64 window accordingly */ + ret = pnv_pci_vf_assign_m64(pdev); + if (ret) { + dev_info(&pdev->dev, "Not enough M64 window resources\n"); + goto m64_failed; + } + + /* + * When using one M64 BAR to map one IOV BAR, we need to shift + * the IOV BAR according to the PE# allocated to the VFs. + * Otherwise, the PE# for the VF will conflict with others. + */ + ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); + if (ret) + goto m64_failed; + } + + /* Setup VF PEs */ + pnv_ioda_setup_vf_PE(pdev, num_vfs); + + return 0; + +m64_failed: + bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); + pdn->offset = 0; + + return ret; +} + int pcibios_sriov_disable(struct pci_dev *pdev) { + pnv_pci_sriov_disable(pdev); + /* Release PCI data */ remove_dev_pci_data(pdev); return 0; @@ -990,6 +1469,8 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_dev_pci_data(pdev); + + pnv_pci_sriov_enable(pdev, num_vfs); return 0; } #endif /* CONFIG_PCI_IOV */ @@ -1186,9 +1667,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, int64_t rc; void *addr; - /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ -#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) - /* XXX FIXME: Handle 64-bit only DMA devices */ /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ /* XXX FIXME: Allocate multi-level tables on PHB3 */ @@ -1251,12 +1729,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR); } iommu_init_table(tbl, phb->hose->node); - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - if (pe->pdev) + if (pe->flags & PNV_IODA_PE_DEV) { + iommu_register_group(tbl, phb->hose->global_number, + pe->pe_number); set_iommu_table_base_and_group(&pe->pdev->dev, tbl); - else + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { + iommu_register_group(tbl, phb->hose->global_number, + pe->pe_number); pnv_ioda_setup_bus_dma(pe, pe->pbus, true); + } else if (pe->flags & PNV_IODA_PE_VF) { + iommu_register_group(tbl, phb->hose->global_number, + pe->pe_number); + } return; fail: @@ -1383,12 +1868,19 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); } iommu_init_table(tbl, phb->hose->node); - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - if (pe->pdev) + if (pe->flags & PNV_IODA_PE_DEV) { + iommu_register_group(tbl, phb->hose->global_number, + pe->pe_number); set_iommu_table_base_and_group(&pe->pdev->dev, tbl); - else + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { + iommu_register_group(tbl, phb->hose->global_number, + pe->pe_number); pnv_ioda_setup_bus_dma(pe, pe->pbus, true); + } else if (pe->flags & PNV_IODA_PE_VF) { + iommu_register_group(tbl, phb->hose->global_number, + pe->pe_number); + } /* Also create a bypass window */ if (!pnv_iommu_bypass_disabled) @@ -2068,6 +2560,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = ioda_type; + mutex_init(&phb->ioda.pe_alloc_mutex); /* Detect specific models for error handling */ if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) @@ -2127,6 +2620,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, INIT_LIST_HEAD(&phb->ioda.pe_dma_list); INIT_LIST_HEAD(&phb->ioda.pe_list); + mutex_init(&phb->ioda.pe_list_mutex); /* Calculate how many 32-bit TCE segments we have */ phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 946aa3d62c3c..02badcef5cea 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -714,6 +714,24 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; +#ifdef CONFIG_PCI_IOV + struct pnv_ioda_pe *pe; + struct pci_dn *pdn; + + /* Fix the VF pdn PE number */ + if (pdev->is_virtfn) { + pdn = pci_get_pdn(pdev); + WARN_ON(pdn->pe_number != IODA_INVALID_PE); + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (pe->rid == ((pdev->bus->number << 8) | + (pdev->devfn & 0xff))) { + pdn->pe_number = pe->pe_number; + pe->pdev = pdev; + break; + } + } + } +#endif /* CONFIG_PCI_IOV */ /* If we have no phb structure, try to setup a fallback based on * the device-tree (RTAS PCI for example) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 84280474e18f..070ee888fc95 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -23,6 +23,7 @@ enum pnv_phb_model { #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ #define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ +#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_phb; @@ -34,6 +35,9 @@ struct pnv_ioda_pe { * entire bus (& children). In the former case, pdev * is populated, in the later case, pbus is. */ +#ifdef CONFIG_PCI_IOV + struct pci_dev *parent_dev; +#endif struct pci_dev *pdev; struct pci_bus *pbus; @@ -145,6 +149,8 @@ struct pnv_phb { /* PE allocation bitmap */ unsigned long *pe_alloc; + /* PE allocation mutex */ + struct mutex pe_alloc_mutex; /* M32 & IO segment maps */ unsigned int *m32_segmap; @@ -159,6 +165,7 @@ struct pnv_phb { * on the sequence of creation */ struct list_head pe_list; + struct mutex pe_list_mutex; /* Reverse map of PEs, will have to extend if * we are to support more than 256 PEs, indexed -- cgit v1.2.3