diff options
Diffstat (limited to 'arch/powerpc/kernel/eeh_driver.c')
-rw-r--r-- | arch/powerpc/kernel/eeh_driver.c | 152 |
1 files changed, 122 insertions, 30 deletions
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 938742135ee0..fb6207d2c604 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -34,6 +34,11 @@ #include <asm/prom.h> #include <asm/rtas.h> +struct eeh_rmv_data { + struct list_head edev_list; + int removed; +}; + /** * eeh_pcid_name - Retrieve name of PCI device driver * @pdev: PCI device @@ -190,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_frozen; @@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + edev->in_error = true; eeh_pcid_put(dev); return NULL; } @@ -231,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; driver = eeh_pcid_get(dev); @@ -271,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; @@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata) if (!driver->err_handler || !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || + (!edev->in_error)) { eeh_pcid_put(dev); return NULL; } @@ -326,20 +333,23 @@ static void *eeh_report_resume(void *data, void *userdata) { struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + bool was_in_error; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); if (!driver) return NULL; + was_in_error = edev->in_error; + edev->in_error = false; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; @@ -365,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_perm_failure; @@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_add_virt_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!(edev->physfn)) { + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", + __func__, edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + return NULL; + } + + driver = eeh_pcid_get(dev); + if (driver) { + eeh_pcid_put(dev); + if (driver->err_handler) + return NULL; + } + +#ifdef CONFIG_PPC_POWERNV + pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0); +#endif + return NULL; +} + static void *eeh_rmv_device(void *data, void *userdata) { struct pci_driver *driver; struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - int *removed = (int *)userdata; + struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; + int *removed = rmv_data ? &rmv_data->removed : NULL; /* * Actually, we should remove the PCI bridges as well. @@ -416,10 +454,13 @@ static void *eeh_rmv_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); - if (driver->err_handler && + if (removed && + eeh_pe_passed(edev->pe)) + return NULL; + if (removed && + driver->err_handler && driver->err_handler->error_detected && - driver->err_handler->slot_reset && - driver->err_handler->resume) + driver->err_handler->slot_reset) return NULL; } @@ -428,11 +469,29 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_name(dev)); edev->bus = dev->bus; edev->mode |= EEH_DEV_DISCONNECTED; - (*removed)++; + if (removed) + (*removed)++; - pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(dev); - pci_unlock_rescan_remove(); + if (edev->physfn) { +#ifdef CONFIG_PPC_POWERNV + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); + edev->pdev = NULL; + + /* + * We have to set the VF PE number to invalid one, which is + * required to plug the VF successfully. + */ + pdn->pe_number = IODA_INVALID_PE; +#endif + if (rmv_data) + list_add(&edev->rmv_list, &rmv_data->edev_list); + } else { + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); + } return NULL; } @@ -546,11 +605,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + struct eeh_rmv_data *rmv_data) { struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc, removed = 0; + int cnt, rc; + struct eeh_dev *edev; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -564,11 +625,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(bus); + pci_unlock_rescan_remove(); + } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); } /* @@ -610,14 +676,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * PE. We should disconnect it so the binding can be * rebuilt when adding PCI devices. */ + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(bus); - } else if (frozen_bus && removed) { + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(bus); + } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(frozen_bus); + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -636,8 +710,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) static void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; + struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { @@ -692,7 +768,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus, NULL); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -744,7 +820,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL); + rc = eeh_reset_device(pe, NULL, &rmv_data); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -764,6 +840,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) goto hard_fail; } + /* + * For those hot removed VFs, we should add back them after PF get + * recovered properly. + */ + list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { + eeh_add_virt_device(edev, NULL); + list_del(&edev->rmv_list); + } + /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); @@ -803,11 +888,17 @@ perm_error: * the their PCI config any more. */ if (frozen_bus) { - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(frozen_bus); - pci_unlock_rescan_remove(); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } } @@ -886,6 +977,7 @@ static void eeh_handle_special_event(void) continue; /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); |