diff options
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/Kconfig | 21 | ||||
-rw-r--r-- | drivers/pci/Makefile | 7 | ||||
-rw-r--r-- | drivers/pci/bus.c | 82 | ||||
-rw-r--r-- | drivers/pci/hotplug/ibmphp_ebda.c | 6 | ||||
-rw-r--r-- | drivers/pci/hotplug/ibmphp_hpc.c | 4 | ||||
-rw-r--r-- | drivers/pci/msi.c | 14 | ||||
-rw-r--r-- | drivers/pci/msi.h | 4 | ||||
-rw-r--r-- | drivers/pci/pci-sysfs.c | 23 | ||||
-rw-r--r-- | drivers/pci/pci.c | 91 | ||||
-rw-r--r-- | drivers/pci/pci.h | 10 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv.c | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv.h | 3 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_acpi.c | 34 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_core.c | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv_acpi.c | 2 | ||||
-rw-r--r-- | drivers/pci/probe.c | 4 | ||||
-rw-r--r-- | drivers/pci/proc.c | 7 | ||||
-rw-r--r-- | drivers/pci/quirks.c | 49 | ||||
-rw-r--r-- | drivers/pci/setup-res.c | 2 | ||||
-rw-r--r-- | drivers/pci/xen-pcifront.c | 1148 |
20 files changed, 1479 insertions, 36 deletions
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 34ef70d562b2..5b1630e4e9e3 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -40,6 +40,27 @@ config PCI_STUB When in doubt, say N. +config XEN_PCIDEV_FRONTEND + tristate "Xen PCI Frontend" + depends on PCI && X86 && XEN + select HOTPLUG + select PCI_XEN + default y + help + The PCI device frontend driver allows the kernel to import arbitrary + PCI devices from a PCI backend to support PCI driver domains. + +config XEN_PCIDEV_FE_DEBUG + bool "Xen PCI Frontend debugging" + depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG + help + Say Y here if you want the Xen PCI frontend to produce a bunch of debug + messages to the system log. Select this if you are having a + problem with Xen PCI frontend support and want to see more of what is + going on. + + When in doubt, say N. + config HT_IRQ bool "Interrupts on hypertransport devices" default y diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index dc1aa0922868..98e6fdf34d30 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o obj-$(CONFIG_X86_VISWS) += setup-irq.o obj-$(CONFIG_MN10300) += setup-bus.o obj-$(CONFIG_MICROBLAZE) += setup-bus.o +obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o # # ACPI Related PCI FW Functions @@ -65,6 +66,6 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o obj-$(CONFIG_PCI_STUB) += pci-stub.o -ifeq ($(CONFIG_PCI_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o + +ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 7f0af0e9b826..003170ea2e39 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -64,6 +64,77 @@ void pci_bus_remove_resources(struct pci_bus *bus) } } +static bool pci_bus_resource_better(struct resource *res1, bool pos1, + struct resource *res2, bool pos2) +{ + /* If exactly one is positive decode, always prefer that one */ + if (pos1 != pos2) + return pos1 ? true : false; + + /* Prefer the one that contains the highest address */ + if (res1->end != res2->end) + return (res1->end > res2->end) ? true : false; + + /* Otherwise, prefer the one with highest "center of gravity" */ + if (res1->start != res2->start) + return (res1->start > res2->start) ? true : false; + + /* Otherwise, choose one arbitrarily (but consistently) */ + return (res1 > res2) ? true : false; +} + +static bool pci_bus_resource_positive(struct pci_bus *bus, struct resource *res) +{ + struct pci_bus_resource *bus_res; + + /* + * This relies on the fact that pci_bus.resource[] refers to P2P or + * CardBus bridge base/limit registers, which are always positively + * decoded. The pci_bus.resources list contains host bridge or + * subtractively decoded resources. + */ + list_for_each_entry(bus_res, &bus->resources, list) { + if (bus_res->res == res) + return (bus_res->flags & PCI_SUBTRACTIVE_DECODE) ? + false : true; + } + return true; +} + +/* + * Find the next-best bus resource after the cursor "res". If the cursor is + * NULL, return the best resource. "Best" means that we prefer positive + * decode regions over subtractive decode, then those at higher addresses. + */ +static struct resource *pci_bus_find_resource_prev(struct pci_bus *bus, + unsigned int type, + struct resource *res) +{ + bool res_pos, r_pos, prev_pos = false; + struct resource *r, *prev = NULL; + int i; + + res_pos = pci_bus_resource_positive(bus, res); + pci_bus_for_each_resource(bus, r, i) { + if (!r) + continue; + + if ((r->flags & IORESOURCE_TYPE_BITS) != type) + continue; + + r_pos = pci_bus_resource_positive(bus, r); + if (!res || pci_bus_resource_better(res, res_pos, r, r_pos)) { + if (!prev || pci_bus_resource_better(r, r_pos, + prev, prev_pos)) { + prev = r; + prev_pos = r_pos; + } + } + } + + return prev; +} + /** * pci_bus_alloc_resource - allocate a resource from a parent bus * @bus: PCI bus @@ -89,9 +160,10 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t), void *alignf_data) { - int i, ret = -ENOMEM; + int ret = -ENOMEM; struct resource *r; resource_size_t max = -1; + unsigned int type = res->flags & IORESOURCE_TYPE_BITS; type_mask |= IORESOURCE_IO | IORESOURCE_MEM; @@ -99,10 +171,9 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, if (!(res->flags & IORESOURCE_MEM_64)) max = PCIBIOS_MAX_MEM_32; - pci_bus_for_each_resource(bus, r, i) { - if (!r) - continue; - + /* Look for space at highest addresses first */ + r = pci_bus_find_resource_prev(bus, type, NULL); + for ( ; r; r = pci_bus_find_resource_prev(bus, type, r)) { /* type_mask must match */ if ((res->flags ^ r->flags) & type_mask) continue; @@ -299,6 +370,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), } up_read(&pci_bus_sem); } +EXPORT_SYMBOL_GPL(pci_walk_bus); EXPORT_SYMBOL(pci_bus_alloc_resource); EXPORT_SYMBOL_GPL(pci_bus_add_device); diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 5becbdee4027..2850e64dedae 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -276,6 +276,12 @@ int __init ibmphp_access_ebda (void) for (;;) { offset = next_offset; + + /* Make sure what we read is still in the mapped section */ + if (WARN(offset > (ebda_sz * 1024 - 4), + "ibmphp_ebda: next read is beyond ebda_sz\n")) + break; + next_offset = readw (io_mem + offset); /* offset of next blk */ offset += 2; diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c index 1aaf3f32d3cd..f59ed30512b5 100644 --- a/drivers/pci/hotplug/ibmphp_hpc.c +++ b/drivers/pci/hotplug/ibmphp_hpc.c @@ -133,8 +133,8 @@ void __init ibmphp_hpc_initvars (void) debug ("%s - Entry\n", __func__); mutex_init(&sem_hpcaccess); - init_MUTEX (&semOperations); - init_MUTEX_LOCKED (&sem_exit); + sema_init(&semOperations, 1); + sema_init(&sem_exit, 0); to_debug = 0; debug ("%s - Exit\n", __func__); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5fcf5aec680f..7c24dcef2989 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -35,7 +35,12 @@ int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) #endif #ifndef arch_setup_msi_irqs -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +# define arch_setup_msi_irqs default_setup_msi_irqs +# define HAVE_DEFAULT_MSI_SETUP_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS +int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; @@ -60,7 +65,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) #endif #ifndef arch_teardown_msi_irqs -void arch_teardown_msi_irqs(struct pci_dev *dev) +# define arch_teardown_msi_irqs default_teardown_msi_irqs +# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS +void default_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index de27c1cb5a2b..feff3bee6fe5 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -22,8 +22,8 @@ #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) -#define msix_table_offset_reg(base) (base + 0x04) -#define msix_pba_offset_reg(base) (base + 0x08) +#define msix_table_offset_reg(base) (base + PCI_MSIX_TABLE) +#define msix_pba_offset_reg(base) (base + PCI_MSIX_PBA) #define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1) #define multi_msix_capable(control) msix_table_size((control)) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index b5a7d9bfcb24..63d5042f2079 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -705,17 +705,21 @@ void pci_remove_legacy_files(struct pci_bus *b) #ifdef HAVE_PCI_MMAP -int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) +int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma, + enum pci_mmap_api mmap_api) { - unsigned long nr, start, size; + unsigned long nr, start, size, pci_start; + if (pci_resource_len(pdev, resno) == 0) + return 0; nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; start = vma->vm_pgoff; size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; - if (start < size && size - start >= nr) + pci_start = (mmap_api == PCI_MMAP_PROCFS) ? + pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0; + if (start >= pci_start && start < pci_start + size && + start + nr <= pci_start + size) return 1; - WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n", - current->comm, start, start+nr, pci_name(pdev), resno, size); return 0; } @@ -745,8 +749,15 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, if (i >= PCI_ROM_RESOURCE) return -ENODEV; - if (!pci_mmap_fits(pdev, i, vma)) + if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) { + WARN(1, "process \"%s\" tried to map 0x%08lx bytes " + "at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n", + current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff, + pci_name(pdev), i, + (u64)pci_resource_start(pdev, i), + (u64)pci_resource_len(pdev, i)); return -EINVAL; + } /* pci_mmap_page_range() expects the same kind of entry as coming * from /proc/bus/pci/ which is a "user visible" value. If this is diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7fa3cbd742c5..710c8a29be0d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -38,6 +38,19 @@ EXPORT_SYMBOL(pci_pci_problems); unsigned int pci_pm_d3_delay; +static void pci_pme_list_scan(struct work_struct *work); + +static LIST_HEAD(pci_pme_list); +static DEFINE_MUTEX(pci_pme_list_mutex); +static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan); + +struct pci_pme_device { + struct list_head list; + struct pci_dev *dev; +}; + +#define PME_TIMEOUT 1000 /* How long between PME checks */ + static void pci_dev_d3_sleep(struct pci_dev *dev) { unsigned int delay = dev->d3_delay; @@ -994,6 +1007,18 @@ static int __pci_enable_device_flags(struct pci_dev *dev, int err; int i, bars = 0; + /* + * Power state could be unknown at this point, either due to a fresh + * boot or a device removal call. So get the current power state + * so that things like MSI message writing will behave as expected + * (e.g. if the device really is in D0 at enable time). + */ + if (dev->pm_cap) { + u16 pmcsr; + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } + if (atomic_add_return(1, &dev->enable_cnt) > 1) return 0; /* already enabled */ @@ -1331,6 +1356,32 @@ bool pci_pme_capable(struct pci_dev *dev, pci_power_t state) return !!(dev->pme_support & (1 << state)); } +static void pci_pme_list_scan(struct work_struct *work) +{ + struct pci_pme_device *pme_dev; + + mutex_lock(&pci_pme_list_mutex); + if (!list_empty(&pci_pme_list)) { + list_for_each_entry(pme_dev, &pci_pme_list, list) + pci_pme_wakeup(pme_dev->dev, NULL); + schedule_delayed_work(&pci_pme_work, msecs_to_jiffies(PME_TIMEOUT)); + } + mutex_unlock(&pci_pme_list_mutex); +} + +/** + * pci_external_pme - is a device an external PCI PME source? + * @dev: PCI device to check + * + */ + +static bool pci_external_pme(struct pci_dev *dev) +{ + if (pci_is_pcie(dev) || dev->bus->number == 0) + return false; + return true; +} + /** * pci_pme_active - enable or disable PCI device's PME# function * @dev: PCI device to handle. @@ -1354,6 +1405,44 @@ void pci_pme_active(struct pci_dev *dev, bool enable) pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); + /* PCI (as opposed to PCIe) PME requires that the device have + its PME# line hooked up correctly. Not all hardware vendors + do this, so the PME never gets delivered and the device + remains asleep. The easiest way around this is to + periodically walk the list of suspended devices and check + whether any have their PME flag set. The assumption is that + we'll wake up often enough anyway that this won't be a huge + hit, and the power savings from the devices will still be a + win. */ + + if (pci_external_pme(dev)) { + struct pci_pme_device *pme_dev; + if (enable) { + pme_dev = kmalloc(sizeof(struct pci_pme_device), + GFP_KERNEL); + if (!pme_dev) + goto out; + pme_dev->dev = dev; + mutex_lock(&pci_pme_list_mutex); + list_add(&pme_dev->list, &pci_pme_list); + if (list_is_singular(&pci_pme_list)) + schedule_delayed_work(&pci_pme_work, + msecs_to_jiffies(PME_TIMEOUT)); + mutex_unlock(&pci_pme_list_mutex); + } else { + mutex_lock(&pci_pme_list_mutex); + list_for_each_entry(pme_dev, &pci_pme_list, list) { + if (pme_dev->dev == dev) { + list_del(&pme_dev->list); + kfree(pme_dev); + break; + } + } + mutex_unlock(&pci_pme_list_mutex); + } + } + +out: dev_printk(KERN_DEBUG, &dev->dev, "PME# %s\n", enable ? "enabled" : "disabled"); } @@ -2689,7 +2778,7 @@ int pcie_get_readrq(struct pci_dev *dev) ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); if (!ret) - ret = 128 << ((ctl & PCI_EXP_DEVCTL_READRQ) >> 12); + ret = 128 << ((ctl & PCI_EXP_DEVCTL_READRQ) >> 12); return ret; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6beb11b617a9..7d33f6673868 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -22,8 +22,13 @@ extern void pci_remove_firmware_label_files(struct pci_dev *pdev); #endif extern void pci_cleanup_rom(struct pci_dev *dev); #ifdef HAVE_PCI_MMAP +enum pci_mmap_api { + PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */ + PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/<BDF> */ +}; extern int pci_mmap_fits(struct pci_dev *pdev, int resno, - struct vm_area_struct *vma); + struct vm_area_struct *vmai, + enum pci_mmap_api mmap_api); #endif int pci_probe_reset_function(struct pci_dev *dev); @@ -63,11 +68,8 @@ struct pci_platform_pm_ops { extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state); extern void pci_disable_enabled_device(struct pci_dev *dev); -extern bool pci_check_pme_status(struct pci_dev *dev); extern int pci_finish_runtime_suspend(struct pci_dev *dev); -extern void pci_wakeup_event(struct pci_dev *dev); extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); -extern void pci_pme_wakeup_bus(struct pci_bus *bus); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index f409948e1a9b..2b2b6508efde 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -416,7 +416,7 @@ static void aer_error_resume(struct pci_dev *dev) */ static int __init aer_service_init(void) { - if (!pci_aer_available()) + if (!pci_aer_available() || aer_acpi_firmware_first()) return -ENXIO; return pcie_port_service_register(&aerdriver); } diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 80c11d131499..9656e3060412 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -132,6 +132,7 @@ static inline int aer_osc_setup(struct pcie_device *pciedev) #ifdef CONFIG_ACPI_APEI extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); +extern bool aer_acpi_firmware_first(void); #else static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) { @@ -139,6 +140,8 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) return pci_dev->__aer_firmware_first; return 0; } + +static inline bool aer_acpi_firmware_first(void) { return false; } #endif static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev, diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index 2bb9b8972211..275bf158ffa7 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -93,4 +93,38 @@ int pcie_aer_get_firmware_first(struct pci_dev *dev) aer_set_firmware_first(dev); return dev->__aer_firmware_first; } + +static bool aer_firmware_first; + +static int aer_hest_parse_aff(struct acpi_hest_header *hest_hdr, void *data) +{ + struct acpi_hest_aer_common *p; + + if (aer_firmware_first) + return 0; + + switch (hest_hdr->type) { + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: + p = (struct acpi_hest_aer_common *)(hest_hdr + 1); + aer_firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + default: + return 0; + } +} + +/** + * aer_acpi_firmware_first - Check if APEI should control AER. + */ +bool aer_acpi_firmware_first(void) +{ + static bool parsed = false; + + if (!parsed) { + apei_hest_parse(aer_hest_parse_aff, NULL); + parsed = true; + } + return aer_firmware_first; +} #endif diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 29e268fadf14..43421fbe080a 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -754,7 +754,7 @@ void aer_isr(struct work_struct *work) { struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); struct pcie_device *p_device = rpc->rpd; - struct aer_err_source e_src; + struct aer_err_source uninitialized_var(e_src); mutex_lock(&rpc->rpc_mutex); while (get_e_source(rpc, &e_src)) diff --git a/drivers/pci/pcie/portdrv_acpi.c b/drivers/pci/pcie/portdrv_acpi.c index b7c4cb1ccb23..5982b6a63b89 100644 --- a/drivers/pci/pcie/portdrv_acpi.c +++ b/drivers/pci/pcie/portdrv_acpi.c @@ -49,7 +49,7 @@ int pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask) | OSC_PCI_EXPRESS_PME_CONTROL; if (pci_aer_available()) { - if (pcie_aer_get_firmware_first(port)) + if (aer_acpi_firmware_first()) dev_dbg(&port->dev, "PCIe errors handled by BIOS.\n"); else flags |= OSC_PCI_EXPRESS_AER_CONTROL; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 12625d90f8b5..c84900da3c59 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -961,8 +961,8 @@ int pci_setup_device(struct pci_dev *dev) dev->class = class; class >>= 8; - dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n", - dev->vendor, dev->device, class, dev->hdr_type); + dev_printk(KERN_DEBUG, &dev->dev, "[%04x:%04x] type %d class %#08x\n", + dev->vendor, dev->device, dev->hdr_type, class); /* need to have dev->class ready */ dev->cfg_size = pci_cfg_space_size(dev); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 01f0306525a5..27911b55c2a5 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -10,7 +10,6 @@ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/smp_lock.h> #include <linux/capability.h> #include <asm/uaccess.h> #include <asm/byteorder.h> @@ -212,8 +211,6 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, #endif /* HAVE_PCI_MMAP */ int ret = 0; - lock_kernel(); - switch (cmd) { case PCIIOC_CONTROLLER: ret = pci_domain_nr(dev->bus); @@ -242,7 +239,6 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, break; }; - unlock_kernel(); return ret; } @@ -260,7 +256,7 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) /* Make sure the caller is mapping a real resource for this device */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (pci_mmap_fits(dev, i, vma)) + if (pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS)) break; } @@ -306,6 +302,7 @@ static const struct file_operations proc_bus_pci_operations = { .read = proc_bus_pci_read, .write = proc_bus_pci_write, .unlocked_ioctl = proc_bus_pci_ioctl, + .compat_ioctl = proc_bus_pci_ioctl, #ifdef HAVE_PCI_MMAP .open = proc_bus_pci_open, .release = proc_bus_pci_release, diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index cc96c7142dac..6f9350cabbd5 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2136,6 +2136,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB, quirk_unhide_mch_dev6); +#ifdef CONFIG_TILE +/* + * The Tilera TILEmpower platform needs to set the link speed + * to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed + * setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe + * capability register of the PEX8624 PCIe switch. The switch + * supports link speed auto negotiation, but falsely sets + * the link speed to 5GT/s. + */ +static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev) +{ + if (tile_plx_gen1) { + pci_write_config_dword(dev, 0x98, 0x1); + mdelay(50); + } +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1); +#endif /* CONFIG_TILE */ #ifdef CONFIG_PCI_MSI /* Some chipsets do not support MSI. We cannot easily rely on setting @@ -2297,6 +2315,37 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15, nvenet_msi_disable); +/* + * Some versions of the MCP55 bridge from nvidia have a legacy irq routing + * config register. This register controls the routing of legacy interrupts + * from devices that route through the MCP55. If this register is misprogramed + * interrupts are only sent to the bsp, unlike conventional systems where the + * irq is broadxast to all online cpus. Not having this register set + * properly prevents kdump from booting up properly, so lets make sure that + * we have it set correctly. + * Note this is an undocumented register. + */ +static void __devinit nvbridge_check_legacy_irq_routing(struct pci_dev *dev) +{ + u32 cfg; + + pci_read_config_dword(dev, 0x74, &cfg); + + if (cfg & ((1 << 2) | (1 << 15))) { + printk(KERN_INFO "Rewriting irq routing register on MCP55\n"); + cfg &= ~((1 << 2) | (1 << 15)); + pci_write_config_dword(dev, 0x74, cfg); + } +} + +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, + PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V0, + nvbridge_check_legacy_irq_routing); + +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, + PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V4, + nvbridge_check_legacy_irq_routing); + static int __devinit ht_check_msi_mapping(struct pci_dev *dev) { int pos, ttl = 48; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 2aaa13150de3..bc0e6eea0fff 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -85,7 +85,7 @@ void pci_update_resource(struct pci_dev *dev, int resno) } } res->flags &= ~IORESOURCE_UNSET; - dev_info(&dev->dev, "BAR %d: set to %pR (PCI address [%#llx-%#llx]\n", + dev_info(&dev->dev, "BAR %d: set to %pR (PCI address [%#llx-%#llx])\n", resno, res, (unsigned long long)region.start, (unsigned long long)region.end); } diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c new file mode 100644 index 000000000000..3a5a6fcc0ead --- /dev/null +++ b/drivers/pci/xen-pcifront.c @@ -0,0 +1,1148 @@ +/* + * Xen PCI Frontend. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/grant_table.h> +#include <xen/page.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <xen/interface/io/pciif.h> +#include <asm/xen/pci.h> +#include <linux/interrupt.h> +#include <asm/atomic.h> +#include <linux/workqueue.h> +#include <linux/bitops.h> +#include <linux/time.h> + +#define INVALID_GRANT_REF (0) +#define INVALID_EVTCHN (-1) + +struct pci_bus_entry { + struct list_head list; + struct pci_bus *bus; +}; + +#define _PDEVB_op_active (0) +#define PDEVB_op_active (1 << (_PDEVB_op_active)) + +struct pcifront_device { + struct xenbus_device *xdev; + struct list_head root_buses; + + int evtchn; + int gnt_ref; + + int irq; + + /* Lock this when doing any operations in sh_info */ + spinlock_t sh_info_lock; + struct xen_pci_sharedinfo *sh_info; + struct work_struct op_work; + unsigned long flags; + +}; + +struct pcifront_sd { + int domain; + struct pcifront_device *pdev; +}; + +static inline struct pcifront_device * +pcifront_get_pdev(struct pcifront_sd *sd) +{ + return sd->pdev; +} + +static inline void pcifront_init_sd(struct pcifront_sd *sd, + unsigned int domain, unsigned int bus, + struct pcifront_device *pdev) +{ + sd->domain = domain; + sd->pdev = pdev; +} + +static DEFINE_SPINLOCK(pcifront_dev_lock); +static struct pcifront_device *pcifront_dev; + +static int verbose_request; +module_param(verbose_request, int, 0644); + +static int errno_to_pcibios_err(int errno) +{ + switch (errno) { + case XEN_PCI_ERR_success: + return PCIBIOS_SUCCESSFUL; + + case XEN_PCI_ERR_dev_not_found: + return PCIBIOS_DEVICE_NOT_FOUND; + + case XEN_PCI_ERR_invalid_offset: + case XEN_PCI_ERR_op_failed: + return PCIBIOS_BAD_REGISTER_NUMBER; + + case XEN_PCI_ERR_not_implemented: + return PCIBIOS_FUNC_NOT_SUPPORTED; + + case XEN_PCI_ERR_access_denied: + return PCIBIOS_SET_FAILED; + } + return errno; +} + +static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev) +{ + if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) { + dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n"); + schedule_work(&pdev->op_work); + } +} + +static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) +{ + int err = 0; + struct xen_pci_op *active_op = &pdev->sh_info->op; + unsigned long irq_flags; + evtchn_port_t port = pdev->evtchn; + unsigned irq = pdev->irq; + s64 ns, ns_timeout; + struct timeval tv; + + spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); + + memcpy(active_op, op, sizeof(struct xen_pci_op)); + + /* Go */ + wmb(); + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(port); + + /* + * We set a poll timeout of 3 seconds but give up on return after + * 2 seconds. It is better to time out too late rather than too early + * (in the latter case we end up continually re-executing poll() with a + * timeout in the past). 1s difference gives plenty of slack for error. + */ + do_gettimeofday(&tv); + ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; + + xen_clear_irq_pending(irq); + + while (test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags)) { + xen_poll_irq_timeout(irq, jiffies + 3*HZ); + xen_clear_irq_pending(irq); + do_gettimeofday(&tv); + ns = timeval_to_ns(&tv); + if (ns > ns_timeout) { + dev_err(&pdev->xdev->dev, + "pciback not responding!!!\n"); + clear_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags); + err = XEN_PCI_ERR_dev_not_found; + goto out; + } + } + + /* + * We might lose backend service request since we + * reuse same evtchn with pci_conf backend response. So re-schedule + * aer pcifront service. + */ + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&pdev->sh_info->flags)) { + dev_err(&pdev->xdev->dev, + "schedule aer pcifront service\n"); + schedule_pcifront_aer_op(pdev); + } + + memcpy(op, active_op, sizeof(struct xen_pci_op)); + + err = op->err; +out: + spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags); + return err; +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + int err = 0; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_read, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n", + pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where, size); + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (verbose_request) + dev_info(&pdev->xdev->dev, "read got back value %x\n", + op.value); + + *val = op.value; + } else if (err == -ENODEV) { + /* No device here, pretend that it just returned 0 */ + err = 0; + *val = 0; + } + + return errno_to_pcibios_err(err); +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_write, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + .value = val, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "write dev=%04x:%02x:%02x.%01x - " + "offset %x size %d val %x\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); + + return errno_to_pcibios_err(do_pci_op(pdev, &op)); +} + +struct pci_ops pcifront_bus_ops = { + .read = pcifront_bus_read, + .write = pcifront_bus_write, +}; + +#ifdef CONFIG_PCI_MSI +static int pci_frontend_enable_msix(struct pci_dev *dev, + int **vector, int nvec) +{ + int err; + int i; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + .value = nvec, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + struct msi_desc *entry; + + if (nvec > SH_INFO_MAX_VEC) { + dev_err(&dev->dev, "too much vector for pci frontend: %x." + " Increase SH_INFO_MAX_VEC.\n", nvec); + return -EINVAL; + } + + i = 0; + list_for_each_entry(entry, &dev->msi_list, list) { + op.msix_entries[i].entry = entry->msi_attrib.entry_nr; + /* Vector is useless at this point. */ + op.msix_entries[i].vector = -1; + i++; + } + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (likely(!op.value)) { + /* we get the result */ + for (i = 0; i < nvec; i++) + *(*vector+i) = op.msix_entries[i].vector; + return 0; + } else { + printk(KERN_DEBUG "enable msix get value %x\n", + op.value); + return op.value; + } + } else { + dev_err(&dev->dev, "enable msix get err %x\n", err); + return err; + } +} + +static void pci_frontend_disable_msix(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + + /* What should do for error ? */ + if (err) + dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); +} + +static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (likely(!err)) { + *(*vector) = op.value; + } else { + dev_err(&dev->dev, "pci frontend enable msi failed for dev " + "%x:%x\n", op.bus, op.devfn); + err = -EINVAL; + } + return err; +} + +static void pci_frontend_disable_msi(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (err == XEN_PCI_ERR_dev_not_found) { + /* XXX No response from backend, what shall we do? */ + printk(KERN_DEBUG "get no response from backend for disable MSI\n"); + return; + } + if (err) + /* how can pciback notify us fail? */ + printk(KERN_DEBUG "get fake response frombackend\n"); +} + +static struct xen_pci_frontend_ops pci_frontend_ops = { + .enable_msi = pci_frontend_enable_msi, + .disable_msi = pci_frontend_disable_msi, + .enable_msix = pci_frontend_enable_msix, + .disable_msix = pci_frontend_disable_msix, +}; + +static void pci_frontend_registrar(int enable) +{ + if (enable) + xen_pci_frontend = &pci_frontend_ops; + else + xen_pci_frontend = NULL; +}; +#else +static inline void pci_frontend_registrar(int enable) { }; +#endif /* CONFIG_PCI_MSI */ + +/* Claim resources for the PCI frontend as-is, backend won't allow changes */ +static int pcifront_claim_resource(struct pci_dev *dev, void *data) +{ + struct pcifront_device *pdev = data; + int i; + struct resource *r; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + + if (!r->parent && r->start && r->flags) { + dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", + pci_name(dev), i); + if (pci_claim_resource(dev, i)) { + dev_err(&pdev->xdev->dev, "Could not claim " + "resource %s/%d! Device offline. Try " + "giving less than 4GB to domain.\n", + pci_name(dev), i); + } + } + } + + return 0; +} + +static int __devinit pcifront_scan_bus(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus, + struct pci_bus *b) +{ + struct pci_dev *d; + unsigned int devfn; + + /* Scan the bus for functions and add. + * We omit handling of PCI bridge attachment because pciback prevents + * bridges from being exported. + */ + for (devfn = 0; devfn < 0x100; devfn++) { + d = pci_get_slot(b, devfn); + if (d) { + /* Device is already known. */ + pci_dev_put(d); + continue; + } + + d = pci_scan_single_device(b, devfn); + if (d) + dev_info(&pdev->xdev->dev, "New device on " + "%04x:%02x:%02x.%02x found.\n", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + } + + return 0; +} + +static int __devinit pcifront_scan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + struct pci_bus *b; + struct pcifront_sd *sd = NULL; + struct pci_bus_entry *bus_entry = NULL; + int err = 0; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + err = -EINVAL; + goto err_out; + } +#endif + + dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", + domain, bus); + + bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kmalloc(sizeof(*sd), GFP_KERNEL); + if (!bus_entry || !sd) { + err = -ENOMEM; + goto err_out; + } + pcifront_init_sd(sd, domain, bus, pdev); + + b = pci_scan_bus_parented(&pdev->xdev->dev, bus, + &pcifront_bus_ops, sd); + if (!b) { + dev_err(&pdev->xdev->dev, + "Error creating PCI Frontend Bus!\n"); + err = -ENOMEM; + goto err_out; + } + + bus_entry->bus = b; + + list_add(&bus_entry->list, &pdev->root_buses); + + /* pci_scan_bus_parented skips devices which do not have a have + * devfn==0. The pcifront_scan_bus enumerates all devfn. */ + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; + +err_out: + kfree(bus_entry); + kfree(sd); + + return err; +} + +static int __devinit pcifront_rescan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + int err; + struct pci_bus *b; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + return -EINVAL; + } +#endif + + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", + domain, bus); + + b = pci_find_bus(domain, bus); + if (!b) + /* If the bus is unknown, create it. */ + return pcifront_scan_root(pdev, domain, bus); + + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; +} + +static void free_root_bus_devs(struct pci_bus *bus) +{ + struct pci_dev *dev; + + while (!list_empty(&bus->devices)) { + dev = container_of(bus->devices.next, struct pci_dev, + bus_list); + dev_dbg(&dev->dev, "removing device\n"); + pci_remove_bus_device(dev); + } +} + +static void pcifront_free_roots(struct pcifront_device *pdev) +{ + struct pci_bus_entry *bus_entry, *t; + + dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n"); + + list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { + list_del(&bus_entry->list); + + free_root_bus_devs(bus_entry->bus); + + kfree(bus_entry->bus->sysdata); + + device_unregister(bus_entry->bus->bridge); + pci_remove_bus(bus_entry->bus); + + kfree(bus_entry); + } +} + +static pci_ers_result_t pcifront_common_process(int cmd, + struct pcifront_device *pdev, + pci_channel_state_t state) +{ + pci_ers_result_t result; + struct pci_driver *pdrv; + int bus = pdev->sh_info->aer_op.bus; + int devfn = pdev->sh_info->aer_op.devfn; + struct pci_dev *pcidev; + int flag = 0; + + dev_dbg(&pdev->xdev->dev, + "pcifront AER process: cmd %x (bus:%x, devfn%x)", + cmd, bus, devfn); + result = PCI_ERS_RESULT_NONE; + + pcidev = pci_get_bus_and_slot(bus, devfn); + if (!pcidev || !pcidev->driver) { + dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n"); + if (pcidev) + pci_dev_put(pcidev); + return result; + } + pdrv = pcidev->driver; + + if (get_driver(&pdrv->driver)) { + if (pdrv->err_handler && pdrv->err_handler->error_detected) { + dev_dbg(&pcidev->dev, + "trying to call AER service\n"); + if (pcidev) { + flag = 1; + switch (cmd) { + case XEN_PCI_OP_aer_detected: + result = pdrv->err_handler-> + error_detected(pcidev, state); + break; + case XEN_PCI_OP_aer_mmio: + result = pdrv->err_handler-> + mmio_enabled(pcidev); + break; + case XEN_PCI_OP_aer_slotreset: + result = pdrv->err_handler-> + slot_reset(pcidev); + break; + case XEN_PCI_OP_aer_resume: + pdrv->err_handler->resume(pcidev); + break; + default: + dev_err(&pdev->xdev->dev, + "bad request in aer recovery " + "operation!\n"); + + } + } + } + put_driver(&pdrv->driver); + } + if (!flag) + result = PCI_ERS_RESULT_NONE; + + return result; +} + + +static void pcifront_do_aer(struct work_struct *data) +{ + struct pcifront_device *pdev = + container_of(data, struct pcifront_device, op_work); + int cmd = pdev->sh_info->aer_op.cmd; + pci_channel_state_t state = + (pci_channel_state_t)pdev->sh_info->aer_op.err; + + /*If a pci_conf op is in progress, + we have to wait until it is done before service aer op*/ + dev_dbg(&pdev->xdev->dev, + "pcifront service aer bus %x devfn %x\n", + pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); + + pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state); + + /* Post the operation to the guest. */ + wmb(); + clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(pdev->evtchn); + + /*in case of we lost an aer request in four lines time_window*/ + smp_mb__before_clear_bit(); + clear_bit(_PDEVB_op_active, &pdev->flags); + smp_mb__after_clear_bit(); + + schedule_pcifront_aer_op(pdev); + +} + +static irqreturn_t pcifront_handler_aer(int irq, void *dev) +{ + struct pcifront_device *pdev = dev; + schedule_pcifront_aer_op(pdev); + return IRQ_HANDLED; +} +static int pcifront_connect(struct pcifront_device *pdev) +{ + int err = 0; + + spin_lock(&pcifront_dev_lock); + + if (!pcifront_dev) { + dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); + pcifront_dev = pdev; + } else { + dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); + err = -EEXIST; + } + + spin_unlock(&pcifront_dev_lock); + + return err; +} + +static void pcifront_disconnect(struct pcifront_device *pdev) +{ + spin_lock(&pcifront_dev_lock); + + if (pdev == pcifront_dev) { + dev_info(&pdev->xdev->dev, + "Disconnecting PCI Frontend Buses\n"); + pcifront_dev = NULL; + } + + spin_unlock(&pcifront_dev_lock); +} +static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev; + + pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + + pdev->sh_info = + (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL); + if (pdev->sh_info == NULL) { + kfree(pdev); + pdev = NULL; + goto out; + } + pdev->sh_info->flags = 0; + + /*Flag for registering PV AER handler*/ + set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags); + + dev_set_drvdata(&xdev->dev, pdev); + pdev->xdev = xdev; + + INIT_LIST_HEAD(&pdev->root_buses); + + spin_lock_init(&pdev->sh_info_lock); + + pdev->evtchn = INVALID_EVTCHN; + pdev->gnt_ref = INVALID_GRANT_REF; + pdev->irq = -1; + + INIT_WORK(&pdev->op_work, pcifront_do_aer); + + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", + pdev, pdev->sh_info); +out: + return pdev; +} + +static void free_pdev(struct pcifront_device *pdev) +{ + dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev); + + pcifront_free_roots(pdev); + + /*For PCIE_AER error handling job*/ + flush_scheduled_work(); + + if (pdev->irq >= 0) + unbind_from_irqhandler(pdev->irq, pdev); + + if (pdev->evtchn != INVALID_EVTCHN) + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); + + if (pdev->gnt_ref != INVALID_GRANT_REF) + gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */, + (unsigned long)pdev->sh_info); + else + free_page((unsigned long)pdev->sh_info); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + + kfree(pdev); +} + +static int pcifront_publish_info(struct pcifront_device *pdev) +{ + int err = 0; + struct xenbus_transaction trans; + + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + if (err < 0) + goto out; + + pdev->gnt_ref = err; + + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); + if (err) + goto out; + + err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer, + 0, "pcifront", pdev); + + if (err < 0) + return err; + + pdev->irq = err; + +do_publish: + err = xenbus_transaction_start(&trans); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend " + "(start transaction)"); + goto out; + } + + err = xenbus_printf(trans, pdev->xdev->nodename, + "pci-op-ref", "%u", pdev->gnt_ref); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "event-channel", "%u", pdev->evtchn); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "magic", XEN_PCI_MAGIC); + + if (err) { + xenbus_transaction_end(trans, 1); + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend"); + goto out; + } else { + err = xenbus_transaction_end(trans, 0); + if (err == -EAGAIN) + goto do_publish; + else if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error completing transaction " + "for backend"); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + + dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); + +out: + return err; +} + +static int __devinit pcifront_try_connect(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + char str[64]; + unsigned int domain, bus; + + + /* Only connect once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + err = pcifront_connect(pdev); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error connecting PCI Frontend"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_scan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_scan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_try_disconnect(struct pcifront_device *pdev) +{ + int err = 0; + enum xenbus_state prev_state; + + + prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + + if (prev_state >= XenbusStateClosing) + goto out; + + if (prev_state == XenbusStateConnected) { + pcifront_free_roots(pdev); + pcifront_disconnect(pdev); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateClosed); + +out: + + return err; +} + +static int __devinit pcifront_attach_devices(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + unsigned int domain, bus; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_rescan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_rescan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_detach_devices(struct pcifront_device *pdev) +{ + int err = 0; + int i, num_devs; + unsigned int domain, bus, slot, func; + struct pci_bus *pci_bus; + struct pci_dev *pci_dev; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateConnected) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI devices"); + goto out; + } + + /* Find devices being detached and remove them. */ + for (i = 0; i < num_devs; i++) { + int l, state; + l = snprintf(str, sizeof(str), "state-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d", + &state); + if (err != 1) + state = XenbusStateUnknown; + + if (state != XenbusStateClosing) + continue; + + /* Remove device. */ + l = snprintf(str, sizeof(str), "vdev-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err != 4) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI device %d", i); + goto out; + } + + pci_bus = pci_find_bus(domain, bus); + if (!pci_bus) { + dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n", + domain, bus); + continue; + } + pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func)); + if (!pci_dev) { + dev_dbg(&pdev->xdev->dev, + "Cannot get PCI device %04x:%02x:%02x.%02x\n", + domain, bus, slot, func); + continue; + } + pci_remove_bus_device(pci_dev); + pci_dev_put(pci_dev); + + dev_dbg(&pdev->xdev->dev, + "PCI device %04x:%02x:%02x.%02x removed.\n", + domain, bus, slot, func); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); + +out: + return err; +} + +static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, + enum xenbus_state be_state) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + + switch (be_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateClosed: + break; + + case XenbusStateConnected: + pcifront_try_connect(pdev); + break; + + case XenbusStateClosing: + dev_warn(&xdev->dev, "backend going away!\n"); + pcifront_try_disconnect(pdev); + break; + + case XenbusStateReconfiguring: + pcifront_detach_devices(pdev); + break; + + case XenbusStateReconfigured: + pcifront_attach_devices(pdev); + break; + } +} + +static int pcifront_xenbus_probe(struct xenbus_device *xdev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct pcifront_device *pdev = alloc_pdev(xdev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(xdev, err, + "Error allocating pcifront_device struct"); + goto out; + } + + err = pcifront_publish_info(pdev); + if (err) + free_pdev(pdev); + +out: + return err; +} + +static int pcifront_xenbus_remove(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + if (pdev) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xenbus_pcifront_driver = { + .name = "pcifront", + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pcifront_xenbus_probe, + .remove = pcifront_xenbus_remove, + .otherend_changed = pcifront_backend_changed, +}; + +static int __init pcifront_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + pci_frontend_registrar(1 /* enable */); + + return xenbus_register_frontend(&xenbus_pcifront_driver); +} + +static void __exit pcifront_cleanup(void) +{ + xenbus_unregister_driver(&xenbus_pcifront_driver); + pci_frontend_registrar(0 /* disable */); +} +module_init(pcifront_init); +module_exit(pcifront_cleanup); + +MODULE_DESCRIPTION("Xen PCI passthrough frontend."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:pci"); |