diff options
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/access.c | 18 | ||||
-rw-r--r-- | drivers/pci/bus.c | 6 | ||||
-rw-r--r-- | drivers/pci/dmar.c | 7 | ||||
-rw-r--r-- | drivers/pci/hotplug/acpiphp_glue.c | 7 | ||||
-rw-r--r-- | drivers/pci/hotplug/pcihp_slot.c | 45 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.c | 240 | ||||
-rw-r--r-- | drivers/pci/iova.c | 12 | ||||
-rw-r--r-- | drivers/pci/pci-acpi.c | 2 | ||||
-rw-r--r-- | drivers/pci/pci-sysfs.c | 62 | ||||
-rw-r--r-- | drivers/pci/pci.c | 444 | ||||
-rw-r--r-- | drivers/pci/pci.h | 3 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aer_inject.c | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv.h | 9 | ||||
-rw-r--r-- | drivers/pci/pcie/aspm.c | 21 | ||||
-rw-r--r-- | drivers/pci/probe.c | 43 | ||||
-rw-r--r-- | drivers/pci/quirks.c | 21 | ||||
-rw-r--r-- | drivers/pci/remove.c | 2 | ||||
-rw-r--r-- | drivers/pci/setup-bus.c | 125 |
18 files changed, 925 insertions, 144 deletions
diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 531bc697d800..fdaa42aac7c6 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -143,33 +143,41 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev) __remove_wait_queue(&pci_ucfg_wait, &wait); } +/* Returns 0 on success, negative values indicate error. */ #define PCI_USER_READ_CONFIG(size,type) \ int pci_user_read_config_##size \ (struct pci_dev *dev, int pos, type *val) \ { \ int ret = 0; \ u32 data = -1; \ - if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ + if (PCI_##size##_BAD) \ + return -EINVAL; \ raw_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->read(dev->bus, dev->devfn, \ pos, sizeof(type), &data); \ raw_spin_unlock_irq(&pci_lock); \ *val = (type)data; \ + if (ret > 0) \ + ret = -EINVAL; \ return ret; \ } +/* Returns 0 on success, negative values indicate error. */ #define PCI_USER_WRITE_CONFIG(size,type) \ int pci_user_write_config_##size \ (struct pci_dev *dev, int pos, type val) \ { \ int ret = -EIO; \ - if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ + if (PCI_##size##_BAD) \ + return -EINVAL; \ raw_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->write(dev->bus, dev->devfn, \ pos, sizeof(type), val); \ raw_spin_unlock_irq(&pci_lock); \ + if (ret > 0) \ + ret = -EINVAL; \ return ret; \ } @@ -197,6 +205,8 @@ struct pci_vpd_pci22 { * This code has to spin since there is no other notification from the PCI * hardware. Since the VPD is often implemented by serial attachment to an * EEPROM, it may take many milliseconds to complete. + * + * Returns 0 on success, negative values indicate error. */ static int pci_vpd_pci22_wait(struct pci_dev *dev) { @@ -212,7 +222,7 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev) for (;;) { ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR, &status); - if (ret) + if (ret < 0) return ret; if ((status & PCI_VPD_ADDR_F) == vpd->flag) { @@ -324,6 +334,8 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count vpd->busy = true; vpd->flag = 0; ret = pci_vpd_pci22_wait(dev); + if (ret < 0) + break; pos += sizeof(u32); } diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 69546e9213dd..1e2ad92a4752 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -163,12 +163,6 @@ int pci_bus_add_child(struct pci_bus *bus) bus->is_added = 1; - retval = device_create_file(&bus->dev, &dev_attr_cpuaffinity); - if (retval) - return retval; - - retval = device_create_file(&bus->dev, &dev_attr_cpulistaffinity); - /* Create legacy_io and legacy_mem files for this bus */ pci_create_legacy_files(bus); diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf92c4a..3dc9befa5aec 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) { #ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; - /* - * for now we will disable dma-remapping when interrupt - * remapping is enabled. - * When support for queued invalidation for IOTLB invalidation - * is added, we will not need this any more. - */ + dmar = (struct acpi_table_dmar *) dmar_tbl; if (ret && cpu_has_x2apic && dmar->flags & 0x1) printk(KERN_INFO diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 2f67e9bc2f96..a70fa89f76fd 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -827,6 +827,13 @@ static int __ref enable_device(struct acpiphp_slot *slot) acpiphp_set_hpp_values(bus); acpiphp_set_acpi_region(slot); pci_enable_bridges(bus); + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Assume that newly added devices are powered on already. */ + if (!dev->is_added) + dev->current_state = PCI_D0; + } + pci_bus_add_devices(bus); list_for_each_entry(func, &slot->funcs, sibling) { diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 80b461c98557..749fdf070319 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -158,6 +158,47 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } +/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ +static int pci_set_payload(struct pci_dev *dev) +{ + int pos, ppos; + u16 pctl, psz; + u16 dctl, dsz, dcap, dmax; + struct pci_dev *parent; + + parent = dev->bus->self; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + + /* Read Device MaxPayload capability and setting */ + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); + pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); + dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; + dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); + + /* Read Parent MaxPayload setting */ + ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (!ppos) + return 0; + pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); + psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; + + /* If parent payload > device max payload -> error + * If parent payload > device payload -> set speed + * If parent payload <= device payload -> do nothing + */ + if (psz > dmax) + return -1; + else if (psz > dsz) { + dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, + (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + + (psz << 5)); + } + return 0; +} + void pci_configure_slot(struct pci_dev *dev) { struct pci_dev *cdev; @@ -169,6 +210,10 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; + ret = pci_set_payload(dev); + if (ret) + dev_warn(&dev->dev, "could not set device max payload\n"); + memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); if (ret) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6af6b628175b..59f17acf7f68 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -47,6 +47,8 @@ #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE +#define IS_BRIDGE_HOST_DEVICE(pdev) \ + ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) @@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1 << ((lvl - 1) * LEVEL_STRIDE); +} + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) @@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void); static int rwbf_quirk; /* + * set to 1 to panic kernel if can't successfully enable VT-d + * (used when kernel is launched w/ TXT) + */ +static int force_on = 0; + +/* * 0: Present * 1-11: Reserved * 12-63: Context Ptr (12 - (haw-1)) @@ -338,6 +351,9 @@ struct dmar_domain { int iommu_coherency;/* indicate coherency of iommu access */ int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ }; @@ -387,6 +403,7 @@ int dmar_disabled = 1; static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +static int intel_iommu_superpage = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable batched IOTLB flush\n"); intel_iommu_strict = 1; + } else if (!strncmp(str, "sp_off", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable supported super page\n"); + intel_iommu_superpage = 0; } str += strcspn(str, ","); @@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) } } +static void domain_update_iommu_superpage(struct dmar_domain *domain) +{ + int i, mask = 0xf; + + if (!intel_iommu_superpage) { + domain->iommu_superpage = 0; + return; + } + + domain->iommu_superpage = 4; /* 1TiB */ + + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { + mask |= cap_super_page_val(g_iommus[i]->cap); + if (!mask) { + break; + } + } + domain->iommu_superpage = fls(mask); +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain_update_iommu_snooping(domain); + domain_update_iommu_superpage(domain); } static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) @@ -689,23 +731,31 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn) + unsigned long pfn, int large_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); - int offset; + int offset, target_level; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; + /* Search pte */ + if (!large_level) + target_level = 1; + else + target_level = large_level; + while (level > 0) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (level == 1) + if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) + break; + if (level == target_level) break; if (!dma_pte_present(pte)) { @@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } + /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, - int level) + int level, int *large_page) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, if (level == total) return pte; - if (!dma_pte_present(pte)) + if (!dma_pte_present(pte)) { + *large_page = total; break; + } + + if (pte->val & DMA_PTE_LARGE_PAGE) { + *large_page = total; + return pte; + } + parent = phys_to_virt(dma_pte_addr(pte)); total--; } @@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned int large_page = 1; struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); @@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* we don't need lock here; nobody else touches the iova range */ do { - first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + large_page = 1; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); if (!pte) { - start_pfn = align_to_level(start_pfn + 1, 2); + start_pfn = align_to_level(start_pfn + 1, large_page + 1); continue; } - do { + do { dma_clear_pte(pte); - start_pfn++; + start_pfn += lvl_to_nr_pages(large_page); pte++; } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); @@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; + int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; do { - first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + large_page = level; + first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); + if (large_page > level) + level = large_page + 1; if (!pte) { tmp = align_to_level(tmp + 1, level + 1); continue; @@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_snooping = 0; + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); domain->iommu_count = 1; domain->nid = iommu->node; @@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; + /* Flush any lazy unmaps that may reference this domain */ + if (!intel_iommu_strict) + flush_unmaps_timeout(0); + domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); @@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* Return largest possible superpage level for a given mapping */ +static inline int hardware_largepage_caps(struct dmar_domain *domain, + unsigned long iov_pfn, + unsigned long phy_pfn, + unsigned long pages) +{ + int support, level = 1; + unsigned long pfnmerge; + + support = domain->iommu_superpage; + + /* To use a large page, the virtual *and* physical addresses + must be aligned to 2MiB/1GiB/etc. Lower bits set in either + of them will mean we have to use smaller pages. So just + merge them and check both at once. */ + pfnmerge = iov_pfn | phy_pfn; + + while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { + pages >>= VTD_STRIDE_SHIFT; + if (!pages) + break; + pfnmerge >>= VTD_STRIDE_SHIFT; + level++; + support--; + } + return level; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; + unsigned int largepage_lvl = 0; + unsigned long lvl_pages = 0; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } - while (nr_pages--) { + while (nr_pages > 0) { uint64_t tmp; if (!sg_res) { @@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; + phys_pfn = pteval >> VTD_PAGE_SHIFT; } + if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); if (!pte) return -ENOMEM; + /* It is large page*/ + if (largepage_lvl > 1) + pteval |= DMA_PTE_LARGE_PAGE; + else + pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } /* We don't need lock here, nobody else * touches the iova range @@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } WARN_ON(1); } + + lvl_pages = lvl_to_nr_pages(largepage_lvl); + + BUG_ON(nr_pages < lvl_pages); + BUG_ON(sg_res < lvl_pages); + + nr_pages -= lvl_pages; + iov_pfn += lvl_pages; + phys_pfn += lvl_pages; + pteval += lvl_pages * VTD_PAGE_SIZE; + sg_res -= lvl_pages; + + /* If the next PTE would be the first in a new page, then we + need to flush the cache on the entries we've just written. + And then we'll need to recalculate 'pte', so clear it and + let it get set again in the if (!pte) block above. + + If we're done (!nr_pages) we need to flush the cache too. + + Also if we've been setting superpages, we may need to + recalculate 'pte' and switch back to smaller pages for the + end of the mapping, if the trailing size is not enough to + use another superpage (i.e. sg_res < lvl_pages). */ pte++; - if (!nr_pages || first_pte_in_page(pte)) { + if (!nr_pages || first_pte_in_page(pte) || + (largepage_lvl > 1 && sg_res < lvl_pages)) { domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); pte = NULL; } - iov_pfn++; - pteval += VTD_PAGE_SIZE; - sg_res--; - if (!sg_res) + + if (!sg_res && nr_pages) sg = sg_next(sg); } return 0; @@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return 0; return iommu_prepare_identity_map(pdev, rmrr->base_address, - rmrr->end_address + 1); + rmrr->end_address); } #ifdef CONFIG_DMAR_FLOPPY_WA @@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void) return; printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); + ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); if (ret) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " @@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev) if (likely(!iommu_identity_mapping)) return 0; + info = pdev->dev.archdata.iommu; + if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + return (info->domain == si_domain); - list_for_each_entry(info, &si_domain->devices, link) - if (info->dev == pdev) - return 1; return 0; } @@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * Assume that they will -- if they turn out not to be, then we can * take them out of the 1:1 domain later. */ - if (!startup) - return pdev->dma_mask > DMA_BIT_MASK(32); + if (!startup) { + /* + * If the device's dma_mask is less than the system's memory + * size then this is not a candidate for identity mapping. + */ + u64 dma_mask = pdev->dma_mask; + + if (pdev->dev.coherent_dma_mask && + pdev->dev.coherent_dma_mask < dma_mask) + dma_mask = pdev->dev.coherent_dma_mask; + + return dma_mask >= dma_get_required_mask(&pdev->dev); + } return 1; } @@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return -EFAULT; for_each_pci_dev(pdev) { + /* Skip Host/PCI Bridge devices */ + if (IS_BRIDGE_HOST_DEVICE(pdev)) + continue; if (iommu_should_identity_map(pdev, 1)) { printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", hw ? "hardware" : "software", pci_name(pdev)); @@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return 0; } -static int __init init_dmars(int force_on) +static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); if (!iova) goto error; @@ -3118,7 +3263,17 @@ static int init_iommu_hw(void) if (iommu->qi) dmar_reenable_qi(iommu); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + iommu_flush_write_buffer(iommu); iommu_set_root_entry(iommu); @@ -3127,7 +3282,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_enable_translation(iommu); + if (iommu_enable_translation(iommu)) + return 1; iommu_disable_protect_mem_regions(iommu); } @@ -3194,7 +3350,10 @@ static void iommu_resume(void) unsigned long flag; if (init_iommu_hw()) { - WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + if (force_on) + panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); + else + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); return; } @@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { int ret = 0; - int force_on = 0; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void) init_no_remapping_devices(); - ret = init_dmars(force_on); + ret = init_dmars(); if (ret) { if (force_on) panic("tboot: Failed to initialize DMARs\n"); @@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_safe(entry, tmp, &domain->devices) { info = list_entry(entry, struct device_domain_info, link); - /* No need to compare PCI domain; it has to be the same */ - if (info->bus == pdev->bus->number && + if (info->segment == pci_domain_nr(pdev->bus) && + info->bus == pdev->bus->number && info->devfn == pdev->devfn) { list_del(&info->link); list_del(&info->global); @@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); + if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { + spin_lock_irqsave(&iommu->lock, tmp_flags); + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; + spin_unlock_irqrestore(&iommu->lock, tmp_flags); + } } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; domain->iommu_snooping = 0; + domain->iommu_superpage = 0; domain->max_addr = 0; domain->nid = -1; @@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); if (pte) phys = dma_pte_addr(pte); diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e599a475..c5c274ab5c5a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) curr = iovad->cached32_node; cached_iova = container_of(curr, struct iova, node); - if (free->pfn_lo >= cached_iova->pfn_lo) - iovad->cached32_node = rb_next(&free->node); + if (free->pfn_lo >= cached_iova->pfn_lo) { + struct rb_node *node = rb_next(&free->node); + struct iova *iova = container_of(node, struct iova, node); + + /* only cache if it's below 32bit pfn */ + if (node && iova->pfn_lo < iovad->dma_32bit_pfn) + iovad->cached32_node = node; + else + iovad->cached32_node = NULL; + } } /* Computes the padding size required, to make the diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7c3b18e78cee..d36f41ea8cbf 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -195,6 +195,8 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) return PCI_D2; case ACPI_STATE_D3: return PCI_D3hot; + case ACPI_STATE_D3_COLD: + return PCI_D3cold; } return PCI_POWER_ERROR; } diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index f8deb3e380a2..7bcf12adced7 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -108,6 +108,40 @@ static ssize_t local_cpulist_show(struct device *dev, return len; } +/* + * PCI Bus Class Devices + */ +static ssize_t pci_bus_show_cpuaffinity(struct device *dev, + int type, + struct device_attribute *attr, + char *buf) +{ + int ret; + const struct cpumask *cpumask; + + cpumask = cpumask_of_pcibus(to_pci_bus(dev)); + ret = type ? + cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) : + cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); + buf[ret++] = '\n'; + buf[ret] = '\0'; + return ret; +} + +static inline ssize_t pci_bus_show_cpumaskaffinity(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return pci_bus_show_cpuaffinity(dev, 0, attr, buf); +} + +static inline ssize_t pci_bus_show_cpulistaffinity(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return pci_bus_show_cpuaffinity(dev, 1, attr, buf); +} + /* show resources */ static ssize_t resource_show(struct device * dev, struct device_attribute *attr, char * buf) @@ -318,6 +352,25 @@ remove_store(struct device *dev, struct device_attribute *dummy, count = ret; return count; } + +static ssize_t +dev_bus_rescan_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + struct pci_bus *bus = to_pci_bus(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + pci_rescan_bus(bus); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + #endif struct device_attribute pci_dev_attrs[] = { @@ -347,6 +400,15 @@ struct device_attribute pci_dev_attrs[] = { __ATTR_NULL, }; +struct device_attribute pcibus_dev_attrs[] = { +#ifdef CONFIG_HOTPLUG + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store), +#endif + __ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL), + __ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL), + __ATTR_NULL, +}; + static ssize_t boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2472e7177b4b..56098b3e17c0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -830,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); @@ -863,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!save_state || pos <= 0) return; - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); @@ -899,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev) return -ENOMEM; } - pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data); + pci_read_config_word(dev, pos + PCI_X_CMD, + (u16 *)save_state->cap.data); return 0; } @@ -914,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (!save_state || pos <= 0) return; - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]); } @@ -975,6 +976,104 @@ void pci_restore_state(struct pci_dev *dev) dev->state_saved = false; } +struct pci_saved_state { + u32 config_space[16]; + struct pci_cap_saved_data cap[0]; +}; + +/** + * pci_store_saved_state - Allocate and return an opaque struct containing + * the device saved state. + * @dev: PCI device that we're dealing with + * + * Rerturn NULL if no state or error. + */ +struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) +{ + struct pci_saved_state *state; + struct pci_cap_saved_state *tmp; + struct pci_cap_saved_data *cap; + struct hlist_node *pos; + size_t size; + + if (!dev->state_saved) + return NULL; + + size = sizeof(*state) + sizeof(struct pci_cap_saved_data); + + hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) + size += sizeof(struct pci_cap_saved_data) + tmp->cap.size; + + state = kzalloc(size, GFP_KERNEL); + if (!state) + return NULL; + + memcpy(state->config_space, dev->saved_config_space, + sizeof(state->config_space)); + + cap = state->cap; + hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) { + size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size; + memcpy(cap, &tmp->cap, len); + cap = (struct pci_cap_saved_data *)((u8 *)cap + len); + } + /* Empty cap_save terminates list */ + + return state; +} +EXPORT_SYMBOL_GPL(pci_store_saved_state); + +/** + * pci_load_saved_state - Reload the provided save state into struct pci_dev. + * @dev: PCI device that we're dealing with + * @state: Saved state returned from pci_store_saved_state() + */ +int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state) +{ + struct pci_cap_saved_data *cap; + + dev->state_saved = false; + + if (!state) + return 0; + + memcpy(dev->saved_config_space, state->config_space, + sizeof(state->config_space)); + + cap = state->cap; + while (cap->size) { + struct pci_cap_saved_state *tmp; + + tmp = pci_find_saved_cap(dev, cap->cap_nr); + if (!tmp || tmp->cap.size != cap->size) + return -EINVAL; + + memcpy(tmp->cap.data, cap->data, tmp->cap.size); + cap = (struct pci_cap_saved_data *)((u8 *)cap + + sizeof(struct pci_cap_saved_data) + cap->size); + } + + dev->state_saved = true; + return 0; +} +EXPORT_SYMBOL_GPL(pci_load_saved_state); + +/** + * pci_load_and_free_saved_state - Reload the save state pointed to by state, + * and free the memory allocated for it. + * @dev: PCI device that we're dealing with + * @state: Pointer to saved state returned from pci_store_saved_state() + */ +int pci_load_and_free_saved_state(struct pci_dev *dev, + struct pci_saved_state **state) +{ + int ret = pci_load_saved_state(dev, *state); + kfree(*state); + *state = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state); + static int do_pci_enable_device(struct pci_dev *dev, int bars) { int err; @@ -1771,7 +1870,8 @@ static int pci_add_cap_save_buffer( if (!save_state) return -ENOMEM; - save_state->cap_nr = cap; + save_state->cap.cap_nr = cap; + save_state->cap.size = size; pci_add_saved_cap(dev, save_state); return 0; @@ -1834,6 +1934,300 @@ void pci_enable_ari(struct pci_dev *dev) bridge->ari_enabled = 1; } +/** + * pci_enable_ido - enable ID-based ordering on a device + * @dev: the PCI device + * @type: which types of IDO to enable + * + * Enable ID-based ordering on @dev. @type can contain the bits + * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate + * which types of transactions are allowed to be re-ordered. + */ +void pci_enable_ido(struct pci_dev *dev, unsigned long type) +{ + int pos; + u16 ctrl; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (type & PCI_EXP_IDO_REQUEST) + ctrl |= PCI_EXP_IDO_REQ_EN; + if (type & PCI_EXP_IDO_COMPLETION) + ctrl |= PCI_EXP_IDO_CMP_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_enable_ido); + +/** + * pci_disable_ido - disable ID-based ordering on a device + * @dev: the PCI device + * @type: which types of IDO to disable + */ +void pci_disable_ido(struct pci_dev *dev, unsigned long type) +{ + int pos; + u16 ctrl; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (type & PCI_EXP_IDO_REQUEST) + ctrl &= ~PCI_EXP_IDO_REQ_EN; + if (type & PCI_EXP_IDO_COMPLETION) + ctrl &= ~PCI_EXP_IDO_CMP_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_ido); + +/** + * pci_enable_obff - enable optimized buffer flush/fill + * @dev: PCI device + * @type: type of signaling to use + * + * Try to enable @type OBFF signaling on @dev. It will try using WAKE# + * signaling if possible, falling back to message signaling only if + * WAKE# isn't supported. @type should indicate whether the PCIe link + * be brought out of L0s or L1 to send the message. It should be either + * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0. + * + * If your device can benefit from receiving all messages, even at the + * power cost of bringing the link back up from a low power state, use + * %PCI_EXP_OBFF_SIGNAL_ALWAYS. Otherwise, use %PCI_OBFF_SIGNAL_L0 (the + * preferred type). + * + * RETURNS: + * Zero on success, appropriate error number on failure. + */ +int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type) +{ + int pos; + u32 cap; + u16 ctrl; + int ret; + + if (!pci_is_pcie(dev)) + return -ENOTSUPP; + + pos = pci_pcie_cap(dev); + if (!pos) + return -ENOTSUPP; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap); + if (!(cap & PCI_EXP_OBFF_MASK)) + return -ENOTSUPP; /* no OBFF support at all */ + + /* Make sure the topology supports OBFF as well */ + if (dev->bus) { + ret = pci_enable_obff(dev->bus->self, type); + if (ret) + return ret; + } + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (cap & PCI_EXP_OBFF_WAKE) + ctrl |= PCI_EXP_OBFF_WAKE_EN; + else { + switch (type) { + case PCI_EXP_OBFF_SIGNAL_L0: + if (!(ctrl & PCI_EXP_OBFF_WAKE_EN)) + ctrl |= PCI_EXP_OBFF_MSGA_EN; + break; + case PCI_EXP_OBFF_SIGNAL_ALWAYS: + ctrl &= ~PCI_EXP_OBFF_WAKE_EN; + ctrl |= PCI_EXP_OBFF_MSGB_EN; + break; + default: + WARN(1, "bad OBFF signal type\n"); + return -ENOTSUPP; + } + } + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + return 0; +} +EXPORT_SYMBOL(pci_enable_obff); + +/** + * pci_disable_obff - disable optimized buffer flush/fill + * @dev: PCI device + * + * Disable OBFF on @dev. + */ +void pci_disable_obff(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl &= ~PCI_EXP_OBFF_WAKE_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_obff); + +/** + * pci_ltr_supported - check whether a device supports LTR + * @dev: PCI device + * + * RETURNS: + * True if @dev supports latency tolerance reporting, false otherwise. + */ +bool pci_ltr_supported(struct pci_dev *dev) +{ + int pos; + u32 cap; + + if (!pci_is_pcie(dev)) + return false; + + pos = pci_pcie_cap(dev); + if (!pos) + return false; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap); + + return cap & PCI_EXP_DEVCAP2_LTR; +} +EXPORT_SYMBOL(pci_ltr_supported); + +/** + * pci_enable_ltr - enable latency tolerance reporting + * @dev: PCI device + * + * Enable LTR on @dev if possible, which means enabling it first on + * upstream ports. + * + * RETURNS: + * Zero on success, errno on failure. + */ +int pci_enable_ltr(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + int ret; + + if (!pci_ltr_supported(dev)) + return -ENOTSUPP; + + pos = pci_pcie_cap(dev); + if (!pos) + return -ENOTSUPP; + + /* Only primary function can enable/disable LTR */ + if (PCI_FUNC(dev->devfn) != 0) + return -EINVAL; + + /* Enable upstream ports first */ + if (dev->bus) { + ret = pci_enable_ltr(dev->bus->self); + if (ret) + return ret; + } + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl |= PCI_EXP_LTR_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + return 0; +} +EXPORT_SYMBOL(pci_enable_ltr); + +/** + * pci_disable_ltr - disable latency tolerance reporting + * @dev: PCI device + */ +void pci_disable_ltr(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + + if (!pci_ltr_supported(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + /* Only primary function can enable/disable LTR */ + if (PCI_FUNC(dev->devfn) != 0) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl &= ~PCI_EXP_LTR_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_ltr); + +static int __pci_ltr_scale(int *val) +{ + int scale = 0; + + while (*val > 1023) { + *val = (*val + 31) / 32; + scale++; + } + return scale; +} + +/** + * pci_set_ltr - set LTR latency values + * @dev: PCI device + * @snoop_lat_ns: snoop latency in nanoseconds + * @nosnoop_lat_ns: nosnoop latency in nanoseconds + * + * Figure out the scale and set the LTR values accordingly. + */ +int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns) +{ + int pos, ret, snoop_scale, nosnoop_scale; + u16 val; + + if (!pci_ltr_supported(dev)) + return -ENOTSUPP; + + snoop_scale = __pci_ltr_scale(&snoop_lat_ns); + nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns); + + if (snoop_lat_ns > PCI_LTR_VALUE_MASK || + nosnoop_lat_ns > PCI_LTR_VALUE_MASK) + return -EINVAL; + + if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) || + (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT))) + return -EINVAL; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!pos) + return -ENOTSUPP; + + val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns; + ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val); + if (ret != 4) + return -EIO; + + val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns; + ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val); + if (ret != 4) + return -EIO; + + return 0; +} +EXPORT_SYMBOL(pci_set_ltr); + static int pci_acs_enable; /** @@ -2479,6 +2873,21 @@ clear: return 0; } +/** + * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0. + * @dev: Device to reset. + * @probe: If set, only check if the device can be reset this way. + * + * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is + * unset, it will be reinitialized internally when going from PCI_D3hot to + * PCI_D0. If that's the case and the device is not in a low-power state + * already, force it into PCI_D3hot and back to PCI_D0, causing it to be reset. + * + * NOTE: This causes the caller to sleep for twice the device power transition + * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms + * by devault (i.e. unless the @dev's d3_delay field has a different value). + * Moreover, only devices in D0 can be reset by this function. + */ static int pci_pm_reset(struct pci_dev *dev, int probe) { u16 csr; @@ -2875,31 +3284,34 @@ static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode, * @dev: the PCI device * @decode: true = enable decoding, false = disable decoding * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY - * @change_bridge: traverse ancestors and change bridges + * @change_bridge_flags: traverse ancestors and change bridges + * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE */ int pci_set_vga_state(struct pci_dev *dev, bool decode, - unsigned int command_bits, bool change_bridge) + unsigned int command_bits, u32 flags) { struct pci_bus *bus; struct pci_dev *bridge; u16 cmd; int rc; - WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)); + WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) & (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY))); /* ARCH specific VGA enables */ - rc = pci_set_vga_state_arch(dev, decode, command_bits, change_bridge); + rc = pci_set_vga_state_arch(dev, decode, command_bits, flags); if (rc) return rc; - pci_read_config_word(dev, PCI_COMMAND, &cmd); - if (decode == true) - cmd |= command_bits; - else - cmd &= ~command_bits; - pci_write_config_word(dev, PCI_COMMAND, cmd); + if (flags & PCI_VGA_STATE_CHANGE_DECODES) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (decode == true) + cmd |= command_bits; + else + cmd &= ~command_bits; + pci_write_config_word(dev, PCI_COMMAND, cmd); + } - if (change_bridge == false) + if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE)) return 0; bus = dev->bus; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 4020025f854e..731e20265ace 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -156,8 +156,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev) } extern struct device_attribute pci_dev_attrs[]; -extern struct device_attribute dev_attr_cpuaffinity; -extern struct device_attribute dev_attr_cpulistaffinity; +extern struct device_attribute pcibus_dev_attrs[]; #ifdef CONFIG_HOTPLUG extern struct bus_attribute pci_bus_attrs[]; #else diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index f62079ff06dd..95489cd9a555 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -326,7 +326,7 @@ static int aer_inject(struct aer_error_inj *einj) unsigned long flags; unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); int pos_cap_err, rp_pos_cap_err; - u32 sever, cor_mask, uncor_mask, cor_mask_orig, uncor_mask_orig; + u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; int ret = 0; dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn); diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 3eb77080366a..94a7598eb262 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -114,15 +114,6 @@ extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); extern irqreturn_t aer_irq(int irq, void *context); -#ifdef CONFIG_ACPI -extern int aer_osc_setup(struct pcie_device *pciedev); -#else -static inline int aer_osc_setup(struct pcie_device *pciedev) -{ - return 0; -} -#endif - #ifdef CONFIG_ACPI_APEI extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); #else diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index eee09f756ec9..6892601fc76f 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -608,7 +608,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev) * the BIOS's expectation, we'll do so once pci_enable_device() is * called. */ - if (aspm_policy != POLICY_POWERSAVE) { + if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) { pcie_config_aspm_path(link); pcie_set_clkpm(link, policy_to_clkpm_state(link)); } @@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev) * pci_disable_link_state - disable pci device's link state, so the link will * never enter specific states */ -void pci_disable_link_state(struct pci_dev *pdev, int state) +static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) { struct pci_dev *parent = pdev->bus->self; struct pcie_link_state *link; @@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) if (!parent || !parent->link_state) return; - down_read(&pci_bus_sem); + if (sem) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); link = parent->link_state; if (state & PCIE_LINK_STATE_L0S) @@ -761,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) pcie_set_clkpm(link, 0); } mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (sem) + up_read(&pci_bus_sem); +} + +void pci_disable_link_state_locked(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, false); +} +EXPORT_SYMBOL(pci_disable_link_state_locked); + +void pci_disable_link_state(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 44cbbbaa499d..48849ffdd672 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -43,43 +43,6 @@ int no_pci_devices(void) EXPORT_SYMBOL(no_pci_devices); /* - * PCI Bus Class Devices - */ -static ssize_t pci_bus_show_cpuaffinity(struct device *dev, - int type, - struct device_attribute *attr, - char *buf) -{ - int ret; - const struct cpumask *cpumask; - - cpumask = cpumask_of_pcibus(to_pci_bus(dev)); - ret = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) : - cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; -} - -static ssize_t inline pci_bus_show_cpumaskaffinity(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return pci_bus_show_cpuaffinity(dev, 0, attr, buf); -} - -static ssize_t inline pci_bus_show_cpulistaffinity(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return pci_bus_show_cpuaffinity(dev, 1, attr, buf); -} - -DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL); -DEVICE_ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL); - -/* * PCI Bus Class */ static void release_pcibus_dev(struct device *dev) @@ -95,6 +58,7 @@ static void release_pcibus_dev(struct device *dev) static struct class pcibus_class = { .name = "pci_bus", .dev_release = &release_pcibus_dev, + .dev_attrs = pcibus_dev_attrs, }; static int __init pcibus_class_init(void) @@ -1455,9 +1419,6 @@ struct pci_bus * pci_create_bus(struct device *parent, error = device_register(&b->dev); if (error) goto class_dev_reg_err; - error = device_create_file(&b->dev, &dev_attr_cpuaffinity); - if (error) - goto dev_create_file_err; /* Create legacy_io and legacy_mem files for this bus */ pci_create_legacy_files(b); @@ -1468,8 +1429,6 @@ struct pci_bus * pci_create_bus(struct device *parent, return b; -dev_create_file_err: - device_unregister(&b->dev); class_dev_reg_err: device_unregister(dev); dev_reg_err: diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 5129ed6d8fa7..e8a140669f90 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -606,7 +606,7 @@ static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev) } pci_read_config_byte(dev, ICH6_GPIO_CNTL, &enable); - if (enable & ICH4_GPIO_EN) { + if (enable & ICH6_GPIO_EN) { pci_read_config_dword(dev, ICH6_GPIOBASE, ®ion); region &= PCI_BASE_ADDRESS_IO_MASK; if (region >= PCIBIOS_MIN_IO) @@ -681,7 +681,7 @@ static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg, /* ICH7-10 has the same common LPC generic IO decode registers */ static void __devinit quirk_ich7_lpc(struct pci_dev *dev) { - /* We share the common ACPI/DPIO decode with ICH6 */ + /* We share the common ACPI/GPIO decode with ICH6 */ ich6_lpc_acpi_gpio(dev); /* And have 4 ICH7+ generic decodes */ @@ -2349,8 +2349,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, */ static void __devinit nvenet_msi_disable(struct pci_dev *dev) { - if (dmi_name_in_vendors("P5N32-SLI PREMIUM") || - dmi_name_in_vendors("P5N32-E SLI")) { + const char *board_name = dmi_get_system_info(DMI_BOARD_NAME); + + if (board_name && + (strstr(board_name, "P5N32-SLI PREMIUM") || + strstr(board_name, "P5N32-E SLI"))) { dev_info(&dev->dev, "Disabling msi for MCP55 NIC on P5N32-SLI\n"); dev->no_msi = 1; @@ -2784,6 +2787,16 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); #endif +static void __devinit fixup_ti816x_class(struct pci_dev* dev) +{ + /* TI 816x devices do not have class code set when in PCIe boot mode */ + if (dev->class == PCI_CLASS_NOT_DEFINED) { + dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n"); + dev->class = PCI_CLASS_MULTIMEDIA_VIDEO; + } +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 176615e7231f..7f87beed35ac 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -73,8 +73,6 @@ void pci_remove_bus(struct pci_bus *pci_bus) return; pci_remove_legacy_files(pci_bus); - device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity); - device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity); device_unregister(&pci_bus->dev); } EXPORT_SYMBOL(pci_remove_bus); diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index a806cb321d2e..1e9e5a5b8c81 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -991,30 +991,139 @@ static void pci_bus_dump_resources(struct pci_bus *bus) } } +static int __init pci_bus_get_depth(struct pci_bus *bus) +{ + int depth = 0; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + int ret; + struct pci_bus *b = dev->subordinate; + if (!b) + continue; + + ret = pci_bus_get_depth(b); + if (ret + 1 > depth) + depth = ret + 1; + } + + return depth; +} +static int __init pci_get_max_depth(void) +{ + int depth = 0; + struct pci_bus *bus; + + list_for_each_entry(bus, &pci_root_buses, node) { + int ret; + + ret = pci_bus_get_depth(bus); + if (ret > depth) + depth = ret; + } + + return depth; +} + +/* + * first try will not touch pci bridge res + * second and later try will clear small leaf bridge res + * will stop till to the max deepth if can not find good one + */ void __init pci_assign_unassigned_resources(void) { struct pci_bus *bus; struct resource_list_x add_list; /* list of resources that want additional resources */ + int tried_times = 0; + enum release_type rel_type = leaf_only; + struct resource_list_x head, *list; + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | + IORESOURCE_PREFETCH; + unsigned long failed_type; + int max_depth = pci_get_max_depth(); + int pci_try_num; + + + head.next = NULL; add_list.next = NULL; + + pci_try_num = max_depth + 1; + printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", + max_depth, pci_try_num); + +again: /* Depth first, calculate sizes and alignments of all subordinate buses. */ - list_for_each_entry(bus, &pci_root_buses, node) { + list_for_each_entry(bus, &pci_root_buses, node) __pci_bus_size_bridges(bus, &add_list); - } /* Depth last, allocate resources and update the hardware. */ - list_for_each_entry(bus, &pci_root_buses, node) { - __pci_bus_assign_resources(bus, &add_list, NULL); - pci_enable_bridges(bus); - } + list_for_each_entry(bus, &pci_root_buses, node) + __pci_bus_assign_resources(bus, &add_list, &head); BUG_ON(add_list.next); + tried_times++; + + /* any device complain? */ + if (!head.next) + goto enable_and_dump; + failed_type = 0; + for (list = head.next; list;) { + failed_type |= list->flags; + list = list->next; + } + /* + * io port are tight, don't try extra + * or if reach the limit, don't want to try more + */ + failed_type &= type_mask; + if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) { + free_list(resource_list_x, &head); + goto enable_and_dump; + } + + printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n", + tried_times + 1); + + /* third times and later will not check if it is leaf */ + if ((tried_times + 1) > 2) + rel_type = whole_subtree; + + /* + * Try to release leaf bridge's resources that doesn't fit resource of + * child device under that bridge + */ + for (list = head.next; list;) { + bus = list->dev->bus; + pci_bus_release_bridge_resources(bus, list->flags & type_mask, + rel_type); + list = list->next; + } + /* restore size and flags */ + for (list = head.next; list;) { + struct resource *res = list->res; + + res->start = list->start; + res->end = list->end; + res->flags = list->flags; + if (list->dev->subordinate) + res->flags = 0; + + list = list->next; + } + free_list(resource_list_x, &head); + + goto again; + +enable_and_dump: + /* Depth last, update the hardware. */ + list_for_each_entry(bus, &pci_root_buses, node) + pci_enable_bridges(bus); /* dump the resource on buses */ - list_for_each_entry(bus, &pci_root_buses, node) { + list_for_each_entry(bus, &pci_root_buses, node) pci_bus_dump_resources(bus); - } } void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) |