summaryrefslogtreecommitdiffstats
path: root/drivers/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/access.c18
-rw-r--r--drivers/pci/bus.c6
-rw-r--r--drivers/pci/dmar.c7
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c7
-rw-r--r--drivers/pci/hotplug/pcihp_slot.c45
-rw-r--r--drivers/pci/intel-iommu.c240
-rw-r--r--drivers/pci/iova.c12
-rw-r--r--drivers/pci/pci-acpi.c2
-rw-r--r--drivers/pci/pci-sysfs.c62
-rw-r--r--drivers/pci/pci.c444
-rw-r--r--drivers/pci/pci.h3
-rw-r--r--drivers/pci/pcie/aer/aer_inject.c2
-rw-r--r--drivers/pci/pcie/aer/aerdrv.h9
-rw-r--r--drivers/pci/pcie/aspm.c21
-rw-r--r--drivers/pci/probe.c43
-rw-r--r--drivers/pci/quirks.c21
-rw-r--r--drivers/pci/remove.c2
-rw-r--r--drivers/pci/setup-bus.c125
18 files changed, 925 insertions, 144 deletions
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 531bc697d800..fdaa42aac7c6 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -143,33 +143,41 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev)
__remove_wait_queue(&pci_ucfg_wait, &wait);
}
+/* Returns 0 on success, negative values indicate error. */
#define PCI_USER_READ_CONFIG(size,type) \
int pci_user_read_config_##size \
(struct pci_dev *dev, int pos, type *val) \
{ \
int ret = 0; \
u32 data = -1; \
- if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ if (PCI_##size##_BAD) \
+ return -EINVAL; \
raw_spin_lock_irq(&pci_lock); \
if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \
ret = dev->bus->ops->read(dev->bus, dev->devfn, \
pos, sizeof(type), &data); \
raw_spin_unlock_irq(&pci_lock); \
*val = (type)data; \
+ if (ret > 0) \
+ ret = -EINVAL; \
return ret; \
}
+/* Returns 0 on success, negative values indicate error. */
#define PCI_USER_WRITE_CONFIG(size,type) \
int pci_user_write_config_##size \
(struct pci_dev *dev, int pos, type val) \
{ \
int ret = -EIO; \
- if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ if (PCI_##size##_BAD) \
+ return -EINVAL; \
raw_spin_lock_irq(&pci_lock); \
if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \
ret = dev->bus->ops->write(dev->bus, dev->devfn, \
pos, sizeof(type), val); \
raw_spin_unlock_irq(&pci_lock); \
+ if (ret > 0) \
+ ret = -EINVAL; \
return ret; \
}
@@ -197,6 +205,8 @@ struct pci_vpd_pci22 {
* This code has to spin since there is no other notification from the PCI
* hardware. Since the VPD is often implemented by serial attachment to an
* EEPROM, it may take many milliseconds to complete.
+ *
+ * Returns 0 on success, negative values indicate error.
*/
static int pci_vpd_pci22_wait(struct pci_dev *dev)
{
@@ -212,7 +222,7 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev)
for (;;) {
ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR,
&status);
- if (ret)
+ if (ret < 0)
return ret;
if ((status & PCI_VPD_ADDR_F) == vpd->flag) {
@@ -324,6 +334,8 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count
vpd->busy = true;
vpd->flag = 0;
ret = pci_vpd_pci22_wait(dev);
+ if (ret < 0)
+ break;
pos += sizeof(u32);
}
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 69546e9213dd..1e2ad92a4752 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -163,12 +163,6 @@ int pci_bus_add_child(struct pci_bus *bus)
bus->is_added = 1;
- retval = device_create_file(&bus->dev, &dev_attr_cpuaffinity);
- if (retval)
- return retval;
-
- retval = device_create_file(&bus->dev, &dev_attr_cpulistaffinity);
-
/* Create legacy_io and legacy_mem files for this bus */
pci_create_legacy_files(bus);
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 12e02bf92c4a..3dc9befa5aec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void)
{
#ifdef CONFIG_INTR_REMAP
struct acpi_table_dmar *dmar;
- /*
- * for now we will disable dma-remapping when interrupt
- * remapping is enabled.
- * When support for queued invalidation for IOTLB invalidation
- * is added, we will not need this any more.
- */
+
dmar = (struct acpi_table_dmar *) dmar_tbl;
if (ret && cpu_has_x2apic && dmar->flags & 0x1)
printk(KERN_INFO
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 2f67e9bc2f96..a70fa89f76fd 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -827,6 +827,13 @@ static int __ref enable_device(struct acpiphp_slot *slot)
acpiphp_set_hpp_values(bus);
acpiphp_set_acpi_region(slot);
pci_enable_bridges(bus);
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ /* Assume that newly added devices are powered on already. */
+ if (!dev->is_added)
+ dev->current_state = PCI_D0;
+ }
+
pci_bus_add_devices(bus);
list_for_each_entry(func, &slot->funcs, sibling) {
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c
index 80b461c98557..749fdf070319 100644
--- a/drivers/pci/hotplug/pcihp_slot.c
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -158,6 +158,47 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
*/
}
+/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */
+static int pci_set_payload(struct pci_dev *dev)
+{
+ int pos, ppos;
+ u16 pctl, psz;
+ u16 dctl, dsz, dcap, dmax;
+ struct pci_dev *parent;
+
+ parent = dev->bus->self;
+ pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (!pos)
+ return 0;
+
+ /* Read Device MaxPayload capability and setting */
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl);
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap);
+ dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
+ dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD);
+
+ /* Read Parent MaxPayload setting */
+ ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
+ if (!ppos)
+ return 0;
+ pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
+ psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
+
+ /* If parent payload > device max payload -> error
+ * If parent payload > device payload -> set speed
+ * If parent payload <= device payload -> do nothing
+ */
+ if (psz > dmax)
+ return -1;
+ else if (psz > dsz) {
+ dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz);
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL,
+ (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) +
+ (psz << 5));
+ }
+ return 0;
+}
+
void pci_configure_slot(struct pci_dev *dev)
{
struct pci_dev *cdev;
@@ -169,6 +210,10 @@ void pci_configure_slot(struct pci_dev *dev)
(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
return;
+ ret = pci_set_payload(dev);
+ if (ret)
+ dev_warn(&dev->dev, "could not set device max payload\n");
+
memset(&hpp, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, &hpp);
if (ret)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6af6b628175b..59f17acf7f68 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -47,6 +47,8 @@
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
+#define IS_BRIDGE_HOST_DEVICE(pdev) \
+ ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
return (pfn + level_size(level) - 1) & level_mask(level);
}
+static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
+{
+ return 1 << ((lvl - 1) * LEVEL_STRIDE);
+}
+
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
are never going to work. */
static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
/*
+ * set to 1 to panic kernel if can't successfully enable VT-d
+ * (used when kernel is launched w/ TXT)
+ */
+static int force_on = 0;
+
+/*
* 0: Present
* 1-11: Reserved
* 12-63: Context Ptr (12 - (haw-1))
@@ -338,6 +351,9 @@ struct dmar_domain {
int iommu_coherency;/* indicate coherency of iommu access */
int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
+ int iommu_superpage;/* Level of superpages supported:
+ 0 == 4KiB (no superpages), 1 == 2MiB,
+ 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
};
@@ -387,6 +403,7 @@ int dmar_disabled = 1;
static int dmar_map_gfx = 1;
static int dmar_forcedac;
static int intel_iommu_strict;
+static int intel_iommu_superpage = 1;
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
static DEFINE_SPINLOCK(device_domain_lock);
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable batched IOTLB flush\n");
intel_iommu_strict = 1;
+ } else if (!strncmp(str, "sp_off", 6)) {
+ printk(KERN_INFO
+ "Intel-IOMMU: disable supported super page\n");
+ intel_iommu_superpage = 0;
}
str += strcspn(str, ",");
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
}
}
+static void domain_update_iommu_superpage(struct dmar_domain *domain)
+{
+ int i, mask = 0xf;
+
+ if (!intel_iommu_superpage) {
+ domain->iommu_superpage = 0;
+ return;
+ }
+
+ domain->iommu_superpage = 4; /* 1TiB */
+
+ for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+ mask |= cap_super_page_val(g_iommus[i]->cap);
+ if (!mask) {
+ break;
+ }
+ }
+ domain->iommu_superpage = fls(mask);
+}
+
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain_update_iommu_snooping(domain);
+ domain_update_iommu_superpage(domain);
}
static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -689,23 +731,31 @@ out:
}
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn)
+ unsigned long pfn, int large_level)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(domain->agaw);
- int offset;
+ int offset, target_level;
BUG_ON(!domain->pgd);
BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
parent = domain->pgd;
+ /* Search pte */
+ if (!large_level)
+ target_level = 1;
+ else
+ target_level = large_level;
+
while (level > 0) {
void *tmp_page;
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
- if (level == 1)
+ if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
+ break;
+ if (level == target_level)
break;
if (!dma_pte_present(pte)) {
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
return pte;
}
+
/* return address's pte at specific level */
static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
unsigned long pfn,
- int level)
+ int level, int *large_page)
{
struct dma_pte *parent, *pte = NULL;
int total = agaw_to_level(domain->agaw);
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
if (level == total)
return pte;
- if (!dma_pte_present(pte))
+ if (!dma_pte_present(pte)) {
+ *large_page = total;
break;
+ }
+
+ if (pte->val & DMA_PTE_LARGE_PAGE) {
+ *large_page = total;
+ return pte;
+ }
+
parent = phys_to_virt(dma_pte_addr(pte));
total--;
}
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long last_pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+ unsigned int large_page = 1;
struct dma_pte *first_pte, *pte;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
/* we don't need lock here; nobody else touches the iova range */
do {
- first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
+ large_page = 1;
+ first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
if (!pte) {
- start_pfn = align_to_level(start_pfn + 1, 2);
+ start_pfn = align_to_level(start_pfn + 1, large_page + 1);
continue;
}
- do {
+ do {
dma_clear_pte(pte);
- start_pfn++;
+ start_pfn += lvl_to_nr_pages(large_page);
pte++;
} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
int total = agaw_to_level(domain->agaw);
int level;
unsigned long tmp;
+ int large_page = 2;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
return;
do {
- first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
+ large_page = level;
+ first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
+ if (large_page > level)
+ level = large_page + 1;
if (!pte) {
tmp = align_to_level(tmp + 1, level + 1);
continue;
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_snooping = 0;
+ domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
domain->iommu_count = 1;
domain->nid = iommu->node;
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain)
return;
+ /* Flush any lazy unmaps that may reference this domain */
+ if (!intel_iommu_strict)
+ flush_unmaps_timeout(0);
+
domain_remove_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
@@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
}
+/* Return largest possible superpage level for a given mapping */
+static inline int hardware_largepage_caps(struct dmar_domain *domain,
+ unsigned long iov_pfn,
+ unsigned long phy_pfn,
+ unsigned long pages)
+{
+ int support, level = 1;
+ unsigned long pfnmerge;
+
+ support = domain->iommu_superpage;
+
+ /* To use a large page, the virtual *and* physical addresses
+ must be aligned to 2MiB/1GiB/etc. Lower bits set in either
+ of them will mean we have to use smaller pages. So just
+ merge them and check both at once. */
+ pfnmerge = iov_pfn | phy_pfn;
+
+ while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
+ pages >>= VTD_STRIDE_SHIFT;
+ if (!pages)
+ break;
+ pfnmerge >>= VTD_STRIDE_SHIFT;
+ level++;
+ support--;
+ }
+ return level;
+}
+
static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
struct scatterlist *sg, unsigned long phys_pfn,
unsigned long nr_pages, int prot)
@@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
phys_addr_t uninitialized_var(pteval);
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned long sg_res;
+ unsigned int largepage_lvl = 0;
+ unsigned long lvl_pages = 0;
BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
@@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
}
- while (nr_pages--) {
+ while (nr_pages > 0) {
uint64_t tmp;
if (!sg_res) {
@@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
sg->dma_length = sg->length;
pteval = page_to_phys(sg_page(sg)) | prot;
+ phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
+
if (!pte) {
- first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
+ largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
+
+ first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
if (!pte)
return -ENOMEM;
+ /* It is large page*/
+ if (largepage_lvl > 1)
+ pteval |= DMA_PTE_LARGE_PAGE;
+ else
+ pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
+
}
/* We don't need lock here, nobody else
* touches the iova range
@@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
}
WARN_ON(1);
}
+
+ lvl_pages = lvl_to_nr_pages(largepage_lvl);
+
+ BUG_ON(nr_pages < lvl_pages);
+ BUG_ON(sg_res < lvl_pages);
+
+ nr_pages -= lvl_pages;
+ iov_pfn += lvl_pages;
+ phys_pfn += lvl_pages;
+ pteval += lvl_pages * VTD_PAGE_SIZE;
+ sg_res -= lvl_pages;
+
+ /* If the next PTE would be the first in a new page, then we
+ need to flush the cache on the entries we've just written.
+ And then we'll need to recalculate 'pte', so clear it and
+ let it get set again in the if (!pte) block above.
+
+ If we're done (!nr_pages) we need to flush the cache too.
+
+ Also if we've been setting superpages, we may need to
+ recalculate 'pte' and switch back to smaller pages for the
+ end of the mapping, if the trailing size is not enough to
+ use another superpage (i.e. sg_res < lvl_pages). */
pte++;
- if (!nr_pages || first_pte_in_page(pte)) {
+ if (!nr_pages || first_pte_in_page(pte) ||
+ (largepage_lvl > 1 && sg_res < lvl_pages)) {
domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);
pte = NULL;
}
- iov_pfn++;
- pteval += VTD_PAGE_SIZE;
- sg_res--;
- if (!sg_res)
+
+ if (!sg_res && nr_pages)
sg = sg_next(sg);
}
return 0;
@@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return 0;
return iommu_prepare_identity_map(pdev, rmrr->base_address,
- rmrr->end_address + 1);
+ rmrr->end_address);
}
#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
return;
printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
- ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
+ ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
if (ret)
printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
if (likely(!iommu_identity_mapping))
return 0;
+ info = pdev->dev.archdata.iommu;
+ if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
+ return (info->domain == si_domain);
- list_for_each_entry(info, &si_domain->devices, link)
- if (info->dev == pdev)
- return 1;
return 0;
}
@@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
* Assume that they will -- if they turn out not to be, then we can
* take them out of the 1:1 domain later.
*/
- if (!startup)
- return pdev->dma_mask > DMA_BIT_MASK(32);
+ if (!startup) {
+ /*
+ * If the device's dma_mask is less than the system's memory
+ * size then this is not a candidate for identity mapping.
+ */
+ u64 dma_mask = pdev->dma_mask;
+
+ if (pdev->dev.coherent_dma_mask &&
+ pdev->dev.coherent_dma_mask < dma_mask)
+ dma_mask = pdev->dev.coherent_dma_mask;
+
+ return dma_mask >= dma_get_required_mask(&pdev->dev);
+ }
return 1;
}
@@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
return -EFAULT;
for_each_pci_dev(pdev) {
+ /* Skip Host/PCI Bridge devices */
+ if (IS_BRIDGE_HOST_DEVICE(pdev))
+ continue;
if (iommu_should_identity_map(pdev, 1)) {
printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
hw ? "hardware" : "software", pci_name(pdev));
@@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
return 0;
}
-static int __init init_dmars(int force_on)
+static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
struct dmar_rmrr_unit *rmrr;
@@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
iommu = domain_get_iommu(domain);
size = aligned_nrpages(paddr, size);
- iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
- pdev->dma_mask);
+ iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
if (!iova)
goto error;
@@ -3118,7 +3263,17 @@ static int init_iommu_hw(void)
if (iommu->qi)
dmar_reenable_qi(iommu);
- for_each_active_iommu(iommu, drhd) {
+ for_each_iommu(iommu, drhd) {
+ if (drhd->ignored) {
+ /*
+ * we always have to disable PMRs or DMA may fail on
+ * this device
+ */
+ if (force_on)
+ iommu_disable_protect_mem_regions(iommu);
+ continue;
+ }
+
iommu_flush_write_buffer(iommu);
iommu_set_root_entry(iommu);
@@ -3127,7 +3282,8 @@ static int init_iommu_hw(void)
DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH);
- iommu_enable_translation(iommu);
+ if (iommu_enable_translation(iommu))
+ return 1;
iommu_disable_protect_mem_regions(iommu);
}
@@ -3194,7 +3350,10 @@ static void iommu_resume(void)
unsigned long flag;
if (init_iommu_hw()) {
- WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
+ if (force_on)
+ panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
+ else
+ WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
return;
}
@@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = {
int __init intel_iommu_init(void)
{
int ret = 0;
- int force_on = 0;
/* VT-d is required for a TXT/tboot launch, so enforce that */
force_on = tboot_force_iommu();
@@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void)
init_no_remapping_devices();
- ret = init_dmars(force_on);
+ ret = init_dmars();
if (ret) {
if (force_on)
panic("tboot: Failed to initialize DMARs\n");
@@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_safe(entry, tmp, &domain->devices) {
info = list_entry(entry, struct device_domain_info, link);
- /* No need to compare PCI domain; it has to be the same */
- if (info->bus == pdev->bus->number &&
+ if (info->segment == pci_domain_nr(pdev->bus) &&
+ info->bus == pdev->bus->number &&
info->devfn == pdev->devfn) {
list_del(&info->link);
list_del(&info->global);
@@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
- spin_lock_irqsave(&iommu->lock, tmp_flags);
- clear_bit(domain->id, iommu->domain_ids);
- iommu->domains[domain->id] = NULL;
- spin_unlock_irqrestore(&iommu->lock, tmp_flags);
+ if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
+ !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
+ spin_lock_irqsave(&iommu->lock, tmp_flags);
+ clear_bit(domain->id, iommu->domain_ids);
+ iommu->domains[domain->id] = NULL;
+ spin_unlock_irqrestore(&iommu->lock, tmp_flags);
+ }
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_count = 0;
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
+ domain->iommu_superpage = 0;
domain->max_addr = 0;
domain->nid = -1;
@@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
struct dma_pte *pte;
u64 phys = 0;
- pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
if (pte)
phys = dma_pte_addr(pte);
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 9606e599a475..c5c274ab5c5a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
curr = iovad->cached32_node;
cached_iova = container_of(curr, struct iova, node);
- if (free->pfn_lo >= cached_iova->pfn_lo)
- iovad->cached32_node = rb_next(&free->node);
+ if (free->pfn_lo >= cached_iova->pfn_lo) {
+ struct rb_node *node = rb_next(&free->node);
+ struct iova *iova = container_of(node, struct iova, node);
+
+ /* only cache if it's below 32bit pfn */
+ if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
+ iovad->cached32_node = node;
+ else
+ iovad->cached32_node = NULL;
+ }
}
/* Computes the padding size required, to make the
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7c3b18e78cee..d36f41ea8cbf 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -195,6 +195,8 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
return PCI_D2;
case ACPI_STATE_D3:
return PCI_D3hot;
+ case ACPI_STATE_D3_COLD:
+ return PCI_D3cold;
}
return PCI_POWER_ERROR;
}
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f8deb3e380a2..7bcf12adced7 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -108,6 +108,40 @@ static ssize_t local_cpulist_show(struct device *dev,
return len;
}
+/*
+ * PCI Bus Class Devices
+ */
+static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
+ int type,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int ret;
+ const struct cpumask *cpumask;
+
+ cpumask = cpumask_of_pcibus(to_pci_bus(dev));
+ ret = type ?
+ cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) :
+ cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
+ buf[ret++] = '\n';
+ buf[ret] = '\0';
+ return ret;
+}
+
+static inline ssize_t pci_bus_show_cpumaskaffinity(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return pci_bus_show_cpuaffinity(dev, 0, attr, buf);
+}
+
+static inline ssize_t pci_bus_show_cpulistaffinity(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return pci_bus_show_cpuaffinity(dev, 1, attr, buf);
+}
+
/* show resources */
static ssize_t
resource_show(struct device * dev, struct device_attribute *attr, char * buf)
@@ -318,6 +352,25 @@ remove_store(struct device *dev, struct device_attribute *dummy,
count = ret;
return count;
}
+
+static ssize_t
+dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+ struct pci_bus *bus = to_pci_bus(dev);
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val) {
+ mutex_lock(&pci_remove_rescan_mutex);
+ pci_rescan_bus(bus);
+ mutex_unlock(&pci_remove_rescan_mutex);
+ }
+ return count;
+}
+
#endif
struct device_attribute pci_dev_attrs[] = {
@@ -347,6 +400,15 @@ struct device_attribute pci_dev_attrs[] = {
__ATTR_NULL,
};
+struct device_attribute pcibus_dev_attrs[] = {
+#ifdef CONFIG_HOTPLUG
+ __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_bus_rescan_store),
+#endif
+ __ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL),
+ __ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL),
+ __ATTR_NULL,
+};
+
static ssize_t
boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
{
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2472e7177b4b..56098b3e17c0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -830,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
dev_err(&dev->dev, "buffer not found in %s\n", __func__);
return -ENOMEM;
}
- cap = (u16 *)&save_state->data[0];
+ cap = (u16 *)&save_state->cap.data[0];
pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
@@ -863,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (!save_state || pos <= 0)
return;
- cap = (u16 *)&save_state->data[0];
+ cap = (u16 *)&save_state->cap.data[0];
pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
@@ -899,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev)
return -ENOMEM;
}
- pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data);
+ pci_read_config_word(dev, pos + PCI_X_CMD,
+ (u16 *)save_state->cap.data);
return 0;
}
@@ -914,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
if (!save_state || pos <= 0)
return;
- cap = (u16 *)&save_state->data[0];
+ cap = (u16 *)&save_state->cap.data[0];
pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
}
@@ -975,6 +976,104 @@ void pci_restore_state(struct pci_dev *dev)
dev->state_saved = false;
}
+struct pci_saved_state {
+ u32 config_space[16];
+ struct pci_cap_saved_data cap[0];
+};
+
+/**
+ * pci_store_saved_state - Allocate and return an opaque struct containing
+ * the device saved state.
+ * @dev: PCI device that we're dealing with
+ *
+ * Rerturn NULL if no state or error.
+ */
+struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
+{
+ struct pci_saved_state *state;
+ struct pci_cap_saved_state *tmp;
+ struct pci_cap_saved_data *cap;
+ struct hlist_node *pos;
+ size_t size;
+
+ if (!dev->state_saved)
+ return NULL;
+
+ size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
+
+ hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next)
+ size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+
+ state = kzalloc(size, GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ memcpy(state->config_space, dev->saved_config_space,
+ sizeof(state->config_space));
+
+ cap = state->cap;
+ hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) {
+ size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+ memcpy(cap, &tmp->cap, len);
+ cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
+ }
+ /* Empty cap_save terminates list */
+
+ return state;
+}
+EXPORT_SYMBOL_GPL(pci_store_saved_state);
+
+/**
+ * pci_load_saved_state - Reload the provided save state into struct pci_dev.
+ * @dev: PCI device that we're dealing with
+ * @state: Saved state returned from pci_store_saved_state()
+ */
+int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state)
+{
+ struct pci_cap_saved_data *cap;
+
+ dev->state_saved = false;
+
+ if (!state)
+ return 0;
+
+ memcpy(dev->saved_config_space, state->config_space,
+ sizeof(state->config_space));
+
+ cap = state->cap;
+ while (cap->size) {
+ struct pci_cap_saved_state *tmp;
+
+ tmp = pci_find_saved_cap(dev, cap->cap_nr);
+ if (!tmp || tmp->cap.size != cap->size)
+ return -EINVAL;
+
+ memcpy(tmp->cap.data, cap->data, tmp->cap.size);
+ cap = (struct pci_cap_saved_data *)((u8 *)cap +
+ sizeof(struct pci_cap_saved_data) + cap->size);
+ }
+
+ dev->state_saved = true;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_load_saved_state);
+
+/**
+ * pci_load_and_free_saved_state - Reload the save state pointed to by state,
+ * and free the memory allocated for it.
+ * @dev: PCI device that we're dealing with
+ * @state: Pointer to saved state returned from pci_store_saved_state()
+ */
+int pci_load_and_free_saved_state(struct pci_dev *dev,
+ struct pci_saved_state **state)
+{
+ int ret = pci_load_saved_state(dev, *state);
+ kfree(*state);
+ *state = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
+
static int do_pci_enable_device(struct pci_dev *dev, int bars)
{
int err;
@@ -1771,7 +1870,8 @@ static int pci_add_cap_save_buffer(
if (!save_state)
return -ENOMEM;
- save_state->cap_nr = cap;
+ save_state->cap.cap_nr = cap;
+ save_state->cap.size = size;
pci_add_saved_cap(dev, save_state);
return 0;
@@ -1834,6 +1934,300 @@ void pci_enable_ari(struct pci_dev *dev)
bridge->ari_enabled = 1;
}
+/**
+ * pci_enable_ido - enable ID-based ordering on a device
+ * @dev: the PCI device
+ * @type: which types of IDO to enable
+ *
+ * Enable ID-based ordering on @dev. @type can contain the bits
+ * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate
+ * which types of transactions are allowed to be re-ordered.
+ */
+void pci_enable_ido(struct pci_dev *dev, unsigned long type)
+{
+ int pos;
+ u16 ctrl;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return;
+
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+ if (type & PCI_EXP_IDO_REQUEST)
+ ctrl |= PCI_EXP_IDO_REQ_EN;
+ if (type & PCI_EXP_IDO_COMPLETION)
+ ctrl |= PCI_EXP_IDO_CMP_EN;
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_enable_ido);
+
+/**
+ * pci_disable_ido - disable ID-based ordering on a device
+ * @dev: the PCI device
+ * @type: which types of IDO to disable
+ */
+void pci_disable_ido(struct pci_dev *dev, unsigned long type)
+{
+ int pos;
+ u16 ctrl;
+
+ if (!pci_is_pcie(dev))
+ return;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return;
+
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+ if (type & PCI_EXP_IDO_REQUEST)
+ ctrl &= ~PCI_EXP_IDO_REQ_EN;
+ if (type & PCI_EXP_IDO_COMPLETION)
+ ctrl &= ~PCI_EXP_IDO_CMP_EN;
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_ido);
+
+/**
+ * pci_enable_obff - enable optimized buffer flush/fill
+ * @dev: PCI device
+ * @type: type of signaling to use
+ *
+ * Try to enable @type OBFF signaling on @dev. It will try using WAKE#
+ * signaling if possible, falling back to message signaling only if
+ * WAKE# isn't supported. @type should indicate whether the PCIe link
+ * be brought out of L0s or L1 to send the message. It should be either
+ * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0.
+ *
+ * If your device can benefit from receiving all messages, even at the
+ * power cost of bringing the link back up from a low power state, use
+ * %PCI_EXP_OBFF_SIGNAL_ALWAYS. Otherwise, use %PCI_OBFF_SIGNAL_L0 (the
+ * preferred type).
+ *
+ * RETURNS:
+ * Zero on success, appropriate error number on failure.
+ */
+int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type)
+{
+ int pos;
+ u32 cap;
+ u16 ctrl;
+ int ret;
+
+ if (!pci_is_pcie(dev))
+ return -ENOTSUPP;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return -ENOTSUPP;
+
+ pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+ if (!(cap & PCI_EXP_OBFF_MASK))
+ return -ENOTSUPP; /* no OBFF support at all */
+
+ /* Make sure the topology supports OBFF as well */
+ if (dev->bus) {
+ ret = pci_enable_obff(dev->bus->self, type);
+ if (ret)
+ return ret;
+ }
+
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+ if (cap & PCI_EXP_OBFF_WAKE)
+ ctrl |= PCI_EXP_OBFF_WAKE_EN;
+ else {
+ switch (type) {
+ case PCI_EXP_OBFF_SIGNAL_L0:
+ if (!(ctrl & PCI_EXP_OBFF_WAKE_EN))
+ ctrl |= PCI_EXP_OBFF_MSGA_EN;
+ break;
+ case PCI_EXP_OBFF_SIGNAL_ALWAYS:
+ ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
+ ctrl |= PCI_EXP_OBFF_MSGB_EN;
+ break;
+ default:
+ WARN(1, "bad OBFF signal type\n");
+ return -ENOTSUPP;
+ }
+ }
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+ return 0;
+}
+EXPORT_SYMBOL(pci_enable_obff);
+
+/**
+ * pci_disable_obff - disable optimized buffer flush/fill
+ * @dev: PCI device
+ *
+ * Disable OBFF on @dev.
+ */
+void pci_disable_obff(struct pci_dev *dev)
+{
+ int pos;
+ u16 ctrl;
+
+ if (!pci_is_pcie(dev))
+ return;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return;
+
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+ ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_obff);
+
+/**
+ * pci_ltr_supported - check whether a device supports LTR
+ * @dev: PCI device
+ *
+ * RETURNS:
+ * True if @dev supports latency tolerance reporting, false otherwise.
+ */
+bool pci_ltr_supported(struct pci_dev *dev)
+{
+ int pos;
+ u32 cap;
+
+ if (!pci_is_pcie(dev))
+ return false;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return false;
+
+ pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+
+ return cap & PCI_EXP_DEVCAP2_LTR;
+}
+EXPORT_SYMBOL(pci_ltr_supported);
+
+/**
+ * pci_enable_ltr - enable latency tolerance reporting
+ * @dev: PCI device
+ *
+ * Enable LTR on @dev if possible, which means enabling it first on
+ * upstream ports.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int pci_enable_ltr(struct pci_dev *dev)
+{
+ int pos;
+ u16 ctrl;
+ int ret;
+
+ if (!pci_ltr_supported(dev))
+ return -ENOTSUPP;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return -ENOTSUPP;
+
+ /* Only primary function can enable/disable LTR */
+ if (PCI_FUNC(dev->devfn) != 0)
+ return -EINVAL;
+
+ /* Enable upstream ports first */
+ if (dev->bus) {
+ ret = pci_enable_ltr(dev->bus->self);
+ if (ret)
+ return ret;
+ }
+
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+ ctrl |= PCI_EXP_LTR_EN;
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+ return 0;
+}
+EXPORT_SYMBOL(pci_enable_ltr);
+
+/**
+ * pci_disable_ltr - disable latency tolerance reporting
+ * @dev: PCI device
+ */
+void pci_disable_ltr(struct pci_dev *dev)
+{
+ int pos;
+ u16 ctrl;
+
+ if (!pci_ltr_supported(dev))
+ return;
+
+ pos = pci_pcie_cap(dev);
+ if (!pos)
+ return;
+
+ /* Only primary function can enable/disable LTR */
+ if (PCI_FUNC(dev->devfn) != 0)
+ return;
+
+ pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+ ctrl &= ~PCI_EXP_LTR_EN;
+ pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_ltr);
+
+static int __pci_ltr_scale(int *val)
+{
+ int scale = 0;
+
+ while (*val > 1023) {
+ *val = (*val + 31) / 32;
+ scale++;
+ }
+ return scale;
+}
+
+/**
+ * pci_set_ltr - set LTR latency values
+ * @dev: PCI device
+ * @snoop_lat_ns: snoop latency in nanoseconds
+ * @nosnoop_lat_ns: nosnoop latency in nanoseconds
+ *
+ * Figure out the scale and set the LTR values accordingly.
+ */
+int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns)
+{
+ int pos, ret, snoop_scale, nosnoop_scale;
+ u16 val;
+
+ if (!pci_ltr_supported(dev))
+ return -ENOTSUPP;
+
+ snoop_scale = __pci_ltr_scale(&snoop_lat_ns);
+ nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns);
+
+ if (snoop_lat_ns > PCI_LTR_VALUE_MASK ||
+ nosnoop_lat_ns > PCI_LTR_VALUE_MASK)
+ return -EINVAL;
+
+ if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) ||
+ (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)))
+ return -EINVAL;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
+ if (!pos)
+ return -ENOTSUPP;
+
+ val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns;
+ ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val);
+ if (ret != 4)
+ return -EIO;
+
+ val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns;
+ ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val);
+ if (ret != 4)
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL(pci_set_ltr);
+
static int pci_acs_enable;
/**
@@ -2479,6 +2873,21 @@ clear:
return 0;
}
+/**
+ * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0.
+ * @dev: Device to reset.
+ * @probe: If set, only check if the device can be reset this way.
+ *
+ * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is
+ * unset, it will be reinitialized internally when going from PCI_D3hot to
+ * PCI_D0. If that's the case and the device is not in a low-power state
+ * already, force it into PCI_D3hot and back to PCI_D0, causing it to be reset.
+ *
+ * NOTE: This causes the caller to sleep for twice the device power transition
+ * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms
+ * by devault (i.e. unless the @dev's d3_delay field has a different value).
+ * Moreover, only devices in D0 can be reset by this function.
+ */
static int pci_pm_reset(struct pci_dev *dev, int probe)
{
u16 csr;
@@ -2875,31 +3284,34 @@ static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
* @dev: the PCI device
* @decode: true = enable decoding, false = disable decoding
* @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
- * @change_bridge: traverse ancestors and change bridges
+ * @change_bridge_flags: traverse ancestors and change bridges
+ * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE
*/
int pci_set_vga_state(struct pci_dev *dev, bool decode,
- unsigned int command_bits, bool change_bridge)
+ unsigned int command_bits, u32 flags)
{
struct pci_bus *bus;
struct pci_dev *bridge;
u16 cmd;
int rc;
- WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+ WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) & (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
/* ARCH specific VGA enables */
- rc = pci_set_vga_state_arch(dev, decode, command_bits, change_bridge);
+ rc = pci_set_vga_state_arch(dev, decode, command_bits, flags);
if (rc)
return rc;
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- if (decode == true)
- cmd |= command_bits;
- else
- cmd &= ~command_bits;
- pci_write_config_word(dev, PCI_COMMAND, cmd);
+ if (flags & PCI_VGA_STATE_CHANGE_DECODES) {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (decode == true)
+ cmd |= command_bits;
+ else
+ cmd &= ~command_bits;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
- if (change_bridge == false)
+ if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
return 0;
bus = dev->bus;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4020025f854e..731e20265ace 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -156,8 +156,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
}
extern struct device_attribute pci_dev_attrs[];
-extern struct device_attribute dev_attr_cpuaffinity;
-extern struct device_attribute dev_attr_cpulistaffinity;
+extern struct device_attribute pcibus_dev_attrs[];
#ifdef CONFIG_HOTPLUG
extern struct bus_attribute pci_bus_attrs[];
#else
diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c
index f62079ff06dd..95489cd9a555 100644
--- a/drivers/pci/pcie/aer/aer_inject.c
+++ b/drivers/pci/pcie/aer/aer_inject.c
@@ -326,7 +326,7 @@ static int aer_inject(struct aer_error_inj *einj)
unsigned long flags;
unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
int pos_cap_err, rp_pos_cap_err;
- u32 sever, cor_mask, uncor_mask, cor_mask_orig, uncor_mask_orig;
+ u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
int ret = 0;
dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 3eb77080366a..94a7598eb262 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -114,15 +114,6 @@ extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
extern irqreturn_t aer_irq(int irq, void *context);
-#ifdef CONFIG_ACPI
-extern int aer_osc_setup(struct pcie_device *pciedev);
-#else
-static inline int aer_osc_setup(struct pcie_device *pciedev)
-{
- return 0;
-}
-#endif
-
#ifdef CONFIG_ACPI_APEI
extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev);
#else
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index eee09f756ec9..6892601fc76f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -608,7 +608,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
* the BIOS's expectation, we'll do so once pci_enable_device() is
* called.
*/
- if (aspm_policy != POLICY_POWERSAVE) {
+ if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
pcie_config_aspm_path(link);
pcie_set_clkpm(link, policy_to_clkpm_state(link));
}
@@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
* pci_disable_link_state - disable pci device's link state, so the link will
* never enter specific states
*/
-void pci_disable_link_state(struct pci_dev *pdev, int state)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
{
struct pci_dev *parent = pdev->bus->self;
struct pcie_link_state *link;
@@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
if (!parent || !parent->link_state)
return;
- down_read(&pci_bus_sem);
+ if (sem)
+ down_read(&pci_bus_sem);
mutex_lock(&aspm_lock);
link = parent->link_state;
if (state & PCIE_LINK_STATE_L0S)
@@ -761,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
pcie_set_clkpm(link, 0);
}
mutex_unlock(&aspm_lock);
- up_read(&pci_bus_sem);
+ if (sem)
+ up_read(&pci_bus_sem);
+}
+
+void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{
+ __pci_disable_link_state(pdev, state, false);
+}
+EXPORT_SYMBOL(pci_disable_link_state_locked);
+
+void pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+ __pci_disable_link_state(pdev, state, true);
}
EXPORT_SYMBOL(pci_disable_link_state);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 44cbbbaa499d..48849ffdd672 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -43,43 +43,6 @@ int no_pci_devices(void)
EXPORT_SYMBOL(no_pci_devices);
/*
- * PCI Bus Class Devices
- */
-static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
- int type,
- struct device_attribute *attr,
- char *buf)
-{
- int ret;
- const struct cpumask *cpumask;
-
- cpumask = cpumask_of_pcibus(to_pci_bus(dev));
- ret = type?
- cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) :
- cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
- buf[ret++] = '\n';
- buf[ret] = '\0';
- return ret;
-}
-
-static ssize_t inline pci_bus_show_cpumaskaffinity(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return pci_bus_show_cpuaffinity(dev, 0, attr, buf);
-}
-
-static ssize_t inline pci_bus_show_cpulistaffinity(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return pci_bus_show_cpuaffinity(dev, 1, attr, buf);
-}
-
-DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL);
-DEVICE_ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL);
-
-/*
* PCI Bus Class
*/
static void release_pcibus_dev(struct device *dev)
@@ -95,6 +58,7 @@ static void release_pcibus_dev(struct device *dev)
static struct class pcibus_class = {
.name = "pci_bus",
.dev_release = &release_pcibus_dev,
+ .dev_attrs = pcibus_dev_attrs,
};
static int __init pcibus_class_init(void)
@@ -1455,9 +1419,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
error = device_register(&b->dev);
if (error)
goto class_dev_reg_err;
- error = device_create_file(&b->dev, &dev_attr_cpuaffinity);
- if (error)
- goto dev_create_file_err;
/* Create legacy_io and legacy_mem files for this bus */
pci_create_legacy_files(b);
@@ -1468,8 +1429,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
return b;
-dev_create_file_err:
- device_unregister(&b->dev);
class_dev_reg_err:
device_unregister(dev);
dev_reg_err:
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 5129ed6d8fa7..e8a140669f90 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -606,7 +606,7 @@ static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev)
}
pci_read_config_byte(dev, ICH6_GPIO_CNTL, &enable);
- if (enable & ICH4_GPIO_EN) {
+ if (enable & ICH6_GPIO_EN) {
pci_read_config_dword(dev, ICH6_GPIOBASE, &region);
region &= PCI_BASE_ADDRESS_IO_MASK;
if (region >= PCIBIOS_MIN_IO)
@@ -681,7 +681,7 @@ static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg,
/* ICH7-10 has the same common LPC generic IO decode registers */
static void __devinit quirk_ich7_lpc(struct pci_dev *dev)
{
- /* We share the common ACPI/DPIO decode with ICH6 */
+ /* We share the common ACPI/GPIO decode with ICH6 */
ich6_lpc_acpi_gpio(dev);
/* And have 4 ICH7+ generic decodes */
@@ -2349,8 +2349,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE,
*/
static void __devinit nvenet_msi_disable(struct pci_dev *dev)
{
- if (dmi_name_in_vendors("P5N32-SLI PREMIUM") ||
- dmi_name_in_vendors("P5N32-E SLI")) {
+ const char *board_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+ if (board_name &&
+ (strstr(board_name, "P5N32-SLI PREMIUM") ||
+ strstr(board_name, "P5N32-E SLI"))) {
dev_info(&dev->dev,
"Disabling msi for MCP55 NIC on P5N32-SLI\n");
dev->no_msi = 1;
@@ -2784,6 +2787,16 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors);
#endif
+static void __devinit fixup_ti816x_class(struct pci_dev* dev)
+{
+ /* TI 816x devices do not have class code set when in PCIe boot mode */
+ if (dev->class == PCI_CLASS_NOT_DEFINED) {
+ dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n");
+ dev->class = PCI_CLASS_MULTIMEDIA_VIDEO;
+ }
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class);
+
static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
struct pci_fixup *end)
{
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 176615e7231f..7f87beed35ac 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -73,8 +73,6 @@ void pci_remove_bus(struct pci_bus *pci_bus)
return;
pci_remove_legacy_files(pci_bus);
- device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
- device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
device_unregister(&pci_bus->dev);
}
EXPORT_SYMBOL(pci_remove_bus);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index a806cb321d2e..1e9e5a5b8c81 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -991,30 +991,139 @@ static void pci_bus_dump_resources(struct pci_bus *bus)
}
}
+static int __init pci_bus_get_depth(struct pci_bus *bus)
+{
+ int depth = 0;
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ int ret;
+ struct pci_bus *b = dev->subordinate;
+ if (!b)
+ continue;
+
+ ret = pci_bus_get_depth(b);
+ if (ret + 1 > depth)
+ depth = ret + 1;
+ }
+
+ return depth;
+}
+static int __init pci_get_max_depth(void)
+{
+ int depth = 0;
+ struct pci_bus *bus;
+
+ list_for_each_entry(bus, &pci_root_buses, node) {
+ int ret;
+
+ ret = pci_bus_get_depth(bus);
+ if (ret > depth)
+ depth = ret;
+ }
+
+ return depth;
+}
+
+/*
+ * first try will not touch pci bridge res
+ * second and later try will clear small leaf bridge res
+ * will stop till to the max deepth if can not find good one
+ */
void __init
pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;
struct resource_list_x add_list; /* list of resources that
want additional resources */
+ int tried_times = 0;
+ enum release_type rel_type = leaf_only;
+ struct resource_list_x head, *list;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+ unsigned long failed_type;
+ int max_depth = pci_get_max_depth();
+ int pci_try_num;
+
+
+ head.next = NULL;
add_list.next = NULL;
+
+ pci_try_num = max_depth + 1;
+ printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
+ max_depth, pci_try_num);
+
+again:
/* Depth first, calculate sizes and alignments of all
subordinate buses. */
- list_for_each_entry(bus, &pci_root_buses, node) {
+ list_for_each_entry(bus, &pci_root_buses, node)
__pci_bus_size_bridges(bus, &add_list);
- }
/* Depth last, allocate resources and update the hardware. */
- list_for_each_entry(bus, &pci_root_buses, node) {
- __pci_bus_assign_resources(bus, &add_list, NULL);
- pci_enable_bridges(bus);
- }
+ list_for_each_entry(bus, &pci_root_buses, node)
+ __pci_bus_assign_resources(bus, &add_list, &head);
BUG_ON(add_list.next);
+ tried_times++;
+
+ /* any device complain? */
+ if (!head.next)
+ goto enable_and_dump;
+ failed_type = 0;
+ for (list = head.next; list;) {
+ failed_type |= list->flags;
+ list = list->next;
+ }
+ /*
+ * io port are tight, don't try extra
+ * or if reach the limit, don't want to try more
+ */
+ failed_type &= type_mask;
+ if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {
+ free_list(resource_list_x, &head);
+ goto enable_and_dump;
+ }
+
+ printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
+ tried_times + 1);
+
+ /* third times and later will not check if it is leaf */
+ if ((tried_times + 1) > 2)
+ rel_type = whole_subtree;
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */
+ for (list = head.next; list;) {
+ bus = list->dev->bus;
+ pci_bus_release_bridge_resources(bus, list->flags & type_mask,
+ rel_type);
+ list = list->next;
+ }
+ /* restore size and flags */
+ for (list = head.next; list;) {
+ struct resource *res = list->res;
+
+ res->start = list->start;
+ res->end = list->end;
+ res->flags = list->flags;
+ if (list->dev->subordinate)
+ res->flags = 0;
+
+ list = list->next;
+ }
+ free_list(resource_list_x, &head);
+
+ goto again;
+
+enable_and_dump:
+ /* Depth last, update the hardware. */
+ list_for_each_entry(bus, &pci_root_buses, node)
+ pci_enable_bridges(bus);
/* dump the resource on buses */
- list_for_each_entry(bus, &pci_root_buses, node) {
+ list_for_each_entry(bus, &pci_root_buses, node)
pci_bus_dump_resources(bus);
- }
}
void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)