From 7752d5cfe3d11ca0bb9c673ec38bd78ba6578f8e Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Fri, 15 Feb 2008 01:27:20 -0800 Subject: x86: validate against acpi motherboard resources This path adds validation of the MMCONFIG table against the ACPI reserved motherboard resources. If the MMCONFIG table is found to be reserved in ACPI, we don't bother checking the E820 table. The PCI Express firmware spec apparently tells BIOS developers that reservation in ACPI is required and E820 reservation is optional, so checking against ACPI first makes sense. Many BIOSes don't reserve the MMCONFIG region in E820 even though it is perfectly functional, the existing check needlessly disables MMCONFIG in these cases. In order to do this, MMCONFIG setup has been split into two phases. If PCI configuration type 1 is not available then MMCONFIG is enabled early as before. Otherwise, it is enabled later after the ACPI interpreter is enabled, since we need to be able to execute control methods in order to check the ACPI reserved resources. Presently this is just triggered off the end of ACPI interpreter initialization. There are a few other behavioral changes here: - Validate all MMCONFIG configurations provided, not just the first one. - Validate the entire required length of each configuration according to the provided ending bus number is reserved, not just the minimum required allocation. - Validate that the area is reserved even if we read it from the chipset directly and not from the MCFG table. This catches the case where the BIOS didn't set the location properly in the chipset and has mapped it over other things it shouldn't have. This also cleans up the MMCONFIG initialization functions so that they simply do nothing if MMCONFIG is not compiled in. Based on an original patch by Rajesh Shah from Intel. [akpm@linux-foundation.org: many fixes and cleanups] Signed-off-by: Robert Hancock Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Greg KH Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Cc: Rajesh Shah Cc: Jesse Barnes Acked-by: Linus Torvalds Cc: Andi Kleen Cc: Greg KH Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- arch/x86/pci/init.c | 4 +- arch/x86/pci/mmconfig-shared.c | 149 ++++++++++++++++++++++++++++++++++++----- arch/x86/pci/pci.h | 1 - 3 files changed, 133 insertions(+), 21 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 3de9f9ba2da6..2080b04b3bcc 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -11,9 +11,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT type = pci_direct_probe(); #endif -#ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(type); -#endif + pci_mmcfg_early_init(type); if (raw_pci_ops) return 0; #ifdef CONFIG_PCI_BIOS diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8d54df4dfaad..498e35ee428e 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -173,9 +173,78 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) pci_mmcfg_resources_inserted = 1; } -static void __init pci_mmcfg_reject_broken(int type) +static acpi_status __init check_mcfg_resource(struct acpi_resource *res, + void *data) +{ + struct resource *mcfg_res = data; + struct acpi_resource_address64 address; + acpi_status status; + + if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) { + struct acpi_resource_fixed_memory32 *fixmem32 = + &res->data.fixed_memory32; + if (!fixmem32) + return AE_OK; + if ((mcfg_res->start >= fixmem32->address) && + (mcfg_res->end < (fixmem32->address + + fixmem32->address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + } + if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) && + (res->type != ACPI_RESOURCE_TYPE_ADDRESS64)) + return AE_OK; + + status = acpi_resource_to_address64(res, &address); + if (ACPI_FAILURE(status) || + (address.address_length <= 0) || + (address.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + if ((mcfg_res->start >= address.minimum) && + (mcfg_res->end < (address.minimum + address.address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, + void *context, void **rv) +{ + struct resource *mcfg_res = context; + + acpi_walk_resources(handle, METHOD_NAME__CRS, + check_mcfg_resource, context); + + if (mcfg_res->flags) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static int __init is_acpi_reserved(unsigned long start, unsigned long end) +{ + struct resource mcfg_res; + + mcfg_res.start = start; + mcfg_res.end = end; + mcfg_res.flags = 0; + + acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); + + if (!mcfg_res.flags) + acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, + NULL); + + return mcfg_res.flags; +} + +static void __init pci_mmcfg_reject_broken(void) { typeof(pci_mmcfg_config[0]) *cfg; + int i; if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || @@ -196,17 +265,37 @@ static void __init pci_mmcfg_reject_broken(int type) goto reject; } - /* - * Only do this check when type 1 works. If it doesn't work - * assume we run on a Mac and always use MCFG - */ - if (type == 1 && !e820_all_mapped(cfg->address, - cfg->address + MMCONFIG_APER_MIN, - E820_RESERVED)) { - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " E820-reserved\n", cfg->address); - goto reject; + for (i = 0; i < pci_mmcfg_config_num; i++) { + u32 size = (cfg->end_bus_number + 1) << 20; + cfg = &pci_mmcfg_config[i]; + printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lu " + "segment %hu buses %u - %u\n", + i, (unsigned long)cfg->address, cfg->pci_segment, + (unsigned int)cfg->start_bus_number, + (unsigned int)cfg->end_bus_number); + if (is_acpi_reserved(cfg->address, cfg->address + size - 1)) { + printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " + "in ACPI motherboard resources\n", + cfg->address); + } else { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" + " reserved in ACPI motherboard resources\n", + cfg->address); + /* Don't try to do this check unless configuration + type 1 is available. */ + if ((pci_probe & PCI_PROBE_CONF1) && + e820_all_mapped(cfg->address, + cfg->address + size - 1, + E820_RESERVED)) + printk(KERN_NOTICE + "PCI: MCFG area at %Lx reserved in " + "E820\n", + cfg->address); + else + goto reject; + } } + return; reject: @@ -216,20 +305,46 @@ reject: pci_mmcfg_config_num = 0; } -void __init pci_mmcfg_init(int type) +void __init pci_mmcfg_early_init(int type) +{ + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + return; + + /* If type 1 access is available, no need to enable MMCONFIG yet, we can + defer until later when the ACPI interpreter is available to better + validate things. */ + if (type == 1) + return; + + acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); + + if ((pci_mmcfg_config_num == 0) || + (pci_mmcfg_config == NULL) || + (pci_mmcfg_config[0].address == 0)) + return; + + if (pci_mmcfg_arch_init()) + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; +} + +void __init pci_mmcfg_late_init(void) { int known_bridge = 0; + /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; - if (type == 1 && pci_mmcfg_check_hostbridge()) - known_bridge = 1; + /* MMCONFIG already enabled */ + if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) + return; - if (!known_bridge) { + if ((pci_probe & PCI_PROBE_CONF1) && pci_mmcfg_check_hostbridge()) + known_bridge = 1; + else acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type); - } + + pci_mmcfg_reject_broken(); if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c4bddaeff619..28b9b72ce7c7 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -97,7 +97,6 @@ extern struct pci_raw_ops pci_direct_conf1; extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(int type); /* pci-mmconfig.c */ -- cgit v1.2.3 From 0b64ad7123eb013c3de26750f2d4c356cd566231 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Feb 2008 01:28:41 -0800 Subject: x86: clear pci_mmcfg_virt when mmcfg get rejected For x86_64, need to free pci_mmcfg_virt, and iounmap some pointers when MMCONF is not reserved in E820 or acpi _CRS and get rejected. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Greg KH Cc: Greg KH Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/mmconfig-shared.c | 1 + arch/x86/pci/mmconfig_32.c | 4 ++++ arch/x86/pci/mmconfig_64.c | 22 +++++++++++++++++++++- arch/x86/pci/pci.h | 1 + 4 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 498e35ee428e..8f204955427c 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -300,6 +300,7 @@ static void __init pci_mmcfg_reject_broken(void) reject: printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + pci_mmcfg_arch_free(); kfree(pci_mmcfg_config); pci_mmcfg_config = NULL; pci_mmcfg_config_num = 0; diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 081816ada057..f3c761dce695 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -136,3 +136,7 @@ int __init pci_mmcfg_arch_init(void) raw_pci_ext_ops = &pci_mmcfg; return 1; } + +void __init pci_mmcfg_arch_free(void) +{ +} diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 9207fd49233c..a1994163c99d 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -127,7 +127,7 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) int __init pci_mmcfg_arch_init(void) { int i; - pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * + pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); @@ -141,9 +141,29 @@ int __init pci_mmcfg_arch_init(void) printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", pci_mmcfg_config[i].pci_segment); + pci_mmcfg_arch_free(); return 0; } } raw_pci_ext_ops = &pci_mmcfg; return 1; } + +void __init pci_mmcfg_arch_free(void) +{ + int i; + + if (pci_mmcfg_virt == NULL) + return; + + for (i = 0; i < pci_mmcfg_config_num; ++i) { + if (pci_mmcfg_virt[i].virt) { + iounmap(pci_mmcfg_virt[i].virt); + pci_mmcfg_virt[i].virt = NULL; + pci_mmcfg_virt[i].cfg = NULL; + } + } + + kfree(pci_mmcfg_virt); + pci_mmcfg_virt = NULL; +} diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 28b9b72ce7c7..c8b89a832c66 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -101,6 +101,7 @@ extern void pci_pcbios_init(void); /* pci-mmconfig.c */ extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space -- cgit v1.2.3 From 05c58b8ac77639c17205f0b2a2d9eb1971dc47ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Feb 2008 01:30:14 -0800 Subject: x86: mmconf enable mcfg early Patch "x86: validate against ACPI motherboard resources" changed the mmconf init sequence, and init MMCONF late in acpi_init. here change it back to old sequence: 1. check hostbridge in early 2. check MCFG with e820 in early 3. if all fail, will check MCFg with acpi _CRS in acpi_init So we can make MCONF working again when acpi=off is set if hostbridge support that. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Greg KH Cc: Greg KH Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/mmconfig-shared.c | 101 ++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 46 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8f204955427c..36a4a7514b05 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -241,7 +241,7 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end) return mcfg_res.flags; } -static void __init pci_mmcfg_reject_broken(void) +static void __init pci_mmcfg_reject_broken(int type, int early) { typeof(pci_mmcfg_config[0]) *cfg; int i; @@ -266,34 +266,43 @@ static void __init pci_mmcfg_reject_broken(void) } for (i = 0; i < pci_mmcfg_config_num; i++) { + int valid = 0; u32 size = (cfg->end_bus_number + 1) << 20; cfg = &pci_mmcfg_config[i]; - printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lu " + printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->pci_segment, (unsigned int)cfg->start_bus_number, (unsigned int)cfg->end_bus_number); - if (is_acpi_reserved(cfg->address, cfg->address + size - 1)) { + + if (!early && + is_acpi_reserved(cfg->address, cfg->address + size - 1)) { printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " "in ACPI motherboard resources\n", cfg->address); - } else { + valid = 1; + } + + if (valid) + continue; + + if (!early) printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" " reserved in ACPI motherboard resources\n", cfg->address); - /* Don't try to do this check unless configuration - type 1 is available. */ - if ((pci_probe & PCI_PROBE_CONF1) && - e820_all_mapped(cfg->address, - cfg->address + size - 1, - E820_RESERVED)) - printk(KERN_NOTICE - "PCI: MCFG area at %Lx reserved in " - "E820\n", - cfg->address); - else - goto reject; + /* Don't try to do this check unless configuration + type 1 is available. */ + if (type == 1 && e820_all_mapped(cfg->address, + cfg->address + size - 1, + E820_RESERVED)) { + printk(KERN_NOTICE + "PCI: MCFG area at %Lx reserved in E820\n", + cfg->address); + valid = 1; } + + if (!valid) + goto reject; } return; @@ -306,46 +315,31 @@ reject: pci_mmcfg_config_num = 0; } -void __init pci_mmcfg_early_init(int type) -{ - if ((pci_probe & PCI_PROBE_MMCONF) == 0) - return; - - /* If type 1 access is available, no need to enable MMCONFIG yet, we can - defer until later when the ACPI interpreter is available to better - validate things. */ - if (type == 1) - return; - - acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) - return; +static int __initdata known_bridge; - if (pci_mmcfg_arch_init()) - pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; -} - -void __init pci_mmcfg_late_init(void) +void __init __pci_mmcfg_init(int type, int early) { - int known_bridge = 0; - /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; /* MMCONFIG already enabled */ - if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) + if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) return; - if ((pci_probe & PCI_PROBE_CONF1) && pci_mmcfg_check_hostbridge()) - known_bridge = 1; - else - acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); + /* for late to exit */ + if (known_bridge) + return; - pci_mmcfg_reject_broken(); + if (early && type == 1) { + if (pci_mmcfg_check_hostbridge()) + known_bridge = 1; + } + + if (!known_bridge) { + acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); + pci_mmcfg_reject_broken(type, early); + } if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || @@ -365,6 +359,21 @@ void __init pci_mmcfg_late_init(void) } } +void __init pci_mmcfg_early_init(int type) +{ + __pci_mmcfg_init(type, 1); +} + +void __init pci_mmcfg_late_init(void) +{ + int type = 0; + + if (pci_probe & PCI_PROBE_CONF1) + type = 1; + + __pci_mmcfg_init(type, 0); +} + static int __init pci_mmcfg_late_insert_resources(void) { /* -- cgit v1.2.3 From 57741a779070e0b141b6148136b420c8d35ccbce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Feb 2008 01:32:50 -0800 Subject: x86_64: set cfg_size for AMD Family 10h in case MMCONFIG reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID. Signed-off-by: Yinghai Lu Cc: Andrew Morton Acked-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/fixup.c | 17 +++++++++++++++++ drivers/pci/probe.c | 11 ++++++++++- include/linux/pci.h | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index a5ef5f551373..b60b2abd480c 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -493,3 +493,20 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); + +/* + * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config + * have 4096 bytes. Even if the device is capable, that doesn't mean we can + * access it. Maybe we don't have a way to generate extended config space + * accesses. So check it + */ +static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +{ + dev->cfg_size = pci_cfg_space_size_ext(dev, 0); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f991359f0c36..a8efdaef1870 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,11 +842,14 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size(struct pci_dev *dev) +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) { int pos; u32 status; + if (!check_exp_pcix) + goto skip; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); @@ -858,6 +861,7 @@ int pci_cfg_space_size(struct pci_dev *dev) goto fail; } + skip: if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) goto fail; if (status == 0xffffffff) @@ -869,6 +873,11 @@ int pci_cfg_space_size(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } +int pci_cfg_space_size(struct pci_dev *dev) +{ + return pci_cfg_space_size_ext(dev, 1); +} + static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 43a4f9cae67d..2b8f74522f8f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -666,6 +666,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -- cgit v1.2.3 From 7fd0da4085d5b012a6bdcbbd63da7ead9fc69ad4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:13:02 -0800 Subject: x86_64: check MSR to get MMCONFIG for AMD Family 10h so even booting kernel with acpi=off or even MCFG is not there, we still can use MMCONFIG. Signed-off-by: Yinghai Lu Cc: Andi Kleen Cc: Greg KH Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/pci/mmconfig-shared.c | 75 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 6 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 36a4a7514b05..8707e24e625e 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -100,33 +100,96 @@ static const char __init *pci_mmcfg_intel_945(void) return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } +static const char __init *pci_mmcfg_amd_fam10h(void) +{ + u32 low, high, address; + u64 base, msr; + int i; + unsigned segnbits = 0, busnbits; + + address = MSR_FAM10H_MMIO_CONF_BASE; + if (rdmsr_safe(address, &low, &high)) + return NULL; + + msr = high; + msr <<= 32; + msr |= low; + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + /* + * only handle bus 0 ? + * need to skip it + */ + if (!busnbits) + return NULL; + + if (busnbits > 8) { + segnbits = busnbits - 8; + busnbits = 8; + } + + pci_mmcfg_config_num = (1 << segnbits); + pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) * + pci_mmcfg_config_num, GFP_KERNEL); + if (!pci_mmcfg_config) + return NULL; + + for (i = 0; i < (1 << segnbits); i++) { + pci_mmcfg_config[i].address = base + (1<<28) * i; + pci_mmcfg_config[i].pci_segment = i; + pci_mmcfg_config[i].start_bus_number = 0; + pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1; + } + + return "AMD Family 10h NB"; +} + struct pci_mmcfg_hostbridge_probe { + u32 bus; + u32 devfn; u32 vendor; u32 device; const char *(*probe)(void); }; static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0x18, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, + { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, }; static int __init pci_mmcfg_check_hostbridge(void) { u32 l; + u32 bus, devfn; u16 vendor, device; int i; const char *name; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0, 4, &l); - vendor = l & 0xffff; - device = (l >> 16) & 0xffff; - pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; name = NULL; for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + bus = pci_mmcfg_probes[i].bus; + devfn = pci_mmcfg_probes[i].devfn; + pci_direct_conf1.read(0, bus, devfn, 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + if (pci_mmcfg_probes[i].vendor == vendor && pci_mmcfg_probes[i].device == device) name = pci_mmcfg_probes[i].probe(); -- cgit v1.2.3 From d2ebdf4bae4f1d7c30e71fd74f270ca4cda024fc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 20 Feb 2008 22:21:57 -0800 Subject: x86: remove unneeded check in mmconf reject mmconfig is only used to access extended configuration space. so don't need to reject MFG that only have one entry and only handle bus0. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/mmconfig-shared.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8707e24e625e..6f68658b519d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -316,18 +316,6 @@ static void __init pci_mmcfg_reject_broken(int type, int early) cfg = &pci_mmcfg_config[0]; - /* - * Handle more broken MCFG tables on Asus etc. - * They only contain a single entry for bus 0-0. - */ - if (pci_mmcfg_config_num == 1 && - cfg->pci_segment == 0 && - (cfg->start_bus_number | cfg->end_bus_number) == 0) { - printk(KERN_ERR "PCI: start and end of bus number is 0. " - "Rejected as broken MCFG.\n"); - goto reject; - } - for (i = 0; i < pci_mmcfg_config_num; i++) { int valid = 0; u32 size = (cfg->end_bus_number + 1) << 20; -- cgit v1.2.3 From bb63b4219976d48ed6d22ac33c18be334fb5a78c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Feb 2008 23:56:50 -0800 Subject: x86 pci: remove checking type for mmconfig probe doesn't need to check if it is type1 or type2, we can use raw_pci_ops directly. also make pci_direct_conf1 static again. anyway is there system with type 2 and mmconf support? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/direct.c | 8 +++++--- arch/x86/pci/init.c | 11 +++++------ arch/x86/pci/mmconfig-shared.c | 32 +++++++++++++++----------------- include/linux/pci.h | 4 ++-- 4 files changed, 27 insertions(+), 28 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 42f3e4cad179..21d1e0e0d535 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -258,7 +258,8 @@ void __init pci_direct_init(int type) { if (type == 0) return; - printk(KERN_INFO "PCI: Using configuration type %d\n", type); + printk(KERN_INFO "PCI: Using configuration type %d for base access\n", + type); if (type == 1) raw_pci_ops = &pci_direct_conf1; else @@ -275,8 +276,10 @@ int __init pci_direct_probe(void) if (!region) goto type2; - if (pci_check_type1()) + if (pci_check_type1()) { + raw_pci_ops = &pci_direct_conf1; return 1; + } release_resource(region); type2: @@ -290,7 +293,6 @@ int __init pci_direct_probe(void) goto fail2; if (pci_check_type2()) { - printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; return 2; } diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 2080b04b3bcc..343c36337e69 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -6,14 +6,13 @@ in the right sequence from here. */ static __init int pci_access_init(void) { - int type __maybe_unused = 0; - #ifdef CONFIG_PCI_DIRECT + int type = 0; + type = pci_direct_probe(); #endif - pci_mmcfg_early_init(type); - if (raw_pci_ops) - return 0; + pci_mmcfg_early_init(); + #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif @@ -26,7 +25,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT pci_direct_init(type); #endif - if (!raw_pci_ops) + if (!raw_pci_ops && !raw_pci_ext_ops) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 6f68658b519d..bdf62243186a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,7 @@ static int __initdata pci_mmcfg_resources_inserted; static const char __init *pci_mmcfg_e7520(void) { u32 win; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; if(win == 0x0000 || win == 0xf000) @@ -53,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void) pci_mmcfg_config_num = 1; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) @@ -179,6 +179,9 @@ static int __init pci_mmcfg_check_hostbridge(void) int i; const char *name; + if (!raw_pci_ops) + return 0; + pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; name = NULL; @@ -186,7 +189,7 @@ static int __init pci_mmcfg_check_hostbridge(void) for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; devfn = pci_mmcfg_probes[i].devfn; - pci_direct_conf1.read(0, bus, devfn, 0, 4, &l); + raw_pci_ops->read(0, bus, devfn, 0, 4, &l); vendor = l & 0xffff; device = (l >> 16) & 0xffff; @@ -304,7 +307,7 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end) return mcfg_res.flags; } -static void __init pci_mmcfg_reject_broken(int type, int early) +static void __init pci_mmcfg_reject_broken(int early) { typeof(pci_mmcfg_config[0]) *cfg; int i; @@ -342,8 +345,8 @@ static void __init pci_mmcfg_reject_broken(int type, int early) " reserved in ACPI motherboard resources\n", cfg->address); /* Don't try to do this check unless configuration - type 1 is available. */ - if (type == 1 && e820_all_mapped(cfg->address, + type 1 is available. how about type 2 ?*/ + if (raw_pci_ops && e820_all_mapped(cfg->address, cfg->address + size - 1, E820_RESERVED)) { printk(KERN_NOTICE @@ -368,7 +371,7 @@ reject: static int __initdata known_bridge; -void __init __pci_mmcfg_init(int type, int early) +void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) @@ -382,14 +385,14 @@ void __init __pci_mmcfg_init(int type, int early) if (known_bridge) return; - if (early && type == 1) { + if (early) { if (pci_mmcfg_check_hostbridge()) known_bridge = 1; } if (!known_bridge) { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type, early); + pci_mmcfg_reject_broken(early); } if ((pci_mmcfg_config_num == 0) || @@ -410,19 +413,14 @@ void __init __pci_mmcfg_init(int type, int early) } } -void __init pci_mmcfg_early_init(int type) +void __init pci_mmcfg_early_init(void) { - __pci_mmcfg_init(type, 1); + __pci_mmcfg_init(1); } void __init pci_mmcfg_late_init(void) { - int type = 0; - - if (pci_probe & PCI_PROBE_CONF1) - type = 1; - - __pci_mmcfg_init(type, 0); + __pci_mmcfg_init(0); } static int __init pci_mmcfg_late_insert_resources(void) diff --git a/include/linux/pci.h b/include/linux/pci.h index 2b8f74522f8f..a71954a38932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1055,10 +1055,10 @@ extern unsigned long pci_cardbus_mem_size; extern int pcibios_add_platform_entries(struct pci_dev *dev); #ifdef CONFIG_PCI_MMCONFIG -extern void __init pci_mmcfg_early_init(int type); +extern void __init pci_mmcfg_early_init(void); extern void __init pci_mmcfg_late_init(void); #else -static inline void pci_mmcfg_early_init(int type) { } +static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif -- cgit v1.2.3 From 871d5f8dd0f7647f03facd4cb79485938d1b61ab Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:20:09 -0800 Subject: x86: get mp_bus_to_node early Currently, on an amd k8 system with multi ht chains, the numa_node of pci devices under /sys/devices/pci0000:80/* is always 0, even if that chain is on node 1 or 2 or 3. Workaround: pcibus_to_node(bus) is used when we want to get the node that pci_device is on. In struct device, we already have numa_node member, and we could use dev_to_node()/set_dev_node() to get and set numa_node in the device. set_dev_node is called in pci_device_add() with pcibus_to_node(bus), and pcibus_to_node uses bus->sysdata for nodeid. The problem is when pci_add_device is called, bus->sysdata is not assigned correct nodeid yet. The result is that numa_node will always be 0. pcibios_scan_root and pci_scan_root could take sysdata. So we need to get mp_bus_to_node mapping before these two are called, and thus get_mp_bus_to_node could get correct node for sysdata in root bus. In scanning of the root bus, all child busses will take parent bus sysdata. So all pci_device->dev.numa_node will be assigned correctly and automatically. Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we could also could make other bus specific device get the correct numa_node too. This is an updated version of pci_sysdata and Jeff's pci_domain patch. [ mingo@elte.hu: build fix ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/Makefile_32 | 1 + arch/x86/pci/acpi.c | 27 ++++++++----- arch/x86/pci/common.c | 18 +++++++-- arch/x86/pci/irq.c | 4 +- arch/x86/pci/k8-bus_64.c | 92 +++++++++++++++++++++++++++++++------------ arch/x86/pci/legacy.c | 4 +- arch/x86/pci/mp_bus_to_node.c | 23 +++++++++++ include/asm-x86/pci.h | 2 + include/asm-x86/topology.h | 13 ++++++ 9 files changed, 142 insertions(+), 42 deletions(-) create mode 100644 arch/x86/pci/mp_bus_to_node.c (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index cdd6828b5abb..e9c5caf54e59 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -10,5 +10,6 @@ pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o +pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2664cb3fc96c..1a9c0c6a1a18 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -191,7 +191,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do { struct pci_bus *bus; struct pci_sysdata *sd; + int node; +#ifdef CONFIG_ACPI_NUMA int pxm; +#endif dmi_check_system(acpi_pciprobe_dmi_table); @@ -201,6 +204,17 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do return NULL; } + node = -1; +#ifdef CONFIG_ACPI_NUMA + pxm = acpi_get_pxm(device->handle); + if (pxm >= 0) + node = pxm_to_node(pxm); + if (node != -1) + set_mp_bus_to_node(busnum, node); + else + node = get_mp_bus_to_node(busnum); +#endif + /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. @@ -212,13 +226,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } sd->domain = domain; - sd->node = -1; - - pxm = acpi_get_pxm(device->handle); -#ifdef CONFIG_ACPI_NUMA - if (pxm >= 0) - sd->node = pxm_to_node(pxm); -#endif + sd->node = node; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. @@ -238,9 +246,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do kfree(sd); #ifdef CONFIG_ACPI_NUMA - if (bus != NULL) { + if (bus) { if (pxm >= 0) { - printk("bus %d -> pxm %d -> node %d\n", + printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", busnum, pxm, pxm_to_node(pxm)); } } @@ -248,7 +256,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); - return bus; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 75fcc29ecf52..07d53184f7a4 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -342,9 +342,14 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return NULL; } + sd->node = get_mp_bus_to_node(busnum); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); + bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + if (!bus) + kfree(sd); - return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + return bus; } extern u8 pci_cache_line_size; @@ -480,7 +485,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) +struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -495,10 +500,15 @@ struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); return NULL; } - sd->node = -1; - bus = pci_scan_bus(busno, &pci_root_ops, sd); + sd->node = node; + bus = pci_scan_bus(busno, ops, sd); if (!bus) kfree(sd); return bus; } + +struct pci_bus *pci_scan_bus_with_sysdata(int busno) +{ + return pci_scan_bus_on_node(busno, &pci_root_ops, -1); +} diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 579745ca6b66..0908fca901bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -136,9 +136,11 @@ static void __init pirq_peer_trick(void) busmap[e->bus] = 1; } for(i = 1; i < 256; i++) { + int node; if (!busmap[i] || pci_find_bus(0, i)) continue; - if (pci_scan_bus_with_sysdata(i)) + node = get_mp_bus_to_node(i); + if (pci_scan_bus_on_node(i, &pci_root_ops, node)) printk(KERN_INFO "PCI: Discovered primary peer " "bus %02x [IRQ]\n", i); } diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 9cc813e29706..3903efbca535 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -1,7 +1,9 @@ #include #include +#include #include #include +#include /* * This discovers the pcibus <-> node mapping on AMD K8. @@ -20,64 +22,102 @@ #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) #define PCI_DEVICE_ID_K8HTCONFIG 0x1100 +#ifdef CONFIG_NUMA + +#define BUS_NR 256 + +static int mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node = -1; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return node; + + node = mp_bus_to_node[busnum]; + + /* + * let numa_node_id to decide it later in dma_alloc_pages + * if there is no ram on that node + */ + if (node != -1 && !node_online(node)) + node = -1; + + return node; +} + +#endif + /** - * fill_mp_bus_to_cpumask() + * early_fill_mp_bus_to_node() + * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ __init static int -fill_mp_bus_to_cpumask(void) +early_fill_mp_bus_to_node(void) { - struct pci_dev *nb_dev = NULL; +#ifdef CONFIG_NUMA int i, j; + unsigned slot; u32 ldtbus, nid; + u32 id; static int lbnr[3] = { LDT_BUS_NUMBER_REGISTER_0, LDT_BUS_NUMBER_REGISTER_1, LDT_BUS_NUMBER_REGISTER_2 }; - while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { - pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); + for (i = 0; i < BUS_NR; i++) + mp_bus_to_node[i] = -1; + + if (!early_pci_allowed()) + return -1; + + for (slot = 0x18; slot < 0x20; slot++) { + id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + if (id != (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_K8HTCONFIG<<16))) + break; + nid = read_pci_config(0, slot, 0, NODE_ID_REGISTER); for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); + ldtbus = read_pci_config(0, slot, 0, lbnr[i]); /* * if there are no busses hanging off of the current * ldt link then both the secondary and subordinate * bus number fields are set to 0. - * + * * RED-PEN * This is slightly broken because it assumes - * HT node IDs == Linux node ids, which is not always + * HT node IDs == Linux node ids, which is not always * true. However it is probably mostly true. */ if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - j++) { - struct pci_bus *bus; - struct pci_sysdata *sd; - - long node = NODE_ID(nid); - /* Algorithm a bit dumb, but - it shouldn't matter here */ - bus = pci_find_bus(0, j); - if (!bus) - continue; - if (!node_online(node)) - node = 0; - - sd = bus->sysdata; - sd->node = node; - } + j++) { + int node = NODE_ID(nid); + mp_bus_to_node[j] = (unsigned char)node; + } } } } + for (i = 0; i < BUS_NR; i++) { + int node = mp_bus_to_node[i]; + if (node >= 0) + printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); + } +#endif return 0; } -fs_initcall(fill_mp_bus_to_cpumask); +postcore_initcall(early_fill_mp_bus_to_node); diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index e041ced0ce13..a67921ce60af 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -12,6 +12,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) { int n, devfn; + long node; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; @@ -21,12 +22,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) u32 l; if (pci_find_bus(0, n)) continue; + node = get_mp_bus_to_node(n); for (devfn = 0; devfn < 256; devfn += 8) { if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_with_sysdata(n); + pci_scan_bus_on_node(n, &pci_root_ops, node); break; } } diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c new file mode 100644 index 000000000000..022943999b84 --- /dev/null +++ b/arch/x86/pci/mp_bus_to_node.c @@ -0,0 +1,23 @@ +#include +#include +#include + +#define BUS_NR 256 + +static unsigned char mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = (unsigned char) node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return 0; + node = mp_bus_to_node[busnum]; + return node; +} diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index ddd8e248fc0a..30bbde0cb34b 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -19,6 +19,8 @@ struct pci_sysdata { }; /* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, + int node); extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); static inline int pci_domain_nr(struct pci_bus *bus) diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 22073268b481..4793ae745a78 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -198,4 +198,17 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define smt_capable() (smp_num_siblings > 1) #endif +#ifdef CONFIG_NUMA +extern int get_mp_bus_to_node(int busnum); +extern void set_mp_bus_to_node(int busnum, int node); +#else +static inline int get_mp_bus_to_node(int busnum) +{ + return 0; +} +static inline void set_mp_bus_to_node(int busnum, int node) +{ +} +#endif + #endif -- cgit v1.2.3 From 35ddd068fb94b187e94a3fc497ccecf27bdda9ae Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:15:08 -0800 Subject: x86: use bus conf in NB conf fun1 to get bus range on, on 64-bit ... so we use the same code with Quad core cpu as old opteron. This patch is useful when acpi=off or _PXM is not there in DSDT. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/k8-bus_64.c | 88 ++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 40 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 3903efbca535..dab38310ee97 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -12,15 +12,18 @@ * RED-PEN empty cpus get reported wrong */ -#define NODE_ID_REGISTER 0x60 -#define NODE_ID(dword) (dword & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0x94 -#define LDT_BUS_NUMBER_REGISTER_1 0xB4 -#define LDT_BUS_NUMBER_REGISTER_2 0xD4 -#define NR_LDT_BUS_NUMBER_REGISTERS 3 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) +#define NODE_ID(dword) ((dword>>4) & 0x07) +#define LDT_BUS_NUMBER_REGISTER_0 0xE0 +#define LDT_BUS_NUMBER_REGISTER_1 0xE4 +#define LDT_BUS_NUMBER_REGISTER_2 0xE8 +#define LDT_BUS_NUMBER_REGISTER_3 0xEC +#define NR_LDT_BUS_NUMBER_REGISTERS 4 +#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) +#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 24) & 0xFF) + #define PCI_DEVICE_ID_K8HTCONFIG 0x1100 +#define PCI_DEVICE_ID_K8_10H_HTCONFIG 0x1200 +#define PCI_DEVICE_ID_K8_11H_HTCONFIG 0x1300 #ifdef CONFIG_NUMA @@ -67,12 +70,19 @@ early_fill_mp_bus_to_node(void) #ifdef CONFIG_NUMA int i, j; unsigned slot; - u32 ldtbus, nid; + u32 ldtbus; u32 id; - static int lbnr[3] = { + int node; + u16 deviceid; + u16 vendorid; + int min_bus; + int max_bus; + + static int lbnr[NR_LDT_BUS_NUMBER_REGISTERS] = { LDT_BUS_NUMBER_REGISTER_0, LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2 + LDT_BUS_NUMBER_REGISTER_2, + LDT_BUS_NUMBER_REGISTER_3 }; for (i = 0; i < BUS_NR; i++) @@ -81,38 +91,36 @@ early_fill_mp_bus_to_node(void) if (!early_pci_allowed()) return -1; - for (slot = 0x18; slot < 0x20; slot++) { - id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); - if (id != (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_K8HTCONFIG<<16))) - break; - nid = read_pci_config(0, slot, 0, NODE_ID_REGISTER); - - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - ldtbus = read_pci_config(0, slot, 0, lbnr[i]); - /* - * if there are no busses hanging off of the current - * ldt link then both the secondary and subordinate - * bus number fields are set to 0. - * - * RED-PEN - * This is slightly broken because it assumes - * HT node IDs == Linux node ids, which is not always - * true. However it is probably mostly true. - */ - if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 - && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { - for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); - j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - j++) { - int node = NODE_ID(nid); - mp_bus_to_node[j] = (unsigned char)node; - } - } - } + slot = 0x18; + id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + + vendorid = id & 0xffff; + if (vendorid != PCI_VENDOR_ID_AMD) + goto out; + + deviceid = (id>>16) & 0xffff; + if ((deviceid != PCI_DEVICE_ID_K8HTCONFIG) && + (deviceid != PCI_DEVICE_ID_K8_10H_HTCONFIG) && + (deviceid != PCI_DEVICE_ID_K8_11H_HTCONFIG)) + goto out; + + for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { + ldtbus = read_pci_config(0, slot, 1, lbnr[i]); + + /* Check if that register is enabled for bus range */ + if ((ldtbus & 7) != 3) + continue; + + min_bus = SECONDARY_LDT_BUS_NUMBER(ldtbus); + max_bus = SUBORDINATE_LDT_BUS_NUMBER(ldtbus); + node = NODE_ID(ldtbus); + for (j = min_bus; j <= max_bus; j++) + mp_bus_to_node[j] = (unsigned char) node; } +out: for (i = 0; i < BUS_NR; i++) { - int node = mp_bus_to_node[i]; + node = mp_bus_to_node[i]; if (node >= 0) printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); } -- cgit v1.2.3 From 30a18d6c3f1e774de656ebd8ff219d53e2ba4029 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:20 -0800 Subject: x86: multi pci root bus with different io resource range, on 64-bit scan AMD opteron io/mmio routing to make sure every pci root bus get correct resource range. Thus later pci scan could assign correct resource to device with unassigned resource. this can fix a system without _CRS for multi pci root bus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/Makefile_64 | 2 +- arch/x86/pci/k8-bus_64.c | 404 +++++++++++++++++++++++++++++++++++++++------ drivers/pci/probe.c | 6 + include/asm-x86/topology.h | 3 + include/linux/pci.h | 2 +- 5 files changed, 365 insertions(+), 52 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 7d8c467bf143..8fbd19832cf6 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-$(CONFIG_NUMA) += k8-bus_64.o +obj-y += k8-bus_64.o diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index dab38310ee97..5e8a9d105edd 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -7,23 +7,29 @@ /* * This discovers the pcibus <-> node mapping on AMD K8. - * - * RED-PEN need to call this again on PCI hotplug - * RED-PEN empty cpus get reported wrong + * also get peer root bus resource for io,mmio */ -#define NODE_ID(dword) ((dword>>4) & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0xE0 -#define LDT_BUS_NUMBER_REGISTER_1 0xE4 -#define LDT_BUS_NUMBER_REGISTER_2 0xE8 -#define LDT_BUS_NUMBER_REGISTER_3 0xEC -#define NR_LDT_BUS_NUMBER_REGISTERS 4 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 24) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 -#define PCI_DEVICE_ID_K8_10H_HTCONFIG 0x1200 -#define PCI_DEVICE_ID_K8_11H_HTCONFIG 0x1300 +/* + * sub bus (transparent) will use entres from 3 to store extra from root, + * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +static int pci_root_num; +static struct pci_root_info pci_root_info[PCI_ROOT_NR]; #ifdef CONFIG_NUMA @@ -55,77 +61,375 @@ int get_mp_bus_to_node(int busnum) return node; } - #endif +void set_pci_bus_resources_arch_default(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + if (!pci_root_num) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +#define RANGE_NUM 16 + +struct res_range { + size_t start; + size_t end; +}; + +static void __init update_range(struct res_range *range, size_t start, + size_t end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + if (start == range[j].start && end < range[j].end) { + range[j].start = end + 1; + break; + } else if (start == range[j].start && end == range[j].end) { + range[j].start = 0; + range[j].end = 0; + break; + } else if (start > range[j].start && end == range[j].end) { + range[j].end = start - 1; + break; + } else if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + break; + } + } +} + +static void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + if (res->flags != flags) + continue; + if (res->end + 1 == start) { + res->end = end; + return; + } else if (end + 1 == res->start) { + res->start = start; + return; + } + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static struct pci_hostbridge_probe pci_probes[] __initdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +}; + /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ -__init static int -early_fill_mp_bus_to_node(void) +static int __init early_fill_mp_bus_info(void) { -#ifdef CONFIG_NUMA - int i, j; + int i; + int j; + unsigned bus; unsigned slot; - u32 ldtbus; - u32 id; + int found; int node; - u16 deviceid; - u16 vendorid; - int min_bus; - int max_bus; - - static int lbnr[NR_LDT_BUS_NUMBER_REGISTERS] = { - LDT_BUS_NUMBER_REGISTER_0, - LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2, - LDT_BUS_NUMBER_REGISTER_3 - }; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + struct resource *res; + size_t start; + size_t end; + struct res_range range[RANGE_NUM]; + u64 val; + u32 address; +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) mp_bus_to_node[i] = -1; +#endif if (!early_pci_allowed()) return -1; - slot = 0x18; - id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; - vendorid = id & 0xffff; - if (vendorid != PCI_VENDOR_ID_AMD) - goto out; + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); - deviceid = (id>>16) & 0xffff; - if ((deviceid != PCI_DEVICE_ID_K8HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_10H_HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_11H_HTCONFIG)) - goto out; + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + return 0; - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - ldtbus = read_pci_config(0, slot, 1, lbnr[i]); + pci_root_num = 0; + for (i = 0; i < 4; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); /* Check if that register is enabled for bus range */ - if ((ldtbus & 7) != 3) + if ((reg & 7) != 3) continue; - min_bus = SECONDARY_LDT_BUS_NUMBER(ldtbus); - max_bus = SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - node = NODE_ID(ldtbus); + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; +#ifdef CONFIG_NUMA for (j = min_bus; j <= max_bus; j++) mp_bus_to_node[j] = (unsigned char) node; +#endif + link = (reg >> 8) & 0x03; + + info = &pci_root_info[pci_root_num]; + info->bus_min = min_bus; + info->bus_max = max_bus; + info->node = node; + info->link = link; + sprintf(info->name, "PCI Bus #%02x", min_bus); + pci_root_num++; } -out: + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, 0x60); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, 0x64); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + range[0].end = 0xffff; + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_IO, 0); + update_range(range, start, end); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + /* 0xfc00000000-0xfcffffffff for Family 10h mmconfig*/ + range[0].end = 0xfbffffffffULL; + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); + if (end < (1ULL<<32)) + update_range(range, 0, end - 1); + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_MEM, 0); + update_range(range, start, end); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); + update_range(range, 1ULL<<32, end - 1); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_MEM, 1); + } + } + +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) { node = mp_bus_to_node[i]; if (node >= 0) printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); } #endif + + for (i = 0; i < pci_root_num; i++) { + int res_num; + int busnum; + + info = &pci_root_info[i]; + res_num = info->res_num; + busnum = info->bus_min; + printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + info->bus_min, info->bus_max, info->node, info->link); + for (j = 0; j < res_num; j++) { + res = &info->res[j]; + printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", + busnum, j, + (res->flags & IORESOURCE_IO)?"io port":"mmio", + res->start, res->end); + } + } + return 0; } -postcore_initcall(early_fill_mp_bus_to_node); +postcore_initcall(early_fill_mp_bus_info); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a40043bd3257..4a55bf380957 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1088,6 +1088,10 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) return max; } +void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) +{ +} + struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { @@ -1147,6 +1151,8 @@ struct pci_bus * pci_create_bus(struct device *parent, b->resource[0] = &ioport_resource; b->resource[1] = &iomem_resource; + set_pci_bus_resources_arch_default(b); + return b; dev_create_file_err: diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 4793ae745a78..0e6d6b03affe 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -193,6 +193,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif +struct pci_bus; +void set_pci_bus_resources_arch_default(struct pci_bus *b); + #ifdef CONFIG_SMP #define mc_capable() (boot_cpu_data.x86_max_cores > 1) #define smt_capable() (smp_num_siblings > 1) diff --git a/include/linux/pci.h b/include/linux/pci.h index a71954a38932..abc998ffb66e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -254,7 +254,7 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, #define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 8 +#define PCI_BUS_NUM_RESOURCES 16 #endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ -- cgit v1.2.3 From 6e184f299d696203bc40545b9db216089d88bef7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 6 Mar 2008 01:15:31 -0800 Subject: x86: double check the multi root bus with fam10h mmconf some bioses give same range to mmconf for fam10h msr, and mmio for node/link. fam10h msr will overide mmio for node/link. so we can not assign range to devices under node/link for unassigned resources. this patch will take range out from the mmio for node/link Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/k8-bus_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 2 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 5e8a9d105edd..84b2030fc2f0 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -191,6 +191,34 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, }; +static u64 __initdata fam10h_mmconf_start; +static u64 __initdata fam10h_mmconf_end; +static void __init get_pci_mmcfg_amd_fam10h_range(void) +{ + u32 address; + u64 base, msr; + unsigned segn_busn_bits; + + /* assume all cpus from fam10h have mmconf */ + if (boot_cpu_data.x86 < 0x10) + return; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, msr); + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + fam10h_mmconf_start = base; + fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; +} + /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus @@ -305,6 +333,8 @@ static int __init early_fill_mp_bus_info(void) continue; /* not found */ info = &pci_root_info[j]; + printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", + node, link, (u64)start, (u64)end); update_res(info, start, end, IORESOURCE_IO, 0); update_range(range, start, end); } @@ -328,8 +358,7 @@ static int __init early_fill_mp_bus_info(void) memset(range, 0, sizeof(range)); /* 0xfd00000000-0xffffffffff for HT */ - /* 0xfc00000000-0xfcffffffff for Family 10h mmconfig*/ - range[0].end = 0xfbffffffffULL; + range[0].end = (0xfdULL<<32) - 1; /* need to take out [0, TOM) for RAM*/ address = MSR_K8_TOP_MEM1; @@ -339,6 +368,14 @@ static int __init early_fill_mp_bus_info(void) if (end < (1ULL<<32)) update_range(range, 0, end - 1); + /* get mmconfig */ + get_pci_mmcfg_amd_fam10h_range(); + /* need to take out mmconf range */ + if (fam10h_mmconf_end) { + printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); + update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); + } + /* mmio resource */ for (i = 0; i < 8; i++) { reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); @@ -364,8 +401,51 @@ static int __init early_fill_mp_bus_info(void) continue; /* not found */ info = &pci_root_info[j]; + + printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", + node, link, (u64)start, (u64)end); + /* + * some sick allocation would have range overlap with fam10h + * mmconf range, so need to update start and end. + */ + if (fam10h_mmconf_end) { + int changed = 0; + u64 endx = 0; + if (start >= fam10h_mmconf_start && + start <= fam10h_mmconf_end) { + start = fam10h_mmconf_end + 1; + changed = 1; + } + + if (end >= fam10h_mmconf_start && + end <= fam10h_mmconf_end) { + end = fam10h_mmconf_start - 1; + changed = 1; + } + + if (start < fam10h_mmconf_start && + end > fam10h_mmconf_end) { + /* we got a hole */ + endx = fam10h_mmconf_start - 1; + update_res(info, start, endx, IORESOURCE_MEM, 0); + update_range(range, start, endx); + printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); + start = fam10h_mmconf_end + 1; + changed = 1; + } + if (changed) { + if (start <= end) { + printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); + } else { + printk(KERN_CONT "%s\n", endx?"":" ==> none"); + continue; + } + } + } + update_res(info, start, end, IORESOURCE_MEM, 0); update_range(range, start, end); + printk(KERN_CONT "\n"); } /* need to take out [4G, TOM2) for RAM*/ -- cgit v1.2.3 From 4cf19463745fad81ef2eed3e4e0038da5fd153f8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Apr 2008 15:14:52 -0700 Subject: x86_64: don't need set default res if only have one root bus if there's only one root bus there's no need to split resources. This patch fixes the issue described at: http://lkml.org/lkml/2008/4/10/304 Reported-and-bisected-by: Rafael J. Wysocki Tested-by: Rafael J. Wysocki Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/k8-bus_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 84b2030fc2f0..4e64c58e17a0 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -69,7 +69,8 @@ void set_pci_bus_resources_arch_default(struct pci_bus *b) int j; struct pci_root_info *info; - if (!pci_root_num) + /* if only one root bus, don't need to anything */ + if (pci_root_num < 2) return; for (i = 0; i < pci_root_num; i++) { -- cgit v1.2.3 From e8ee6f0ae5cd860e8e6c02807edfa3c1fa01bcb5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Apr 2008 01:41:58 -0700 Subject: x86: work around io allocation overlap of HT links normally BIOSes assign io/mmio range to different HT links without overlapping, even same node same link should get non overlapping entries. but Rafael L. Wysocki's buggy BIOS creates a link with overlapping entries for mmio and io: node 0 link 0: io port [1000, ffffff] node 0 link 0: mmio [e0000000, efffffff] node 0 link 0: mmio [a0000, bffff] node 0 link 0: mmio [80000000, ffffffff] try to merge them and we will get: bus: [00, ff] on node 0 link 0 bus: 00 index 0 io port: [0, ffff] bus: 00 index 1 mmio: [80000000, fcffffffff] bus: 00 index 2 mmio: [a0000, bffff] so later we will reduce the chance to assign used resource to unassigned device. Reported-by: "Rafael J. Wysocki" Signed-off-by: Yinghai Lu Tested-by: "Rafael J. Wysocki" Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/k8-bus_64.c | 56 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 18 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 4e64c58e17a0..ab6d4b18a88f 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -112,17 +112,25 @@ static void __init update_range(struct res_range *range, size_t start, for (j = 0; j < RANGE_NUM; j++) { if (!range[j].end) continue; - if (start == range[j].start && end < range[j].end) { - range[j].start = end + 1; - break; - } else if (start == range[j].start && end == range[j].end) { + + if (start <= range[j].start && end >= range[j].end) { range[j].start = 0; range[j].end = 0; - break; - } else if (start > range[j].start && end == range[j].end) { + continue; + } + + if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { range[j].end = start - 1; - break; - } else if (start > range[j].start && end < range[j].end) { + continue; + } + + if (start > range[j].start && end < range[j].end) { /* find the new spare */ for (i = 0; i < RANGE_NUM; i++) { if (range[i].end == 0) @@ -135,7 +143,7 @@ static void __init update_range(struct res_range *range, size_t start, printk(KERN_ERR "run of slot in ranges\n"); } range[j].end = start - 1; - break; + continue; } } } @@ -151,16 +159,24 @@ static void __init update_res(struct pci_root_info *info, size_t start, /* try to merge it with old one */ for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + res = &info->res[i]; if (res->flags != flags) continue; - if (res->end + 1 == start) { - res->end = end; - return; - } else if (end + 1 == res->start) { - res->start = start; - return; - } + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; } addit: @@ -336,7 +352,11 @@ static int __init early_fill_mp_bus_info(void) info = &pci_root_info[j]; printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", node, link, (u64)start, (u64)end); - update_res(info, start, end, IORESOURCE_IO, 0); + + /* kernel only handle 16 bit only */ + if (end > 0xffff) + end = 0xffff; + update_res(info, start, end, IORESOURCE_IO, 1); update_range(range, start, end); } /* add left over io port range to def node/link, [0, 0xffff] */ @@ -444,7 +464,7 @@ static int __init early_fill_mp_bus_info(void) } } - update_res(info, start, end, IORESOURCE_MEM, 0); + update_res(info, start, end, IORESOURCE_MEM, 1); update_range(range, start, end); printk(KERN_CONT "\n"); } -- cgit v1.2.3 From 5f0b2976cb2b62668a076f54419c24b8ab677167 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 14 Apr 2008 16:08:25 -0700 Subject: x86: add pci=check_enable_amd_mmconf and dmi check so will disable that feature by default, and only enable that via pci=check_enable_amd_mmconf or for system match with dmi table. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mmconf-fam10h_64.c | 28 ++++++++++++++++++++++++++++ arch/x86/kernel/setup_64.c | 23 +++++++++++++++-------- arch/x86/pci/common.c | 4 ++++ arch/x86/pci/mmconfig-shared.c | 3 +++ arch/x86/pci/pci.h | 1 + 5 files changed, 51 insertions(+), 8 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 37897920ec65..edc5fbfe85c0 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -6,12 +6,15 @@ #include #include #include +#include #include #include #include #include #include +#include "../pci/pci.h" + struct pci_hostbridge_probe { u32 bus; u32 slot; @@ -176,6 +179,9 @@ void __cpuinit fam10h_check_enable_mmcfg(void) u64 val; u32 address; + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return; + address = MSR_FAM10H_MMIO_CONF_BASE; rdmsrl(address, val); @@ -213,3 +219,25 @@ void __cpuinit fam10h_check_enable_mmcfg(void) FAM10H_MMIO_CONF_ENABLE; wrmsrl(address, val); } + +static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) +{ + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return 0; +} + +static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { + { + .callback = set_check_enable_amd_mmconf, + .ident = "Sun Microsystems Machine", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sun Microsystems"), + }, + }, + {} +}; + +void __init check_enable_amd_mmconf_dmi(void) +{ + dmi_check_system(mmconf_dmi_table); +} diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index d8a9ee752fb3..2f5c488aad0b 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -289,6 +289,18 @@ static void __init parse_setup_data(void) } } +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +void __cpuinit fam10h_check_enable_mmcfg(void) +{ +} +void __init check_enable_amd_mmconf_dmi(void) +{ +} +#endif + /* * setup_arch - architecture-specific boot-time initializations * @@ -510,6 +522,9 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + + /* do this before identify_cpu for boot cpu */ + check_enable_amd_mmconf_dmi(); } static int __cpuinit get_model_name(struct cpuinfo_x86 *c) @@ -697,14 +712,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -#endif - static void __cpuinit init_amd(struct cpuinfo_x86 *c) { unsigned level; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 07d53184f7a4..2a4d751818b7 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -425,6 +425,10 @@ char * __devinit pcibios_setup(char *str) pci_probe &= ~PCI_PROBE_MMCONF; return NULL; } + else if (!strcmp(str, "check_enable_amd_mmconf")) { + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return NULL; + } #endif else if (!strcmp(str, "noacpi")) { acpi_noirq_set(); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index bdf62243186a..0cfebecf2a8f 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -107,6 +107,9 @@ static const char __init *pci_mmcfg_amd_fam10h(void) int i; unsigned segnbits = 0, busnbits; + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return NULL; + address = MSR_FAM10H_MMIO_CONF_BASE; if (rdmsr_safe(address, &low, &high)) return NULL; diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c8b89a832c66..8ef86b5c37c8 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -26,6 +26,7 @@ #define PCI_ASSIGN_ALL_BUSSES 0x4000 #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 #define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; -- cgit v1.2.3