From 883b7af932b4435eb4798cfa4fec0848639c2a87 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 11 May 2008 19:36:57 +0800 Subject: x86: smpboot.c: removed duplicated include Removed duplicated include in arch/x86/kernel/smpboot.c. Signed-off-by: Huang Weiyi Cc: mingo@redhat.com Cc: hpa@zytor.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 38988491c622..0974fc0997b9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From a4c81cf684350797939416c99effb9d3ae46bca6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 18 May 2008 01:18:57 -0700 Subject: x86: extend e820 ealy_res support 32bit move early_res related from e820_64.c to e820.c make edba detection to be done in head32.c remove smp_alloc_memory, because we have fixed trampoline address now. Signed-off-by: Yinghai Lu arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 10 files changed, 320 insertions(+), 319 deletions(-) Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 --------------------------------- arch/x86/kernel/head32.c | 76 +++++++++++++ arch/x86/kernel/setup_32.c | 109 ++++-------------- arch/x86/kernel/smpboot.c | 17 --- arch/x86/kernel/trampoline.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 9 -- include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 -- include/asm-x86/smp.h | 1 - 10 files changed, 320 insertions(+), 319 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41c480ae47df..35da8cdbe5e6 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include struct e820map e820; @@ -493,3 +495,215 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } + +/* + * Early reserved memory areas. + */ +#define MAX_EARLY_RES 20 + +struct early_res { + u64 start, end; + char name[16]; +}; +static struct early_res early_res[MAX_EARLY_RES] __initdata = { + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +#endif +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, +#endif + {} +}; + +void __init reserve_early(u64 start, u64 end, char *name) +{ + int i; + struct early_res *r; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (end > r->start && start < r->end) + panic("Overlapping early reservations %llx-%llx %s to %llx-%llx %s\n", + start, end - 1, name?name:"", r->start, + r->end - 1, r->name); + } + if (i >= MAX_EARLY_RES) + panic("Too many early reservations"); + r = &early_res[i]; + r->start = start; + r->end = end; + if (name) + strncpy(r->name, name, sizeof(r->name) - 1); +} + +void __init free_early(u64 start, u64 end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %llx-%llx!", + start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memmove(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(u64 start, u64 end) +{ + int i; + u64 final_start, final_end; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, + final_start, final_end - 1, r->name); +#ifdef CONFIG_X86_64 + reserve_bootmem_generic(final_start, final_end - final_start); +#else + reserve_bootmem(final_start, final_end - final_start, + BOOTMEM_DEFAULT); +#endif + } +} + +/* Check for already reserved areas */ +static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) +{ + int i; + u64 addr = *addrp, last; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last >= r->start && addr < r->end) { + *addrp = addr = round_up(r->end, align); + changed = 1; + goto again; + } + } + return changed; +} + +/* Check for already reserved areas */ +static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +{ + int i; + u64 addr = *addrp, last; + u64 size = *sizep; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last > r->start && addr < r->start) { + size = r->start - addr; + changed = 1; + goto again; + } + if (last > r->end && addr < r->end) { + addr = round_up(r->end, align); + size = last - addr; + changed = 1; + goto again; + } + if (last <= r->end && addr >= r->start) { + (*sizep)++; + return 0; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + while (bad_addr(&addr, size, align) && addr+size <= ei_last) + ; + last = addr + size; + if (last > ei_last) + continue; + if (last > end) + continue; + return addr; + } + return -1ULL; +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) + ; + last = addr + *sizep; + if (last > ei_last) + continue; + return addr; + } + return -1UL; + +} diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 354fbb221709..07941b554519 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -46,202 +46,6 @@ unsigned long max_pfn_mapped; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -/* - * Early reserved memory areas. - */ -#define MAX_EARLY_RES 20 - -struct early_res { - unsigned long start, end; - char name[16]; -}; -static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ -#ifdef CONFIG_X86_TRAMPOLINE - { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, -#endif - {} -}; - -void __init reserve_early(unsigned long start, unsigned long end, char *name) -{ - int i; - struct early_res *r; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", - start, end - 1, name?name:"", r->start, r->end - 1, r->name); - } - if (i >= MAX_EARLY_RES) - panic("Too many early reservations"); - r = &early_res[i]; - r->start = start; - r->end = end; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); -} - -void __init free_early(unsigned long start, unsigned long end) -{ - struct early_res *r; - int i, j; - - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (start == r->start && end == r->end) - break; - } - if (i >= MAX_EARLY_RES || !early_res[i].end) - panic("free_early on not reserved area: %lx-%lx!", start, end); - - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; -} - -void __init early_res_to_bootmem(unsigned long start, unsigned long end) -{ - int i; - unsigned long final_start, final_end; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) - continue; - printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, - final_start, final_end - 1, r->name); - reserve_bootmem_generic(final_start, final_end - final_start); - } -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last >= r->start && addr < r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - unsigned long size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - continue; - if (last > end) - continue; - return addr; - } - return -1UL; -} - -/* - * Find next free range after *start - */ -unsigned long __init find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && - addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - continue; - return addr; - } - return -1UL; - -} /* * Find the highest page frame number we have available */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db059058927..c216d3c2a991 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,7 +8,83 @@ #include #include +#include +#include +#include +#include + +#define BIOS_LOWMEM_KILOBYTES 0x413 + +/* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. + */ +static void __init reserve_ebda_region(void) +{ + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ + if (paravirt_enabled()) + return; + + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; + + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; + + /* Paranoia: should never happen, but... */ + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early(lowmem, 0x100000, "BIOS reserved"); +} + void __init i386_start_kernel(void) { + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + +#ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } +#endif + reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE"); + + reserve_ebda_region(); + + /* + * At this point everything still needed from the boot loader + * or BIOS or kernel text should be early reserved or marked not + * RAM in e820. All other memory is free game. + */ + start_kernel(); } diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5faeab69edd9..fed482c62450 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -359,56 +359,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); -} - #ifndef CONFIG_NEED_MULTIPLE_NODES static void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) @@ -522,25 +472,32 @@ static void __init reserve_initrd(void) unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; unsigned long ramdisk_here; - initrd_start = 0; - if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ + initrd_start = 0; + if (ramdisk_end < ramdisk_image) { + free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd wraps around end of memory, " "disabling initrd\n"); return; } if (ramdisk_size >= end_of_lowmem/2) { + free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; } + if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ - reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); + /* + * don't need to reserve again, already reserved early + * in i386_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; return; @@ -582,6 +539,8 @@ static void __init relocate_initrd(void) p = (char *)__va(ramdisk_image); memcpy(q, p, clen); q += clen; + /* need to free these low pages...*/ + free_bootmem(ramdisk_image, clen); ramdisk_image += clen; ramdisk_size -= clen; } @@ -600,47 +559,28 @@ static void __init relocate_initrd(void) ramdisk_image += clen; ramdisk_size -= clen; } + /* high pages is not converted by early_res_to_bootmem */ } #endif /* CONFIG_BLK_DEV_INITRD */ void __init setup_bootmem_allocator(void) { - unsigned long bootmap_size; + unsigned long bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ - bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); - + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<> PAGE_SHIFT, max_low_pfn); register_bootmem_low_pages(max_low_pfn); + early_res_to_bootmem(0, max_low_pfn<llc_shared_map; } -#ifdef CONFIG_X86_32 -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); -} -#endif - static void impress_friends(void) { int cpu; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199adebb..1106fac6024d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -2,7 +2,7 @@ #include -/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +/* ready for x86_64 and x86 */ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); /* diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0cdf1a5..7bbebbfe8c4e 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1137,15 +1137,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, 0, 1, 1); } -/* used to set up the trampoline for other CPUs when the memory manager - * is sorted out */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - if (__pa(trampoline_base) >= 0x93000) - BUG(); -} - /* send a reschedule CPI to one CPU by physical CPU number*/ static void voyager_smp_send_reschedule(int cpu) { diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 1eb13b881437..5a58e2bb1d78 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -70,6 +70,12 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, extern void update_e820(void); extern void e820_setup_gap(void); +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern void reserve_early(u64 start, u64 end, char *name); +extern void free_early(u64 start, u64 end); +extern void early_res_to_bootmem(u64 start, u64 end); + #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 9fac77e01441..37f176a02bc6 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -14,11 +14,6 @@ #include #ifndef __ASSEMBLY__ -extern unsigned long find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align); -extern unsigned long find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align); extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); @@ -35,10 +30,6 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn, extern void finish_e820_parsing(void); -extern void reserve_early(unsigned long start, unsigned long end, char *name); -extern void free_early(unsigned long start, unsigned long end); -extern void early_res_to_bootmem(unsigned long start, unsigned long end); - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 1ebaa5cd3112..514e52b95cef 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -201,7 +201,6 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif -extern void smp_alloc_memory(void); extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 73d08e636026bbcb413d4864ca5e917502f8a0f9 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 21 May 2008 22:09:43 +0100 Subject: x86: APIC/SMP: correct the message for "nosmp" The local APIC is no longer forced off when "nosmp" has been specified. Correct the message printed. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3e1cecedde42..8c53d86e3e86 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1157,8 +1157,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * If SMP should be disabled, then really disable it! */ if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated," - "forcing use of dummy APIC emulation.\n"); + printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); #ifdef CONFIG_X86_32 connect_bsp_APIC(); -- cgit v1.2.3 From a1133d8e4ffc2db751eb987a2f3cf8ead67927c3 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 21 May 2008 22:10:16 +0100 Subject: x86: APIC/SMP: downgrade the NMI watchdog for "nosmp" If configured to use the I/O APIC, the NMI watchdog is deemed to fail if the chip has been deactivated as a result of "nosmp". Downgrade to the local APIC watchdog similarly to what is done for the UP case. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c53d86e3e86..df60bc7c9cb7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1159,6 +1159,10 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); + + if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) + nmi_watchdog = NMI_LOCAL_APIC; + #ifdef CONFIG_X86_32 connect_bsp_APIC(); #endif -- cgit v1.2.3 From 6fe9fe875691e15eda61b992e03257e68aa5ba4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 8 Jun 2008 10:14:40 +0200 Subject: Revert "x86: APIC/SMP: downgrade the NMI watchdog for "nosmp"" This reverts commit 791b93d3dfaf16c23e978bec0cc0a3dd9d855d63. A better fix from Maciej will be merged. --- arch/x86/kernel/smpboot.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index df60bc7c9cb7..8c53d86e3e86 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1159,10 +1159,6 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); - - if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) - nmi_watchdog = NMI_LOCAL_APIC; - #ifdef CONFIG_X86_32 connect_bsp_APIC(); #endif -- cgit v1.2.3 From d54db1ac9ecde9bcb8a561595b02c1d970d3a4d6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 6 Jun 2008 03:28:02 +0100 Subject: x86: APIC/SMP: Downgrade the NMI watchdog for "nosmp" If configured to use the I/O APIC, the NMI watchdog is deemed to fail if the chip has been deactivated as a result of "nosmp". Downgrade to the local APIC watchdog similarly to what is done for the UP case. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c53d86e3e86..f3760349c84f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1159,6 +1159,9 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); + + localise_nmi_watchdog(); + #ifdef CONFIG_X86_32 connect_bsp_APIC(); #endif -- cgit v1.2.3 From 23ca4bba3e20c6c3cb11c1bb0ab4770b724d39ac Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:12 +0200 Subject: x86: cleanup early per cpu variables/accesses v4 * Introduce a new PER_CPU macro called "EARLY_PER_CPU". This is used by some per_cpu variables that are initialized and accessed before there are per_cpu areas allocated. ["Early" in respect to per_cpu variables is "earlier than the per_cpu areas have been setup".] This patchset adds these new macros: DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) EXPORT_EARLY_PER_CPU_SYMBOL(_name) DECLARE_EARLY_PER_CPU(_type, _name) early_per_cpu_ptr(_name) early_per_cpu_map(_name, _idx) early_per_cpu(_name, _cpu) The DEFINE macro defines the per_cpu variable as well as the early map and pointer. It also initializes the per_cpu variable and map elements to "_initvalue". The early_* macros provide access to the initial map (usually setup during system init) and the early pointer. This pointer is initialized to point to the early map but is then NULL'ed when the actual per_cpu areas are setup. After that the per_cpu variable is the correct access to the variable. The early_per_cpu() macro is not very efficient but does show how to access the variable if you have a function that can be called both "early" and "late". It tests the early ptr to be NULL, and if not then it's still valid. Otherwise, the per_cpu variable is used instead: #define early_per_cpu(_name, _cpu) \ (early_per_cpu_ptr(_name) ? \ early_per_cpu_ptr(_name)[_cpu] : \ per_cpu(_name, _cpu)) A better method is to actually check the pointer manually. In the case below, numa_set_node can be called both "early" and "late": void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); if (cpu_to_node_map) cpu_to_node_map[cpu] = node; else per_cpu(x86_cpu_to_node_map, cpu) = node; } * Add a flag "arch_provides_topology_pointers" that indicates pointers to topology cpumask_t maps are available. Otherwise, use the function returning the cpumask_t value. This is useful if cpumask_t set size is very large to avoid copying data on to/off of the stack. * The coverage of CONFIG_DEBUG_PER_CPU_MAPS has been increased while the non-debug case has been optimized a bit. * Remove an unreferenced compiler warning in drivers/base/topology.c * Clean up #ifdef in setup.c For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 2 +- arch/x86/Kconfig.debug | 2 +- arch/x86/kernel/apic_32.c | 9 +-- arch/x86/kernel/apic_64.c | 11 ++-- arch/x86/kernel/setup.c | 96 ++++++++++++++++++++++++++---- arch/x86/kernel/setup_32.c | 24 -------- arch/x86/kernel/setup_64.c | 9 --- arch/x86/kernel/smpboot.c | 20 +------ arch/x86/mm/numa_64.c | 43 ++++---------- arch/x86/mm/srat_64.c | 2 +- drivers/base/topology.c | 25 +++++++- include/asm-x86/numa_64.h | 19 +++--- include/asm-x86/percpu.h | 46 +++++++++++++++ include/asm-x86/smp.h | 15 +---- include/asm-x86/topology.h | 143 ++++++++++++++++++++++++++------------------- 15 files changed, 270 insertions(+), 196 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2e325521e5e9..4469a0db1ae1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE def_bool y config HAVE_SETUP_PER_CPU_AREA - def_bool X86_64 || (X86_SMP && !X86_VOYAGER) + def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 18363374d51a..24ca95a0ba54 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -60,7 +60,7 @@ config DEBUG_PAGEALLOC config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL - depends on X86_64_SMP + depends on X86_SMP default n help Say Y to verify that the per_cpu map being accessed has diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..f17c1c1bc384 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -52,9 +52,6 @@ unsigned long mp_lapic_addr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - /* * Knob to control our willingness to enable the local APIC. * @@ -1534,9 +1531,9 @@ void __cpuinit generic_processor_info(int apicid, int version) } #ifdef CONFIG_SMP /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); cpu_to_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0633cfd0dc29..4fd21f7d698c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -87,9 +87,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version @@ -1091,9 +1088,9 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu = 0; } /* are we being called early in kernel startup? */ - if (x86_cpu_to_apicid_early_ptr) { - u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; - u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); cpu_to_apicid[cpu] = apicid; bios_cpu_apicid[cpu] = apicid; @@ -1269,7 +1266,7 @@ __cpuinit int apic_is_clustered_box(void) if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) return 0; - bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b852a196..03caa8e4351f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -19,13 +19,23 @@ unsigned disabled_cpus __cpuinitdata; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); - /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; #endif +/* map cpu index to physical APIC ID */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#define X86_64_NUMA 1 + +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); +#endif + #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) /* * Copy data used in early init routines from the initial arrays to the @@ -37,20 +47,21 @@ static void __init setup_per_cpu_maps(void) int cpu; for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = - x86_bios_cpu_apicid_init[cpu]; -#ifdef CONFIG_NUMA + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#ifdef X86_64_NUMA per_cpu(x86_cpu_to_node_map, cpu) = - x86_cpu_to_node_map_init[cpu]; + early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif } /* indicate the early static arrays will soon be gone */ - x86_cpu_to_apicid_early_ptr = NULL; - x86_bios_cpu_apicid_early_ptr = NULL; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = NULL; + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#ifdef X86_64_NUMA + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif } @@ -109,7 +120,8 @@ void __init setup_per_cpu_areas(void) if (!node_online(node) || !NODE_DATA(node)) { ptr = alloc_bootmem_pages(size); printk(KERN_INFO - "cpu %d has no node or node-local memory\n", i); + "cpu %d has no node %d or node-local memory\n", + i, node); } else ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); @@ -137,3 +149,63 @@ void __init setup_per_cpu_areas(void) } #endif + +#ifdef X86_64_NUMA +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + if (cpu_to_node_map) + cpu_to_node_map[cpu] = node; + + else if (per_cpu_offset(cpu)) + per_cpu(x86_cpu_to_node_map, cpu) = node; + + else + Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + +void __cpuinit numa_add_cpu(int cpu) +{ + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); +} +#endif /* CONFIG_NUMA */ + +#if defined(CONFIG_DEBUG_PER_CPU_MAPS) && defined(CONFIG_X86_64) + +int cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(cpu_to_node); + +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!per_cpu_offset(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +#endif diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5a2f8e063887..ccd5f5cdbbe6 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -737,18 +737,6 @@ char * __init __attribute__((weak)) memory_setup(void) return machine_specific_memory_setup(); } -#ifdef CONFIG_NUMA -/* - * In the golden day, when everything among i386 and x86_64 will be - * integrated, this will not live here - */ -void *x86_cpu_to_node_map_early_ptr; -int x86_cpu_to_node_map_init[NR_CPUS] = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE -}; -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; -#endif - /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -887,18 +875,6 @@ void __init setup_arch(char **cmdline_p) io_delay_init(); -#ifdef CONFIG_X86_SMP - /* - * setup to use the early static init tables during kernel startup - * X86_SMP will exclude sub-arches that don't deal well with it. - */ - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; -#endif -#endif - #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(); #endif diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8a..e8df64fad540 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -406,15 +406,6 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif -#ifdef CONFIG_SMP - /* setup to use the early static init tables during kernel startup */ - x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; - x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; -#ifdef CONFIG_NUMA - x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; -#endif -#endif - #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3e1cecedde42..036604d3daed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -68,22 +68,6 @@ #include #include -/* - * FIXME: For x86_64, those are defined in other files. But moving them here, - * would make the setup areas dependent on smp, which is a loss. When we - * integrate apic between arches, we can probably do a better job, but - * right now, they'll stay here -- glommer - */ - -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = - { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_cpu_to_apicid_early_ptr; - -u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata - = { [0 ... NR_CPUS-1] = BAD_APICID }; -void *x86_bios_cpu_apicid_early_ptr; - #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; static int low_mappings; @@ -992,7 +976,7 @@ do_rest: /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); #ifdef CONFIG_X86_64 - clear_node_cpumask(cpu); /* was set by numa_add_cpu */ + numa_remove_cpu(cpu); /* was set by numa_add_cpu */ #endif cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ @@ -1373,7 +1357,7 @@ static void __ref remove_cpu_from_maps(int cpu) cpu_clear(cpu, cpu_callin_map); /* was set by cpu_init() */ clear_bit(cpu, (unsigned long *)&cpu_initialized); - clear_node_cpumask(cpu); + numa_remove_cpu(cpu); #endif } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5d..970f86775c41 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -31,16 +31,6 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct memnode memnode; -#ifdef CONFIG_SMP -int x86_cpu_to_node_map_init[NR_CPUS] = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE -}; -void *x86_cpu_to_node_map_early_ptr; -EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); -#endif -DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); - s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; @@ -577,24 +567,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); } -__cpuinit void numa_add_cpu(int cpu) -{ - set_bit(cpu, - (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - - if(cpu_to_node_map) - cpu_to_node_map[cpu] = node; - else if(per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; - else - Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); -} - unsigned long __init numa_free_all_bootmem(void) { unsigned long pages = 0; @@ -641,6 +613,7 @@ static __init int numa_setup(char *opt) } early_param("numa", numa_setup); +#ifdef CONFIG_NUMA /* * Setup early cpu_to_node. * @@ -652,14 +625,19 @@ early_param("numa", numa_setup); * is already initialized in a round robin manner at numa_init_array, * prior to this call, and this initialization is good enough * for the fake NUMA cases. + * + * Called before the per_cpu areas are setup. */ void __init init_cpu_to_node(void) { - int i; + int cpu; + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - for (i = 0; i < NR_CPUS; i++) { + BUG_ON(cpu_to_apicid == NULL); + + for_each_possible_cpu(cpu) { int node; - u16 apicid = x86_cpu_to_apicid_init[i]; + u16 apicid = cpu_to_apicid[cpu]; if (apicid == BAD_APICID) continue; @@ -668,8 +646,9 @@ void __init init_cpu_to_node(void) continue; if (!node_online(node)) continue; - numa_set_node(i, node); + numa_set_node(cpu, node); } } +#endif diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 99649dccad28..012220e31c99 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (node == NUMA_NO_NODE) continue; if (!node_isset(node, node_possible_map)) - numa_set_node(i, NUMA_NO_NODE); + numa_clear_node(i); } numa_init_array(); return 0; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index fdf4044d2e74..1efe162e16d7 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \ return sprintf(buf, "%d\n", topology_##name(cpu)); \ } +#if defined(topology_thread_siblings) || defined(topology_core_siblings) static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) { ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; @@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) } return n; } +#endif +#ifdef arch_provides_topology_pointers #define define_siblings_show_map(name) \ -static inline ssize_t show_##name(struct sys_device *dev, char *buf) \ +static ssize_t show_##name(struct sys_device *dev, char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(0, &(topology_##name(cpu)), buf); \ } #define define_siblings_show_list(name) \ -static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ +static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(1, &(topology_##name(cpu)), buf); \ } +#else +#define define_siblings_show_map(name) \ +static ssize_t show_##name(struct sys_device *dev, char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + cpumask_t mask = topology_##name(cpu); \ + return show_cpumap(0, &mask, buf); \ +} + +#define define_siblings_show_list(name) \ +static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + cpumask_t mask = topology_##name(cpu); \ + return show_cpumap(1, &mask, buf); \ +} +#endif + #define define_siblings_show_func(name) \ define_siblings_show_map(name); define_siblings_show_list(name) diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 22e87c9f6a80..b510daf4f4d8 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -14,11 +14,9 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) -extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; -extern void numa_set_node(int cpu, int node); extern void srat_reserve_add_area(int nodeid); extern int hotadd_percent; @@ -31,15 +29,16 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, #ifdef CONFIG_NUMA extern void __init init_cpu_to_node(void); - -static inline void clear_node_cpumask(int cpu) -{ - clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); -} - +extern void __cpuinit numa_set_node(int cpu, int node); +extern void __cpuinit numa_clear_node(int cpu); +extern void __cpuinit numa_add_cpu(int cpu); +extern void __cpuinit numa_remove_cpu(int cpu); #else -#define init_cpu_to_node() do {} while (0) -#define clear_node_cpumask(cpu) do {} while (0) +static inline void init_cpu_to_node(void) { } +static inline void numa_set_node(int cpu, int node) { } +static inline void numa_clear_node(int cpu) { } +static inline void numa_add_cpu(int cpu, int node) { } +static inline void numa_remove_cpu(int cpu) { } #endif #endif diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index 736fc3bb8e1e..912a3a17b9db 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -143,4 +143,50 @@ do { \ #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) #endif /* !__ASSEMBLY__ */ #endif /* !CONFIG_X86_64 */ + +#ifdef CONFIG_SMP + +/* + * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu + * variables that are initialized and accessed before there are per_cpu + * areas allocated. + */ + +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue; \ + __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ + { [0 ... NR_CPUS-1] = _initvalue }; \ + __typeof__(_type) *_name##_early_ptr = _name##_early_map + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name); \ + extern __typeof__(_type) *_name##_early_ptr; \ + extern __typeof__(_type) _name##_early_map[] + +#define early_per_cpu_ptr(_name) (_name##_early_ptr) +#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) +#define early_per_cpu(_name, _cpu) \ + (early_per_cpu_ptr(_name) ? \ + early_per_cpu_ptr(_name)[_cpu] : \ + per_cpu(_name, _cpu)) + +#else /* !CONFIG_SMP */ +#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ + DEFINE_PER_CPU(_type, _name) = _initvalue + +#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ + EXPORT_PER_CPU_SYMBOL(_name) + +#define DECLARE_EARLY_PER_CPU(_type, _name) \ + DECLARE_PER_CPU(_type, _name) + +#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) +#define early_per_cpu_ptr(_name) NULL +/* no early_per_cpu_map() */ + +#endif /* !CONFIG_SMP */ + #endif /* _ASM_X86_PERCPU_H_ */ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 1ebaa5cd3112..ec841639fb44 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -29,21 +29,12 @@ extern int smp_num_siblings; extern unsigned int num_processors; extern cpumask_t cpu_initialized; -#ifdef CONFIG_SMP -extern u16 x86_cpu_to_apicid_init[]; -extern u16 x86_bios_cpu_apicid_init[]; -extern void *x86_cpu_to_apicid_early_ptr; -extern void *x86_bios_cpu_apicid_early_ptr; -#else -#define x86_cpu_to_apicid_early_ptr NULL -#define x86_bios_cpu_apicid_early_ptr NULL -#endif - DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -DECLARE_PER_CPU(u16, x86_cpu_to_apicid); -DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); + +DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); /* Static state in head.S used to set up a CPU */ extern struct { diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index dcf3f8131d6b..dac09cb66dca 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -35,87 +35,67 @@ # endif #endif +/* Node not present */ +#define NUMA_NO_NODE (-1) + #ifdef CONFIG_NUMA #include #include -/* Mappings between logical cpu number and node number */ #ifdef CONFIG_X86_32 -extern int cpu_to_node_map[]; -#else -/* Returns the number of the current Node. */ -#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id())) -#endif - -DECLARE_PER_CPU(int, x86_cpu_to_node_map); - -#ifdef CONFIG_SMP -extern int x86_cpu_to_node_map_init[]; -extern void *x86_cpu_to_node_map_early_ptr; -#else -#define x86_cpu_to_node_map_early_ptr NULL -#endif +/* Mappings between node number and cpus on that node. */ extern cpumask_t node_to_cpumask_map[]; -#define NUMA_NO_NODE (-1) +/* Mappings between logical cpu number and node number */ +extern int cpu_to_node_map[]; /* Returns the number of the node containing CPU 'cpu' */ -#ifdef CONFIG_X86_32 -#define early_cpu_to_node(cpu) cpu_to_node(cpu) static inline int cpu_to_node(int cpu) { return cpu_to_node_map[cpu]; } +#define early_cpu_to_node(cpu) cpu_to_node(cpu) #else /* CONFIG_X86_64 */ -#ifdef CONFIG_SMP -static inline int early_cpu_to_node(int cpu) -{ - int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; - - if (cpu_to_node_map) - return cpu_to_node_map[cpu]; - else if (per_cpu_offset(cpu)) - return per_cpu(x86_cpu_to_node_map, cpu); - else - return NUMA_NO_NODE; -} -#else -#define early_cpu_to_node(cpu) cpu_to_node(cpu) -#endif +/* Mappings between node number and cpus on that node. */ +extern cpumask_t node_to_cpumask_map[]; + +/* Mappings between logical cpu number and node number */ +DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); + +/* Returns the number of the current Node. */ +#define numa_node_id() (per_cpu(x86_cpu_to_node_map, raw_smp_processor_id())) + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +extern int cpu_to_node(int cpu); +extern int early_cpu_to_node(int cpu); +extern cpumask_t *_node_to_cpumask_ptr(int node); +extern cpumask_t node_to_cpumask(int node); +#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ + +/* Returns the number of the node containing CPU 'cpu' */ static inline int cpu_to_node(int cpu) { -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (x86_cpu_to_node_map_early_ptr) { - printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n", - (int)cpu); - dump_stack(); - return ((int *)x86_cpu_to_node_map_early_ptr)[cpu]; - } -#endif return per_cpu(x86_cpu_to_node_map, cpu); } -#ifdef CONFIG_NUMA - -/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ -#define node_to_cpumask_ptr(v, node) \ - cpumask_t *v = &(node_to_cpumask_map[node]) - -#define node_to_cpumask_ptr_next(v, node) \ - v = &(node_to_cpumask_map[node]) -#endif +/* Same function but used if called before per_cpu areas are setup */ +static inline int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; -#endif /* CONFIG_X86_64 */ + return per_cpu(x86_cpu_to_node_map, cpu); +} -/* - * Returns the number of the node containing Node 'node'. This - * architecture is flat, so it is a pretty simple function! - */ -#define parent_node(node) (node) +/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +static inline cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &node_to_cpumask_map[node]; +} /* Returns a bitmask of CPUs on Node 'node'. */ static inline cpumask_t node_to_cpumask(int node) @@ -123,14 +103,29 @@ static inline cpumask_t node_to_cpumask(int node) return node_to_cpumask_map[node]; } +#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ +#endif /* CONFIG_X86_64 */ + +/* Replace default node_to_cpumask_ptr with optimized version */ +#define node_to_cpumask_ptr(v, node) \ + cpumask_t *v = _node_to_cpumask_ptr(node) + +#define node_to_cpumask_ptr_next(v, node) \ + v = _node_to_cpumask_ptr(node) + /* Returns the number of the first CPU on Node 'node'. */ static inline int node_to_first_cpu(int node) { - cpumask_t mask = node_to_cpumask(node); - - return first_cpu(mask); + node_to_cpumask_ptr(mask, node); + return first_cpu(*mask); } +/* + * Returns the number of the node containing Node 'node'. This + * architecture is flat, so it is a pretty simple function! + */ +#define parent_node(node) (node) + #define pcibus_to_node(bus) __pcibus_to_node(bus) #define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) @@ -180,8 +175,31 @@ extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) #endif -#else /* CONFIG_NUMA */ +#else /* !CONFIG_NUMA */ + +#define numa_node_id() 0 +#define cpu_to_node(cpu) 0 +#define early_cpu_to_node(cpu) 0 + +static inline cpumask_t *_node_to_cpumask_ptr(int node) +{ + return &cpu_online_map; +} +static inline cpumask_t node_to_cpumask(int node) +{ + return cpu_online_map; +} +static inline int node_to_first_cpu(int node) +{ + return first_cpu(cpu_online_map); +} + +/* Replace default node_to_cpumask_ptr with optimized version */ +#define node_to_cpumask_ptr(v, node) \ + cpumask_t *v = _node_to_cpumask_ptr(node) +#define node_to_cpumask_ptr_next(v, node) \ + v = _node_to_cpumask_ptr(node) #endif #include @@ -193,6 +211,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) + +/* indicates that pointers to the topology cpumask_t maps are valid */ +#define arch_provides_topology_pointers yes #endif static inline void arch_fix_phys_package_id(int num, u32 slot) @@ -220,4 +241,4 @@ static inline void set_mp_bus_to_node(int busnum, int node) } #endif -#endif +#endif /* _ASM_X86_TOPOLOGY_H */ -- cgit v1.2.3 From 3461b0af025251bbc6b3d56c821c6ac2de6f7209 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: remove static boot_cpu_pda array v2 * Remove the boot_cpu_pda array and pointer table from the data section. Allocate the pointer table and array during init. do_boot_cpu() will reallocate the pda in node local memory and if the cpu is being brought up before the bootmem array is released (after_bootmem = 0), then it will free the initial pda. This will happen for all cpus present at system startup. This removes 512k + 32k bytes from the data section. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 26 +++++++++++++++-- arch/x86/kernel/setup.c | 73 ++++++++++++++++++++++++++++++++++++----------- arch/x86/kernel/setup64.c | 8 ++++-- arch/x86/kernel/smpboot.c | 59 +++++++++++++++++++++++++++++--------- include/asm-x86/pda.h | 6 ++-- include/linux/mm.h | 1 + 6 files changed, 135 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e25c57b8aa84..0ab59edd7067 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -25,6 +25,24 @@ #include #include +/* boot cpu pda */ +static struct x8664_pda _boot_cpu_pda __read_mostly; + +#ifdef CONFIG_SMP +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +/* + * We install an empty cpu_pda pointer table to trap references before + * the actual cpu_pda pointer table is created in setup_cpu_pda_map(). + */ +static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; +#else +static struct x8664_pda *__cpu_pda[1] __read_mostly; +#endif + +#else /* !CONFIG_SMP (NR_CPUS will be 1) */ +static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; +#endif + static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -156,10 +174,12 @@ void __init x86_64_start_kernel(char * real_mode_data) early_printk("Kernel alive\n"); - for (i = 0; i < NR_CPUS; i++) - cpu_pda(i) = &boot_cpu_pda[i]; - + _cpu_pda = __cpu_pda; + cpu_pda(0) = &_boot_cpu_pda; pda_init(0); + + early_printk("Kernel really alive\n"); + copy_bootdata(__va(real_mode_data)); reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 913af838c3c5..dd12c1c84a8f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -101,6 +101,50 @@ static inline void setup_cpumask_of_cpu(void) { } */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_cpu_pda_map(void) { } + +#elif !defined(CONFIG_SMP) +static inline void setup_cpu_pda_map(void) { } + +#else /* CONFIG_SMP && CONFIG_X86_64 */ + +/* + * Allocate cpu_pda pointer table and array via alloc_bootmem. + */ +static void __init setup_cpu_pda_map(void) +{ + char *pda; + struct x8664_pda **new_cpu_pda; + unsigned long size; + int cpu; + + size = roundup(sizeof(struct x8664_pda), cache_line_size()); + + /* allocate cpu_pda array and pointer table */ + { + unsigned long tsize = nr_cpu_ids * sizeof(void *); + unsigned long asize = size * (nr_cpu_ids - 1); + + tsize = roundup(tsize, cache_line_size()); + new_cpu_pda = alloc_bootmem(tsize + asize); + pda = (char *)new_cpu_pda + tsize; + } + + /* initialize pointer table to static pda's */ + for_each_possible_cpu(cpu) { + if (cpu == 0) { + /* leave boot cpu pda in place */ + new_cpu_pda[0] = cpu_pda(0); + continue; + } + new_cpu_pda[cpu] = (struct x8664_pda *)pda; + new_cpu_pda[cpu]->in_bootmem = 1; + pda += size; + } + + /* point to new pointer table */ + _cpu_pda = new_cpu_pda; +} #endif /* @@ -110,46 +154,43 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - int i, highest_cpu = 0; - unsigned long size; + ssize_t size = PERCPU_ENOUGH_ROOM; + char *ptr; + int cpu; #ifdef CONFIG_HOTPLUG_CPU prefill_possible_map(); +#else + nr_cpu_ids = num_processors; #endif + /* Setup cpu_pda map */ + setup_cpu_pda_map(); + /* Copy section for each CPU (we discard the original) */ size = PERCPU_ENOUGH_ROOM; printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); - for_each_possible_cpu(i) { - char *ptr; + for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES ptr = alloc_bootmem_pages(size); #else - int node = early_cpu_to_node(i); + int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { ptr = alloc_bootmem_pages(size); printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", - i, node); + cpu, node); } else ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); #endif - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); -#ifdef CONFIG_X86_64 - cpu_pda(i)->data_offset = ptr - __per_cpu_start; -#else - __per_cpu_offset[i] = ptr - __per_cpu_start; -#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - highest_cpu = i; } - nr_cpu_ids = highest_cpu + 1; printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", NR_CPUS, nr_cpu_ids, nr_node_ids); @@ -199,7 +240,7 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - if (node != NUMA_NO_NODE) + if (cpu_pda(cpu) && node != NUMA_NO_NODE) cpu_pda(cpu)->nodenumber = node; if (cpu_to_node_map) diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index aee0e8200777..631ea6cc01d8 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -34,9 +35,8 @@ struct boot_params boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; +struct x8664_pda **_cpu_pda __read_mostly; EXPORT_SYMBOL(_cpu_pda); -struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; @@ -114,8 +114,10 @@ void pda_init(int cpu) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!pda->irqstackptr) panic("cannot allocate irqstack for cpu %d", cpu); - } + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } pda->irqstackptr += IRQSTACKSIZE-64; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 036604d3daed..bf0833487455 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -816,6 +816,43 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } +/* + * Allocate node local memory for the AP pda. + * + * Must be called after the _cpu_pda pointer table is initialized. + */ +static int __cpuinit get_local_pda(int cpu) +{ + struct x8664_pda *oldpda, *newpda; + unsigned long size = sizeof(struct x8664_pda); + int node = cpu_to_node(cpu); + + if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) + return 0; + + oldpda = cpu_pda(cpu); + newpda = kmalloc_node(size, GFP_ATOMIC, node); + if (!newpda) { + printk(KERN_ERR "Could not allocate node local PDA " + "for CPU %d on node %d\n", cpu, node); + + if (oldpda) + return 0; /* have a usable pda */ + else + return -1; + } + + if (oldpda) { + memcpy(newpda, oldpda, size); + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, size); + } + + newpda->in_bootmem = 0; + cpu_pda(cpu) = newpda; + return 0; +} + static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -841,19 +878,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) } /* Allocate node local memory for AP pdas */ - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { - struct x8664_pda *newpda, *pda; - int node = cpu_to_node(cpu); - pda = cpu_pda(cpu); - newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, - node); - if (newpda) { - memcpy(newpda, pda, sizeof(struct x8664_pda)); - cpu_pda(cpu) = newpda; - } else - printk(KERN_ERR - "Could not allocate node local PDA for CPU %d on node %d\n", - cpu, node); + if (cpu > 0) { + boot_error = get_local_pda(cpu); + if (boot_error) + goto restore_state; + /* if can't get pda memory, can't start cpu */ } #endif @@ -972,6 +1001,8 @@ do_rest: } } +restore_state: + if (boot_error) { /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); @@ -1347,6 +1378,8 @@ __init void prefill_possible_map(void) for (i = 0; i < possible; i++) cpu_set(i, cpu_possible_map); + + nr_cpu_ids = possible; } static void __ref remove_cpu_from_maps(int cpu) diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index de2ad9ac35a9..b34e9a7cc80b 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -22,7 +22,8 @@ struct x8664_pda { offset 40!!! */ #endif char *irqstackptr; - int nodenumber; /* number of current node */ + short nodenumber; /* number of current node (32k max) */ + short in_bootmem; /* pda lives in bootmem */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; @@ -38,8 +39,7 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[]; -extern struct x8664_pda boot_cpu_pda[]; +extern struct x8664_pda **_cpu_pda; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab01..0ea48a5af823 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1024,6 +1024,7 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern int after_bootmem; #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -- cgit v1.2.3 From f307d25e638d3408659a2ec98fb3fd1737f7cb31 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 21 May 2008 11:21:13 +0100 Subject: x86: compile error fix for smpboot.c Without this patch, my link fails with: arch/x86/kernel/built-in.o(.cpuinit.text+0x3c6e): In function `get_local_pda': : undefined reference to `_cpu_pda' arch/x86/kernel/built-in.o(.cpuinit.text+0x3cd1): In function `get_local_pda': : undefined reference to `after_bootmem' arch/x86/kernel/built-in.o(.cpuinit.text+0x3cec): In function `get_local_pda': : undefined reference to `_cpu_pda' make[2]: *** [.tmp_vmlinux1] Error 1 Caused by commit 766da892634694f795b18b9538407816896fc470 x86: remove static boot_cpu_pda array v2 Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bf0833487455..bc1e1257e515 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -816,6 +816,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } +#ifdef CONFIG_X86_64 /* * Allocate node local memory for the AP pda. * @@ -852,6 +853,7 @@ static int __cpuinit get_local_pda(int cpu) cpu_pda(cpu) = newpda; return 0; } +#endif /* CONFIG_X86_64 */ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* -- cgit v1.2.3 From b6df1b8bc1250191cfee15627697111c1cbda53f Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 19 Jun 2008 21:51:05 -0500 Subject: x86: fix stack overflow for large values of MAX_APICS physid_mask_of_physid() causes a huge stack (12k) to be created if the number of APICS is large. Replace physid_mask_of_physid() with a new function that does not create large stacks. This is a problem only on large x86_64 systems. this paves the way to increase MAX_APICS. Signed-off-by: Jack Steiner Cc: linux-mm@kvack.org Cc: mingo@elte.hu Cc: tglx@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 2 +- arch/x86/kernel/smpboot.c | 5 ++--- include/asm-x86/mpspec.h | 7 +++++++ 4 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d5767cb19d56..3f13a1aa07cd 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1269,7 +1269,7 @@ int __init APIC_init_uniprocessor(void) #ifdef CONFIG_CRASH_DUMP boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); #endif - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); setup_local_APIC(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0633cfd0dc29..5e22f8d2d507 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -942,7 +942,7 @@ int __init APIC_init_uniprocessor(void) verify_local_APIC(); - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); setup_local_APIC(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3e1cecedde42..664a5db36d40 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1091,10 +1091,9 @@ static __init void disable_smp(void) smpboot_clear_io_apic_irqs(); #endif if (smp_found_config) - phys_cpu_present_map = - physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); else - phys_cpu_present_map = physid_mask_of_physid(0); + physid_set_mask_of_physid(0, &phys_cpu_present_map); map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 57a991b9c053..b69e7ba7bf1d 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -101,6 +101,7 @@ typedef struct physid_mask physid_mask_t; __physid_mask; \ }) +/* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ ({ \ physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ @@ -108,6 +109,12 @@ typedef struct physid_mask physid_mask_t; __physid_mask; \ }) +static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +{ + physids_clear(*map); + physid_set(physid, *map); +} + #define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } #define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } -- cgit v1.2.3 From 9cf4f298e29abba25c16679fe7be70898223167e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 27 May 2008 18:22:54 -0700 Subject: x86: use stack_start in x86_64 call x86_64's init_rsp stack_start, just as i386 does. Put a zeroed stack segment for consistency. With this, we can eliminate one ugly ifdef in smpboot.c. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/head_64.S | 5 +++-- arch/x86/kernel/smpboot.c | 7 +------ 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 36af01f029ed..a81f468ab410 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -86,7 +86,7 @@ int acpi_save_state_mem(void) saved_magic = 0x12345678; #else /* CONFIG_64BIT */ header->trampoline_segment = setup_trampoline() >> 4; - init_rsp = (unsigned long)temp_stack + 4096; + stack_start.sp = temp_stack + 4096; initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; #endif /* CONFIG_64BIT */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 263b9d14753e..918a2711aff6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -191,7 +191,7 @@ ENTRY(secondary_startup_64) movq %rax, %cr0 /* Setup a boot time stack */ - movq init_rsp(%rip),%rsp + movq stack_start(%rip),%rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -252,8 +252,9 @@ ENTRY(secondary_startup_64) .quad x86_64_start_kernel __FINITDATA - ENTRY(init_rsp) + ENTRY(stack_start) .quad init_thread_union+THREAD_SIZE-8 + .word 0 bad_address: jmp bad_address diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d3ad4e09455b..a71e3cad5470 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -714,11 +714,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) * target processor state. */ startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, -#ifdef CONFIG_X86_64 - (unsigned long)init_rsp); -#else (unsigned long)stack_start.sp); -#endif /* * Run STARTUP IPI loop. @@ -905,15 +901,14 @@ do_rest: early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); c_idle.idle->thread.ip = (unsigned long) start_secondary; /* Stack for startup_32 can be just as for start_secondary onwards */ - stack_start.sp = (void *) c_idle.idle->thread.sp; irq_ctx_init(cpu); #else cpu_pda(cpu)->pcurrent = c_idle.idle; - init_rsp = c_idle.idle->thread.sp; load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); initial_code = (unsigned long)start_secondary; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); #endif + stack_start.sp = (void *) c_idle.idle->thread.sp; /* start_ip had better be page-aligned! */ start_ip = setup_trampoline(); -- cgit v1.2.3 From a939098afcfa5f81d3474782ec15c6d114e57763 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 May 2008 16:19:53 -0700 Subject: x86: move x86_64 gdt closer to i386 i386 and x86_64 used two different schemes for maintaining the gdt. With this patch, x86_64 initial gdt table is defined in a .c file, same way as i386 is now. Also, we call it "gdt_page", and the descriptor, "early_gdt_descr". This way we achieve common naming, which can allow for more code integration. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 48 +++++----------------------------------- arch/x86/kernel/setup64.c | 5 +---- arch/x86/kernel/setup_64.c | 19 ++++++++++++++-- arch/x86/kernel/smpboot.c | 12 +++------- arch/x86/kernel/x8664_ksyms_64.c | 5 ----- include/asm-x86/desc.h | 24 +++++++++----------- include/asm-x86/segment.h | 23 ++++++++++--------- 7 files changed, 48 insertions(+), 88 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 918a2711aff6..32f5a114d1a2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -203,7 +203,7 @@ ENTRY(secondary_startup_64) * addresses where we're currently running on. We have to do that here * because in 32bit we couldn't load a 64bit linear address. */ - lgdt cpu_gdt_descr(%rip) + lgdt early_gdt_descr(%rip) /* set up data segments. actually 0 would do too */ movl $__KERNEL_DS,%eax @@ -391,54 +391,16 @@ NEXT_PAGE(level2_spare_pgt) .data .align 16 - .globl cpu_gdt_descr -cpu_gdt_descr: - .word gdt_end-cpu_gdt_table-1 -gdt: - .quad cpu_gdt_table -#ifdef CONFIG_SMP - .rept NR_CPUS-1 - .word 0 - .quad 0 - .endr -#endif + .globl early_gdt_descr +early_gdt_descr: + .word GDT_ENTRIES*8-1 + .quad per_cpu__gdt_page ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ - - .section .data.page_aligned, "aw" - .align PAGE_SIZE - -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -ENTRY(cpu_gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ - .quad 0x00af9b000000ffff /* __KERNEL_CS */ - .quad 0x00cf93000000ffff /* __KERNEL_DS */ - .quad 0x00cffb000000ffff /* __USER32_CS */ - .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ - .quad 0x00affb000000ffff /* __USER_CS */ - .quad 0x0 /* unused */ - .quad 0,0 /* TSS */ - .quad 0,0 /* LDT */ - .quad 0,0,0 /* three TLS descriptors */ - .quad 0x0000f40000000000 /* node/CPU stored in limit */ -gdt_end: - /* asm/segment.h:GDT_ENTRIES must match this */ - /* This should be a multiple of the cache line size */ - /* GDTs of other CPUs are now dynamically allocated */ - - /* zero the remaining page */ - .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 - .section .bss, "aw", @nobits .align L1_CACHE_BYTES ENTRY(idt_table) diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index fc1a56da8240..70ff07186772 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -202,11 +202,8 @@ void __cpuinit cpu_init (void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - if (cpu) - memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE; - load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); + switch_to_new_gdt(); load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 504caeaffd58..a93300de4da9 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -81,8 +81,6 @@ #define ARCH_SETUP #endif -#include "cpu/cpu.h" - /* * Machine setup.. */ @@ -228,6 +226,23 @@ static inline void copy_edd(void) } #endif +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +void __attribute__((weak)) __init memory_setup(void) +{ + machine_specific_memory_setup(); +} + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); +} + /* * setup_arch - architecture-specific boot-time initializations * diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a71e3cad5470..fe2bd515d6cc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -849,14 +849,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* allocate memory for gdts of secondary cpus. Hotplug is considered */ - if (!cpu_gdt_descr[cpu].address && - !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { - printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); - return -1; - } +#ifdef CONFIG_X86_64 /* Allocate node local memory for AP pdas */ if (cpu > 0) { boot_error = get_local_pda(cpu); @@ -898,7 +892,6 @@ do_rest: #ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; init_gdt(cpu); - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); c_idle.idle->thread.ip = (unsigned long) start_secondary; /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); @@ -908,6 +901,7 @@ do_rest: initial_code = (unsigned long)start_secondary; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); #endif + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); stack_start.sp = (void *) c_idle.idle->thread.sp; /* start_ip had better be page-aligned! */ @@ -1252,8 +1246,8 @@ void __init native_smp_prepare_boot_cpu(void) int me = smp_processor_id(); #ifdef CONFIG_X86_32 init_gdt(me); - switch_to_new_gdt(); #endif + switch_to_new_gdt(); /* already set me in cpu_online_map in boot_cpu_init() */ cpu_set(me, cpu_callout_map); per_cpu(cpu_state, me) = CPU_ONLINE; diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index f6c05d0410fb..2f306a826897 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -53,8 +53,3 @@ EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); EXPORT_SYMBOL(_proxy_pda); - -#ifdef CONFIG_PARAVIRT -/* Virtualized guests may want to use it */ -EXPORT_SYMBOL_GPL(cpu_gdt_descr); -#endif diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index b3875d4b4fab..07f9f2b17be8 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -29,11 +29,17 @@ static inline void fill_ldt(struct desc_struct *desc, extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; +struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); +DECLARE_PER_CPU(struct gdt_page, gdt_page); + +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) +{ + return per_cpu(gdt_page, cpu).gdt; +} + #ifdef CONFIG_X86_64 -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -extern struct desc_ptr cpu_gdt_descr[]; -/* the cpu gdt accessor */ -#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address) static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, unsigned dpl, unsigned ist, unsigned seg) @@ -51,16 +57,6 @@ static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, } #else -struct gdt_page { - struct desc_struct gdt[GDT_ENTRIES]; -} __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); - -static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) -{ - return per_cpu(gdt_page, cpu).gdt; -} - static inline void pack_gate(gate_desc *gate, unsigned char type, unsigned long base, unsigned dpl, unsigned flags, unsigned short seg) diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h index ed5131dd7d92..dfc8601c0892 100644 --- a/include/asm-x86/segment.h +++ b/include/asm-x86/segment.h @@ -61,18 +61,14 @@ #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) #define GDT_ENTRY_DEFAULT_USER_CS 14 -#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3) #define GDT_ENTRY_DEFAULT_USER_DS 15 -#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3) #define GDT_ENTRY_KERNEL_BASE 12 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) @@ -139,10 +135,11 @@ #else #include -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x18 +#define GDT_ENTRY_KERNEL32_CS 1 +#define GDT_ENTRY_KERNEL_CS 2 +#define GDT_ENTRY_KERNEL_DS 3 -#define __KERNEL32_CS 0x08 +#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) /* * we cannot use the same code segment descriptor for user and kernel @@ -150,10 +147,10 @@ * The segment offset needs to contain a RPL. Grr. -AK * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) */ - -#define __USER32_CS 0x23 /* 4*8+3 */ -#define __USER_DS 0x2b /* 5*8+3 */ -#define __USER_CS 0x33 /* 6*8+3 */ +#define GDT_ENTRY_DEFAULT_USER32_CS 4 +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define GDT_ENTRY_DEFAULT_USER_CS 6 +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) #define __USER32_DS __USER_DS #define GDT_ENTRY_TSS 8 /* needs two entries */ @@ -175,6 +172,10 @@ #endif +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3) #ifndef CONFIG_PARAVIRT #define get_kernel_rpl() 0 #endif -- cgit v1.2.3 From 3e9704739daf46a8ba6593d749c67b5f7cd633d2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 May 2008 13:01:54 -0300 Subject: x86: boot secondary cpus through initial_code remove "initialize_secondary". Boot both architectures via initial_code. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/smpboot.c | 25 +------------------------ 2 files changed, 2 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ffb73a5b609f..f67e93441caf 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -452,7 +452,7 @@ is386: movl $2,%ecx # set MP je 1f movl $(__KERNEL_PERCPU), %eax movl %eax,%fs # set this cpu's percpu - jmp initialize_secondary # all other CPUs call initialize_secondary + movl (stack_start), %esp 1: #endif /* CONFIG_SMP */ jmp *(initial_code) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe2bd515d6cc..2a0d39f3f2f1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -349,28 +349,6 @@ static void __cpuinit start_secondary(void *unused) cpu_idle(); } -#ifdef CONFIG_X86_32 -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __devinit initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the ip. - */ - - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"m" (current->thread.sp), "m" (current->thread.ip)); -} -#endif - static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 @@ -892,16 +870,15 @@ do_rest: #ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; init_gdt(cpu); - c_idle.idle->thread.ip = (unsigned long) start_secondary; /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else cpu_pda(cpu)->pcurrent = c_idle.idle; load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); - initial_code = (unsigned long)start_secondary; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + initial_code = (unsigned long)start_secondary; stack_start.sp = (void *) c_idle.idle->thread.sp; /* start_ip had better be page-aligned! */ -- cgit v1.2.3 From 0f385d1ddd0952e01a968bfa5512378ad23de6df Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 May 2008 17:09:53 -0700 Subject: x86: clearing io_apic harmless for x86_64 so remove ifdef. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a0d39f3f2f1..a74a261c5626 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1052,9 +1052,8 @@ static __init void disable_smp(void) { cpu_present_map = cpumask_of_cpu(0); cpu_possible_map = cpumask_of_cpu(0); -#ifdef CONFIG_X86_32 smpboot_clear_io_apic_irqs(); -#endif + if (smp_found_config) physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); else -- cgit v1.2.3 From 86e430edf462e872ecfab28d6b8619be5ab9c300 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 May 2008 20:05:46 -0700 Subject: x86: remove ifdef from stepping The stepping won't affect x86_64, since there are not x86_64 k7's or pentiums. So, although it adds to the binary size, remove the ifdef for smoother integration Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a74a261c5626..6f9a31a18811 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -351,7 +351,6 @@ static void __cpuinit start_secondary(void *unused) static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_32 /* * Mask B, Pentium, but not Pentium MMX */ @@ -401,7 +400,6 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) valid_k7: ; -#endif } static void __cpuinit smp_checks(void) -- cgit v1.2.3 From 3fde690011a84e19f98f77bfaa349b2119ddd2d2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 May 2008 20:34:19 -0700 Subject: x86: change __setup_vector_irq with setup_vector_irq We create a version of it for i386, and then take the CONFIG_X86_64 ifdef out of the game. We could create a __setup_vector_irq for i386, but it would incur in an unnecessary lock taking. Moreover, it is better practice to only export setup_vector_irq anyway. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 +++++ arch/x86/kernel/io_apic_64.c | 9 ++++++++- arch/x86/kernel/smpboot.c | 11 ++--------- include/asm-x86/hw_irq.h | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index fedb3b113ace..d6af301c822b 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1207,6 +1207,11 @@ static int assign_irq_vector(int irq) return vector; } + +void setup_vector_irq(int cpu) +{ +} + static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 2eba4f4c14ba..08c48750888a 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -801,7 +801,7 @@ static void __clear_irq_vector(int irq) cpus_clear(cfg->domain); } -void __setup_vector_irq(int cpu) +static void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ @@ -824,6 +824,13 @@ void __setup_vector_irq(int cpu) } } +void setup_vector_irq(int cpu) +{ + spin_lock(&vector_lock); + __setup_vector_irq(smp_processor_id()); + spin_unlock(&vector_lock); +} + static struct irq_chip ioapic_chip; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f9a31a18811..e09f3124738a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -329,15 +329,8 @@ static void __cpuinit start_secondary(void *unused) * smp_call_function(). */ lock_ipi_call_lock(); -#ifdef CONFIG_X86_64 - spin_lock(&vector_lock); - - /* Setup the per cpu irq handling data structures */ - __setup_vector_irq(smp_processor_id()); - /* - * Allow the master to continue. - */ - spin_unlock(&vector_lock); +#ifdef CONFIG_X86_IO_APIC + setup_vector_irq(smp_processor_id()); #endif cpu_set(smp_processor_id(), cpu_online_map); unlock_ipi_call_lock(); diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 1428b41dcbb9..18f067c310f7 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -97,9 +97,9 @@ extern void (*const interrupt[NR_IRQS])(void); #else typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void __setup_vector_irq(int cpu); extern spinlock_t vector_lock; #endif +extern void setup_vector_irq(int cpu); #endif /* !ASSEMBLY_ */ -- cgit v1.2.3 From b5841765a2e735a38612c4e4a82170c33d701b3c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 May 2008 13:38:28 -0300 Subject: x86: provide connect_bsp_APIC for x86_64 Although it is not really needed, we provide it to get closer to i386. ifdefs around it are removed in smpboot.c Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++++ arch/x86/kernel/smpboot.c | 5 +---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d9d663ffa641..5279dc924381 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -918,6 +918,8 @@ int __init APIC_init_uniprocessor(void) verify_local_APIC(); + connect_bsp_APIC(); + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); @@ -999,6 +1001,14 @@ asmlinkage void smp_error_interrupt(void) irq_exit(); } +/** + * * connect_bsp_APIC - attach the APIC to the interrupt system + * */ +void __init connect_bsp_APIC(void) +{ + enable_apic_mode(); +} + void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e09f3124738a..a569f06d789e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1116,9 +1116,7 @@ static int __init smp_sanity_check(unsigned max_cpus) localise_nmi_watchdog(); -#ifdef CONFIG_X86_32 connect_bsp_APIC(); -#endif setup_local_APIC(); end_local_APIC_setup(); return -1; @@ -1173,9 +1171,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_enable(); -#ifdef CONFIG_X86_32 connect_bsp_APIC(); -#endif + /* * Switch from PIC to APIC mode. */ -- cgit v1.2.3 From 78e622705c69da9649ba87071d8de85054b62df8 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 4 Jun 2008 02:03:07 -0300 Subject: x86: change naming to match x86_64 Change unmap_cpu_to_logical_apicid to numa_remove_cpu. Besides being shorter, it is the same name x86_64 uses. We can save an ifdef in the code this way. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a569f06d789e..b99c386af77d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -181,13 +181,12 @@ static void map_cpu_to_logical_apicid(void) map_cpu_to_node(cpu, node); } -static void unmap_cpu_to_logical_apicid(int cpu) +static void numa_remove_cpu(int cpu) { cpu_2_logical_apicid[cpu] = BAD_APICID; unmap_cpu_to_node(cpu); } #else -#define unmap_cpu_to_logical_apicid(cpu) do {} while (0) #define map_cpu_to_logical_apicid() do {} while (0) #endif @@ -946,10 +945,7 @@ restore_state: if (boot_error) { /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); -#ifdef CONFIG_X86_64 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ -#endif cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpu_clear(cpu, cpu_present_map); @@ -1244,7 +1240,7 @@ void cpu_exit_clear(void) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); - unmap_cpu_to_logical_apicid(cpu); + numa_remove_cpu(cpu); } # endif /* CONFIG_X86_32 */ -- cgit v1.2.3 From b553a1e0ff48bd66fd18f705370e47c0b4ecea61 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 4 Jun 2008 02:05:03 -0300 Subject: x86: remove cpu from maps during cpu disable, take cpus out of all maps in i386, instead of just the online map. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b99c386af77d..820c23dbe761 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1319,13 +1319,11 @@ __init void prefill_possible_map(void) static void __ref remove_cpu_from_maps(int cpu) { cpu_clear(cpu, cpu_online_map); -#ifdef CONFIG_X86_64 cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); /* was set by cpu_init() */ clear_bit(cpu, (unsigned long *)&cpu_initialized); numa_remove_cpu(cpu); -#endif } int __cpu_disable(void) -- cgit v1.2.3 From 1481a3dd42c21ac4a8b9497cb9f5df816d6b064f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 4 Jun 2008 15:35:03 -0300 Subject: x86: move cpu_exit_clear to process_32.c Take it out of smpboot.c, and move it to process_32.c, closer to its only user. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 16 ++++++++++++++++ arch/x86/kernel/smpboot.c | 19 +------------------ include/asm-x86/numa_32.h | 1 + include/asm-x86/smp.h | 1 - 4 files changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c2a11d77b1b5..9a139f6c9df3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -74,6 +74,22 @@ unsigned long thread_saved_pc(struct task_struct *tsk) #ifdef CONFIG_HOTPLUG_CPU #include + +static void cpu_exit_clear(void) +{ + int cpu = raw_smp_processor_id(); + + idle_task_exit(); + + cpu_uninit(); + irq_ctx_exit(cpu); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + + numa_remove_cpu(cpu); +} + /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 820c23dbe761..67af727f733a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -181,7 +181,7 @@ static void map_cpu_to_logical_apicid(void) map_cpu_to_node(cpu, node); } -static void numa_remove_cpu(int cpu) +void numa_remove_cpu(int cpu) { cpu_2_logical_apicid[cpu] = BAD_APICID; unmap_cpu_to_node(cpu); @@ -1227,23 +1227,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_HOTPLUG_CPU -# ifdef CONFIG_X86_32 -void cpu_exit_clear(void) -{ - int cpu = raw_smp_processor_id(); - - idle_task_exit(); - - cpu_uninit(); - irq_ctx_exit(cpu); - - cpu_clear(cpu, cpu_callout_map); - cpu_clear(cpu, cpu_callin_map); - - numa_remove_cpu(cpu); -} -# endif /* CONFIG_X86_32 */ - static void remove_siblinginfo(int cpu) { int sibling; diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h index a02674f64869..ed6c88eac848 100644 --- a/include/asm-x86/numa_32.h +++ b/include/asm-x86/numa_32.h @@ -2,6 +2,7 @@ #define _ASM_X86_32_NUMA_H 1 extern int pxm_to_nid(int pxm); +extern void numa_remove_cpu(int cpu); #ifdef CONFIG_NUMA extern void __init remap_numa_kva(void); diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index fc1007321ef6..fad45f6a193f 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -188,7 +188,6 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_HOTPLUG_CPU -extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif -- cgit v1.2.3 From 7f6cbc905ee22c457e0dcd0bba9d4affbc290a6f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 4 Jun 2008 23:05:39 -0700 Subject: x86: take load_sp0 out of smpboot.c there's no particular reason to do load_sp0 in different places for i386 and x86_64. They should all be in cpu_init. Right now, cpu_init itself is not integrated, but with this patch, the code becomes closer to each other, making in easier to integrate when the time comes. Furthermore, although doing it in do_boot_cpu for x86_64 is fine, since it's only a copy, load_sp0 should be executed in the cpu it refers to anyway. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup64.c | 1 + arch/x86/kernel/smpboot.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 70ff07186772..151d3155ddf6 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -247,6 +247,7 @@ void __cpuinit cpu_init (void) BUG(); enter_lazy_tlb(&init_mm, me); + load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); load_LDT(&init_mm.context); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 67af727f733a..3b48d1f4c7c3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -864,7 +864,6 @@ do_rest: irq_ctx_init(cpu); #else cpu_pda(cpu)->pcurrent = c_idle.idle; - load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); clear_tsk_thread_flag(c_idle.idle, TIF_FORK); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); -- cgit v1.2.3 From c376d45432d935e6f1e0ff2d6be3734bcd3ba455 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 24 Jun 2008 22:52:05 +0200 Subject: x86: nmi_watchdog - use NMI_NONE by default There is no need to keep NMI_DISABLED definition and use it for nmi_watchdog by default. Here is the point why: - IO-APIC and APIC chips are programmed for nmi_watchdog support at very early stage of kernel booting and not having nmi_watchdog specified as boot option lead only to nmi_watchdog becomes to NMI_NONE anyway - enable nmi_watchdog thru /proc/sys/kernel/nmi if it was not specified at boot is not possible too (even having this sysfs entry) Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 - arch/x86/kernel/io_apic_64.c | 2 -- arch/x86/kernel/nmi.c | 26 +++----------------------- arch/x86/kernel/smpboot.c | 1 - include/asm-x86/nmi.h | 3 --- 5 files changed, 3 insertions(+), 30 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 8c751f731bca..1e3d32e27c14 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -826,7 +826,6 @@ static void __cpuinit lapic_setup_esr(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); - nmi_watchdog_default(); setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 08c48750888a..2b4c40bc12c9 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1729,7 +1729,6 @@ static inline void __init check_timer(void) } unmask_IO_APIC_irq(0); if (!no_timer_check && timer_irq_works()) { - nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); enable_8259A_irq(0); @@ -1758,7 +1757,6 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); timer_through_8259 = 1; - nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { disable_8259A_irq(0); setup_nmi(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 427c511e7adc..32acda25e3cb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -52,7 +52,7 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE; atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); -unsigned int nmi_watchdog = NMI_DEFAULT; +unsigned int nmi_watchdog = NMI_NONE; EXPORT_SYMBOL(nmi_watchdog); static int panic_on_timeout; @@ -92,14 +92,6 @@ static inline unsigned int get_timer_irqs(int cpu) #endif } -/* Run after command line and cpu_init init, but before all other checks */ -void nmi_watchdog_default(void) -{ - if (nmi_watchdog != NMI_DEFAULT) - return; - nmi_watchdog = NMI_NONE; -} - #ifdef CONFIG_SMP /* * The performance counters used by NMI_LOCAL_APIC don't trigger when @@ -127,7 +119,7 @@ int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; - if (nmi_watchdog == NMI_NONE || nmi_watchdog == NMI_DISABLED) + if (nmi_watchdog == NMI_NONE) return 0; if (!atomic_read(&nmi_active)) @@ -482,24 +474,12 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, if (!!old_state == !!nmi_watchdog_enabled) return 0; - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { + if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_NONE) { printk(KERN_WARNING "NMI watchdog is permanently disabled\n"); return -EIO; } - /* if nmi_watchdog is not set yet, then set it */ - nmi_watchdog_default(); - -#ifdef CONFIG_X86_32 - if (nmi_watchdog == NMI_NONE) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } -#endif - if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) enable_lapic_nmi_watchdog(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3b48d1f4c7c3..3b19441d78b8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1139,7 +1139,6 @@ static void __init smp_cpu_index_default(void) void __init native_smp_prepare_cpus(unsigned int max_cpus) { preempt_disable(); - nmi_watchdog_default(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; cpu_callin_map = cpumask_of_cpu(0); diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index f0e435dd38fb..1348e542360f 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -20,7 +20,6 @@ extern void default_do_nmi(struct pt_regs *); #endif extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -extern void nmi_watchdog_default(void); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); @@ -38,12 +37,10 @@ extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -#define NMI_DISABLED -1 #define NMI_NONE 0 #define NMI_IO_APIC 1 #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 -#define NMI_DEFAULT NMI_DISABLED struct ctl_table; struct file; -- cgit v1.2.3 From 329513a35d1a2b6b28d54f5c2c0dde4face8200b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 2 Jul 2008 18:54:40 -0700 Subject: x86: move prefill_possible_map calling early call it right after we are done with MADT/mptable handling, instead of doing that in setup_per_cpu_areas() later on... this way for_possible_cpu() can be used early. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 1 + arch/x86/kernel/setup_percpu.c | 10 ---------- arch/x86/kernel/smpboot.c | 8 ++++++++ include/asm-x86/smp.h | 4 ++++ 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f52a6fb90238..cfcfbefee0b9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -818,6 +818,7 @@ void __init setup_arch(char **cmdline_p) get_smp_config(); #endif + prefill_possible_map(); #ifdef CONFIG_X86_64 init_cpu_to_node(); #endif diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 43aca2de624a..5fc310f746fc 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -162,16 +162,6 @@ void __init setup_per_cpu_areas(void) char *ptr; int cpu; - /* no processor from mptable or madt */ - if (!num_processors) - num_processors = 1; - -#ifdef CONFIG_HOTPLUG_CPU - prefill_possible_map(); -#else - nr_cpu_ids = num_processors; -#endif - /* Setup cpu_pda map */ setup_cpu_pda_map(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3b19441d78b8..e1200b202ed7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1278,12 +1278,20 @@ __init void prefill_possible_map(void) int i; int possible; + /* no processor from mptable or madt */ + if (!num_processors) + num_processors = 1; + +#ifdef CONFIG_HOTPLUG_CPU if (additional_cpus == -1) { if (disabled_cpus > 0) additional_cpus = disabled_cpus; else additional_cpus = 0; } +#else + additional_cpus = 0; +#endif possible = num_processors + additional_cpus; if (possible > NR_CPUS) possible = NR_CPUS; diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index fad45f6a193f..b324a0645a78 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -119,6 +119,10 @@ static inline int num_booting_cpus(void) { return cpus_weight(cpu_callout_map); } +#else +static inline void prefill_possible_map(void) +{ +} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; -- cgit v1.2.3 From 6f585e01614f38195599c1bc5379d3c9dae6be47 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 4 Jul 2008 12:36:21 +0200 Subject: arch/x86/kernel/smpboot.c: fix warning arch/x86/kernel/smpboot.c: In function 'do_boot_cpu': arch/x86/kernel/smpboot.c:943: warning: label 'restore_state' defined but not used Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e1200b202ed7..f35c2d8016ac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -939,9 +939,9 @@ do_rest: inquire_remote_apic(apicid); } } - +#ifdef CONFIG_X86_64 restore_state: - +#endif if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ -- cgit v1.2.3