From f6c2e3330d3fdd5474bc3756da46fca889a30e33 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Unmap NULL during early bootup We should zap the low mappings, as soon as possible, so that we can catch kernel bugs more effectively. Previously early boot had NULL mapped and didn't trap on NULL references. This patch introduces boot_level4_pgt, which will always have low identity addresses mapped. Druing boot, all the processors will use this as their level4 pgt. On BP, we will switch to init_level4_pgt as soon as we enter C code and zap the low mappings as soon as we are done with the usage of identity low mapped addresses. On AP's we will zap the low mappings as soon as we jump to C code. Signed-off-by: Suresh Siddha Signed-off-by: Ashok Raj Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86_64/kernel/smpboot.c') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 658a81b33f3b..3393fc08823b 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1063,9 +1063,6 @@ int __cpuinit __cpu_up(unsigned int cpu) */ void __init smp_cpus_done(unsigned int max_cpus) { -#ifndef CONFIG_HOTPLUG_CPU - zap_low_mappings(); -#endif smp_cleanup_boot(); #ifdef CONFIG_X86_IO_APIC -- cgit v1.2.3 From 420f8f68c9c5148dddf946bebdbc7eacde2172cb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: New heuristics to find out hotpluggable CPUs. With a NR_CPUS==128 kernel with CPU hotplug enabled we would waste 4MB on per CPU data of all possible CPUs. The reason was that HOTPLUG always set up possible map to NR_CPUS cpus and then we need to allocate that much (each per CPU data is roughly ~32k now) The underlying problem is that ACPI didn't tell us how many hotplug CPUs the platform supports. So the old code just assumed all, which would lead to this memory wastage. This implements some new heuristics: - If the BIOS specified disabled CPUs in the ACPI/mptables assume they can be enabled later (this is bending the ACPI specification a bit, but seems like a obvious extension) - The user can overwrite it with a new additionals_cpus=NUM option - Otherwise use half of the available CPUs or 2, whatever is more. Cc: ashok.raj@intel.com Cc: len.brown@intel.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- Documentation/x86_64/boot-options.txt | 3 +++ arch/x86_64/kernel/mpparse.c | 8 +++++-- arch/x86_64/kernel/smpboot.c | 39 ++++++++++++++++++++++++++++++++--- include/asm-x86_64/smp.h | 2 ++ 4 files changed, 47 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel/smpboot.c') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index ffe1c062088b..a83139692cdf 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -122,6 +122,9 @@ SMP cpumask=MASK only use cpus with bits set in mask + additional_cpus=NUM Allow NUM more CPUs for hotplug + (defaults are specified by the BIOS or half the available CPUs) + NUMA numa=off Only set up a single NUMA node spanning all memory. diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 4a581d1cefbd..1d61f10a92c6 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; /* Internal processor count */ -static unsigned int num_processors = 0; +unsigned int num_processors __initdata = 0; + +unsigned disabled_cpus __initdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; @@ -109,8 +111,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m) int ver, cpu; static int found_bsp=0; - if (!(m->mpc_cpuflag & CPU_ENABLED)) + if (!(m->mpc_cpuflag & CPU_ENABLED)) { + disabled_cpus++; return; + } printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", m->mpc_apicid, diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 3393fc08823b..f74319a80659 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -880,6 +880,9 @@ static __init void disable_smp(void) } #ifdef CONFIG_HOTPLUG_CPU + +int additional_cpus __initdata = -1; + /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -888,14 +891,38 @@ static __init void disable_smp(void) * cpu_present_map on the other hand can change dynamically. * In case when cpu_hotplug is not compiled, then we resort to current * behaviour, which is cpu_possible == cpu_present. - * If cpu-hotplug is supported, then we need to preallocate for all - * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - otherwise use half of the available CPUs or 2, whatever is more. + * - The user can overwrite it with additional_cpus=NUM + * We do this because additional CPUs waste a lot of memory. + * -AK */ __init void prefill_possible_map(void) { int i; - for (i = 0; i < NR_CPUS; i++) + int possible; + + if (additional_cpus == -1) { + if (disabled_cpus > 0) { + additional_cpus = disabled_cpus; + } else { + additional_cpus = num_processors / 2; + if (additional_cpus == 0) + additional_cpus = 2; + } + } + possible = num_processors + additional_cpus; + if (possible > NR_CPUS) + possible = NR_CPUS; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, + max_t(int, possible - num_processors, 0)); + + for (i = 0; i < possible; i++) cpu_set(i, cpu_possible_map); } #endif @@ -1151,6 +1178,12 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } +static __init int setup_additional_cpus(char *s) +{ + return get_option(&s, &additional_cpus); +} +__setup("additional_cpus=", setup_additional_cpus); + #else /* ... !CONFIG_HOTPLUG_CPU */ int __cpu_disable(void) diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 592161e979e5..cf8f969f9020 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -81,6 +81,8 @@ extern int safe_smp_processor_id(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void prefill_possible_map(void); +extern unsigned num_processors; +extern unsigned disabled_cpus; #endif /* !ASSEMBLY */ -- cgit v1.2.3 From 94605eff572b727aaad9b4b29bc358b919096503 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86-64/i386: Intel HT, Multi core detection fixes Fields obtained through cpuid vector 0x1(ebx[16:23]) and vector 0x4(eax[14:25], eax[26:31]) indicate the maximum values and might not always be the same as what is available and what OS sees. So make sure "siblings" and "cpu cores" values in /proc/cpuinfo reflect the values as seen by OS instead of what cpuid instruction says. This will also fix the buggy BIOS cases (for example where cpuid on a single core cpu says there are "2" siblings, even when HT is disabled in the BIOS. http://bugzilla.kernel.org/show_bug.cgi?id=4359) Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 12 +++--- arch/i386/kernel/cpu/common.c | 36 +++++++---------- arch/i386/kernel/cpu/intel.c | 2 +- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- arch/i386/kernel/cpu/proc.c | 7 ++-- arch/i386/kernel/smpboot.c | 73 ++++++++++++++++++++++++---------- arch/x86_64/kernel/setup.c | 69 ++++++++++++++------------------ arch/x86_64/kernel/smpboot.c | 69 +++++++++++++++++++++++++------- include/asm-i386/processor.h | 4 +- include/asm-x86_64/processor.h | 4 +- include/linux/bitops.h | 10 +++++ 11 files changed, 178 insertions(+), 110 deletions(-) (limited to 'arch/x86_64/kernel/smpboot.c') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 53a1681cd964..e344ef88cfcd 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; } #ifdef CONFIG_X86_HT @@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) * distingush the cores. Assumes number of cores is a power * of two. */ - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); unsigned bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_num_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 35a67dab4a94..4e9c2e99b0a5 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -335,7 +335,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_num_cores = 1; + c->x86_max_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -446,52 +446,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1 ) { - index_msb = 31; if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } + index_msb = get_count_order(smp_num_siblings) ; - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 43601de0f633..8d603ba28126 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -157,7 +157,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) if ( p ) strcpy(c->x86_model_id, p); - c->x86_num_cores = num_cpu_cores(c); + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 4dc42a189ae5..e66d14099564 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -307,7 +307,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) #ifdef CONFIG_X86_HT else if (num_threads_sharing == smp_num_siblings) this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; - else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) + else if (num_threads_sharing == (c->x86_max_cores * smp_num_siblings)) this_leaf->shared_cpu_map = cpu_core_map[cpu]; else printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 41b871ecf4b3..e7921315ae9d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_X86_HT - if (c->x86_num_cores * smp_num_siblings > 1) { + if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 01b618e73ecd..0a9c64655236 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -74,9 +74,11 @@ EXPORT_SYMBOL(phys_proc_id); int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -444,35 +446,60 @@ static void __devinit smp_callin(void) static int cpucount; +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - } - } - } else { + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; + c[cpu].booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(cpu_sibling_map[i]) == i) + c[cpu].booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + c[i].booted_cores++; + } else if (i != cpu && !c[cpu].booted_cores) + c[cpu].booted_cores = c[i].booted_cores; + } } } @@ -1096,11 +1123,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; smp_tune_scheduling(); - cpus_clear(cpu_sibling_map[0]); - cpu_set(0, cpu_sibling_map[0]); - cpus_clear(cpu_core_map[0]); - cpu_set(0, cpu_core_map[0]); + set_cpu_sibling_map(0); /* * If we couldn't find an SMP configuration at boot time, @@ -1279,15 +1303,24 @@ static void remove_siblinginfo(int cpu) { int sibling; + struct cpuinfo_x86 *c = cpu_data; + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) cpu_clear(cpu, cpu_sibling_map[sibling]); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); phys_proc_id[cpu] = BAD_APICID; cpu_core_id[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_sibling_setup_map); } int __cpu_disable(void) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 476ee034fca2..40c77f6fe4b0 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -795,7 +795,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) #endif bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; /* Low order bits define the core id (index of core in socket) */ @@ -828,7 +828,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) numa_set_node(cpu, node); printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_num_cores, node, cpu_core_id[cpu]); + cpu, c->x86_max_cores, node, cpu_core_id[cpu]); #endif #endif } @@ -877,9 +877,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (c->extended_cpuid_level >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; amd_detect_cmp(c); } @@ -891,54 +891,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); - + + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id(0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; - + if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - index_msb = 31; - /* - * At this point we only support two siblings per - * processor package. - */ + } else if (smp_num_siblings > 1 ) { + if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id(index_msb); - + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + index_msb = get_count_order(smp_num_siblings) ; + + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id(index_msb); + cpu_core_id[cpu] = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } @@ -1006,7 +996,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 >= 15) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); - c->x86_num_cores = intel_num_cpu_cores(c); + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } @@ -1044,7 +1034,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_clflush_size = 64; c->x86_cache_alignment = c->x86_clflush_size; - c->x86_num_cores = 1; + c->x86_max_cores = 1; c->extended_cpuid_level = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); @@ -1278,13 +1268,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_SMP - if (smp_num_siblings * c->x86_num_cores > 1) { + if (smp_num_siblings * c->x86_max_cores > 1) { int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index f74319a80659..2b9ddba61b37 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -64,6 +64,7 @@ int smp_num_siblings = 1; /* Package ID of each logical CPU */ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(phys_proc_id); EXPORT_SYMBOL(cpu_core_id); @@ -89,7 +90,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; + +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -436,30 +440,59 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for_each_cpu(i) { - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for_each_cpu(i) { - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - } - } - } else { + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; + c[cpu].booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(cpu_sibling_map[i]) == i) + c[cpu].booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + c[i].booted_cores++; + } else if (i != cpu && !c[cpu].booted_cores) + c[cpu].booted_cores = c[i].booted_cores; + } } } @@ -993,6 +1026,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ + set_cpu_sibling_map(0); if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); @@ -1036,8 +1070,6 @@ void __init smp_prepare_boot_cpu(void) int me = smp_processor_id(); cpu_set(me, cpu_online_map); cpu_set(me, cpu_callout_map); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); per_cpu(cpu_state, me) = CPU_ONLINE; } @@ -1106,15 +1138,24 @@ void __init smp_cpus_done(unsigned int max_cpus) static void remove_siblinginfo(int cpu) { int sibling; + struct cpuinfo_x86 *c = cpu_data; + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) cpu_clear(cpu, cpu_sibling_map[sibling]); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); phys_proc_id[cpu] = BAD_APICID; cpu_core_id[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_sibling_setup_map); } void remove_cpu_from_maps(void) diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 0a4ec764377c..9cd4a05234a1 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -65,7 +65,9 @@ struct cpuinfo_x86 { int f00f_bug; int coma_bug; unsigned long loops_per_jiffy; - unsigned char x86_num_cores; + unsigned char x86_max_cores; /* cpuid returned max cores value */ + unsigned char booted_cores; /* number of cores as seen by OS */ + unsigned char apicid; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 03837d34fba0..4861246548f7 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -61,10 +61,12 @@ struct cpuinfo_x86 { int x86_cache_alignment; int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ __u8 x86_virt_bits, x86_phys_bits; - __u8 x86_num_cores; + __u8 x86_max_cores; /* cpuid returned max cores value */ __u32 x86_power; __u32 extended_cpuid_level; /* Max extended CPUID function supported */ unsigned long loops_per_jiffy; + __u8 apicid; + __u8 booted_cores; /* number of cores as seen by OS */ } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cb3c3ef50f50..38c2fb7ebe09 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count) return order; /* We could be slightly more clever with -1 here... */ } +static __inline__ int get_count_order(unsigned int count) +{ + int order; + + order = fls(count) - 1; + if (count & (count - 1)) + order++; + return order; +} + /* * hweightN: returns the hamming weight (i.e. the number * of bits set) of a N-bit word -- cgit v1.2.3