From bbc9e2f452d9c4b166d1f9a78d941d80173312fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jan 2011 14:37:39 +0100 Subject: x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit The mapping between cpu/apicid and node is done via apicid_to_node[] on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit. This difference makes it difficult to further unify 32 and 64bit NUMA handling. This patch unifies it by replacing both apicid_to_node[] and apicid_2_node[] with __apicid_to_node[] array, which is accessed by two accessors - set_apicid_to_node() and numa_cpu_node(). On 64bit, numa_cpu_node() always consults __apicid_to_node[] directly while 32bit goes through apic->numa_cpu_node() method to allow apic implementations to override it. srat_detect_node() for amd cpus contains workaround for broken NUMA configuration which assumes relationship between APIC ID, HT node ID and NUMA topology. Leave it to access __apicid_to_node[] directly as mapping through CPU might result in undesirable behavior change. The comment is reformatted and updated to note the ugliness. Signed-off-by: Tejun Heo Reviewed-by: Pekka Enberg Cc: eric.dumazet@gmail.com Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-14-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar Cc: David Rientjes --- arch/x86/mm/numa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/numa.c') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ebf6d7887a38..480b3571c8b1 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -26,8 +26,12 @@ static __init int numa_setup(char *opt) early_param("numa", numa_setup); /* - * Which logical CPUs are on which nodes + * apicid, cpu, node mappings */ +s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { + [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE +}; + cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); -- cgit v1.2.3 From 645a79195f66eb68ef3ab2b21d9829ac3aa085a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jan 2011 14:37:40 +0100 Subject: x86: Unify CPU -> NUMA node mapping between 32 and 64bit Unlike 64bit, 32bit has been using its own cpu_to_node_map[] for CPU -> NUMA node mapping. Replace it with early_percpu variable x86_cpu_to_node_map and share the mapping code with 64bit. * USE_PERCPU_NUMA_NODE_ID is now enabled for 32bit too. * x86_cpu_to_node_map and numa_set/clear_node() are moved from numa_64 to numa. For now, on 32bit, x86_cpu_to_node_map is initialized with 0 instead of NUMA_NO_NODE. This is to avoid introducing unexpected behavior change and will be updated once init path is unified. * srat_detect_node() is now enabled for x86_32 too. It calls numa_set_node() and initializes the mapping making explicit cpu_to_node_map[] updates from map/unmap_cpu_to_node() unnecessary. Signed-off-by: Tejun Heo Cc: eric.dumazet@gmail.com Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: penberg@kernel.org Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-15-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar Cc: David Rientjes --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/numa.h | 8 +++++ arch/x86/include/asm/numa_64.h | 4 --- arch/x86/include/asm/topology.h | 17 ---------- arch/x86/kernel/acpi/boot.c | 5 --- arch/x86/kernel/cpu/amd.c | 4 +-- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/setup_percpu.c | 4 +-- arch/x86/kernel/smpboot.c | 6 ---- arch/x86/mm/numa.c | 72 ++++++++++++++++++++++++++++++++++++++++- arch/x86/mm/numa_64.c | 65 ------------------------------------- 11 files changed, 85 insertions(+), 104 deletions(-) (limited to 'arch/x86/mm/numa.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d5ed94d30aad..95c36c474766 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1705,7 +1705,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID depends on NUMA config USE_PERCPU_NUMA_NODE_ID - def_bool X86_64 + def_bool y depends on NUMA menu "Power management and ACPI options" diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 5e01c768a575..2b21fff9f655 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -30,4 +30,12 @@ static inline void set_apicid_to_node(int apicid, s16 node) # include "numa_64.h" #endif +#ifdef CONFIG_NUMA +extern void __cpuinit numa_set_node(int cpu, int node); +extern void __cpuinit numa_clear_node(int cpu); +#else /* CONFIG_NUMA */ +static inline void numa_set_node(int cpu, int node) { } +static inline void numa_clear_node(int cpu) { } +#endif /* CONFIG_NUMA */ + #endif /* _ASM_X86_NUMA_H */ diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 4982a9c08c2f..6ead9f361bda 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -30,8 +30,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, extern void __init init_cpu_to_node(void); extern int __cpuinit numa_cpu_node(int cpu); -extern void __cpuinit numa_set_node(int cpu, int node); -extern void __cpuinit numa_clear_node(int cpu); extern void __cpuinit numa_add_cpu(int cpu); extern void __cpuinit numa_remove_cpu(int cpu); @@ -43,8 +41,6 @@ void numa_emu_cmdline(char *); #else static inline void init_cpu_to_node(void) { } static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } -static inline void numa_set_node(int cpu, int node) { } -static inline void numa_clear_node(int cpu) { } static inline void numa_add_cpu(int cpu, int node) { } static inline void numa_remove_cpu(int cpu) { } #endif diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 21899cc31e52..b101c17861f5 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -47,21 +47,6 @@ #include -#ifdef CONFIG_X86_32 - -/* Mappings between logical cpu number and node number */ -extern int cpu_to_node_map[]; - -/* Returns the number of the node containing CPU 'cpu' */ -static inline int __cpu_to_node(int cpu) -{ - return cpu_to_node_map[cpu]; -} -#define early_cpu_to_node __cpu_to_node -#define cpu_to_node __cpu_to_node - -#else /* CONFIG_X86_64 */ - /* Mappings between logical cpu number and node number */ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); @@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ -#endif /* CONFIG_X86_64 */ - /* Mappings between node number and cpus on that node. */ extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a7bca59ec595..a2c512175395 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -590,12 +590,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) if (nid == -1 || !node_online(nid)) return; set_apicid_to_node(physid, nid); -#ifdef CONFIG_X86_64 numa_set_node(cpu, nid); -#else /* CONFIG_X86_32 */ - cpu_to_node_map[cpu] = nid; -#endif - #endif } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3cce8f2bb2e1..77858fd64620 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -233,7 +233,7 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) } #endif -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#ifdef CONFIG_NUMA /* * To workaround broken NUMA config. Read the comment in * srat_detect_node(). @@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id); static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#ifdef CONFIG_NUMA int cpu = smp_processor_id(); int node; unsigned apicid = c->apicid; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 6052004bf4f4..df86bc8c859d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -276,7 +276,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#ifdef CONFIG_NUMA unsigned node; int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b5147f00f0ef..71f4727da373 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -233,6 +233,7 @@ void __init setup_per_cpu_areas(void) per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; +#endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); @@ -245,7 +246,6 @@ void __init setup_per_cpu_areas(void) * So set them all (boot cpu and all APs). */ set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); -#endif #endif /* * Up to this point, the boot CPU has been using .init.data @@ -263,7 +263,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) +#ifdef CONFIG_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b7cfce535cb0..2c203822424f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -133,16 +133,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) -/* which node each logical CPU is on */ -int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; -EXPORT_SYMBOL(cpu_to_node_map); - /* set up a mapping between cpu and node. */ static void map_cpu_to_node(int cpu, int node) { printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); cpumask_set_cpu(cpu, node_to_cpumask_map[node]); - cpu_to_node_map[cpu] = node; } /* undo a mapping between cpu and node. */ @@ -153,7 +148,6 @@ static void unmap_cpu_to_node(int cpu) printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); for (node = 0; node < MAX_NUMNODES; node++) cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); - cpu_to_node_map[cpu] = 0; } #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ #define map_cpu_to_node(cpu, node) ({}) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 480b3571c8b1..187810be3d6c 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -35,6 +35,44 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); +/* + * Map cpu index to node index + */ +#ifdef CONFIG_X86_32 +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0); +#else +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +#endif +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); + +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { + cpu_to_node_map[cpu] = node; + return; + } + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; + + if (node != NUMA_NO_NODE) + set_cpu_numa_node(cpu, node); +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + /* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. @@ -62,6 +100,37 @@ void __init setup_node_to_cpumask_map(void) } #ifdef CONFIG_DEBUG_PER_CPU_MAPS + +int __cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(__cpu_to_node); + +/* + * Same function as cpu_to_node() but used if called before the + * per_cpu areas are setup. + */ +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!cpu_possible(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} + /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ @@ -84,4 +153,5 @@ const struct cpumask *cpumask_of_node(int node) return node_to_cpumask_map[node]; } EXPORT_SYMBOL(cpumask_of_node); -#endif + +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 1e1026f61a5a..f5459400644d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -29,12 +29,6 @@ struct memnode memnode; static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; -/* - * Map cpu index to node index - */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); - /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -732,34 +726,6 @@ int __cpuinit numa_cpu_node(int cpu) return NUMA_NO_NODE; } -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - - /* early setting, no percpu area yet */ - if (cpu_to_node_map) { - cpu_to_node_map[cpu] = node; - return; - } - -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { - printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); - dump_stack(); - return; - } -#endif - per_cpu(x86_cpu_to_node_map, cpu) = node; - - if (node != NUMA_NO_NODE) - set_cpu_numa_node(cpu, node); -} - -void __cpuinit numa_clear_node(int cpu) -{ - numa_set_node(cpu, NUMA_NO_NODE); -} - #ifndef CONFIG_DEBUG_PER_CPU_MAPS #ifndef CONFIG_NUMA_EMU @@ -887,37 +853,6 @@ void __cpuinit numa_remove_cpu(int cpu) { numa_set_cpumask(cpu, 0); } - -int __cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) { - printk(KERN_WARNING - "cpu_to_node(%d): usage too early!\n", cpu); - dump_stack(); - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} -EXPORT_SYMBOL(__cpu_to_node); - -/* - * Same function as cpu_to_node() but used if called before the - * per_cpu areas are setup. - */ -int early_cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - if (!cpu_possible(cpu)) { - printk(KERN_WARNING - "early_cpu_to_node(%d): no per_cpu area!\n", cpu); - dump_stack(); - return NUMA_NO_NODE; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} - /* * --------- end of debug versions of the numa functions --------- */ -- cgit v1.2.3 From de2d9445f1627830ed2ebd00ee9d851986c940b5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jan 2011 14:37:41 +0100 Subject: x86: Unify node_to_cpumask_map handling between 32 and 64bit x86_32 has been managing node_to_cpumask_map explicitly from map_cpu_to_node() and friends in a rather ugly way. With previous changes, it's now possible to share the code with 64bit. * When CONFIG_NUMA_EMU is disabled, numa_add/remove_cpu() are implemented in numa.c and shared by 32 and 64bit. CONFIG_NUMA_EMU versions still live in numa_64.c. NUMA_EMU's dependency on 64bit is planned to be removed and the above should go away together. * identify_cpu() now calls numa_add_cpu() for 32bit too. This makes the explicit mask management from map_cpu_to_node() unnecessary. * The whole x86_32 specific map_cpu_to_node() chunk is no longer necessary. Dropped. Signed-off-by: Tejun Heo Reviewed-by: Pekka Enberg Cc: eric.dumazet@gmail.com Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-16-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar Cc: David Rientjes Cc: Shaohui Zheng --- arch/x86/include/asm/numa.h | 9 +++++ arch/x86/include/asm/numa_32.h | 1 - arch/x86/include/asm/numa_64.h | 4 --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/smpboot.c | 47 -------------------------- arch/x86/mm/numa.c | 64 +++++++++++++++++++++++++++++++++-- arch/x86/mm/numa_64.c | 75 +++++++++--------------------------------- 7 files changed, 87 insertions(+), 115 deletions(-) (limited to 'arch/x86/mm/numa.c') diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 2b21fff9f655..d3964b28b128 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_NUMA_H #define _ASM_X86_NUMA_H +#include #include #ifdef CONFIG_NUMA @@ -33,9 +34,17 @@ static inline void set_apicid_to_node(int apicid, s16 node) #ifdef CONFIG_NUMA extern void __cpuinit numa_set_node(int cpu, int node); extern void __cpuinit numa_clear_node(int cpu); +extern void __cpuinit numa_add_cpu(int cpu); +extern void __cpuinit numa_remove_cpu(int cpu); #else /* CONFIG_NUMA */ static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } +static inline void numa_add_cpu(int cpu) { } +static inline void numa_remove_cpu(int cpu) { } #endif /* CONFIG_NUMA */ +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable); +#endif + #endif /* _ASM_X86_NUMA_H */ diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index cdf8043d7a1a..bc66031afa1f 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h @@ -4,7 +4,6 @@ extern int numa_off; extern int pxm_to_nid(int pxm); -extern void numa_remove_cpu(int cpu); #ifdef CONFIG_NUMA extern int __cpuinit numa_cpu_node(int apicid); diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 6ead9f361bda..123f1856101c 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -30,8 +30,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, extern void __init init_cpu_to_node(void); extern int __cpuinit numa_cpu_node(int cpu); -extern void __cpuinit numa_add_cpu(int cpu); -extern void __cpuinit numa_remove_cpu(int cpu); #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) @@ -41,8 +39,6 @@ void numa_emu_cmdline(char *); #else static inline void init_cpu_to_node(void) { } static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } -static inline void numa_add_cpu(int cpu, int node) { } -static inline void numa_remove_cpu(int cpu) { } #endif #endif /* _ASM_X86_NUMA_64_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1d59834396bd..a2559c3ed500 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) select_idle_routine(c); -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +#ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2c203822424f..522b2173888a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -132,49 +132,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) -/* set up a mapping between cpu and node. */ -static void map_cpu_to_node(int cpu, int node) -{ - printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); - cpumask_set_cpu(cpu, node_to_cpumask_map[node]); -} - -/* undo a mapping between cpu and node. */ -static void unmap_cpu_to_node(int cpu) -{ - int node; - - printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); - for (node = 0; node < MAX_NUMNODES; node++) - cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); -} -#else /* !(CONFIG_NUMA && CONFIG_X86_32) */ -#define map_cpu_to_node(cpu, node) ({}) -#define unmap_cpu_to_node(cpu) ({}) -#endif - -#ifdef CONFIG_X86_32 -static void map_cpu_to_logical_apicid(void) -{ - int cpu = smp_processor_id(); - int node; - - node = numa_cpu_node(cpu); - if (!node_online(node)) - node = first_online_node; - - map_cpu_to_node(cpu, node); -} - -void numa_remove_cpu(int cpu) -{ - unmap_cpu_to_node(cpu); -} -#else -#define map_cpu_to_logical_apicid() do {} while (0) -#endif - /* * Report back to the Boot Processor. * Running on AP. @@ -242,7 +199,6 @@ static void __cpuinit smp_callin(void) apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); - map_cpu_to_logical_apicid(); /* * Need to setup vector mappings before we enable interrupts. @@ -943,7 +899,6 @@ static __init void disable_smp(void) physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); else physid_set_mask_of_physid(0, &phys_cpu_present_map); - map_cpu_to_logical_apicid(); cpumask_set_cpu(0, cpu_sibling_mask(0)); cpumask_set_cpu(0, cpu_core_mask(0)); } @@ -1120,8 +1075,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) end_local_APIC_setup(); - map_cpu_to_logical_apicid(); - if (apic->setup_portio_remap) apic->setup_portio_remap(); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 187810be3d6c..75abecb614c9 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -99,7 +99,21 @@ void __init setup_node_to_cpumask_map(void) pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } -#ifdef CONFIG_DEBUG_PER_CPU_MAPS +#ifndef CONFIG_DEBUG_PER_CPU_MAPS + +# ifndef CONFIG_NUMA_EMU +void __cpuinit numa_add_cpu(int cpu) +{ + cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} +# endif /* !CONFIG_NUMA_EMU */ + +#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ int __cpu_to_node(int cpu) { @@ -131,6 +145,52 @@ int early_cpu_to_node(int cpu) return per_cpu(x86_cpu_to_node_map, cpu); } +struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) +{ + int node = early_cpu_to_node(cpu); + struct cpumask *mask; + char buf[64]; + + mask = node_to_cpumask_map[node]; + if (!mask) { + pr_err("node_to_cpumask_map[%i] NULL\n", node); + dump_stack(); + return NULL; + } + + cpulist_scnprintf(buf, sizeof(buf), mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable ? "numa_add_cpu" : "numa_remove_cpu", + cpu, node, buf); + return mask; +} + +# ifndef CONFIG_NUMA_EMU +static void __cpuinit numa_set_cpumask(int cpu, int enable) +{ + struct cpumask *mask; + + mask = debug_cpumask_set_cpu(cpu, enable); + if (!mask) + return; + + if (enable) + cpumask_set_cpu(cpu, mask); + else + cpumask_clear_cpu(cpu, mask); +} + +void __cpuinit numa_add_cpu(int cpu) +{ + numa_set_cpumask(cpu, 1); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + numa_set_cpumask(cpu, 0); +} +# endif /* !CONFIG_NUMA_EMU */ + /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ @@ -154,4 +214,4 @@ const struct cpumask *cpumask_of_node(int node) } EXPORT_SYMBOL(cpumask_of_node); -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ +#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index f5459400644d..14664f58a759 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -726,19 +726,18 @@ int __cpuinit numa_cpu_node(int cpu) return NUMA_NO_NODE; } -#ifndef CONFIG_DEBUG_PER_CPU_MAPS - -#ifndef CONFIG_NUMA_EMU -void __cpuinit numa_add_cpu(int cpu) -{ - cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} -#else +/* + * UGLINESS AHEAD: Currently, CONFIG_NUMA_EMU is 64bit only and makes use + * of 64bit specific data structures. The distinction is artificial and + * should be removed. numa_{add|remove}_cpu() are implemented in numa.c + * for both 32 and 64bit when CONFIG_NUMA_EMU is disabled but here when + * enabled. + * + * NUMA emulation is planned to be made generic and the following and other + * related code should be moved to numa.c. + */ +#ifdef CONFIG_NUMA_EMU +# ifndef CONFIG_DEBUG_PER_CPU_MAPS void __cpuinit numa_add_cpu(int cpu) { unsigned long addr; @@ -778,47 +777,7 @@ void __cpuinit numa_remove_cpu(int cpu) for_each_online_node(i) cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); } -#endif /* !CONFIG_NUMA_EMU */ - -#else /* CONFIG_DEBUG_PER_CPU_MAPS */ -static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) -{ - int node = early_cpu_to_node(cpu); - struct cpumask *mask; - char buf[64]; - - mask = node_to_cpumask_map[node]; - if (!mask) { - pr_err("node_to_cpumask_map[%i] NULL\n", node); - dump_stack(); - return NULL; - } - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", - cpu, node, buf); - return mask; -} - -/* - * --------- debug versions of the numa functions --------- - */ -#ifndef CONFIG_NUMA_EMU -static void __cpuinit numa_set_cpumask(int cpu, int enable) -{ - struct cpumask *mask; - - mask = debug_cpumask_set_cpu(cpu, enable); - if (!mask) - return; - - if (enable) - cpumask_set_cpu(cpu, mask); - else - cpumask_clear_cpu(cpu, mask); -} -#else +# else /* !CONFIG_DEBUG_PER_CPU_MAPS */ static void __cpuinit numa_set_cpumask(int cpu, int enable) { int node = early_cpu_to_node(cpu); @@ -842,7 +801,6 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) cpumask_clear_cpu(cpu, mask); } } -#endif /* CONFIG_NUMA_EMU */ void __cpuinit numa_add_cpu(int cpu) { @@ -853,8 +811,5 @@ void __cpuinit numa_remove_cpu(int cpu) { numa_set_cpumask(cpu, 0); } -/* - * --------- end of debug versions of the numa functions --------- - */ - -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ +# endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ +#endif /* CONFIG_NUMA_EMU */ -- cgit v1.2.3 From 8db78cc4b4048e3add40bca1bc3e55057c319256 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 Jan 2011 14:37:42 +0100 Subject: x86: Unify NUMA initialization between 32 and 64bit Now that everything else is unified, NUMA initialization can be unified too. * numa_init_array() and init_cpu_to_node() are moved from numa_64 to numa. * numa_32::initmem_init() is updated to call numa_init_array() and setup_arch() to call init_cpu_to_node() on 32bit too. * x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too. This is safe now as numa_init_array() will initialize it early during boot. This makes NUMA mapping fully initialized before setup_per_cpu_areas() on 32bit too and thus makes the first percpu chunk which contains all the static variables and some of dynamic area allocated with NUMA affinity correctly considered. Signed-off-by: Tejun Heo Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-17-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar Reported-by: Eric Dumazet Reviewed-by: Pekka Enberg --- arch/x86/include/asm/numa.h | 4 +++ arch/x86/include/asm/numa_64.h | 3 -- arch/x86/kernel/setup.c | 2 -- arch/x86/mm/numa.c | 76 +++++++++++++++++++++++++++++++++++++++--- arch/x86/mm/numa_32.c | 1 + arch/x86/mm/numa_64.c | 75 ----------------------------------------- 6 files changed, 77 insertions(+), 84 deletions(-) (limited to 'arch/x86/mm/numa.c') diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index d3964b28b128..26fc6e2dd0fb 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -34,11 +34,15 @@ static inline void set_apicid_to_node(int apicid, s16 node) #ifdef CONFIG_NUMA extern void __cpuinit numa_set_node(int cpu, int node); extern void __cpuinit numa_clear_node(int cpu); +extern void __init numa_init_array(void); +extern void __init init_cpu_to_node(void); extern void __cpuinit numa_add_cpu(int cpu); extern void __cpuinit numa_remove_cpu(int cpu); #else /* CONFIG_NUMA */ static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } +static inline void numa_init_array(void) { } +static inline void init_cpu_to_node(void) { } static inline void numa_add_cpu(int cpu) { } static inline void numa_remove_cpu(int cpu) { } #endif /* CONFIG_NUMA */ diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 123f1856101c..2819afa33632 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -13,7 +13,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) -extern void numa_init_array(void); extern int numa_off; extern unsigned long numa_free_all_bootmem(void); @@ -28,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, */ #define NODE_MIN_SIZE (4*1024*1024) -extern void __init init_cpu_to_node(void); extern int __cpuinit numa_cpu_node(int cpu); #ifdef CONFIG_NUMA_EMU @@ -37,7 +35,6 @@ extern int __cpuinit numa_cpu_node(int cpu); void numa_emu_cmdline(char *); #endif /* CONFIG_NUMA_EMU */ #else -static inline void init_cpu_to_node(void) { } static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3cfe26c0252..12023412fdf7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1040,9 +1040,7 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); -#ifdef CONFIG_X86_64 init_cpu_to_node(); -#endif init_apic_mappings(); ioapic_and_gsi_init(); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 75abecb614c9..bf60715bd1b7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -38,11 +38,7 @@ EXPORT_SYMBOL(node_to_cpumask_map); /* * Map cpu index to node index */ -#ifdef CONFIG_X86_32 -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0); -#else DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -#endif EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); void __cpuinit numa_set_node(int cpu, int node) @@ -99,6 +95,78 @@ void __init setup_node_to_cpumask_map(void) pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } +/* + * There are unfortunately some poorly designed mainboards around that + * only connect memory to a single CPU. This breaks the 1:1 cpu->node + * mapping. To avoid this fill in the mapping for all possible CPUs, + * as the number of CPUs is not known yet. We round robin the existing + * nodes. + */ +void __init numa_init_array(void) +{ + int rr, i; + + rr = first_node(node_online_map); + for (i = 0; i < nr_cpu_ids; i++) { + if (early_cpu_to_node(i) != NUMA_NO_NODE) + continue; + numa_set_node(i, rr); + rr = next_node(rr, node_online_map); + if (rr == MAX_NUMNODES) + rr = first_node(node_online_map); + } +} + +static __init int find_near_online_node(int node) +{ + int n, val; + int min_val = INT_MAX; + int best_node = -1; + + for_each_online_node(n) { + val = node_distance(node, n); + + if (val < min_val) { + min_val = val; + best_node = n; + } + } + + return best_node; +} + +/* + * Setup early cpu_to_node. + * + * Populate cpu_to_node[] only if x86_cpu_to_apicid[], + * and apicid_to_node[] tables have valid entries for a CPU. + * This means we skip cpu_to_node[] initialisation for NUMA + * emulation and faking node case (when running a kernel compiled + * for NUMA on a non NUMA box), which is OK as cpu_to_node[] + * is already initialized in a round robin manner at numa_init_array, + * prior to this call, and this initialization is good enough + * for the fake NUMA cases. + * + * Called before the per_cpu areas are setup. + */ +void __init init_cpu_to_node(void) +{ + int cpu; + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + + BUG_ON(cpu_to_apicid == NULL); + + for_each_possible_cpu(cpu) { + int node = numa_cpu_node(cpu); + + if (node == NUMA_NO_NODE) + continue; + if (!node_online(node)) + node = find_near_online_node(node); + numa_set_node(cpu, node); + } +} + #ifndef CONFIG_DEBUG_PER_CPU_MAPS # ifndef CONFIG_NUMA_EMU diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 8d91d227be09..505bb04654b5 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, */ get_memcfg_numa(); + numa_init_array(); kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 14664f58a759..f548fbf75f44 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -224,28 +224,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) node_set_online(nodeid); } -/* - * There are unfortunately some poorly designed mainboards around that - * only connect memory to a single CPU. This breaks the 1:1 cpu->node - * mapping. To avoid this fill in the mapping for all possible CPUs, - * as the number of CPUs is not known yet. We round robin the existing - * nodes. - */ -void __init numa_init_array(void) -{ - int rr, i; - - rr = first_node(node_online_map); - for (i = 0; i < nr_cpu_ids; i++) { - if (early_cpu_to_node(i) != NUMA_NO_NODE) - continue; - numa_set_node(i, rr); - rr = next_node(rr, node_online_map); - if (rr == MAX_NUMNODES) - rr = first_node(node_online_map); - } -} - #ifdef CONFIG_NUMA_EMU /* Numa emulation */ static struct bootnode nodes[MAX_NUMNODES] __initdata; @@ -664,59 +642,6 @@ unsigned long __init numa_free_all_bootmem(void) return pages; } -#ifdef CONFIG_NUMA - -static __init int find_near_online_node(int node) -{ - int n, val; - int min_val = INT_MAX; - int best_node = -1; - - for_each_online_node(n) { - val = node_distance(node, n); - - if (val < min_val) { - min_val = val; - best_node = n; - } - } - - return best_node; -} - -/* - * Setup early cpu_to_node. - * - * Populate cpu_to_node[] only if x86_cpu_to_apicid[], - * and apicid_to_node[] tables have valid entries for a CPU. - * This means we skip cpu_to_node[] initialisation for NUMA - * emulation and faking node case (when running a kernel compiled - * for NUMA on a non NUMA box), which is OK as cpu_to_node[] - * is already initialized in a round robin manner at numa_init_array, - * prior to this call, and this initialization is good enough - * for the fake NUMA cases. - * - * Called before the per_cpu areas are setup. - */ -void __init init_cpu_to_node(void) -{ - int cpu; - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - - BUG_ON(cpu_to_apicid == NULL); - - for_each_possible_cpu(cpu) { - int node = numa_cpu_node(cpu); - - if (node == NUMA_NO_NODE) - continue; - if (!node_online(node)) - node = find_near_online_node(node); - numa_set_node(cpu, node); - } -} -#endif - int __cpuinit numa_cpu_node(int cpu) { int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); -- cgit v1.2.3 From 14392fd329eca9b59d51c0aa5d0acfb4965424d1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Feb 2011 14:08:53 -0800 Subject: x86, numa: Add error handling for bad cpu-to-node mappings CONFIG_DEBUG_PER_CPU_MAPS may return NUMA_NO_NODE when an early_cpu_to_node() mapping hasn't been initialized. In such a case, it emits a warning and continues without an issue but callers may try to use the return value to index into an array. We can catch those errors and fail silently since a warning has already been emitted. No current user of numa_add_cpu() requires this error checking to avoid a crash, but it's better to be proactive in case a future user happens to have a bug and a user tries to diagnose it with CONFIG_DEBUG_PER_CPU_MAPS. Reported-by: Jesper Juhl Signed-off-by: David Rientjes Cc: Tejun Heo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/numa.c | 4 ++++ arch/x86/mm/numa_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/mm/numa.c') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index bf60715bd1b7..9559d360fde7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -219,6 +219,10 @@ struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) struct cpumask *mask; char buf[64]; + if (node == NUMA_NO_NODE) { + /* early_cpu_to_node() already emits a warning and trace */ + return NULL; + } mask = node_to_cpumask_map[node]; if (!mask) { pr_err("node_to_cpumask_map[%i] NULL\n", node); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index f548fbf75f44..3f9411ed3cdc 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -709,6 +709,10 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) struct cpumask *mask; int i; + if (node == NUMA_NO_NODE) { + /* early_cpu_to_node() already emits a warning and trace */ + return; + } for_each_online_node(i) { unsigned long addr; -- cgit v1.2.3