diff options
author | Srikar Dronamraju <srikar@linux.vnet.ibm.com> | 2021-04-15 14:09:34 +0200 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2021-04-17 02:40:51 +0200 |
commit | c1e53367dab15e41814cff4e37df8ec4ac8fb9d7 (patch) | |
tree | 490cad3a7a3e381380a82bed254bb9180bc92785 | |
parent | Revert "powerpc/topology: Update topology_core_cpumask" (diff) | |
download | linux-c1e53367dab15e41814cff4e37df8ec4ac8fb9d7.tar.xz linux-c1e53367dab15e41814cff4e37df8ec4ac8fb9d7.zip |
powerpc/smp: Cache CPU to chip lookup
On systems with large CPUs per node, even with the filtered matching of
related CPUs, there can be large number of calls to cpu_to_chip_id for
the same CPU. For example with 4096 vCPU, 1 node QEMU configuration,
with 4 threads per core, system could be see upto 1024 calls to
cpu_to_chip_id() for the same CPU. On a given system, cpu_to_chip_id()
for a given CPU would always return the same. Hence cache the result in
a lookup table for use in subsequent calls.
Since all CPUs sharing the same core will belong to the same chip, the
lookup_table has an entry for one CPU per core. chip_id_lookup_table is
not being freed and would be used on subsequent CPU online post CPU
offline.
Reported-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210415120934.232271-4-srikar@linux.vnet.ibm.com
-rw-r--r-- | arch/powerpc/include/asm/smp.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/prom.c | 19 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 21 |
3 files changed, 35 insertions, 6 deletions
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 47081a9e13ca..03b3d010cbab 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; extern int cpu_to_chip_id(int cpu); +extern int *chip_id_lookup_table; #ifdef CONFIG_SMP diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a8b2d6bfc1ca..fbe9deebc8e1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -65,6 +65,8 @@ #define DBG(fmt...) #endif +int *chip_id_lookup_table; + #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; @@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id); int cpu_to_chip_id(int cpu) { struct device_node *np; + int ret = -1, idx; + + idx = cpu / threads_per_core; + if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) + return chip_id_lookup_table[idx]; np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; + if (np) { + ret = of_get_ibm_chip_id(np); + of_node_put(np); + + if (chip_id_lookup_table) + chip_id_lookup_table[idx] = ret; + } - of_node_put(np); - return of_get_ibm_chip_id(np); + return ret; } EXPORT_SYMBOL(cpu_to_chip_id); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f8576a2f34a0..ad3e97472679 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1073,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_smallcore_mask(boot_cpuid)); } + if (cpu_to_chip_id(boot_cpuid) != -1) { + int idx = num_possible_cpus() / threads_per_core; + + /* + * All threads of a core will all belong to the same core, + * chip_id_lookup_table will have one entry per core. + * Assumption: if boot_cpuid doesn't have a chip-id, then no + * other CPUs, will also not have chip-id. + */ + chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL); + if (chip_id_lookup_table) + memset(chip_id_lookup_table, -1, sizeof(int) * idx); + } + if (smp_ops && smp_ops->probe) smp_ops->probe(); } @@ -1468,8 +1482,8 @@ static void add_cpu_to_masks(int cpu) { struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; int first_thread = cpu_first_thread_sibling(cpu); - int chip_id = cpu_to_chip_id(cpu); cpumask_var_t mask; + int chip_id = -1; bool ret; int i; @@ -1492,7 +1506,10 @@ static void add_cpu_to_masks(int cpu) if (has_coregroup_support()) update_coregroup_mask(cpu, &mask); - if (chip_id == -1 || !ret) { + if (chip_id_lookup_table && ret) + chip_id = cpu_to_chip_id(cpu); + + if (chip_id == -1) { cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); goto out; } |