diff options
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 5 | ||||
-rw-r--r-- | include/linux/topology.h | 14 | ||||
-rw-r--r-- | kernel/sched/topology.c | 3 | ||||
-rw-r--r-- | mm/khugepaged.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 |
5 files changed, 23 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8d4e50428b68..ceeb8afc7cf3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/random.h> +#include <linux/topology.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -824,6 +825,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); +#ifdef CONFIG_NUMA + node_reclaim_distance = 32; +#endif + /* * Fix erratum 1076: CPB feature bit not being set in CPUID. * Always set it, except when running under a hypervisor. diff --git a/include/linux/topology.h b/include/linux/topology.h index 47a3e3c08036..579522ec446c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -59,6 +59,20 @@ int arch_update_cpu_topology(void); */ #define RECLAIM_DISTANCE 30 #endif + +/* + * The following tunable allows platforms to override the default node + * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are + * sufficiently fast that the default value actually hurts + * performance. + * + * AMD EPYC machines use this because even though the 2-hop distance + * is 32 (3.2x slower than a local memory access) performance actually + * *improves* if allowed to reclaim memory and load balance tasks + * between NUMA nodes 2-hops apart. + */ +extern int __read_mostly node_reclaim_distance; + #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8f83e8e3ea9a..b5667a273bf6 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1284,6 +1284,7 @@ static int sched_domains_curr_level; int sched_max_numa_distance; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; #endif /* @@ -1402,7 +1403,7 @@ sd_init(struct sched_domain_topology_level *tl, sd->flags &= ~SD_PREFER_SIBLING; sd->flags |= SD_SERIALIZE; - if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { + if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) { sd->flags &= ~(SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index eaaa21b23215..ccede2425c3f 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -710,7 +710,7 @@ static bool khugepaged_scan_abort(int nid) for (i = 0; i < MAX_NUMNODES; i++) { if (!khugepaged_node_load[i]) continue; - if (node_distance(nid, i) > RECLAIM_DISTANCE) + if (node_distance(nid, i) > node_reclaim_distance) return true; } return false; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 272c6de1bf4e..0d54cd2c43a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3522,7 +3522,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= - RECLAIM_DISTANCE; + node_reclaim_distance; } #else /* CONFIG_NUMA */ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) |