From 970e178985cadbca660feb02f4d2ee3a09f7fdda Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 12 Jun 2012 05:18:32 +0200 Subject: sched: Improve scalability via 'CPU buddies', which withstand random perturbations Traversing an entire package is not only expensive, it also leads to tasks bouncing all over a partially idle and possible quite large package. Fix that up by assigning a 'buddy' CPU to try to motivate. Each buddy may try to motivate that one other CPU, if it's busy, tough, it may then try its SMT sibling, but that's all this optimization is allowed to cost. Sibling cache buddies are cross-wired to prevent bouncing. 4 socket 40 core + SMT Westmere box, single 30 sec tbench runs, higher is better: clients 1 2 4 8 16 32 64 128 .......................................................................... pre 30 41 118 645 3769 6214 12233 14312 post 299 603 1211 2418 4697 6847 11606 14557 A nice increase in performance. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1339471112.7352.32.camel@marge.simpson.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c099cc6eebe3..dd00aaf44fda 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2637,8 +2637,6 @@ static int select_idle_sibling(struct task_struct *p, int target) int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); struct sched_domain *sd; - struct sched_group *sg; - int i; /* * If the task is going to be woken-up on this cpu and if it is @@ -2655,29 +2653,17 @@ static int select_idle_sibling(struct task_struct *p, int target) return prev_cpu; /* - * Otherwise, iterate the domains and find an elegible idle cpu. + * Otherwise, check assigned siblings to find an elegible idle cpu. */ sd = rcu_dereference(per_cpu(sd_llc, target)); - for_each_lower_domain(sd) { - sg = sd->groups; - do { - if (!cpumask_intersects(sched_group_cpus(sg), - tsk_cpus_allowed(p))) - goto next; - - for_each_cpu(i, sched_group_cpus(sg)) { - if (!idle_cpu(i)) - goto next; - } - target = cpumask_first_and(sched_group_cpus(sg), - tsk_cpus_allowed(p)); - goto done; -next: - sg = sg->next; - } while (sg != sd->groups); + for_each_lower_domain(sd) { + if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) + continue; + if (idle_cpu(sd->idle_buddy)) + return sd->idle_buddy; } -done: + return target; } -- cgit v1.2.3