summaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2013-11-12 01:29:25 +0100
committerIngo Molnar <mingo@kernel.org>2013-11-13 13:33:51 +0100
commit46a73e8a1c1720f7713b5e2df68e9dd272015b5d (patch)
treea119f949799e3cd7382548eac2b41fa060c29ac5 /kernel/sched/fair.c
parentsched: Fix endless sync_sched/rcu() loop inside _cpu_down() (diff)
downloadlinux-46a73e8a1c1720f7713b5e2df68e9dd272015b5d.tar.xz
linux-46a73e8a1c1720f7713b5e2df68e9dd272015b5d.zip
sched/numa: Fix NULL pointer dereference in task_numa_migrate()
The cpusets code can split up the scheduler's domain tree into smaller domains. Some of those smaller domains may not cross NUMA nodes at all, leading to a NULL pointer dereference on the per-cpu sd_numa pointer. Tasks cannot be migrated out of their domain, so the patch also sets p->numa_preferred_nid to whereever they are, to prevent the migration from being retried over and over again. Reported-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Mel Gorman <mgorman@suse.de> Link: http://lkml.kernel.org/n/tip-oosqomw0Jput0Jkvoowhrqtu@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to '')
-rw-r--r--kernel/sched/fair.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index df77c605c7a6..c11e36ff5ea0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1201,9 +1201,21 @@ static int task_numa_migrate(struct task_struct *p)
*/
rcu_read_lock();
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
- env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
+ if (sd)
+ env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
rcu_read_unlock();
+ /*
+ * Cpusets can break the scheduler domain tree into smaller
+ * balance domains, some of which do not cross NUMA boundaries.
+ * Tasks that are "trapped" in such domains cannot be migrated
+ * elsewhere, so there is no point in (re)trying.
+ */
+ if (unlikely(!sd)) {
+ p->numa_preferred_nid = cpu_to_node(task_cpu(p));
+ return -EINVAL;
+ }
+
taskweight = task_weight(p, env.src_nid);
groupweight = group_weight(p, env.src_nid);
update_numa_stats(&env.src_stats, env.src_nid);