diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-05-15 17:43:34 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-08-12 12:06:09 +0200 |
commit | 25834c73f93af7f0712c98ca4593691592e6b360 (patch) | |
tree | 5da4f7c4da2a85ba6458a4de234da5e3d0a0c27a /kernel/sched/core.c | |
parent | sched: Ensure a task has a non-normalized vruntime when returning back to CFS (diff) | |
download | linux-25834c73f93af7f0712c98ca4593691592e6b360.tar.xz linux-25834c73f93af7f0712c98ca4593691592e6b360.zip |
sched: Fix a race between __kthread_bind() and sched_setaffinity()
Because sched_setscheduler() checks p->flags & PF_NO_SETAFFINITY
without locks, a caller might observe an old value and race with the
set_cpus_allowed_ptr() call from __kthread_bind() and effectively undo
it:
__kthread_bind()
do_set_cpus_allowed()
<SYSCALL>
sched_setaffinity()
if (p->flags & PF_NO_SETAFFINITIY)
set_cpus_allowed_ptr()
p->flags |= PF_NO_SETAFFINITY
Fix the bug by putting everything under the regular scheduler locks.
This also closes a hole in the serialization of task_struct::{nr_,}cpus_allowed.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dedekind1@gmail.com
Cc: juri.lelli@arm.com
Cc: mgorman@suse.de
Cc: riel@redhat.com
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20150515154833.545640346@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 36 |
1 files changed, 32 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ea6d74345e60..2e3b983da836 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1153,6 +1153,8 @@ static int migration_cpu_stop(void *data) void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { + lockdep_assert_held(&p->pi_lock); + if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); @@ -1169,7 +1171,8 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +static int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, bool check) { unsigned long flags; struct rq *rq; @@ -1178,6 +1181,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) rq = task_rq_lock(p, &flags); + /* + * Must re-check here, to close a race against __kthread_bind(), + * sched_setaffinity() is not guaranteed to observe the flag. + */ + if (check && (p->flags & PF_NO_SETAFFINITY)) { + ret = -EINVAL; + goto out; + } + if (cpumask_equal(&p->cpus_allowed, new_mask)) goto out; @@ -1214,6 +1226,11 @@ out: return ret; } + +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ + return __set_cpus_allowed_ptr(p, new_mask, false); +} EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); void set_task_cpu(struct task_struct *p, unsigned int new_cpu) @@ -1595,6 +1612,15 @@ static void update_avg(u64 *avg, u64 sample) s64 diff = sample - *avg; *avg += diff >> 3; } + +#else + +static inline int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, bool check) +{ + return set_cpus_allowed_ptr(p, new_mask); +} + #endif /* CONFIG_SMP */ static void @@ -4340,7 +4366,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) } #endif again: - retval = set_cpus_allowed_ptr(p, new_mask); + retval = __set_cpus_allowed_ptr(p, new_mask, true); if (!retval) { cpuset_cpus_allowed(p, cpus_allowed); @@ -4865,7 +4891,8 @@ void init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_lock_irqsave(&idle->pi_lock, flags); + raw_spin_lock(&rq->lock); __sched_fork(0, idle); idle->state = TASK_RUNNING; @@ -4891,7 +4918,8 @@ void init_idle(struct task_struct *idle, int cpu) #if defined(CONFIG_SMP) idle->on_cpu = 1; #endif - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&idle->pi_lock, flags); /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); |