diff options
author | Oleg Nesterov <oleg@redhat.com> | 2019-01-28 17:00:13 +0100 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2019-01-31 15:55:57 +0100 |
commit | 51bee5abeab2058ea5813c5615d6197a23dbf041 (patch) | |
tree | 5747f7ee7faf9589d1cf49ca690e598b222cef92 /kernel/cgroup | |
parent | fs/dcache: Track & report number of negative dentries (diff) | |
download | linux-51bee5abeab2058ea5813c5615d6197a23dbf041.tar.xz linux-51bee5abeab2058ea5813c5615d6197a23dbf041.zip |
cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release() to fix the accounting
The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which
needs pids_free() to uncharge the pid.
However, ->free() is called from __put_task_struct()->cgroup_free() and this
is too late. Even the trivial program which does
for (;;) {
int pid = fork();
assert(pid >= 0);
if (pid)
wait(NULL);
else
exit(0);
}
can run out of limits because release_task()->call_rcu(delayed_put_task_struct)
implies an RCU gp after the task/pid goes away and before the final put().
Test-case:
mkdir -p /tmp/CG
mount -t cgroup2 none /tmp/CG
echo '+pids' > /tmp/CG/cgroup.subtree_control
mkdir /tmp/CG/PID
echo 2 > /tmp/CG/PID/pids.max
perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' &
echo $! > /tmp/CG/PID/cgroup.procs
Without this patch the forking process fails soon after migration.
Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite
into the new helper, cgroup_release(), called by release_task() which actually
frees the pid(s).
Reported-by: Herton R. Krzesinski <hkrzesin@redhat.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/cgroup.c | 15 | ||||
-rw-r--r-- | kernel/cgroup/pids.c | 4 |
2 files changed, 11 insertions, 8 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..f4418371c83b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1; */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; -static u16 have_free_callback __read_mostly; +static u16 have_release_callback __read_mostly; static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ @@ -5313,7 +5313,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; - have_free_callback |= (bool)ss->free << ss->id; + have_release_callback |= (bool)ss->release << ss->id; have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been @@ -5749,16 +5749,19 @@ void cgroup_exit(struct task_struct *tsk) } while_each_subsys_mask(); } -void cgroup_free(struct task_struct *task) +void cgroup_release(struct task_struct *task) { - struct css_set *cset = task_css_set(task); struct cgroup_subsys *ss; int ssid; - do_each_subsys_mask(ss, ssid, have_free_callback) { - ss->free(task); + do_each_subsys_mask(ss, ssid, have_release_callback) { + ss->release(task); } while_each_subsys_mask(); +} +void cgroup_free(struct task_struct *task) +{ + struct css_set *cset = task_css_set(task); put_css_set(cset); } diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 9829c67ebc0a..c9960baaa14f 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task) pids_uncharge(pids, 1); } -static void pids_free(struct task_struct *task) +static void pids_release(struct task_struct *task) { struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id)); @@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = { .cancel_attach = pids_cancel_attach, .can_fork = pids_can_fork, .cancel_fork = pids_cancel_fork, - .free = pids_free, + .release = pids_release, .legacy_cftypes = pids_files, .dfl_cftypes = pids_files, .threaded = true, |