summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup/cgroup-v1.c
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2019-01-12 06:20:54 +0100
committerAl Viro <viro@zeniv.linux.org.uk>2019-01-17 17:53:02 +0100
commit35ac1184244f1329783e1d897f74926d8bb1103a (patch)
tree948885626366907d39c1d4540a89283ed2fb1a01 /kernel/cgroup/cgroup-v1.c
parentfix cgroup_do_mount() handling of failure exits (diff)
downloadlinux-35ac1184244f1329783e1d897f74926d8bb1103a.tar.xz
linux-35ac1184244f1329783e1d897f74926d8bb1103a.zip
cgroup: saner refcounting for cgroup_root
* make the reference from superblock to cgroup_root counting - do cgroup_put() in cgroup_kill_sb() whether we'd done percpu_ref_kill() or not; matching grab is done when we allocate a new root. That gives the same refcounting rules for all callers of cgroup_do_mount() - a reference to cgroup_root has been grabbed by caller and it either is transferred to new superblock or dropped. * have cgroup_kill_sb() treat an already killed refcount as "just don't bother killing it, then". * after successful cgroup_do_mount() have cgroup1_mount() recheck if we'd raced with mount/umount from somebody else and cgroup_root got killed. In that case we drop the superblock and bugger off with -ERESTARTSYS, same as if we'd found it in the list already dying. * don't bother with delayed initialization of refcount - it's unreliable and not needed. No need to prevent attempts to bump the refcount if we find cgroup_root of another mount in progress - sget will reuse an existing superblock just fine and if the other sb manages to die before we get there, we'll catch that immediately after cgroup_do_mount(). * don't bother with kernfs_pin_sb() - no need for doing that either. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'kernel/cgroup/cgroup-v1.c')
-rw-r--r--kernel/cgroup/cgroup-v1.c58
1 files changed, 13 insertions, 45 deletions
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 583b969b0c0e..f94a7229974e 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
void *data, unsigned long magic,
struct cgroup_namespace *ns)
{
- struct super_block *pinned_sb = NULL;
struct cgroup_sb_opts opts;
struct cgroup_root *root;
struct cgroup_subsys *ss;
struct dentry *dentry;
int i, ret;
- bool new_root = false;
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
@@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
if (root->flags ^ opts.flags)
pr_warn("new mount options do not match the existing superblock, will be ignored\n");
- /*
- * We want to reuse @root whose lifetime is governed by its
- * ->cgrp. Let's check whether @root is alive and keep it
- * that way. As cgroup_kill_sb() can happen anytime, we
- * want to block it by pinning the sb so that @root doesn't
- * get killed before mount is complete.
- *
- * With the sb pinned, tryget_live can reliably indicate
- * whether @root can be reused. If it's being killed,
- * drain it. We can use wait_queue for the wait but this
- * path is super cold. Let's just sleep a bit and retry.
- */
- pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
- if (IS_ERR(pinned_sb) ||
- !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
- mutex_unlock(&cgroup_mutex);
- if (!IS_ERR_OR_NULL(pinned_sb))
- deactivate_super(pinned_sb);
- msleep(10);
- ret = restart_syscall();
- goto out_free;
- }
-
ret = 0;
goto out_unlock;
}
@@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
ret = -ENOMEM;
goto out_unlock;
}
- new_root = true;
init_cgroup_root(root, &opts);
- ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
+ ret = cgroup_setup_root(root, opts.subsys_mask);
if (ret)
cgroup_free_root(root);
out_unlock:
+ if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+ mutex_unlock(&cgroup_mutex);
+ msleep(10);
+ ret = restart_syscall();
+ goto out_free;
+ }
mutex_unlock(&cgroup_mutex);
out_free:
kfree(opts.release_agent);
@@ -1252,25 +1232,13 @@ out_free:
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
CGROUP_SUPER_MAGIC, ns);
- /*
- * There's a race window after we release cgroup_mutex and before
- * allocating a superblock. Make sure a concurrent process won't
- * be able to re-use the root during this window by delaying the
- * initialization of root refcnt.
- */
- if (new_root) {
- mutex_lock(&cgroup_mutex);
- percpu_ref_reinit(&root->cgrp.self.refcnt);
- mutex_unlock(&cgroup_mutex);
+ if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
+ struct super_block *sb = dentry->d_sb;
+ dput(dentry);
+ deactivate_locked_super(sb);
+ msleep(10);
+ dentry = ERR_PTR(restart_syscall());
}
-
- /*
- * If @pinned_sb, we're reusing an existing root and holding an
- * extra ref on its sb. Mount is complete. Put the extra ref.
- */
- if (pinned_sb)
- deactivate_super(pinned_sb);
-
return dentry;
}