summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--kernel/cgroup.c53
2 files changed, 39 insertions, 17 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 71e77e7cdb6f..c24bd0b9f93a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -82,7 +82,8 @@ struct cgroup_subsys_state {
/* ID for this css, if possible */
struct css_id __rcu *id;
- /* percpu_ref killing and putting dentry on the last css_put() */
+ /* percpu_ref killing and RCU release */
+ struct rcu_head rcu_head;
struct work_struct destroy_work;
};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3137e38995b0..66d01078eebe 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -869,18 +869,8 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
static void cgroup_free_fn(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
- struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
- /*
- * Release the subsystem state objects.
- */
- for_each_root_subsys(cgrp->root, ss) {
- struct cgroup_subsys_state *css = cgroup_css(cgrp, ss->subsys_id);
-
- ss->css_free(css);
- }
-
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
@@ -4281,32 +4271,62 @@ err:
return ret;
}
+/*
+ * css destruction is four-stage process.
+ *
+ * 1. Destruction starts. Killing of the percpu_ref is initiated.
+ * Implemented in kill_css().
+ *
+ * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
+ * and thus css_tryget() is guaranteed to fail, the css can be offlined
+ * by invoking offline_css(). After offlining, the base ref is put.
+ * Implemented in css_killed_work_fn().
+ *
+ * 3. When the percpu_ref reaches zero, the only possible remaining
+ * accessors are inside RCU read sections. css_release() schedules the
+ * RCU callback.
+ *
+ * 4. After the grace period, the css can be freed. Implemented in
+ * css_free_work_fn().
+ *
+ * It is actually hairier because both step 2 and 4 require process context
+ * and thus involve punting to css->destroy_work adding two additional
+ * steps to the already complex sequence.
+ */
static void css_free_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
+ struct cgroup *cgrp = css->cgroup;
if (css->parent)
css_put(css->parent);
- cgroup_dput(css->cgroup);
+ css->ss->css_free(css);
+ cgroup_dput(cgrp);
}
-static void css_release(struct percpu_ref *ref)
+static void css_free_rcu_fn(struct rcu_head *rcu_head)
{
struct cgroup_subsys_state *css =
- container_of(ref, struct cgroup_subsys_state, refcnt);
+ container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
/*
* css holds an extra ref to @cgrp->dentry which is put on the last
- * css_put(). dput() requires process context, which css_put() may
- * be called without. @css->destroy_work will be used to invoke
- * dput() asynchronously from css_put().
+ * css_put(). dput() requires process context which we don't have.
*/
INIT_WORK(&css->destroy_work, css_free_work_fn);
schedule_work(&css->destroy_work);
}
+static void css_release(struct percpu_ref *ref)
+{
+ struct cgroup_subsys_state *css =
+ container_of(ref, struct cgroup_subsys_state, refcnt);
+
+ call_rcu(&css->rcu_head, css_free_rcu_fn);
+}
+
static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
struct cgroup *cgrp)
{
@@ -4356,6 +4376,7 @@ static void offline_css(struct cgroup_subsys_state *css)
css->flags &= ~CSS_ONLINE;
css->cgroup->nr_css--;
+ RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
}
/*