summaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 22:15:19 +0100
committerJens Axboe <axboe@kernel.dk>2012-03-06 21:27:23 +0100
commit03aa264ac15637b6f98374270bcdf31400965505 (patch)
tree6fa9ca54d3f775fba19123790f6655158034a1d8 /block/blk-cgroup.c
parentblkcg: move per-queue blkg list heads and counters to queue and blkg (diff)
downloadlinux-03aa264ac15637b6f98374270bcdf31400965505.tar.xz
linux-03aa264ac15637b6f98374270bcdf31400965505.zip
blkcg: let blkcg core manage per-queue blkg list and counter
With the previous patch to move blkg list heads and counters to request_queue and blkg, logic to manage them in both policies are almost identical and can be moved to blkcg core. This patch moves blkg link logic into blkg_lookup_create(), implements common blkg unlink code in blkg_destroy(), and updates blkg_destory_all() so that it's policy specific and can skip root group. The updated blkg_destroy_all() is now used to both clear queue for bypassing and elv switching, and release all blkgs on q exit. This patch introduces a race window where policy [de]registration may race against queue blkg clearing. This can only be a problem on cfq unload and shouldn't be a real problem in practice (and we have many other places where this race already exists). Future patches will remove these unlikely races. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c72
1 files changed, 56 insertions, 16 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e940972ccd66..2ca9a15db0f7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -596,8 +596,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
/* insert */
spin_lock(&blkcg->lock);
swap(blkg, new_blkg);
+
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
- pol->ops.blkio_link_group_fn(q, blkg);
+ list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
+ q->nr_blkgs[plid]++;
+
spin_unlock(&blkcg->lock);
out:
blkg_free(new_blkg);
@@ -646,36 +649,69 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
}
EXPORT_SYMBOL_GPL(blkg_lookup);
-void blkg_destroy_all(struct request_queue *q)
+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
+{
+ struct request_queue *q = blkg->q;
+
+ lockdep_assert_held(q->queue_lock);
+
+ /* Something wrong if we are trying to remove same group twice */
+ WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
+ list_del_init(&blkg->q_node[plid]);
+
+ WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
+ q->nr_blkgs[plid]--;
+
+ /*
+ * Put the reference taken at the time of creation so that when all
+ * queues are gone, group can be destroyed.
+ */
+ blkg_put(blkg);
+}
+
+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
+ bool destroy_root)
{
- struct blkio_policy_type *pol;
+ struct blkio_group *blkg, *n;
while (true) {
bool done = true;
- spin_lock(&blkio_list_lock);
spin_lock_irq(q->queue_lock);
- /*
- * clear_queue_fn() might return with non-empty group list
- * if it raced cgroup removal and lost. cgroup removal is
- * guaranteed to make forward progress and retrying after a
- * while is enough. This ugliness is scheduled to be
- * removed after locking update.
- */
- list_for_each_entry(pol, &blkio_list, list)
- if (!pol->ops.blkio_clear_queue_fn(q))
+ list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
+ q_node[plid]) {
+ /* skip root? */
+ if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+ continue;
+
+ /*
+ * If cgroup removal path got to blk_group first
+ * and removed it from cgroup list, then it will
+ * take care of destroying cfqg also.
+ */
+ if (!blkiocg_del_blkio_group(blkg))
+ blkg_destroy(blkg, plid);
+ else
done = false;
+ }
spin_unlock_irq(q->queue_lock);
- spin_unlock(&blkio_list_lock);
+ /*
+ * Group list may not be empty if we raced cgroup removal
+ * and lost. cgroup removal is guaranteed to make forward
+ * progress and retrying after a while is enough. This
+ * ugliness is scheduled to be removed after locking
+ * update.
+ */
if (done)
break;
msleep(10); /* just some random duration I like */
}
}
+EXPORT_SYMBOL_GPL(blkg_destroy_all);
static void blkg_rcu_free(struct rcu_head *rcu_head)
{
@@ -1549,11 +1585,13 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
* this event.
*/
spin_lock(&blkio_list_lock);
+ spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry(blkiop, &blkio_list, list) {
if (blkiop->plid != blkg->plid)
continue;
- blkiop->ops.blkio_unlink_group_fn(q, blkg);
+ blkg_destroy(blkg, blkiop->plid);
}
+ spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock(&blkio_list_lock);
} while (1);
@@ -1695,12 +1733,14 @@ static void blkcg_bypass_start(void)
__acquires(&all_q_mutex)
{
struct request_queue *q;
+ int i;
mutex_lock(&all_q_mutex);
list_for_each_entry(q, &all_q_list, all_q_node) {
blk_queue_bypass_start(q);
- blkg_destroy_all(q);
+ for (i = 0; i < BLKIO_NR_POLICIES; i++)
+ blkg_destroy_all(q, i, false);
}
}