diff options
author | Dennis Zhou (Facebook) <dennisszhou@gmail.com> | 2018-09-11 20:41:26 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-09-22 04:29:02 +0200 |
commit | 27e6fa996c534c32702aa4d32db0ffa383acd050 (patch) | |
tree | fbeed4501b887851cc4b48af3ddb9d24d8e2dd9f /include | |
parent | Blk-throttle: update to use rbtree with leftmost node cached (diff) | |
download | linux-27e6fa996c534c32702aa4d32db0ffa383acd050.tar.xz linux-27e6fa996c534c32702aa4d32db0ffa383acd050.zip |
blkcg: fix ref count issue with bio_blkcg using task_css
The accessor function bio_blkcg either returns the blkcg associated with
the bio or finds one in the current context. This can cause an issue
when trying to associate a bio with a blkcg. Particularly, it's the
third case that is problematic:
return css_to_blkcg(task_css(current, io_cgrp_id));
As the above may race against task migration and the cgroup exiting, it
is not always ok to take a reference on the blkcg returned from
bio_blkcg.
This patch adds association ahead of calling bio_blkcg rather than
after. This makes association a required and explicit step along the
code paths for calling bio_blkcg. blk_get_rl is modified as well to get
a reference to the blkcg it may use and blk_put_rl will always put the
reference back. Association is also moved above the bio_blkcg call to
ensure it will not return NULL in blk-iolatency.
BFQ and CFQ utilize this flaw, but due to the complexity, I do not want
to address this in this series. I've created a private version of the
function with notes not to use it describing the flaw. Hopefully soon,
that code can be cleaned up.
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blk-cgroup.h | 101 |
1 files changed, 93 insertions, 8 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..24067a1f8b36 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -230,22 +230,100 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + +/** + * blkcg_get_css - find and get a reference to the css + * + * Find the css associated with either the kthread or the current task. + * This takes a reference on the blkcg which will need to be managed by the + * caller. + */ +static inline struct cgroup_subsys_state *blkcg_get_css(void) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + + css = kthread_blkcg(); + if (css) { + css_get(css); + } else { + /* + * This is a bit complicated. It is possible task_css is seeing + * an old css pointer here. This is caused by the current + * thread migrating away from this cgroup and this cgroup dying. + * css_tryget() will fail when trying to take a ref on a cgroup + * that's ref count has hit 0. + * + * Therefore, if it does fail, this means current must have + * been swapped away already and this is waiting for it to + * propagate on the polling cpu. Hence the use of cpu_relax(). + */ + while (true) { + css = task_css(current, io_cgrp_id); + if (likely(css_tryget(css))) + break; + cpu_relax(); + } + } + + rcu_read_unlock(); + + return css; +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal version of bio_blkcg for bfq and cfq + * + * DO NOT USE. + * There is a flaw using this version of the function. In particular, this was + * used in a broken paradigm where association was called on the given css. It + * is possible though that the returned css from task_css() is in the process + * of dying due to migration of the current task. So it is improper to assume + * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will + * take additional work to handle more gracefully. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + return css_to_blkcg(blkcg_css()); +} +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, NULL if not associated. + * Callers are expected to either handle NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ if (bio && bio->bi_css) return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); + return NULL; } static inline bool blk_cgroup_congested(void) @@ -534,6 +612,10 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); blkcg = bio_blkcg(bio); + if (blkcg) + css_get(&blkcg->css); + else + blkcg = css_to_blkcg(blkcg_get_css()); /* bypass blkg lookup and use @q->root_rl directly for root */ if (blkcg == &blkcg_root) @@ -565,6 +647,8 @@ root_rl: */ static inline void blk_put_rl(struct request_list *rl) { + /* an additional ref is always taken for rl */ + css_put(&rl->blkg->blkcg->css); if (rl->blkg->blkcg != &blkcg_root) blkg_put(rl->blkg); } @@ -805,10 +889,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); + bio_associate_blkcg(bio, NULL); + blkcg = bio_blkcg(bio); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { @@ -930,6 +1014,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |