summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2019-06-27 22:39:52 +0200
committerJens Axboe <axboe@kernel.dk>2019-07-10 17:00:57 +0200
commitd3f77dfdc71835f8db71ca57d272b1fbec9dfc18 (patch)
treeced59cee416b39c3c2f80c647c736a2d378772e3 /include
parentblkcg, writeback: Implement wbc_blkcg_css() (diff)
downloadlinux-d3f77dfdc71835f8db71ca57d272b1fbec9dfc18.tar.xz
linux-d3f77dfdc71835f8db71ca57d272b1fbec9dfc18.zip
blkcg: implement REQ_CGROUP_PUNT
When a shared kthread needs to issue a bio for a cgroup, doing so synchronously can lead to priority inversions as the kthread can be trapped waiting for that cgroup. This patch implements REQ_CGROUP_PUNT flag which makes submit_bio() punt the actual issuing to a dedicated per-blkcg work item to avoid such priority inversions. This will be used to fix priority inversions in btrfs compression and should be generally useful as we grow filesystem support for comprehensive IO control. Cc: Chris Mason <clm@fb.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h1
-rw-r--r--include/linux/blk-cgroup.h16
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/writeback.h13
4 files changed, 36 insertions, 4 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f9b029180241..35b31d176f74 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -48,6 +48,7 @@ extern spinlock_t bdi_lock;
extern struct list_head bdi_list;
extern struct workqueue_struct *bdi_wq;
+extern struct workqueue_struct *bdi_async_bio_wq;
static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
{
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 33f23a858438..689a58231288 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -132,13 +132,17 @@ struct blkcg_gq {
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
- struct rcu_head rcu_head;
+ spinlock_t async_bio_lock;
+ struct bio_list async_bios;
+ struct work_struct async_bio_work;
atomic_t use_delay;
atomic64_t delay_nsec;
atomic64_t delay_start;
u64 last_delay;
int last_use;
+
+ struct rcu_head rcu_head;
};
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -701,6 +705,15 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
struct bio *bio) { return false; }
#endif
+bool __blkcg_punt_bio_submit(struct bio *bio);
+
+static inline bool blkcg_punt_bio_submit(struct bio *bio)
+{
+ if (bio->bi_opf & REQ_CGROUP_PUNT)
+ return __blkcg_punt_bio_submit(bio);
+ else
+ return false;
+}
static inline void blkcg_bio_issue_init(struct bio *bio)
{
@@ -848,6 +861,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
+static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio) { return true; }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6a53799c3fe2..feff3fe4467e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -311,6 +311,14 @@ enum req_flag_bits {
__REQ_RAHEAD, /* read ahead, can fail anytime */
__REQ_BACKGROUND, /* background IO */
__REQ_NOWAIT, /* Don't wait if request will block */
+ /*
+ * When a shared kthread needs to issue a bio for a cgroup, doing
+ * so synchronously can lead to priority inversions as the kthread
+ * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes
+ * submit_bio() punt the actual issuing to a dedicated per-blkcg
+ * work item to avoid such priority inversions.
+ */
+ __REQ_CGROUP_PUNT,
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
@@ -337,6 +345,8 @@ enum req_flag_bits {
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
+#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
+
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
#define REQ_HIPRI (1ULL << __REQ_HIPRI)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e056a22075cf..8945aac31392 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -78,6 +78,8 @@ struct writeback_control {
*/
unsigned no_cgroup_owner:1;
+ unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */
+
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *wb; /* wb this writeback is issued under */
struct inode *inode; /* inode being written out */
@@ -94,12 +96,17 @@ struct writeback_control {
static inline int wbc_to_write_flags(struct writeback_control *wbc)
{
+ int flags = 0;
+
+ if (wbc->punt_to_cgroup)
+ flags = REQ_CGROUP_PUNT;
+
if (wbc->sync_mode == WB_SYNC_ALL)
- return REQ_SYNC;
+ flags |= REQ_SYNC;
else if (wbc->for_kupdate || wbc->for_background)
- return REQ_BACKGROUND;
+ flags |= REQ_BACKGROUND;
- return 0;
+ return flags;
}
static inline struct cgroup_subsys_state *