summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/io-wq.c23
-rw-r--r--fs/io-wq.h3
-rw-r--r--fs/io_uring.c92
3 files changed, 118 insertions, 0 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index c1a7ef85844b..1be0d70f8673 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -17,6 +17,7 @@
#include <linux/rculist_nulls.h>
#include <linux/fs_struct.h>
#include <linux/task_work.h>
+#include <linux/blk-cgroup.h>
#include "io-wq.h"
@@ -57,6 +58,9 @@ struct io_worker {
struct rcu_head rcu;
struct mm_struct *mm;
+#ifdef CONFIG_BLK_CGROUP
+ struct cgroup_subsys_state *blkcg_css;
+#endif
const struct cred *cur_creds;
const struct cred *saved_creds;
struct files_struct *restore_files;
@@ -177,6 +181,13 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
worker->mm = NULL;
}
+#ifdef CONFIG_BLK_CGROUP
+ if (worker->blkcg_css) {
+ kthread_associate_blkcg(NULL);
+ worker->blkcg_css = NULL;
+ }
+#endif
+
return dropped_lock;
}
@@ -439,6 +450,17 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
work->flags |= IO_WQ_WORK_CANCEL;
}
+static inline void io_wq_switch_blkcg(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+#ifdef CONFIG_BLK_CGROUP
+ if (work->blkcg_css != worker->blkcg_css) {
+ kthread_associate_blkcg(work->blkcg_css);
+ worker->blkcg_css = work->blkcg_css;
+ }
+#endif
+}
+
static void io_wq_switch_creds(struct io_worker *worker,
struct io_wq_work *work)
{
@@ -467,6 +489,7 @@ static void io_impersonate_work(struct io_worker *worker,
if (worker->cur_creds != work->creds)
io_wq_switch_creds(worker, work);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
+ io_wq_switch_blkcg(worker, work);
}
static void io_assign_current_work(struct io_worker *worker,
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 2519830c8c55..84bcf6a85523 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -87,6 +87,9 @@ struct io_wq_work {
struct io_wq_work_node list;
struct files_struct *files;
struct mm_struct *mm;
+#ifdef CONFIG_BLK_CGROUP
+ struct cgroup_subsys_state *blkcg_css;
+#endif
const struct cred *creds;
struct nsproxy *nsproxy;
struct fs_struct *fs;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0deaf8b5068d..d7f41e3021e6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -80,6 +80,7 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
+#include <linux/blk-cgroup.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -300,6 +301,10 @@ struct io_ring_ctx {
/* Only used for accounting purposes */
struct mm_struct *mm_account;
+#ifdef CONFIG_BLK_CGROUP
+ struct cgroup_subsys_state *sqo_blkcg_css;
+#endif
+
struct io_sq_data *sq_data; /* if using sq thread polling */
struct wait_queue_head sqo_sq_wait;
@@ -748,6 +753,8 @@ struct io_op_def {
unsigned needs_fsize : 1;
/* must always have async data allocated */
unsigned needs_async_data : 1;
+ /* needs blkcg context, issues async io potentially */
+ unsigned needs_blkcg : 1;
/* size of async data needed, if any */
unsigned short async_size;
};
@@ -761,6 +768,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.pollin = 1,
.buffer_select = 1,
.needs_async_data = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITEV] = {
@@ -771,15 +779,18 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.pollout = 1,
.needs_fsize = 1,
.needs_async_data = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_FSYNC] = {
.needs_file = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_READ_FIXED] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITE_FIXED] = {
@@ -788,6 +799,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_fsize = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_POLL_ADD] = {
@@ -797,6 +809,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
[IORING_OP_POLL_REMOVE] = {},
[IORING_OP_SYNC_FILE_RANGE] = {
.needs_file = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_SENDMSG] = {
.needs_mm = 1,
@@ -805,6 +818,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.needs_fs = 1,
.pollout = 1,
.needs_async_data = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_RECVMSG] = {
@@ -815,6 +829,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.pollin = 1,
.buffer_select = 1,
.needs_async_data = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_TIMEOUT] = {
@@ -847,15 +862,18 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
.needs_fsize = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_OPENAT] = {
.file_table = 1,
.needs_fs = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_CLOSE] = {
.needs_file = 1,
.needs_file_no_error = 1,
.file_table = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_FILES_UPDATE] = {
.needs_mm = 1,
@@ -865,6 +883,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.needs_mm = 1,
.needs_fs = 1,
.file_table = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_READ] = {
.needs_mm = 1,
@@ -872,6 +891,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.unbound_nonreg_file = 1,
.pollin = 1,
.buffer_select = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITE] = {
@@ -880,19 +900,23 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_fsize = 1,
+ .needs_blkcg = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_FADVISE] = {
.needs_file = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_MADVISE] = {
.needs_mm = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_SEND] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_RECV] = {
.needs_mm = 1,
@@ -900,10 +924,12 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.unbound_nonreg_file = 1,
.pollin = 1,
.buffer_select = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_OPENAT2] = {
.file_table = 1,
.needs_fs = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_EPOLL_CTL] = {
.unbound_nonreg_file = 1,
@@ -913,6 +939,7 @@ static const struct io_op_def io_op_defs[] __read_mostly = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
+ .needs_blkcg = 1,
},
[IORING_OP_PROVIDE_BUFFERS] = {},
[IORING_OP_REMOVE_BUFFERS] = {},
@@ -1011,6 +1038,26 @@ static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
return __io_sq_thread_acquire_mm(ctx);
}
+static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
+ struct cgroup_subsys_state **cur_css)
+
+{
+#ifdef CONFIG_BLK_CGROUP
+ /* puts the old one when swapping */
+ if (*cur_css != ctx->sqo_blkcg_css) {
+ kthread_associate_blkcg(ctx->sqo_blkcg_css);
+ *cur_css = ctx->sqo_blkcg_css;
+ }
+#endif
+}
+
+static void io_sq_thread_unassociate_blkcg(void)
+{
+#ifdef CONFIG_BLK_CGROUP
+ kthread_associate_blkcg(NULL);
+#endif
+}
+
static inline void req_set_fail_links(struct io_kiocb *req)
{
if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
@@ -1148,6 +1195,10 @@ static bool io_req_clean_work(struct io_kiocb *req)
mmdrop(req->work.mm);
req->work.mm = NULL;
}
+#ifdef CONFIG_BLK_CGROUP
+ if (req->work.blkcg_css)
+ css_put(req->work.blkcg_css);
+#endif
if (req->work.creds) {
put_cred(req->work.creds);
req->work.creds = NULL;
@@ -1187,6 +1238,19 @@ static void io_prep_async_work(struct io_kiocb *req)
mmgrab(current->mm);
req->work.mm = current->mm;
}
+#ifdef CONFIG_BLK_CGROUP
+ if (!req->work.blkcg_css && def->needs_blkcg) {
+ rcu_read_lock();
+ req->work.blkcg_css = blkcg_css();
+ /*
+ * This should be rare, either the cgroup is dying or the task
+ * is moving cgroups. Just punt to root for the handful of ios.
+ */
+ if (!css_tryget_online(req->work.blkcg_css))
+ req->work.blkcg_css = NULL;
+ rcu_read_unlock();
+ }
+#endif
if (!req->work.creds)
req->work.creds = get_current_cred();
if (!req->work.fs && def->needs_fs) {
@@ -6789,6 +6853,7 @@ static void io_sqd_init_new(struct io_sq_data *sqd)
static int io_sq_thread(void *data)
{
+ struct cgroup_subsys_state *cur_css = NULL;
const struct cred *old_cred = NULL;
struct io_sq_data *sqd = data;
struct io_ring_ctx *ctx;
@@ -6818,6 +6883,7 @@ static int io_sq_thread(void *data)
revert_creds(old_cred);
old_cred = override_creds(ctx->creds);
}
+ io_sq_thread_associate_blkcg(ctx, &cur_css);
ret |= __io_sq_thread(ctx, start_jiffies, cap_entries);
@@ -6841,6 +6907,8 @@ static int io_sq_thread(void *data)
io_run_task_work();
+ if (cur_css)
+ io_sq_thread_unassociate_blkcg();
if (old_cred)
revert_creds(old_cred);
@@ -8304,6 +8372,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
ctx->mm_account = NULL;
}
+#ifdef CONFIG_BLK_CGROUP
+ if (ctx->sqo_blkcg_css)
+ css_put(ctx->sqo_blkcg_css);
+#endif
+
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
@@ -9288,6 +9361,25 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
mmgrab(current->mm);
ctx->mm_account = current->mm;
+#ifdef CONFIG_BLK_CGROUP
+ /*
+ * The sq thread will belong to the original cgroup it was inited in.
+ * If the cgroup goes offline (e.g. disabling the io controller), then
+ * issued bios will be associated with the closest cgroup later in the
+ * block layer.
+ */
+ rcu_read_lock();
+ ctx->sqo_blkcg_css = blkcg_css();
+ ret = css_tryget_online(ctx->sqo_blkcg_css);
+ rcu_read_unlock();
+ if (!ret) {
+ /* don't init against a dying cgroup, have the user try again */
+ ctx->sqo_blkcg_css = NULL;
+ ret = -ENODEV;
+ goto err;
+ }
+#endif
+
/*
* Account memory _before_ installing the file descriptor. Once
* the descriptor is installed, it can get closed at any time. Also