summaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c94
1 files changed, 82 insertions, 12 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f35b1285865..16fb7436043c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_WRITE] = {
.needs_file = 1,
+ .hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.plug = 1,
@@ -1481,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
struct io_timeout_data *io = req->async_data;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
+ if (status)
+ req_set_fail(req);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
@@ -1618,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{
+ /* see waitqueue_active() comment */
+ smp_mb();
+
if (ctx->flags & IORING_SETUP_SQPOLL) {
- if (wq_has_sleeper(&ctx->cq_wait))
+ if (waitqueue_active(&ctx->cq_wait))
wake_up_all(&ctx->cq_wait);
}
if (io_should_trigger_evfd(ctx))
@@ -1851,6 +1857,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
io_req_complete_post(req, res, 0);
}
+static void io_req_complete_fail_submit(struct io_kiocb *req)
+{
+ /*
+ * We don't submit, fail them all, for that replace hardlinks with
+ * normal links. Extra REQ_F_LINK is tolerated.
+ */
+ req->flags &= ~REQ_F_HARDLINK;
+ req->flags |= REQ_F_LINK;
+ io_req_complete_failed(req, req->result);
+}
+
/*
* Don't initialise the fields below on every allocation, but do that in
* advance and keep them valid across allocations.
@@ -2119,6 +2136,9 @@ static void tctx_task_work(struct callback_head *cb)
while (1) {
struct io_wq_work_node *node;
+ if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
+
spin_lock_irq(&tctx->task_lock);
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
@@ -2673,7 +2693,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
if (__io_complete_rw_common(req, res))
return;
- __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+ __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -3410,6 +3430,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
return -EINVAL;
}
+static bool need_read_all(struct io_kiocb *req)
+{
+ return req->flags & REQ_F_ISREG ||
+ S_ISBLK(file_inode(req->file)->i_mode);
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -3459,12 +3485,13 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (req->flags & REQ_F_NOWAIT)
goto done;
/* some cases will consume bytes even on error returns */
+ iov_iter_reexpand(iter, iter->count + iter->truncated);
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
} else if (ret == -EIOCBQUEUED) {
goto out_free;
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
- (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
+ (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
@@ -3598,6 +3625,7 @@ done:
} else {
copy_iov:
/* some cases will consume bytes even on error returns */
+ iov_iter_reexpand(iter, iter->count + iter->truncated);
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
return ret ?: -EAGAIN;
@@ -5249,7 +5277,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
}
}
-static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
__must_hold(&req->ctx->completion_lock)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -5271,10 +5299,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
if (flags & IORING_CQE_F_MORE)
ctx->cq_extra++;
- io_commit_cqring(ctx);
return !(flags & IORING_CQE_F_MORE);
}
+static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+ __must_hold(&req->ctx->completion_lock)
+{
+ bool done;
+
+ done = __io_poll_complete(req, mask);
+ io_commit_cqring(req->ctx);
+ return done;
+}
+
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -5285,7 +5322,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
} else {
bool done;
- done = io_poll_complete(req, req->result);
+ done = __io_poll_complete(req, req->result);
if (done) {
io_poll_remove_double(req);
hash_del(&req->hash_node);
@@ -5293,6 +5330,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait);
}
+ io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
@@ -6398,6 +6436,11 @@ static bool io_drain_req(struct io_kiocb *req)
int ret;
u32 seq;
+ if (req->flags & REQ_F_FAIL) {
+ io_req_complete_fail_submit(req);
+ return true;
+ }
+
/*
* If we need to drain a request in the middle of a link, drain the
* head request and the next request/link after the current link.
@@ -6914,7 +6957,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
__io_queue_sqe(req);
} else if (req->flags & REQ_F_FAIL) {
- io_req_complete_failed(req, req->result);
+ io_req_complete_fail_submit(req);
} else {
int ret = io_req_prep_async(req);
@@ -10498,26 +10541,53 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
void __user *arg)
{
- struct io_uring_task *tctx = current->io_uring;
+ struct io_uring_task *tctx = NULL;
+ struct io_sq_data *sqd = NULL;
__u32 new_count[2];
int i, ret;
- if (!tctx || !tctx->io_wq)
- return -EINVAL;
if (copy_from_user(new_count, arg, sizeof(new_count)))
return -EFAULT;
for (i = 0; i < ARRAY_SIZE(new_count); i++)
if (new_count[i] > INT_MAX)
return -EINVAL;
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
+ sqd = ctx->sq_data;
+ if (sqd) {
+ /*
+ * Observe the correct sqd->lock -> ctx->uring_lock
+ * ordering. Fine to drop uring_lock here, we hold
+ * a ref to the ctx.
+ */
+ mutex_unlock(&ctx->uring_lock);
+ mutex_lock(&sqd->lock);
+ mutex_lock(&ctx->uring_lock);
+ tctx = sqd->thread->io_uring;
+ }
+ } else {
+ tctx = current->io_uring;
+ }
+
+ ret = -EINVAL;
+ if (!tctx || !tctx->io_wq)
+ goto err;
+
ret = io_wq_max_workers(tctx->io_wq, new_count);
if (ret)
- return ret;
+ goto err;
+
+ if (sqd)
+ mutex_unlock(&sqd->lock);
if (copy_to_user(arg, new_count, sizeof(new_count)))
return -EFAULT;
return 0;
+err:
+ if (sqd)
+ mutex_unlock(&sqd->lock);
+ return ret;
}
static bool io_register_op_must_quiesce(int op)
@@ -10795,7 +10865,7 @@ static int __init io_uring_init(void)
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
- BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
+ BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT);