diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 94 |
1 files changed, 82 insertions, 12 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 6f35b1285865..16fb7436043c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_WRITE] = { .needs_file = 1, + .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, @@ -1481,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) struct io_timeout_data *io = req->async_data; if (hrtimer_try_to_cancel(&io->timer) != -1) { + if (status) + req_set_fail(req); atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); @@ -1618,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (ctx->flags & IORING_SETUP_SQPOLL) { - if (wq_has_sleeper(&ctx->cq_wait)) + if (waitqueue_active(&ctx->cq_wait)) wake_up_all(&ctx->cq_wait); } if (io_should_trigger_evfd(ctx)) @@ -1851,6 +1857,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res) io_req_complete_post(req, res, 0); } +static void io_req_complete_fail_submit(struct io_kiocb *req) +{ + /* + * We don't submit, fail them all, for that replace hardlinks with + * normal links. Extra REQ_F_LINK is tolerated. + */ + req->flags &= ~REQ_F_HARDLINK; + req->flags |= REQ_F_LINK; + io_req_complete_failed(req, req->result); +} + /* * Don't initialise the fields below on every allocation, but do that in * advance and keep them valid across allocations. @@ -2119,6 +2136,9 @@ static void tctx_task_work(struct callback_head *cb) while (1) { struct io_wq_work_node *node; + if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr) + io_submit_flush_completions(ctx); + spin_lock_irq(&tctx->task_lock); node = tctx->task_list.first; INIT_WQ_LIST(&tctx->task_list); @@ -2673,7 +2693,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, { if (__io_complete_rw_common(req, res)) return; - __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req)); + __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req)); } static void io_complete_rw(struct kiocb *kiocb, long res, long res2) @@ -3410,6 +3430,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) return -EINVAL; } +static bool need_read_all(struct io_kiocb *req) +{ + return req->flags & REQ_F_ISREG || + S_ISBLK(file_inode(req->file)->i_mode); +} + static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; @@ -3459,12 +3485,13 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) if (req->flags & REQ_F_NOWAIT) goto done; /* some cases will consume bytes even on error returns */ + iov_iter_reexpand(iter, iter->count + iter->truncated); iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; } else if (ret == -EIOCBQUEUED) { goto out_free; } else if (ret <= 0 || ret == io_size || !force_nonblock || - (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) { + (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { /* read all, failed, already did sync or don't want to retry */ goto done; } @@ -3598,6 +3625,7 @@ done: } else { copy_iov: /* some cases will consume bytes even on error returns */ + iov_iter_reexpand(iter, iter->count + iter->truncated); iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); return ret ?: -EAGAIN; @@ -5249,7 +5277,7 @@ static void io_poll_remove_double(struct io_kiocb *req) } } -static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) +static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) __must_hold(&req->ctx->completion_lock) { struct io_ring_ctx *ctx = req->ctx; @@ -5271,10 +5299,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) if (flags & IORING_CQE_F_MORE) ctx->cq_extra++; - io_commit_cqring(ctx); return !(flags & IORING_CQE_F_MORE); } +static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask) + __must_hold(&req->ctx->completion_lock) +{ + bool done; + + done = __io_poll_complete(req, mask); + io_commit_cqring(req->ctx); + return done; +} + static void io_poll_task_func(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; @@ -5285,7 +5322,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) } else { bool done; - done = io_poll_complete(req, req->result); + done = __io_poll_complete(req, req->result); if (done) { io_poll_remove_double(req); hash_del(&req->hash_node); @@ -5293,6 +5330,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) req->result = 0; add_wait_queue(req->poll.head, &req->poll.wait); } + io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); io_cqring_ev_posted(ctx); @@ -6398,6 +6436,11 @@ static bool io_drain_req(struct io_kiocb *req) int ret; u32 seq; + if (req->flags & REQ_F_FAIL) { + io_req_complete_fail_submit(req); + return true; + } + /* * If we need to drain a request in the middle of a link, drain the * head request and the next request/link after the current link. @@ -6914,7 +6957,7 @@ static inline void io_queue_sqe(struct io_kiocb *req) if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) { __io_queue_sqe(req); } else if (req->flags & REQ_F_FAIL) { - io_req_complete_failed(req, req->result); + io_req_complete_fail_submit(req); } else { int ret = io_req_prep_async(req); @@ -10498,26 +10541,53 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, void __user *arg) { - struct io_uring_task *tctx = current->io_uring; + struct io_uring_task *tctx = NULL; + struct io_sq_data *sqd = NULL; __u32 new_count[2]; int i, ret; - if (!tctx || !tctx->io_wq) - return -EINVAL; if (copy_from_user(new_count, arg, sizeof(new_count))) return -EFAULT; for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i] > INT_MAX) return -EINVAL; + if (ctx->flags & IORING_SETUP_SQPOLL) { + sqd = ctx->sq_data; + if (sqd) { + /* + * Observe the correct sqd->lock -> ctx->uring_lock + * ordering. Fine to drop uring_lock here, we hold + * a ref to the ctx. + */ + mutex_unlock(&ctx->uring_lock); + mutex_lock(&sqd->lock); + mutex_lock(&ctx->uring_lock); + tctx = sqd->thread->io_uring; + } + } else { + tctx = current->io_uring; + } + + ret = -EINVAL; + if (!tctx || !tctx->io_wq) + goto err; + ret = io_wq_max_workers(tctx->io_wq, new_count); if (ret) - return ret; + goto err; + + if (sqd) + mutex_unlock(&sqd->lock); if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; return 0; +err: + if (sqd) + mutex_unlock(&sqd->lock); + return ret; } static bool io_register_op_must_quiesce(int op) @@ -10795,7 +10865,7 @@ static int __init io_uring_init(void) BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); - BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); + BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); |