io_uring: allow defer completion for aux posted cqes

Multishot ops cannot use the compl_reqs list as the request must stay in the poll list, but that means they need to run each completion without benefiting from batching. Here introduce batching infrastructure for only small (ie 16 byte) CQEs. This restriction is ok because there are no use cases posting 32 byte CQEs. In the ring keep a batch of up to 16 posted results, and flush in the same way as compl_reqs. 16 was chosen through experimentation on a microbenchmark ([1]), as well as trying not to increase the size of the ring too much. This increases the size to 1472 bytes from 1216. [1]: https://github.com/DylanZA/liburing/commit/9ac66b36bcf4477bfafeff1c5f107896b7ae31cf Run with $ make -j && ./benchmark/reg.b -s 1 -t 2000 -r 10 Gives results: baseline 8309 k/s 8 18807 k/s 16 19338 k/s 32 20134 k/s Suggested-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Dylan Yudaken <dylany@meta.com> Link: https://lore.kernel.org/r/20221124093559.3780686-5-dylany@meta.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Dylan Yudaken <dylany@meta.com> 2022-11-24 10:35:54 +0100
committer: Jens Axboe <axboe@kernel.dk> 2022-11-25 14:10:04 +0100
commit: 931147ddfa6e9ffd814272f1c0370c4740acbe17 (patch)
tree: b4eb11cbb2cbe60eb47828100a64b62da31ea25d /io_uring
parent: io_uring: defer all io_req_complete_failed (diff)
download: linux-931147ddfa6e9ffd814272f1c0370c4740acbe17.tar.xz
linux-931147ddfa6e9ffd814272f1c0370c4740acbe17.zip
1 files changed, 24 insertions, 3 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4888fe834920..28635e3e578a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -167,7 +167,8 @@ EXPORT_SYMBOL(io_uring_get_socket);
 
 static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
 {
-	if (!wq_list_empty(&ctx->submit_state.compl_reqs))
+	if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
+	    ctx->submit_state.cqes_count)
 		__io_submit_flush_completions(ctx);
 }
 
@@ -802,6 +803,21 @@ bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
 	return false;
 }
 
+static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	struct io_submit_state *state = &ctx->submit_state;
+	unsigned int i;
+
+	lockdep_assert_held(&ctx->uring_lock);
+	for (i = 0; i < state->cqes_count; i++) {
+		struct io_uring_cqe *cqe = &state->cqes[i];
+
+		io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags, true);
+	}
+	state->cqes_count = 0;
+}
+
 bool io_post_aux_cqe(struct io_ring_ctx *ctx,
 		     u64 user_data, s32 res, u32 cflags,
 		     bool allow_overflow)
@@ -1323,6 +1339,9 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	io_cq_lock(ctx);
+	/* must come first to preserve CQE ordering in failure cases */
+	if (state->cqes_count)
+		__io_flush_post_cqes(ctx);
 	wq_list_for_each(node, prev, &state->compl_reqs) {
 		struct io_kiocb *req = container_of(node, struct io_kiocb,
 					    comp_list);
@@ -1332,8 +1351,10 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 	}
 	io_cq_unlock_post(ctx);
 
-	io_free_batch_list(ctx, state->compl_reqs.first);
-	INIT_WQ_LIST(&state->compl_reqs);
+	if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
+		io_free_batch_list(ctx, state->compl_reqs.first);
+		INIT_WQ_LIST(&state->compl_reqs);
+	}
 }
 
 /*
author	Dylan Yudaken <dylany@meta.com>	2022-11-24 10:35:54 +0100
committer	Jens Axboe <axboe@kernel.dk>	2022-11-25 14:10:04 +0100
commit	931147ddfa6e9ffd814272f1c0370c4740acbe17 (patch)
tree	b4eb11cbb2cbe60eb47828100a64b62da31ea25d /io_uring
parent	io_uring: defer all io_req_complete_failed (diff)
download	linux-931147ddfa6e9ffd814272f1c0370c4740acbe17.tar.xz linux-931147ddfa6e9ffd814272f1c0370c4740acbe17.zip