From 47c122e35d7e43b14129ceb9ed3a7e67599978fa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 6 Oct 2021 06:34:11 -0600 Subject: block: pre-allocate requests if plug is started and is a batch The caller typically has a good (or even exact) idea of how many requests it needs to submit. We can make the request/tag allocation a lot more efficient if we just allocate N requests/tags upfront when we queue the first bio from the batch. Provide a new plug start helper that allows the caller to specify how many IOs are expected. This sets plug->nr_ios, and we can use that for smarter request allocation. The plug provides a holding spot for requests, and request allocation will check it before calling into the normal request allocation path. The blk_finish_plug() is called, check if there are unused requests and free them. This should not happen in normal operations. The exception is if we get merging, then we may be left with requests that need freeing when done. This raises the per-core performance on my setup from ~5.8M to ~6.1M IOPS. Signed-off-by: Jens Axboe --- block/blk-mq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block/blk-mq.h') diff --git a/block/blk-mq.h b/block/blk-mq.h index 171e8cdcff54..5da970bb8865 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -125,6 +125,7 @@ extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); +void blk_mq_free_plug_rqs(struct blk_plug *plug); void blk_mq_release(struct request_queue *q); @@ -152,6 +153,10 @@ struct blk_mq_alloc_data { unsigned int shallow_depth; unsigned int cmd_flags; + /* allocate multiple requests/tags in one go */ + unsigned int nr_tags; + struct request **cached_rq; + /* input & output parameter */ struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; -- cgit v1.2.3