blk-mq-sched: improve dispatching from sw queue

SCSI devices use host-wide tagset, and the shared driver tag space is often quite big. However, there is also a queue depth for each lun( .cmd_per_lun), which is often small, for example, on both lpfc and qla2xxx, .cmd_per_lun is just 3. So lots of requests may stay in sw queue, and we always flush all belonging to same hw queue and dispatch them all to driver. Unfortunately it is easy to cause queue busy because of the small .cmd_per_lun. Once these requests are flushed out, they have to stay in hctx->dispatch, and no bio merge can happen on these requests, and sequential IO performance is harmed. This patch introduces blk_mq_dequeue_from_ctx for dequeuing a request from a sw queue, so that we can dispatch them in scheduler's way. We can then avoid dequeueing too many requests from sw queue, since we don't flush ->dispatch completely. This patch improves dispatching from sw queue by using the .get_budget and .put_budget callbacks. Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Ming Lei <ming.lei@redhat.com> 2017-10-14 11:22:30 +0200
committer: Jens Axboe <axboe@kernel.dk> 2017-11-01 15:20:02 +0100
commit: b347689ffbca745ac457ee27400ce1affd571c6f (patch)
tree: 44edb581ccc8c51915efa957300f9814b7044dc2 /block/blk-mq.c
parent: blk-mq: introduce .get_budget and .put_budget in blk_mq_ops (diff)
download: linux-b347689ffbca745ac457ee27400ce1affd571c6f.tar.xz
linux-b347689ffbca745ac457ee27400ce1affd571c6f.zip
1 files changed, 39 insertions, 0 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index dcb467369999..097ca3ece716 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -914,6 +914,45 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 }
 EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
 
+struct dispatch_rq_data {
+	struct blk_mq_hw_ctx *hctx;
+	struct request *rq;
+};
+
+static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
+		void *data)
+{
+	struct dispatch_rq_data *dispatch_data = data;
+	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
+	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+
+	spin_lock(&ctx->lock);
+	if (unlikely(!list_empty(&ctx->rq_list))) {
+		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+		list_del_init(&dispatch_data->rq->queuelist);
+		if (list_empty(&ctx->rq_list))
+			sbitmap_clear_bit(sb, bitnr);
+	}
+	spin_unlock(&ctx->lock);
+
+	return !dispatch_data->rq;
+}
+
+struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
+					struct blk_mq_ctx *start)
+{
+	unsigned off = start ? start->index_hw : 0;
+	struct dispatch_rq_data data = {
+		.hctx = hctx,
+		.rq   = NULL,
+	};
+
+	__sbitmap_for_each_set(&hctx->ctx_map, off,
+			       dispatch_rq_from_ctx, &data);
+
+	return data.rq;
+}
+
 static inline unsigned int queued_to_index(unsigned int queued)
 {
 	if (!queued)
author	Ming Lei <ming.lei@redhat.com>	2017-10-14 11:22:30 +0200
committer	Jens Axboe <axboe@kernel.dk>	2017-11-01 15:20:02 +0100
commit	b347689ffbca745ac457ee27400ce1affd571c6f (patch)
tree	44edb581ccc8c51915efa957300f9814b7044dc2 /block/blk-mq.c
parent	blk-mq: introduce .get_budget and .put_budget in blk_mq_ops (diff)
download	linux-b347689ffbca745ac457ee27400ce1affd571c6f.tar.xz linux-b347689ffbca745ac457ee27400ce1affd571c6f.zip