summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2018-05-24 19:00:39 +0200
committerJens Axboe <axboe@kernel.dk>2018-05-24 19:00:39 +0200
commite6fc46498784e799d3eb95d83079180e413c4e7d (patch)
tree63876dd6517d7d170d90d71441392ebdcdff3ea6 /block
parentbdi: Move cgroup bdi_writeback to a dedicated low concurrency workqueue (diff)
downloadlinux-e6fc46498784e799d3eb95d83079180e413c4e7d.tar.xz
linux-e6fc46498784e799d3eb95d83079180e413c4e7d.zip
blk-mq: avoid starving tag allocation after allocating process migrates
When the allocation process is scheduled back and the mapped hw queue is changed, fake one extra wake up on previous queue for compensating wake up miss, so other allocations on the previous queue won't be starved. This patch fixes one request allocation hang issue, which can be triggered easily in case of very low nr_request. The race is as follows: 1) 2 hw queues, nr_requests are 2, and wake_batch is one 2) there are 3 waiters on hw queue 0 3) two in-flight requests in hw queue 0 are completed, and only two waiters of 3 are waken up because of wake_batch, but both the two waiters can be scheduled to another CPU and cause to switch to hw queue 1 4) then the 3rd waiter will wait for ever, since no in-flight request is in hw queue 0 any more. 5) this patch fixes it by the fake wakeup when waiter is scheduled to another hw queue Cc: <stable@vger.kernel.org> Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Modified commit message to make it clearer, and make it apply on top of the 4.18 branch. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq-tag.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 336dde07b230..a4e58fc28a06 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
ws = bt_wait_ptr(bt, data->hctx);
drop_ctx = data->ctx == NULL;
do {
+ struct sbitmap_queue *bt_prev;
+
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
@@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (data->ctx)
blk_mq_put_ctx(data->ctx);
+ bt_prev = bt;
io_schedule();
data->ctx = blk_mq_get_ctx(data->q);
@@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
bt = &tags->bitmap_tags;
finish_wait(&ws->wait, &wait);
+
+ /*
+ * If destination hw queue is changed, fake wake up on
+ * previous queue for compensating the wake up miss, so
+ * other allocations on previous queue won't be starved.
+ */
+ if (bt != bt_prev)
+ sbitmap_queue_wake_up(bt_prev);
+
ws = bt_wait_ptr(bt, data->hctx);
} while (1);