diff options
author | Keith Busch <kbusch@kernel.org> | 2021-09-09 17:54:52 +0200 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2021-09-14 10:32:05 +0200 |
commit | 70f437fb4395ad4d1d16fab9a1ad9fbc9fc0579b (patch) | |
tree | 08033cf92886114b9b9cbffc632c2cc0a2d6fddd /drivers | |
parent | nvme-rdma: destroy cm id before destroy qp to avoid use after free (diff) | |
download | linux-70f437fb4395ad4d1d16fab9a1ad9fbc9fc0579b.tar.xz linux-70f437fb4395ad4d1d16fab9a1ad9fbc9fc0579b.zip |
nvme-tcp: fix io_work priority inversion
Dispatching requests inline with the .queue_rq() call may block while
holding the send_mutex. If the tcp io_work also happens to schedule, it
may see the req_list is non-empty, leaving "pending" true and remaining
in TASK_RUNNING. Since io_work is of higher scheduling priority, the
.queue_rq task may not get a chance to run, blocking forward progress
and leading to io timeouts.
Instead of checking for pending requests within io_work, let the queueing
restart io_work outside the send_mutex lock if there is more work to be
done.
Fixes: a0fdd1418007f ("nvme-tcp: rerun io_work if req_list is not empty")
Reported-by: Samuel Jones <sjones@kalrayinc.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/nvme/host/tcp.c | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e2ab12f3f51c..e4249b7dc056 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !list_empty(&queue->send_list) || + !llist_empty(&queue->req_list) || queue->more_requests; +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); - } else if (last) { - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } + + if (last && nvme_tcp_queue_more(queue)) + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) @@ -906,12 +913,6 @@ done: read_unlock_bh(&sk->sk_callback_lock); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) -{ - return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; -} - static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) { queue->request = NULL; @@ -1145,8 +1146,7 @@ static void nvme_tcp_io_work(struct work_struct *w) pending = true; else if (unlikely(result < 0)) break; - } else - pending = !llist_empty(&queue->req_list); + } result = nvme_tcp_try_recv(queue); if (result > 0) |