From d6a2b9535d1e52bea269c138614c4801469d10e1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Nov 2018 16:39:47 -0700 Subject: nvme: Free ctrl device name on init failure Free the kobject name that was allocated for the controller device on failure rather than its parent. Fixes: d22524a4782a9 ("nvme: switch controller refcounting to use struct device") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 559d567693b8..5afda6fe5ae9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3585,7 +3585,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, return 0; out_free_name: - kfree_const(dev->kobj.name); + kfree_const(ctrl->device->kobj.name); out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: -- cgit v1.2.3 From dfa74422d604abc2e16763db12646583219806e4 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 26 Nov 2018 12:01:30 -0500 Subject: nvme-fc: initialize nvme_req(rq)->ctrl after calling __nvme_fc_init_request() __nvme_fc_init_request() invokes memset() on the nvme_fcp_op_w_sgl structure, which NULLed-out the nvme_req(req)->ctrl field previously set by nvme_fc_init_request(). This apparently was not referenced until commit faf4a44fff ("nvme: support traffic based keep-alive") which now results in a crash in nvme_complete_rq(): [ 8386.897130] RIP: 0010:panic+0x220/0x26c [ 8386.901406] Code: 83 3d 6f ee 72 01 00 74 05 e8 e8 54 02 00 48 c7 c6 40 fd 5b b4 48 c7 c7 d8 8d c6 b3 31e [ 8386.922359] RSP: 0018:ffff99650019fc40 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 8386.930804] RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000006 [ 8386.938764] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff8e325f8168b0 [ 8386.946725] RBP: ffff99650019fcb0 R08: 0000000000000000 R09: 00000000000004f8 [ 8386.954687] R10: 0000000000000000 R11: ffff99650019f9b8 R12: ffffffffb3c55f3c [ 8386.962648] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 [ 8386.970613] oops_end+0xd1/0xe0 [ 8386.974116] no_context+0x1b2/0x3c0 [ 8386.978006] do_page_fault+0x32/0x140 [ 8386.982090] page_fault+0x1e/0x30 [ 8386.985786] RIP: 0010:nvme_complete_rq+0x65/0x1d0 [nvme_core] [ 8386.992195] Code: 41 bc 03 00 00 00 74 16 0f 86 c3 00 00 00 66 3d 83 00 41 bc 06 00 00 00 0f 85 e7 00 000 [ 8387.013147] RSP: 0018:ffff99650019fe18 EFLAGS: 00010246 [ 8387.018973] RAX: 0000000000000000 RBX: ffff8e322ae51280 RCX: 0000000000000001 [ 8387.026935] RDX: 0000000000000400 RSI: 0000000000000001 RDI: ffff8e322ae51280 [ 8387.034897] RBP: ffff8e322ae51280 R08: 0000000000000000 R09: ffffffffb2f0b890 [ 8387.042859] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 [ 8387.050821] R13: 0000000000000100 R14: 0000000000000004 R15: ffff8e2b0446d990 [ 8387.058782] ? swiotlb_unmap_page+0x40/0x40 [ 8387.063448] nvme_fc_complete_rq+0x2d/0x70 [nvme_fc] [ 8387.068986] blk_done_softirq+0xa1/0xd0 [ 8387.073264] __do_softirq+0xd6/0x2a9 [ 8387.077251] run_ksoftirqd+0x26/0x40 [ 8387.081238] smpboot_thread_fn+0x10e/0x160 [ 8387.085807] kthread+0xf8/0x130 [ 8387.089309] ? sort_range+0x20/0x20 [ 8387.093198] ? kthread_stop+0x110/0x110 [ 8387.097475] ret_from_fork+0x35/0x40 [ 8387.101462] ---[ end trace 7106b0adf5e422f8 ]--- Fixes: faf4a44fff ("nvme: support traffic based keep-alive") Signed-off-by: Ewan D. Milne Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 54032c466636..feb86b59170e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1752,12 +1752,12 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; int res; - nvme_req(rq)->ctrl = &ctrl->ctrl; res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); if (res) return res; op->op.fcp_req.first_sgl = &op->sgl[0]; op->op.fcp_req.private = &op->priv[0]; + nvme_req(rq)->ctrl = &ctrl->ctrl; return res; } -- cgit v1.2.3 From 751a0cc0cd3a0d51e6aaf6fd3b8bd31f4ecfaf3e Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Fri, 23 Nov 2018 16:58:10 +0100 Subject: nvme-pci: fix surprise removal When a PCIe NVMe device is not present, nvme_dev_remove_admin() calls blk_cleanup_queue() on the admin queue, which frees the hctx for that queue. Moments later, on the same path nvme_kill_queues() calls blk_mq_unquiesce_queue() on admin queue and tries to access hctx of it, which leads to following OOPS: Oops: 0000 [#1] SMP PTI RIP: 0010:sbitmap_any_bit_set+0xb/0x40 Call Trace: blk_mq_run_hw_queue+0xd5/0x150 blk_mq_run_hw_queues+0x3a/0x50 nvme_kill_queues+0x26/0x50 nvme_remove_namespaces+0xb2/0xc0 nvme_remove+0x60/0x140 pci_device_remove+0x3b/0xb0 Fixes: cb4bfda62afa2 ("nvme-pci: fix hot removal during error handling") Signed-off-by: Igor Konopko Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5afda6fe5ae9..bb39b91253c2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3607,7 +3607,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); /* Forcibly unquiesce queues to avoid blocking dispatch */ - if (ctrl->admin_q) + if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q)) blk_mq_unquiesce_queue(ctrl->admin_q); list_for_each_entry(ns, &ctrl->namespaces, list) -- cgit v1.2.3 From 14a1336e6fff47dd1028b484d6c802105c58e2ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Nov 2018 16:57:54 +0100 Subject: nvme: warn when finding multi-port subsystems without multipathing enabled Without CONFIG_NVME_MULTIPATH enabled a multi-port subsystem might show up as invididual devices and cause problems, warn about it. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/nvme.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cee79cb388af..081cbdcce880 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -531,6 +531,9 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { + if (ctrl->subsys->cmic & (1 << 3)) + dev_warn(ctrl->device, +"Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) -- cgit v1.2.3 From f6c8e432cb0479255322c5d0335b9f1699a0270c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 21 Nov 2018 15:17:37 -0800 Subject: nvme: flush namespace scanning work just before removing namespaces nvme_stop_ctrl can be called also for reset flow and there is no need to flush the scan_work as namespaces are not being removed. This can cause deadlock in rdma, fc and loop drivers since nvme_stop_ctrl barriers before controller teardown (and specifically I/O cancellation of the scan_work itself) takes place, but the scan_work will be blocked anyways so there is no need to flush it. Instead, move scan_work flush to nvme_remove_namespaces() where it really needs to flush. Reported-by: Ming Lei Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed by: James Smart Tested-by: Ewan D. Milne Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb39b91253c2..3cf1b773158e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3314,6 +3314,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) struct nvme_ns *ns, *next; LIST_HEAD(ns_list); + /* prevent racing with ns scanning */ + flush_work(&ctrl->scan_work); + /* * The dead states indicates the controller was not gracefully * disconnected. In that case, we won't be able to flush any data while @@ -3476,7 +3479,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_mpath_stop(ctrl); nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); - flush_work(&ctrl->scan_work); cancel_work_sync(&ctrl->fw_act_work); if (ctrl->ops->stop_ctrl) ctrl->ops->stop_ctrl(ctrl); -- cgit v1.2.3 From 6344d02dc8f886b6bbcd922ae1a17e4a41500f2d Mon Sep 17 00:00:00 2001 From: Prabhath Sajeepa Date: Wed, 28 Nov 2018 11:11:29 -0700 Subject: nvme-rdma: fix double freeing of async event data Some error paths in configuration of admin queue free data buffer associated with async request SQE without resetting the data buffer pointer to NULL, This buffer is also freed up again if the controller is shutdown or reset. Signed-off-by: Prabhath Sajeepa Reviewed-by: Roland Dreier Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d181cafedc58..ab6ec7295bf9 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -184,6 +184,7 @@ static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe, qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir); if (ib_dma_mapping_error(ibdev, qe->dma)) { kfree(qe->data); + qe->data = NULL; return -ENOMEM; } @@ -823,6 +824,7 @@ out_free_tagset: out_free_async_qe: nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); + ctrl->async_event_sqe.data = NULL; out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; -- cgit v1.2.3 From 86880d646122240596d6719b642fee3213239994 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 27 Nov 2018 17:04:44 -0800 Subject: nvme: validate controller state before rescheduling keep alive Delete operations are seeing NULL pointer references in call_timer_fn. Tracking these back, the timer appears to be the keep alive timer. nvme_keep_alive_work() which is tied to the timer that is cancelled by nvme_stop_keep_alive(), simply starts the keep alive io but doesn't wait for it's completion. So nvme_stop_keep_alive() only stops a timer when it's pending. When a keep alive is in flight, there is no timer running and the nvme_stop_keep_alive() will have no affect on the keep alive io. Thus, if the io completes successfully, the keep alive timer will be rescheduled. In the failure case, delete is called, the controller state is changed, the nvme_stop_keep_alive() is called while the io is outstanding, and the delete path continues on. The keep alive happens to successfully complete before the delete paths mark it as aborted as part of the queue termination, so the timer is restarted. The delete paths then tear down the controller, and later on the timer code fires and the timer entry is now corrupt. Fix by validating the controller state before rescheduling the keep alive. Testing with the fix has confirmed the condition above was hit. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3cf1b773158e..962012135b62 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -831,6 +831,8 @@ static int nvme_submit_user_cmd(struct request_queue *q, static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; + unsigned long flags; + bool startka = false; blk_mq_free_request(rq); @@ -841,7 +843,13 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) return; } - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->state == NVME_CTRL_LIVE || + ctrl->state == NVME_CTRL_CONNECTING) + startka = true; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (startka) + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } static int nvme_keep_alive(struct nvme_ctrl *ctrl) -- cgit v1.2.3 From d7dcdf9d4e15189ecfda24cc87339a3425448d5c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 5 Dec 2018 16:54:57 +0000 Subject: nvmet-rdma: fix response use after free nvmet_rdma_release_rsp() may free the response before using it at error flow. Fixes: 8407879 ("nvmet-rdma: fix possible bogus dereference under heavy load") Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 3f7971d3706d..583086dd9cb9 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -529,6 +529,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; nvmet_rdma_release_rsp(rsp); @@ -536,7 +537,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) wc->status != IB_WC_WR_FLUSH_ERR)) { pr_err("SEND for CQE 0x%p failed with status %s (%d).\n", wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); - nvmet_rdma_error_comp(rsp->queue); + nvmet_rdma_error_comp(queue); } } -- cgit v1.2.3