From d6a2b9535d1e52bea269c138614c4801469d10e1 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 26 Nov 2018 16:39:47 -0700
Subject: nvme: Free ctrl device name on init failure

Free the kobject name that was allocated for the controller device on
failure rather than its parent.

Fixes: d22524a4782a9 ("nvme: switch controller refcounting to use struct device")
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 559d567693b8..5afda6fe5ae9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3585,7 +3585,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
 	return 0;
 out_free_name:
-	kfree_const(dev->kobj.name);
+	kfree_const(ctrl->device->kobj.name);
 out_release_instance:
 	ida_simple_remove(&nvme_instance_ida, ctrl->instance);
 out:
-- 
cgit v1.2.3


From dfa74422d604abc2e16763db12646583219806e4 Mon Sep 17 00:00:00 2001
From: "Ewan D. Milne" <emilne@redhat.com>
Date: Mon, 26 Nov 2018 12:01:30 -0500
Subject: nvme-fc: initialize nvme_req(rq)->ctrl after calling
 __nvme_fc_init_request()

__nvme_fc_init_request() invokes memset() on the nvme_fcp_op_w_sgl structure, which
NULLed-out the nvme_req(req)->ctrl field previously set by nvme_fc_init_request().
This apparently was not referenced until commit faf4a44fff ("nvme: support traffic
based keep-alive") which now results in a crash in nvme_complete_rq():

[ 8386.897130] RIP: 0010:panic+0x220/0x26c
[ 8386.901406] Code: 83 3d 6f ee 72 01 00 74 05 e8 e8 54 02 00 48 c7 c6 40 fd 5b b4 48 c7 c7 d8 8d c6 b3 31e
[ 8386.922359] RSP: 0018:ffff99650019fc40 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
[ 8386.930804] RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000006
[ 8386.938764] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff8e325f8168b0
[ 8386.946725] RBP: ffff99650019fcb0 R08: 0000000000000000 R09: 00000000000004f8
[ 8386.954687] R10: 0000000000000000 R11: ffff99650019f9b8 R12: ffffffffb3c55f3c
[ 8386.962648] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001
[ 8386.970613]  oops_end+0xd1/0xe0
[ 8386.974116]  no_context+0x1b2/0x3c0
[ 8386.978006]  do_page_fault+0x32/0x140
[ 8386.982090]  page_fault+0x1e/0x30
[ 8386.985786] RIP: 0010:nvme_complete_rq+0x65/0x1d0 [nvme_core]
[ 8386.992195] Code: 41 bc 03 00 00 00 74 16 0f 86 c3 00 00 00 66 3d 83 00 41 bc 06 00 00 00 0f 85 e7 00 000
[ 8387.013147] RSP: 0018:ffff99650019fe18 EFLAGS: 00010246
[ 8387.018973] RAX: 0000000000000000 RBX: ffff8e322ae51280 RCX: 0000000000000001
[ 8387.026935] RDX: 0000000000000400 RSI: 0000000000000001 RDI: ffff8e322ae51280
[ 8387.034897] RBP: ffff8e322ae51280 R08: 0000000000000000 R09: ffffffffb2f0b890
[ 8387.042859] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
[ 8387.050821] R13: 0000000000000100 R14: 0000000000000004 R15: ffff8e2b0446d990
[ 8387.058782]  ? swiotlb_unmap_page+0x40/0x40
[ 8387.063448]  nvme_fc_complete_rq+0x2d/0x70 [nvme_fc]
[ 8387.068986]  blk_done_softirq+0xa1/0xd0
[ 8387.073264]  __do_softirq+0xd6/0x2a9
[ 8387.077251]  run_ksoftirqd+0x26/0x40
[ 8387.081238]  smpboot_thread_fn+0x10e/0x160
[ 8387.085807]  kthread+0xf8/0x130
[ 8387.089309]  ? sort_range+0x20/0x20
[ 8387.093198]  ? kthread_stop+0x110/0x110
[ 8387.097475]  ret_from_fork+0x35/0x40
[ 8387.101462] ---[ end trace 7106b0adf5e422f8 ]---

Fixes: faf4a44fff ("nvme: support traffic based keep-alive")
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 54032c466636..feb86b59170e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1752,12 +1752,12 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
 	int res;
 
-	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
 	if (res)
 		return res;
 	op->op.fcp_req.first_sgl = &op->sgl[0];
 	op->op.fcp_req.private = &op->priv[0];
+	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	return res;
 }
 
-- 
cgit v1.2.3


From 751a0cc0cd3a0d51e6aaf6fd3b8bd31f4ecfaf3e Mon Sep 17 00:00:00 2001
From: Igor Konopko <igor.j.konopko@intel.com>
Date: Fri, 23 Nov 2018 16:58:10 +0100
Subject: nvme-pci: fix surprise removal

When a PCIe NVMe device is not present, nvme_dev_remove_admin() calls
blk_cleanup_queue() on the admin queue, which frees the hctx for that
queue.  Moments later, on the same path nvme_kill_queues() calls
blk_mq_unquiesce_queue() on admin queue and tries to access hctx of it,
which leads to following OOPS:

Oops: 0000 [#1] SMP PTI
RIP: 0010:sbitmap_any_bit_set+0xb/0x40
Call Trace:
 blk_mq_run_hw_queue+0xd5/0x150
 blk_mq_run_hw_queues+0x3a/0x50
 nvme_kill_queues+0x26/0x50
 nvme_remove_namespaces+0xb2/0xc0
 nvme_remove+0x60/0x140
 pci_device_remove+0x3b/0xb0

Fixes: cb4bfda62afa2 ("nvme-pci: fix hot removal during error handling")
Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5afda6fe5ae9..bb39b91253c2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3607,7 +3607,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	down_read(&ctrl->namespaces_rwsem);
 
 	/* Forcibly unquiesce queues to avoid blocking dispatch */
-	if (ctrl->admin_q)
+	if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
 		blk_mq_unquiesce_queue(ctrl->admin_q);
 
 	list_for_each_entry(ns, &ctrl->namespaces, list)
-- 
cgit v1.2.3


From 14a1336e6fff47dd1028b484d6c802105c58e2ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 20 Nov 2018 16:57:54 +0100
Subject: nvme: warn when finding multi-port subsystems without multipathing
 enabled

Without CONFIG_NVME_MULTIPATH enabled a multi-port subsystem might
show up as invididual devices and cause problems, warn about it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/nvme.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cee79cb388af..081cbdcce880 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -531,6 +531,9 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
 		struct nvme_id_ctrl *id)
 {
+	if (ctrl->subsys->cmic & (1 << 3))
+		dev_warn(ctrl->device,
+"Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
 	return 0;
 }
 static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
-- 
cgit v1.2.3


From f6c8e432cb0479255322c5d0335b9f1699a0270c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 21 Nov 2018 15:17:37 -0800
Subject: nvme: flush namespace scanning work just before removing namespaces

nvme_stop_ctrl can be called also for reset flow and there is no need to
flush the scan_work as namespaces are not being removed. This can cause
deadlock in rdma, fc and loop drivers since nvme_stop_ctrl barriers
before controller teardown (and specifically I/O cancellation of the
scan_work itself) takes place, but the scan_work will be blocked anyways
so there is no need to flush it.

Instead, move scan_work flush to nvme_remove_namespaces() where it really
needs to flush.

Reported-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed by: James Smart <jsmart2021@gmail.com>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bb39b91253c2..3cf1b773158e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3314,6 +3314,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns, *next;
 	LIST_HEAD(ns_list);
 
+	/* prevent racing with ns scanning */
+	flush_work(&ctrl->scan_work);
+
 	/*
 	 * The dead states indicates the controller was not gracefully
 	 * disconnected. In that case, we won't be able to flush any data while
@@ -3476,7 +3479,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 	nvme_mpath_stop(ctrl);
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
-	flush_work(&ctrl->scan_work);
 	cancel_work_sync(&ctrl->fw_act_work);
 	if (ctrl->ops->stop_ctrl)
 		ctrl->ops->stop_ctrl(ctrl);
-- 
cgit v1.2.3


From 6344d02dc8f886b6bbcd922ae1a17e4a41500f2d Mon Sep 17 00:00:00 2001
From: Prabhath Sajeepa <psajeepa@purestorage.com>
Date: Wed, 28 Nov 2018 11:11:29 -0700
Subject: nvme-rdma: fix double freeing of async event data

Some error paths in configuration of admin queue free data buffer
associated with async request SQE without resetting the data buffer
pointer to NULL, This buffer is also freed up again if the controller
is shutdown or reset.

Signed-off-by: Prabhath Sajeepa <psajeepa@purestorage.com>
Reviewed-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d181cafedc58..ab6ec7295bf9 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -184,6 +184,7 @@ static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
 	qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
 	if (ib_dma_mapping_error(ibdev, qe->dma)) {
 		kfree(qe->data);
+		qe->data = NULL;
 		return -ENOMEM;
 	}
 
@@ -823,6 +824,7 @@ out_free_tagset:
 out_free_async_qe:
 	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
 		sizeof(struct nvme_command), DMA_TO_DEVICE);
+	ctrl->async_event_sqe.data = NULL;
 out_free_queue:
 	nvme_rdma_free_queue(&ctrl->queues[0]);
 	return error;
-- 
cgit v1.2.3


From 86880d646122240596d6719b642fee3213239994 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 27 Nov 2018 17:04:44 -0800
Subject: nvme: validate controller state before rescheduling keep alive

Delete operations are seeing NULL pointer references in call_timer_fn.
Tracking these back, the timer appears to be the keep alive timer.

nvme_keep_alive_work() which is tied to the timer that is cancelled
by nvme_stop_keep_alive(), simply starts the keep alive io but doesn't
wait for it's completion. So nvme_stop_keep_alive() only stops a timer
when it's pending. When a keep alive is in flight, there is no timer
running and the nvme_stop_keep_alive() will have no affect on the keep
alive io. Thus, if the io completes successfully, the keep alive timer
will be rescheduled.   In the failure case, delete is called, the
controller state is changed, the nvme_stop_keep_alive() is called while
the io is outstanding, and the delete path continues on. The keep
alive happens to successfully complete before the delete paths mark it
as aborted as part of the queue termination, so the timer is restarted.
The delete paths then tear down the controller, and later on the timer
code fires and the timer entry is now corrupt.

Fix by validating the controller state before rescheduling the keep
alive. Testing with the fix has confirmed the condition above was hit.

Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3cf1b773158e..962012135b62 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -831,6 +831,8 @@ static int nvme_submit_user_cmd(struct request_queue *q,
 static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvme_ctrl *ctrl = rq->end_io_data;
+	unsigned long flags;
+	bool startka = false;
 
 	blk_mq_free_request(rq);
 
@@ -841,7 +843,13 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 		return;
 	}
 
-	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->state == NVME_CTRL_LIVE ||
+	    ctrl->state == NVME_CTRL_CONNECTING)
+		startka = true;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+	if (startka)
+		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static int nvme_keep_alive(struct nvme_ctrl *ctrl)
-- 
cgit v1.2.3


From d7dcdf9d4e15189ecfda24cc87339a3425448d5c Mon Sep 17 00:00:00 2001
From: Israel Rukshin <israelr@mellanox.com>
Date: Wed, 5 Dec 2018 16:54:57 +0000
Subject: nvmet-rdma: fix response use after free

nvmet_rdma_release_rsp() may free the response before using it at error
flow.

Fixes: 8407879 ("nvmet-rdma: fix possible bogus dereference under heavy load")
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3f7971d3706d..583086dd9cb9 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -529,6 +529,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
+	struct nvmet_rdma_queue *queue = cq->cq_context;
 
 	nvmet_rdma_release_rsp(rsp);
 
@@ -536,7 +537,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		     wc->status != IB_WC_WR_FLUSH_ERR)) {
 		pr_err("SEND for CQE 0x%p failed with status %s (%d).\n",
 			wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
-		nvmet_rdma_error_comp(rsp->queue);
+		nvmet_rdma_error_comp(queue);
 	}
 }
 
-- 
cgit v1.2.3