Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block

Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-11-15 00:32:19 +0100
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-11-15 00:32:19 +0100
commit: e2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
tree: b97a90170c45211bcc437761653aa8016c34afcd /drivers/nvme/host/fc.c
parent: Merge tag 'configfs-for-4.15' of git://git.infradead.org/users/hch/configfs (diff)
parent: nvme: fix visibility of "uuid" ns attribute (diff)
download: linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.xz
linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip
1 files changed, 592 insertions, 201 deletions
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index be49d0f79381..7ab0be55c7d0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -30,27 +30,19 @@
 /* *************************** Data Structures/Defines ****************** */
 
 
-/*
- * We handle AEN commands ourselves and don't even let the
- * block layer know about them.
- */
-#define NVME_FC_NR_AEN_COMMANDS	1
-#define NVME_FC_AQ_BLKMQ_DEPTH	\
-	(NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
-#define AEN_CMDID_BASE		(NVME_FC_AQ_BLKMQ_DEPTH + 1)
-
 enum nvme_fc_queue_flags {
 	NVME_FC_Q_CONNECTED = (1 << 0),
 };
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
+#define NVME_FC_DEFAULT_DEV_LOSS_TMO	60	/* seconds */
+
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
 	struct blk_mq_hw_ctx	*hctx;
 	void			*lldd_handle;
-	int			queue_size;
 	size_t			cmnd_capsule_len;
 	u32			qnum;
 	u32			rqcnt;
@@ -124,6 +116,7 @@ struct nvme_fc_lport {
 	struct device			*dev;	/* physical device for dma */
 	struct nvme_fc_port_template	*ops;
 	struct kref			ref;
+	atomic_t                        act_rport_cnt;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
 struct nvme_fc_rport {
@@ -136,6 +129,8 @@ struct nvme_fc_rport {
 	struct nvme_fc_lport		*lport;
 	spinlock_t			lock;
 	struct kref			ref;
+	atomic_t                        act_ctrl_cnt;
+	unsigned long			dev_loss_end;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
 enum nvme_fcctrl_flags {
@@ -150,6 +145,7 @@ struct nvme_fc_ctrl {
 	struct nvme_fc_rport	*rport;
 	u32			cnum;
 
+	bool			assoc_active;
 	u64			association_id;
 
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
@@ -157,7 +153,6 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct blk_mq_tag_set	tag_set;
 
-	struct work_struct	delete_work;
 	struct delayed_work	connect_work;
 
 	struct kref		ref;
@@ -165,7 +160,7 @@ struct nvme_fc_ctrl {
 	u32			iocnt;
 	wait_queue_head_t	ioabort_wait;
 
-	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
+	struct nvme_fc_fcp_op	aen_ops[NVME_NR_AEN_COMMANDS];
 
 	struct nvme_ctrl	ctrl;
 };
@@ -213,10 +208,16 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
 
 
 
+/*
+ * These items are short-term. They will eventually be moved into
+ * a generic FC class. See comments in module init.
+ */
+static struct class *fc_class;
+static struct device *fc_udev_device;
+
 
 /* *********************** FC-NVME Port Management ************************ */
 
-static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
 			struct nvme_fc_queue *, unsigned int);
 
@@ -235,9 +236,6 @@ nvme_fc_free_lport(struct kref *ref)
 	list_del(&lport->port_list);
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
-	/* let the LLDD know we've finished tearing it down */
-	lport->ops->localport_delete(&lport->localport);
-
 	ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
 	ida_destroy(&lport->endp_cnt);
 
@@ -260,7 +258,9 @@ nvme_fc_lport_get(struct nvme_fc_lport *lport)
 
 
 static struct nvme_fc_lport *
-nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
+nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_port_template *ops,
+			struct device *dev)
 {
 	struct nvme_fc_lport *lport;
 	unsigned long flags;
@@ -272,6 +272,11 @@ nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
 		    lport->localport.port_name != pinfo->port_name)
 			continue;
 
+		if (lport->dev != dev) {
+			lport = ERR_PTR(-EXDEV);
+			goto out_done;
+		}
+
 		if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
 			lport = ERR_PTR(-EEXIST);
 			goto out_done;
@@ -288,6 +293,7 @@ nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
 
 		/* resume the lport */
 
+		lport->ops = ops;
 		lport->localport.port_role = pinfo->port_role;
 		lport->localport.port_id = pinfo->port_id;
 		lport->localport.port_state = FC_OBJSTATE_ONLINE;
@@ -348,7 +354,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
 	 * expired, we can simply re-enable the localport. Remoteports
 	 * and controller reconnections should resume naturally.
 	 */
-	newrec = nvme_fc_attach_to_unreg_lport(pinfo);
+	newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev);
 
 	/* found an lport, but something about its state is bad */
 	if (IS_ERR(newrec)) {
@@ -384,6 +390,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
 	INIT_LIST_HEAD(&newrec->port_list);
 	INIT_LIST_HEAD(&newrec->endp_list);
 	kref_init(&newrec->ref);
+	atomic_set(&newrec->act_rport_cnt, 0);
 	newrec->ops = template;
 	newrec->dev = dev;
 	ida_init(&newrec->endp_cnt);
@@ -446,12 +453,177 @@ nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
 
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
+	if (atomic_read(&lport->act_rport_cnt) == 0)
+		lport->ops->localport_delete(&lport->localport);
+
 	nvme_fc_lport_put(lport);
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
 
+/*
+ * TRADDR strings, per FC-NVME are fixed format:
+ *   "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
+ * udev event will only differ by prefix of what field is
+ * being specified:
+ *    "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
+ *  19 + 43 + null_fudge = 64 characters
+ */
+#define FCNVME_TRADDR_LENGTH		64
+
+static void
+nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
+		struct nvme_fc_rport *rport)
+{
+	char hostaddr[FCNVME_TRADDR_LENGTH];	/* NVMEFC_HOST_TRADDR=...*/
+	char tgtaddr[FCNVME_TRADDR_LENGTH];	/* NVMEFC_TRADDR=...*/
+	char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
+
+	if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
+		return;
+
+	snprintf(hostaddr, sizeof(hostaddr),
+		"NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
+		lport->localport.node_name, lport->localport.port_name);
+	snprintf(tgtaddr, sizeof(tgtaddr),
+		"NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
+		rport->remoteport.node_name, rport->remoteport.port_name);
+	kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp);
+}
+
+static void
+nvme_fc_free_rport(struct kref *ref)
+{
+	struct nvme_fc_rport *rport =
+		container_of(ref, struct nvme_fc_rport, ref);
+	struct nvme_fc_lport *lport =
+			localport_to_lport(rport->remoteport.localport);
+	unsigned long flags;
+
+	WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
+	WARN_ON(!list_empty(&rport->ctrl_list));
+
+	/* remove from lport list */
+	spin_lock_irqsave(&nvme_fc_lock, flags);
+	list_del(&rport->endp_list);
+	spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+	ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
+
+	kfree(rport);
+
+	nvme_fc_lport_put(lport);
+}
+
+static void
+nvme_fc_rport_put(struct nvme_fc_rport *rport)
+{
+	kref_put(&rport->ref, nvme_fc_free_rport);
+}
+
+static int
+nvme_fc_rport_get(struct nvme_fc_rport *rport)
+{
+	return kref_get_unless_zero(&rport->ref);
+}
+
+static void
+nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
+{
+	switch (ctrl->ctrl.state) {
+	case NVME_CTRL_NEW:
+	case NVME_CTRL_RECONNECTING:
+		/*
+		 * As all reconnects were suppressed, schedule a
+		 * connect.
+		 */
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: connectivity re-established. "
+			"Attempting reconnect\n", ctrl->cnum);
+
+		queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
+		break;
+
+	case NVME_CTRL_RESETTING:
+		/*
+		 * Controller is already in the process of terminating the
+		 * association. No need to do anything further. The reconnect
+		 * step will naturally occur after the reset completes.
+		 */
+		break;
+
+	default:
+		/* no action to take - let it delete */
+		break;
+	}
+}
+
+static struct nvme_fc_rport *
+nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
+				struct nvme_fc_port_info *pinfo)
+{
+	struct nvme_fc_rport *rport;
+	struct nvme_fc_ctrl *ctrl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&nvme_fc_lock, flags);
+
+	list_for_each_entry(rport, &lport->endp_list, endp_list) {
+		if (rport->remoteport.node_name != pinfo->node_name ||
+		    rport->remoteport.port_name != pinfo->port_name)
+			continue;
+
+		if (!nvme_fc_rport_get(rport)) {
+			rport = ERR_PTR(-ENOLCK);
+			goto out_done;
+		}
+
+		spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+		spin_lock_irqsave(&rport->lock, flags);
+
+		/* has it been unregistered */
+		if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
+			/* means lldd called us twice */
+			spin_unlock_irqrestore(&rport->lock, flags);
+			nvme_fc_rport_put(rport);
+			return ERR_PTR(-ESTALE);
+		}
+
+		rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
+		rport->dev_loss_end = 0;
+
+		/*
+		 * kick off a reconnect attempt on all associations to the
+		 * remote port. A successful reconnects will resume i/o.
+		 */
+		list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+			nvme_fc_resume_controller(ctrl);
+
+		spin_unlock_irqrestore(&rport->lock, flags);
+
+		return rport;
+	}
+
+	rport = NULL;
+
+out_done:
+	spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+	return rport;
+}
+
+static inline void
+__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
+			struct nvme_fc_port_info *pinfo)
+{
+	if (pinfo->dev_loss_tmo)
+		rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
+	else
+		rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
+}
+
 /**
  * nvme_fc_register_remoteport - transport entry point called by an
  *                              LLDD to register the existence of a NVME
@@ -478,28 +650,52 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 	unsigned long flags;
 	int ret, idx;
 
+	if (!nvme_fc_lport_get(lport)) {
+		ret = -ESHUTDOWN;
+		goto out_reghost_failed;
+	}
+
+	/*
+	 * look to see if there is already a remoteport that is waiting
+	 * for a reconnect (within dev_loss_tmo) with the same WWN's.
+	 * If so, transition to it and reconnect.
+	 */
+	newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
+
+	/* found an rport, but something about its state is bad */
+	if (IS_ERR(newrec)) {
+		ret = PTR_ERR(newrec);
+		goto out_lport_put;
+
+	/* found existing rport, which was resumed */
+	} else if (newrec) {
+		nvme_fc_lport_put(lport);
+		__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
+		nvme_fc_signal_discovery_scan(lport, newrec);
+		*portptr = &newrec->remoteport;
+		return 0;
+	}
+
+	/* nothing found - allocate a new remoteport struct */
+
 	newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
 			 GFP_KERNEL);
 	if (!newrec) {
 		ret = -ENOMEM;
-		goto out_reghost_failed;
-	}
-
-	if (!nvme_fc_lport_get(lport)) {
-		ret = -ESHUTDOWN;
-		goto out_kfree_rport;
+		goto out_lport_put;
 	}
 
 	idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
 	if (idx < 0) {
 		ret = -ENOSPC;
-		goto out_lport_put;
+		goto out_kfree_rport;
 	}
 
 	INIT_LIST_HEAD(&newrec->endp_list);
 	INIT_LIST_HEAD(&newrec->ctrl_list);
 	INIT_LIST_HEAD(&newrec->ls_req_list);
 	kref_init(&newrec->ref);
+	atomic_set(&newrec->act_ctrl_cnt, 0);
 	spin_lock_init(&newrec->lock);
 	newrec->remoteport.localport = &lport->localport;
 	newrec->dev = lport->dev;
@@ -511,63 +707,27 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 	newrec->remoteport.port_id = pinfo->port_id;
 	newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
 	newrec->remoteport.port_num = idx;
+	__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
 
 	spin_lock_irqsave(&nvme_fc_lock, flags);
 	list_add_tail(&newrec->endp_list, &lport->endp_list);
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
+	nvme_fc_signal_discovery_scan(lport, newrec);
+
 	*portptr = &newrec->remoteport;
 	return 0;
 
-out_lport_put:
-	nvme_fc_lport_put(lport);
 out_kfree_rport:
 	kfree(newrec);
+out_lport_put:
+	nvme_fc_lport_put(lport);
 out_reghost_failed:
 	*portptr = NULL;
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
 
-static void
-nvme_fc_free_rport(struct kref *ref)
-{
-	struct nvme_fc_rport *rport =
-		container_of(ref, struct nvme_fc_rport, ref);
-	struct nvme_fc_lport *lport =
-			localport_to_lport(rport->remoteport.localport);
-	unsigned long flags;
-
-	WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
-	WARN_ON(!list_empty(&rport->ctrl_list));
-
-	/* remove from lport list */
-	spin_lock_irqsave(&nvme_fc_lock, flags);
-	list_del(&rport->endp_list);
-	spin_unlock_irqrestore(&nvme_fc_lock, flags);
-
-	/* let the LLDD know we've finished tearing it down */
-	lport->ops->remoteport_delete(&rport->remoteport);
-
-	ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
-
-	kfree(rport);
-
-	nvme_fc_lport_put(lport);
-}
-
-static void
-nvme_fc_rport_put(struct nvme_fc_rport *rport)
-{
-	kref_put(&rport->ref, nvme_fc_free_rport);
-}
-
-static int
-nvme_fc_rport_get(struct nvme_fc_rport *rport)
-{
-	return kref_get_unless_zero(&rport->ref);
-}
-
 static int
 nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
 {
@@ -592,6 +752,58 @@ restart:
 	return 0;
 }
 
+static void
+nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
+{
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: controller connectivity lost. Awaiting "
+		"Reconnect", ctrl->cnum);
+
+	switch (ctrl->ctrl.state) {
+	case NVME_CTRL_NEW:
+	case NVME_CTRL_LIVE:
+		/*
+		 * Schedule a controller reset. The reset will terminate the
+		 * association and schedule the reconnect timer.  Reconnects
+		 * will be attempted until either the ctlr_loss_tmo
+		 * (max_retries * connect_delay) expires or the remoteport's
+		 * dev_loss_tmo expires.
+		 */
+		if (nvme_reset_ctrl(&ctrl->ctrl)) {
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Couldn't schedule reset. "
+				"Deleting controller.\n",
+				ctrl->cnum);
+			nvme_delete_ctrl(&ctrl->ctrl);
+		}
+		break;
+
+	case NVME_CTRL_RECONNECTING:
+		/*
+		 * The association has already been terminated and the
+		 * controller is attempting reconnects.  No need to do anything
+		 * futher.  Reconnects will be attempted until either the
+		 * ctlr_loss_tmo (max_retries * connect_delay) expires or the
+		 * remoteport's dev_loss_tmo expires.
+		 */
+		break;
+
+	case NVME_CTRL_RESETTING:
+		/*
+		 * Controller is already in the process of terminating the
+		 * association.  No need to do anything further. The reconnect
+		 * step will kick in naturally after the association is
+		 * terminated.
+		 */
+		break;
+
+	case NVME_CTRL_DELETING:
+	default:
+		/* no action to take - let it delete */
+		break;
+	}
+}
+
 /**
  * nvme_fc_unregister_remoteport - transport entry point called by an
  *                              LLDD to deregister/remove a previously
@@ -621,19 +833,78 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
 	}
 	portptr->port_state = FC_OBJSTATE_DELETED;
 
-	/* tear down all associations to the remote port */
-	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
-		__nvme_fc_del_ctrl(ctrl);
+	rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
+
+	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+		/* if dev_loss_tmo==0, dev loss is immediate */
+		if (!portptr->dev_loss_tmo) {
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: controller connectivity lost. "
+				"Deleting controller.\n",
+				ctrl->cnum);
+			nvme_delete_ctrl(&ctrl->ctrl);
+		} else
+			nvme_fc_ctrl_connectivity_loss(ctrl);
+	}
 
 	spin_unlock_irqrestore(&rport->lock, flags);
 
 	nvme_fc_abort_lsops(rport);
 
+	if (atomic_read(&rport->act_ctrl_cnt) == 0)
+		rport->lport->ops->remoteport_delete(portptr);
+
+	/*
+	 * release the reference, which will allow, if all controllers
+	 * go away, which should only occur after dev_loss_tmo occurs,
+	 * for the rport to be torn down.
+	 */
 	nvme_fc_rport_put(rport);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
 
+/**
+ * nvme_fc_rescan_remoteport - transport entry point called by an
+ *                              LLDD to request a nvme device rescan.
+ * @remoteport: pointer to the (registered) remote port that is to be
+ *              rescanned.
+ *
+ * Returns: N/A
+ */
+void
+nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
+{
+	struct nvme_fc_rport *rport = remoteport_to_rport(remoteport);
+
+	nvme_fc_signal_discovery_scan(rport->lport, rport);
+}
+EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
+
+int
+nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
+			u32 dev_loss_tmo)
+{
+	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rport->lock, flags);
+
+	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
+		spin_unlock_irqrestore(&rport->lock, flags);
+		return -EINVAL;
+	}
+
+	/* a dev_loss_tmo of 0 (immediate) is allowed to be set */
+	rport->remoteport.dev_loss_tmo = dev_loss_tmo;
+
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss);
+
 
 /* *********************** FC-NVME DMA Handling **************************** */
 
@@ -723,7 +994,6 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 		dma_unmap_sg(dev, sg, nents, dir);
 }
 
-
 /* *********************** FC-NVME LS Handling **************************** */
 
 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
@@ -1266,7 +1536,7 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 	unsigned long flags;
 	int i, ret;
 
-	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
 		if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
 			continue;
 
@@ -1331,7 +1601,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	struct nvme_command *sqe = &op->cmd_iu.sqe;
 	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
 	union nvme_result result;
-	bool complete_rq, terminate_assoc = true;
+	bool terminate_assoc = true;
 
 	/*
 	 * WARNING:
@@ -1373,8 +1643,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
 				sizeof(op->rsp_iu), DMA_FROM_DEVICE);
 
-	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
-		status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
+			op->flags & FCOP_FLAGS_TERMIO)
+		status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
 	else if (freq->status)
 		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 
@@ -1438,23 +1709,27 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 done:
 	if (op->flags & FCOP_FLAGS_AEN) {
 		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
-		complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+		__nvme_fc_fcpop_chk_teardowns(ctrl, op);
 		atomic_set(&op->state, FCPOP_STATE_IDLE);
 		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
 		nvme_fc_ctrl_put(ctrl);
 		goto check_error;
 	}
 
-	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
-	if (!complete_rq) {
-		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-			status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
-			if (blk_queue_dying(rq->q))
-				status |= cpu_to_le16(NVME_SC_DNR << 1);
-		}
-		nvme_end_request(rq, status, result);
-	} else
+	/*
+	 * Force failures of commands if we're killing the controller
+	 * or have an error on a command used to create an new association
+	 */
+	if (status &&
+	    (blk_queue_dying(rq->q) ||
+	     ctrl->ctrl.state == NVME_CTRL_NEW ||
+	     ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
+		status |= cpu_to_le16(NVME_SC_DNR << 1);
+
+	if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
 		__nvme_fc_final_op_cleanup(rq);
+	else
+		nvme_end_request(rq, status, result);
 
 check_error:
 	if (terminate_assoc)
@@ -1531,7 +1806,7 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 	int i, ret;
 
 	aen_op = ctrl->aen_ops;
-	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
 		private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
 						GFP_KERNEL);
 		if (!private)
@@ -1541,7 +1816,7 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 		sqe = &cmdiu->sqe;
 		ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
 				aen_op, (struct request *)NULL,
-				(AEN_CMDID_BASE + i));
+				(NVME_AQ_BLK_MQ_DEPTH + i));
 		if (ret) {
 			kfree(private);
 			return ret;
@@ -1554,7 +1829,7 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 		memset(sqe, 0, sizeof(*sqe));
 		sqe->common.opcode = nvme_admin_async_event;
 		/* Note: core layer may overwrite the sqe.command_id value */
-		sqe->common.command_id = AEN_CMDID_BASE + i;
+		sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i;
 	}
 	return 0;
 }
@@ -1566,7 +1841,7 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
 	int i;
 
 	aen_op = ctrl->aen_ops;
-	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
 		if (!aen_op->fcp_req.private)
 			continue;
 
@@ -1610,7 +1885,7 @@ nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 }
 
 static void
-nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size)
+nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
 {
 	struct nvme_fc_queue *queue;
 
@@ -1626,8 +1901,6 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size)
 	else
 		queue->cmnd_capsule_len = sizeof(struct nvme_command);
 
-	queue->queue_size = queue_size;
-
 	/*
 	 * Considered whether we should allocate buffers for all SQEs
 	 * and CQEs and dma map them - mapping their respective entries
@@ -1751,7 +2024,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
 	int i;
 
 	for (i = 1; i < ctrl->ctrl.queue_count; i++)
-		nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
+		nvme_fc_init_queue(ctrl, i);
 }
 
 static void
@@ -1825,13 +2098,6 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
-		dev_err(ctrl->ctrl.device,
-			"NVME-FC{%d}: error_recovery: Couldn't change state "
-			"to RECONNECTING\n", ctrl->cnum);
-		return;
-	}
-
 	nvme_reset_ctrl(&ctrl->ctrl);
 }
 
@@ -1842,13 +2108,14 @@ nvme_fc_timeout(struct request *rq, bool reserved)
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
 	int ret;
 
-	if (reserved)
+	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
+			atomic_read(&op->state) == FCPOP_STATE_ABORTED)
 		return BLK_EH_RESET_TIMER;
 
 	ret = __nvme_fc_abort_op(ctrl, op);
 	if (ret)
-		/* io wasn't active to abort consider it done */
-		return BLK_EH_HANDLED;
+		/* io wasn't active to abort */
+		return BLK_EH_NOT_HANDLED;
 
 	/*
 	 * we can't individually ABTS an io without affecting the queue,
@@ -1859,7 +2126,12 @@ nvme_fc_timeout(struct request *rq, bool reserved)
 	 */
 	nvme_fc_error_recovery(ctrl, "io timeout error");
 
-	return BLK_EH_HANDLED;
+	/*
+	 * the io abort has been initiated. Have the reset timer
+	 * restarted and the abort completion will complete the io
+	 * shortly. Avoids a synchronous wait while the abort finishes.
+	 */
+	return BLK_EH_RESET_TIMER;
 }
 
 static int
@@ -2110,7 +2382,7 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 }
 
 static void
-nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+nvme_fc_submit_async_event(struct nvme_ctrl *arg)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
 	struct nvme_fc_fcp_op *aen_op;
@@ -2118,9 +2390,6 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	bool terminating = false;
 	blk_status_t ret;
 
-	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
-		return;
-
 	spin_lock_irqsave(&ctrl->lock, flags);
 	if (ctrl->flags & FCCTRL_TERMIO)
 		terminating = true;
@@ -2129,13 +2398,13 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	if (terminating)
 		return;
 
-	aen_op = &ctrl->aen_ops[aer_idx];
+	aen_op = &ctrl->aen_ops[0];
 
 	ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
 					NVMEFC_FCP_NODATA);
 	if (ret)
 		dev_err(ctrl->ctrl.device,
-			"failed async event work [%d]\n", aer_idx);
+			"failed async event work\n");
 }
 
 static void
@@ -2337,7 +2606,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 
 	nvme_fc_init_io_queues(ctrl);
 
-	ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request);
+	ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
 	if (ret)
 		goto out_free_io_queues;
 
@@ -2360,6 +2629,61 @@ out_free_io_queues:
 	return ret;
 }
 
+static void
+nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport)
+{
+	struct nvme_fc_lport *lport = rport->lport;
+
+	atomic_inc(&lport->act_rport_cnt);
+}
+
+static void
+nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport)
+{
+	struct nvme_fc_lport *lport = rport->lport;
+	u32 cnt;
+
+	cnt = atomic_dec_return(&lport->act_rport_cnt);
+	if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED)
+		lport->ops->localport_delete(&lport->localport);
+}
+
+static int
+nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvme_fc_rport *rport = ctrl->rport;
+	u32 cnt;
+
+	if (ctrl->assoc_active)
+		return 1;
+
+	ctrl->assoc_active = true;
+	cnt = atomic_inc_return(&rport->act_ctrl_cnt);
+	if (cnt == 1)
+		nvme_fc_rport_active_on_lport(rport);
+
+	return 0;
+}
+
+static int
+nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvme_fc_rport *rport = ctrl->rport;
+	struct nvme_fc_lport *lport = rport->lport;
+	u32 cnt;
+
+	/* ctrl->assoc_active=false will be set independently */
+
+	cnt = atomic_dec_return(&rport->act_ctrl_cnt);
+	if (cnt == 0) {
+		if (rport->remoteport.port_state == FC_OBJSTATE_DELETED)
+			lport->ops->remoteport_delete(&rport->remoteport);
+		nvme_fc_rport_inactive_on_lport(rport);
+	}
+
+	return 0;
+}
+
 /*
  * This routine restarts the controller on the host side, and
  * on the link side, recreates the controller association.
@@ -2368,26 +2692,31 @@ static int
 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
-	u32 segs;
 	int ret;
 	bool changed;
 
 	++ctrl->ctrl.nr_reconnects;
 
+	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+		return -ENODEV;
+
+	if (nvme_fc_ctlr_active_on_rport(ctrl))
+		return -ENOTUNIQ;
+
 	/*
 	 * Create the admin queue
 	 */
 
-	nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+	nvme_fc_init_queue(ctrl, 0);
 
 	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
-				NVME_FC_AQ_BLKMQ_DEPTH);
+				NVME_AQ_BLK_MQ_DEPTH);
 	if (ret)
 		goto out_free_queue;
 
 	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-				NVME_FC_AQ_BLKMQ_DEPTH,
-				(NVME_FC_AQ_BLKMQ_DEPTH / 4));
+				NVME_AQ_BLK_MQ_DEPTH,
+				(NVME_AQ_BLK_MQ_DEPTH / 4));
 	if (ret)
 		goto out_delete_hw_queue;
 
@@ -2419,9 +2748,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_disconnect_admin_queue;
 
-	segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
-			ctrl->lport->ops->max_sgl_segments);
-	ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
+	ctrl->ctrl.max_hw_sectors =
+		(ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
 
 	ret = nvme_init_identify(&ctrl->ctrl);
 	if (ret)
@@ -2465,11 +2793,11 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	nvme_start_ctrl(&ctrl->ctrl);
+	if (changed)
+		nvme_start_ctrl(&ctrl->ctrl);
 
 	return 0;	/* Success */
 
@@ -2482,6 +2810,8 @@ out_delete_hw_queue:
 	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
 out_free_queue:
 	nvme_fc_free_queue(&ctrl->queues[0]);
+	ctrl->assoc_active = false;
+	nvme_fc_ctlr_inactive_on_rport(ctrl);
 
 	return ret;
 }
@@ -2497,6 +2827,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
 	unsigned long flags;
 
+	if (!ctrl->assoc_active)
+		return;
+	ctrl->assoc_active = false;
+
 	spin_lock_irqsave(&ctrl->lock, flags);
 	ctrl->flags |= FCCTRL_TERMIO;
 	ctrl->iocnt = 0;
@@ -2537,7 +2871,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * use blk_mq_tagset_busy_itr() and the transport routine to
 	 * terminate the exchanges.
 	 */
-	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+	if (ctrl->ctrl.state != NVME_CTRL_NEW)
+		blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 
@@ -2568,102 +2903,64 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
 	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
 	nvme_fc_free_queue(&ctrl->queues[0]);
+
+	nvme_fc_ctlr_inactive_on_rport(ctrl);
 }
 
 static void
-nvme_fc_delete_ctrl_work(struct work_struct *work)
+nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
 {
-	struct nvme_fc_ctrl *ctrl =
-		container_of(work, struct nvme_fc_ctrl, delete_work);
+	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
-	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
-	nvme_stop_ctrl(&ctrl->ctrl);
-	nvme_remove_namespaces(&ctrl->ctrl);
 	/*
 	 * kill the association on the link side.  this will block
 	 * waiting for io to terminate
 	 */
 	nvme_fc_delete_association(ctrl);
-
-	/*
-	 * tear down the controller
-	 * After the last reference on the nvme ctrl is removed,
-	 * the transport nvme_fc_nvme_ctrl_freed() callback will be
-	 * invoked. From there, the transport will tear down it's
-	 * logical queues and association.
-	 */
-	nvme_uninit_ctrl(&ctrl->ctrl);
-
-	nvme_put_ctrl(&ctrl->ctrl);
-}
-
-static bool
-__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
-{
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-		return true;
-
-	if (!queue_work(nvme_wq, &ctrl->delete_work))
-		return true;
-
-	return false;
-}
-
-static int
-__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
-{
-	return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
-}
-
-/*
- * Request from nvme core layer to delete the controller
- */
-static int
-nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-	int ret;
-
-	if (!kref_get_unless_zero(&ctrl->ctrl.kref))
-		return -EBUSY;
-
-	ret = __nvme_fc_del_ctrl(ctrl);
-
-	if (!ret)
-		flush_workqueue(nvme_wq);
-
-	nvme_put_ctrl(&ctrl->ctrl);
-
-	return ret;
 }
 
 static void
 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 {
-	/* If we are resetting/deleting then do nothing */
-	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
-		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
-			ctrl->ctrl.state == NVME_CTRL_LIVE);
-		return;
-	}
+	struct nvme_fc_rport *rport = ctrl->rport;
+	struct nvme_fc_remote_port *portptr = &rport->remoteport;
+	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
+	bool recon = true;
 
-	dev_info(ctrl->ctrl.device,
-		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
-		ctrl->cnum, status);
+	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
+		return;
 
-	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+	if (portptr->port_state == FC_OBJSTATE_ONLINE)
 		dev_info(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
-		queue_delayed_work(nvme_wq, &ctrl->connect_work,
-				ctrl->ctrl.opts->reconnect_delay * HZ);
+			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+			ctrl->cnum, status);
+	else if (time_after_eq(jiffies, rport->dev_loss_end))
+		recon = false;
+
+	if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
+		if (portptr->port_state == FC_OBJSTATE_ONLINE)
+			dev_info(ctrl->ctrl.device,
+				"NVME-FC{%d}: Reconnect attempt in %ld "
+				"seconds\n",
+				ctrl->cnum, recon_delay / HZ);
+		else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
+			recon_delay = rport->dev_loss_end - jiffies;
+
+		queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
 	} else {
-		dev_warn(ctrl->ctrl.device,
+		if (portptr->port_state == FC_OBJSTATE_ONLINE)
+			dev_warn(ctrl->ctrl.device,
 				"NVME-FC{%d}: Max reconnect attempts (%d) "
 				"reached. Removing controller\n",
 				ctrl->cnum, ctrl->ctrl.nr_reconnects);
-		WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
+		else
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: dev_loss_tmo (%d) expired "
+				"while waiting for remoteport connectivity. "
+				"Removing controller\n", ctrl->cnum,
+				portptr->dev_loss_tmo);
+		WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
 	}
 }
 
@@ -2675,15 +2972,28 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 	int ret;
 
 	nvme_stop_ctrl(&ctrl->ctrl);
+
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
-	ret = nvme_fc_create_association(ctrl);
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+		dev_err(ctrl->ctrl.device,
+			"NVME-FC{%d}: error_recovery: Couldn't change state "
+			"to RECONNECTING\n", ctrl->cnum);
+		return;
+	}
+
+	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
+		ret = nvme_fc_create_association(ctrl);
+	else
+		ret = -ENOTCONN;
+
 	if (ret)
 		nvme_fc_reconnect_or_delete(ctrl, ret);
 	else
 		dev_info(ctrl->ctrl.device,
-			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
+			"NVME-FC{%d}: controller reset complete\n",
+			ctrl->cnum);
 }
 
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
@@ -2695,8 +3005,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.reg_write32		= nvmf_reg_write32,
 	.free_ctrl		= nvme_fc_nvme_ctrl_freed,
 	.submit_async_event	= nvme_fc_submit_async_event,
-	.delete_ctrl		= nvme_fc_del_nvme_ctrl,
+	.delete_ctrl		= nvme_fc_delete_ctrl,
 	.get_address		= nvmf_get_address,
+	.reinit_request		= nvme_fc_reinit_request,
 };
 
 static void
@@ -2728,6 +3039,33 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
 };
 
 
+/*
+ * Fails a controller request if it matches an existing controller
+ * (association) with the same tuple:
+ * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN>
+ *
+ * The ports don't need to be compared as they are intrinsically
+ * already matched by the port pointers supplied.
+ */
+static bool
+nvme_fc_existing_controller(struct nvme_fc_rport *rport,
+		struct nvmf_ctrl_options *opts)
+{
+	struct nvme_fc_ctrl *ctrl;
+	unsigned long flags;
+	bool found = false;
+
+	spin_lock_irqsave(&rport->lock, flags);
+	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+		found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts);
+		if (found)
+			break;
+	}
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	return found;
+}
+
 static struct nvme_ctrl *
 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
@@ -2742,6 +3080,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		goto out_fail;
 	}
 
+	if (!opts->duplicate_connect &&
+	    nvme_fc_existing_controller(rport, opts)) {
+		ret = -EALREADY;
+		goto out_fail;
+	}
+
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
 		ret = -ENOMEM;
@@ -2760,12 +3104,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->rport = rport;
 	ctrl->dev = lport->dev;
 	ctrl->cnum = idx;
+	ctrl->assoc_active = false;
 	init_waitqueue_head(&ctrl->ioabort_wait);
 
 	get_device(ctrl->dev);
 	kref_init(&ctrl->ref);
 
-	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
 	spin_lock_init(&ctrl->lock);
@@ -2787,7 +3131,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
 	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
 	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
 	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
@@ -2797,6 +3141,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->admin_tag_set.driver_data = ctrl;
 	ctrl->admin_tag_set.nr_hw_queues = 1;
 	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+	ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
 	if (ret)
@@ -2878,7 +3223,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		return ERR_PTR(ret);
 	}
 
-	kref_get(&ctrl->ctrl.kref);
+	nvme_get_ctrl(&ctrl->ctrl);
 
 	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
@@ -3026,7 +3371,50 @@ static struct nvmf_transport_ops nvme_fc_transport = {
 
 static int __init nvme_fc_init_module(void)
 {
-	return nvmf_register_transport(&nvme_fc_transport);
+	int ret;
+
+	/*
+	 * NOTE:
+	 * It is expected that in the future the kernel will combine
+	 * the FC-isms that are currently under scsi and now being
+	 * added to by NVME into a new standalone FC class. The SCSI
+	 * and NVME protocols and their devices would be under this
+	 * new FC class.
+	 *
+	 * As we need something to post FC-specific udev events to,
+	 * specifically for nvme probe events, start by creating the
+	 * new device class.  When the new standalone FC class is
+	 * put in place, this code will move to a more generic
+	 * location for the class.
+	 */
+	fc_class = class_create(THIS_MODULE, "fc");
+	if (IS_ERR(fc_class)) {
+		pr_err("couldn't register class fc\n");
+		return PTR_ERR(fc_class);
+	}
+
+	/*
+	 * Create a device for the FC-centric udev events
+	 */
+	fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
+				"fc_udev_device");
+	if (IS_ERR(fc_udev_device)) {
+		pr_err("couldn't create fc_udev device!\n");
+		ret = PTR_ERR(fc_udev_device);
+		goto out_destroy_class;
+	}
+
+	ret = nvmf_register_transport(&nvme_fc_transport);
+	if (ret)
+		goto out_destroy_device;
+
+	return 0;
+
+out_destroy_device:
+	device_destroy(fc_class, MKDEV(0, 0));
+out_destroy_class:
+	class_destroy(fc_class);
+	return ret;
 }
 
 static void __exit nvme_fc_exit_module(void)
@@ -3039,6 +3427,9 @@ static void __exit nvme_fc_exit_module(void)
 
 	ida_destroy(&nvme_fc_local_port_cnt);
 	ida_destroy(&nvme_fc_ctrl_cnt);
+
+	device_destroy(fc_class, MKDEV(0, 0));
+	class_destroy(fc_class);
 }
 
 module_init(nvme_fc_init_module);
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-11-15 00:32:19 +0100
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-11-15 00:32:19 +0100
commit	e2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
tree	b97a90170c45211bcc437761653aa8016c34afcd /drivers/nvme/host/fc.c
parent	Merge tag 'configfs-for-4.15' of git://git.infradead.org/users/hch/configfs (diff)
parent	nvme: fix visibility of "uuid" ns attribute (diff)
download	linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.xz linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip