From 91921e016a2199e7afe5933c94bd9f723d946598 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 25 Jun 2014 16:39:59 +0200
Subject: scsi: use dev_printk variants where possible

Using dev_printk variants prefixes the logging message with
the originating device, which makes debugging easier.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/scsi_lib.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3f50dfcb3227..093e63207dc6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -95,8 +95,8 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 	struct request_queue *q = device->request_queue;
 	unsigned long flags;
 
-	SCSI_LOG_MLQUEUE(1,
-		 printk("Inserting command %p into mlqueue\n", cmd));
+	SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
+		"Inserting command %p into mlqueue\n", cmd));
 
 	/*
 	 * Set the appropriate busy bit for the device/host.
@@ -750,9 +750,9 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 * Next deal with any sectors which we were able to correctly
 	 * handle.
 	 */
-	SCSI_LOG_HLCOMPLETE(1, printk("%u sectors total, "
-				      "%d bytes done.\n",
-				      blk_rq_sectors(req), good_bytes));
+	SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd,
+		"%u sectors total, %d bytes done.\n",
+		blk_rq_sectors(req), good_bytes));
 
 	/*
 	 * Recovered errors need reporting, but they're always treated
@@ -1351,8 +1351,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 		 */
 		if (--shost->host_blocked == 0) {
 			SCSI_LOG_MLQUEUE(3,
-				printk("scsi%d unblocking host at zero depth\n",
-					shost->host_no));
+				shost_printk(KERN_INFO, shost,
+					     "unblocking host at zero depth\n"));
 		} else {
 			return 0;
 		}
@@ -1461,7 +1461,7 @@ static void scsi_softirq_done(struct request *rq)
 			    wait_for/HZ);
 		disposition = SUCCESS;
 	}
-			
+
 	scsi_log_completion(cmd, disposition);
 
 	switch (disposition) {
@@ -1509,7 +1509,7 @@ static void scsi_request_fn(struct request_queue *q)
 		int rtn;
 		/*
 		 * get next queueable request.  We do this early to make sure
-		 * that the request is fully prepared even if we cannot 
+		 * that the request is fully prepared even if we cannot
 		 * accept it.
 		 */
 		req = blk_peek_request(q);
@@ -2147,9 +2147,9 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
 	return 0;
 
  illegal:
-	SCSI_LOG_ERROR_RECOVERY(1, 
+	SCSI_LOG_ERROR_RECOVERY(1,
 				sdev_printk(KERN_ERR, sdev,
-					    "Illegal state transition %s->%s\n",
+					    "Illegal state transition %s->%s",
 					    scsi_device_state_name(oldstate),
 					    scsi_device_state_name(state))
 				);
-- 
cgit v1.2.3


From f1bea55d5afa371c311b61946c58b2cd4e78fb2d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Apr 2014 12:26:54 +0200
Subject: scsi: remove various exports that were only used by scsi_tgt

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi.c      | 10 +++-------
 drivers/scsi/scsi_lib.c  |  6 ++----
 include/scsi/scsi_cmnd.h |  3 ---
 include/scsi/scsi_host.h |  2 --
 4 files changed, 5 insertions(+), 16 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index ce5b4e5e7a5a..a76d76df4209 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -235,7 +235,8 @@ fail:
  * Description: allocate a struct scsi_cmd from host's slab, recycling from the
  *              host's free_list if necessary.
  */
-struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
+static struct scsi_cmnd *
+__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
 {
 	struct scsi_cmnd *cmd = scsi_host_alloc_command(shost, gfp_mask);
 
@@ -265,7 +266,6 @@ struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
 
 	return cmd;
 }
-EXPORT_SYMBOL_GPL(__scsi_get_command);
 
 /**
  * scsi_get_command - Allocate and setup a scsi command block
@@ -291,14 +291,13 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
 	cmd->jiffies_at_alloc = jiffies;
 	return cmd;
 }
-EXPORT_SYMBOL(scsi_get_command);
 
 /**
  * __scsi_put_command - Free a struct scsi_cmnd
  * @shost: dev->host
  * @cmd: Command to free
  */
-void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
+static void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 {
 	unsigned long flags;
 
@@ -314,7 +313,6 @@ void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 	if (likely(cmd != NULL))
 		scsi_host_free_command(shost, cmd);
 }
-EXPORT_SYMBOL(__scsi_put_command);
 
 /**
  * scsi_put_command - Free a scsi command block
@@ -338,7 +336,6 @@ void scsi_put_command(struct scsi_cmnd *cmd)
 
 	__scsi_put_command(cmd->device->host, cmd);
 }
-EXPORT_SYMBOL(scsi_put_command);
 
 static struct scsi_host_cmd_pool *
 scsi_find_host_cmd_pool(struct Scsi_Host *shost)
@@ -801,7 +798,6 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 	}
 	scsi_io_completion(cmd, good_bytes);
 }
-EXPORT_SYMBOL(scsi_finish_command);
 
 /**
  * scsi_adjust_queue_depth - Let low level drivers change a device's queue depth
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 093e63207dc6..db7b7f2c14b8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -579,7 +579,7 @@ static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
  *		the __init_io() function.  Primarily this would involve
  *		the scatter-gather table.
  */
-void scsi_release_buffers(struct scsi_cmnd *cmd)
+static void scsi_release_buffers(struct scsi_cmnd *cmd)
 {
 	if (cmd->sdb.table.nents)
 		scsi_free_sgtable(&cmd->sdb);
@@ -589,7 +589,6 @@ void scsi_release_buffers(struct scsi_cmnd *cmd)
 	if (scsi_prot_sg_count(cmd))
 		scsi_free_sgtable(cmd->prot_sdb);
 }
-EXPORT_SYMBOL(scsi_release_buffers);
 
 static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 {
@@ -1609,7 +1608,7 @@ out_delay:
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
+static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
 {
 	struct device *host_dev;
 	u64 bounce_limit = 0xffffffff;
@@ -1629,7 +1628,6 @@ u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
 
 	return bounce_limit;
 }
-EXPORT_SYMBOL(scsi_calculate_bounce_limit);
 
 struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 					 request_fn_proc *request_fn)
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index e0ae71098144..73f349044941 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -150,9 +150,7 @@ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
 }
 
 extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t);
-extern struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *, gfp_t);
 extern void scsi_put_command(struct scsi_cmnd *);
-extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *);
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
 
 extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
@@ -160,7 +158,6 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask);
-extern void scsi_release_buffers(struct scsi_cmnd *cmd);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index f7adfe0a6dc8..b2bc5198b7fb 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -795,8 +795,6 @@ extern struct Scsi_Host *scsi_host_lookup(unsigned short);
 extern const char *scsi_host_state_name(enum scsi_host_state);
 extern void scsi_cmd_get_serial(struct Scsi_Host *, struct scsi_cmnd *);
 
-extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
-
 static inline int __must_check scsi_add_host(struct Scsi_Host *host,
 					     struct device *dev)
 {
-- 
cgit v1.2.3


From e6c11dbb8da81c599ca09ef2f6311220e068acd8 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Thu, 10 Jul 2014 09:41:53 +0200
Subject: scsi_lib: remove the description string in scsi_io_completion()

During IO with fabric faults, one generally sees several "Unhandled error
code" messages in the syslog as shown below:

sd 4:0:6:2: [sdbw] Unhandled error code
sd 4:0:6:2: [sdbw] Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK
sd 4:0:6:2: [sdbw] CDB: Read(10): 28 00 00 00 00 00 00 00 08 00
end_request: I/O error, dev sdbw, sector 0

This comes from scsi_io_completion (in scsi_lib.c) while handling error
codes other than DID_RESET or not deferred sense keys i.e. this is
actually handled by the SCSI mid layer. But what gets displayed here is
"Unhandled error code" which is quite misleading as it indicates
something that is not addressed by the mid layer.

The description string is based on the sense key and sometimes on the
additional sense code;
since the ACTION_FAIL case always prints the sense key and the
additional sense code, this patch removes the description string
completely because it does not add useful information.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/scsi_lib.c | 40 ++++------------------------------------
 1 file changed, 4 insertions(+), 36 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index db7b7f2c14b8..03076b2b5deb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -685,7 +685,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	int sense_deferred = 0;
 	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
 	      ACTION_DELAYED_RETRY} action;
-	char *description = NULL;
 	unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
 
 	if (result) {
@@ -810,7 +809,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				 * and quietly refuse further access.
 				 */
 				cmd->device->changed = 1;
-				description = "Media Changed";
 				action = ACTION_FAIL;
 			} else {
 				/* Must have been a power glitch, or a
@@ -838,27 +836,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				cmd->device->use_10_for_rw = 0;
 				action = ACTION_REPREP;
 			} else if (sshdr.asc == 0x10) /* DIX */ {
-				description = "Host Data Integrity Failure";
 				action = ACTION_FAIL;
 				error = -EILSEQ;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
 			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
-				switch (cmd->cmnd[0]) {
-				case UNMAP:
-					description = "Discard failure";
-					break;
-				case WRITE_SAME:
-				case WRITE_SAME_16:
-					if (cmd->cmnd[1] & 0x8)
-						description = "Discard failure";
-					else
-						description =
-							"Write same failure";
-					break;
-				default:
-					description = "Invalid command failure";
-					break;
-				}
 				action = ACTION_FAIL;
 				error = -EREMOTEIO;
 			} else
@@ -866,10 +847,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			break;
 		case ABORTED_COMMAND:
 			action = ACTION_FAIL;
-			if (sshdr.asc == 0x10) { /* DIF */
-				description = "Target Data Integrity Failure";
+			if (sshdr.asc == 0x10) /* DIF */
 				error = -EILSEQ;
-			}
 			break;
 		case NOT_READY:
 			/* If the device is in the process of becoming
@@ -888,42 +867,31 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 					action = ACTION_DELAYED_RETRY;
 					break;
 				default:
-					description = "Device not ready";
 					action = ACTION_FAIL;
 					break;
 				}
-			} else {
-				description = "Device not ready";
+			} else
 				action = ACTION_FAIL;
-			}
 			break;
 		case VOLUME_OVERFLOW:
 			/* See SSC3rXX or current. */
 			action = ACTION_FAIL;
 			break;
 		default:
-			description = "Unhandled sense code";
 			action = ACTION_FAIL;
 			break;
 		}
-	} else {
-		description = "Unhandled error code";
+	} else
 		action = ACTION_FAIL;
-	}
 
 	if (action != ACTION_FAIL &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
 		action = ACTION_FAIL;
-		description = "Command timed out";
-	}
 
 	switch (action) {
 	case ACTION_FAIL:
 		/* Give up and fail the remainder of the request */
 		if (!(req->cmd_flags & REQ_QUIET)) {
-			if (description)
-				scmd_printk(KERN_INFO, cmd, "%s\n",
-					    description);
 			scsi_print_result(cmd);
 			if (driver_byte(result) & DRIVER_SENSE)
 				scsi_print_sense("", cmd);
-- 
cgit v1.2.3


From 635d98b1d0cfc2ba3426a701725d31a6102c059a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 28 Jun 2014 11:51:01 +0200
Subject: scsi: move the nr_phys_segments assert into scsi_init_io

scsi_init_io should only be called for requests that transfer data,
so move the assert that a request has segments from the callers into
scsi_init_io.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 03076b2b5deb..150d7bb675a1 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -963,8 +963,11 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 {
 	struct scsi_device *sdev = cmd->device;
 	struct request *rq = cmd->request;
+	int error;
 
-	int error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask);
+	BUG_ON(!rq->nr_phys_segments);
+
+	error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask);
 	if (error)
 		goto err_exit;
 
@@ -1055,11 +1058,7 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 	 * submit a request without an attached bio.
 	 */
 	if (req->bio) {
-		int ret;
-
-		BUG_ON(!req->nr_phys_segments);
-
-		ret = scsi_init_io(cmd, GFP_ATOMIC);
+		int ret = scsi_init_io(cmd, GFP_ATOMIC);
 		if (unlikely(ret))
 			return ret;
 	} else {
@@ -1098,11 +1097,6 @@ int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 			return ret;
 	}
 
-	/*
-	 * Filesystem requests must transfer data.
-	 */
-	BUG_ON(!req->nr_phys_segments);
-
 	memset(cmd->cmnd, 0, BLK_MAX_CDB);
 	return scsi_init_io(cmd, GFP_ATOMIC);
 }
-- 
cgit v1.2.3


From 3868cf8ea70a57fc3f927872d8296f287ce4b96a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 28 Jun 2014 11:58:42 +0200
Subject: scsi: restructure command initialization for TYPE_FS requests

We should call the device handler prep_fn for all TYPE_FS requests,
not just simple read/write calls that are handled by the disk driver.

Restructure the common I/O code to call the prep_fn handler and zero
out the CDB, and just leave the call to scsi_init_io to the ULDs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c    | 22 ++++++++++++----------
 drivers/scsi/sd.c          |  2 +-
 drivers/scsi/sr.c          |  3 +--
 include/scsi/scsi_driver.h |  1 -
 4 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 150d7bb675a1..2afb96b801cb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1082,11 +1082,10 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
 
 /*
- * Setup a REQ_TYPE_FS command.  These are simple read/write request
- * from filesystems that still need to be translated to SCSI CDBs from
- * the ULD.
+ * Setup a REQ_TYPE_FS command.  These are simple request from filesystems
+ * that still need to be translated to SCSI CDBs from the ULD.
  */
-int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
+static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 {
 	struct scsi_cmnd *cmd = req->special;
 
@@ -1098,9 +1097,8 @@ int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 	}
 
 	memset(cmd->cmnd, 0, BLK_MAX_CDB);
-	return scsi_init_io(cmd, GFP_ATOMIC);
+	return scsi_cmd_to_driver(cmd)->init_command(cmd);
 }
-EXPORT_SYMBOL(scsi_setup_fs_cmnd);
 
 static int
 scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
@@ -1205,12 +1203,16 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 		goto out;
 	}
 
-	if (req->cmd_type == REQ_TYPE_FS)
-		ret = scsi_cmd_to_driver(cmd)->init_command(cmd);
-	else if (req->cmd_type == REQ_TYPE_BLOCK_PC)
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
+		ret = scsi_setup_fs_cmnd(sdev, req);
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		ret = scsi_setup_blk_pc_cmnd(sdev, req);
-	else
+		break;
+	default:
 		ret = BLKPREP_KILL;
+	}
 
 out:
 	return scsi_prep_return(q, req, ret);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 87566b51fcf7..1b166c7ea891 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -894,7 +894,7 @@ static int sd_init_command(struct scsi_cmnd *SCpnt)
 		ret = scsi_setup_flush_cmnd(sdp, rq);
 		goto out;
 	}
-	ret = scsi_setup_fs_cmnd(sdp, rq);
+	ret = scsi_init_io(SCpnt, GFP_ATOMIC);
 	if (ret != BLKPREP_OK)
 		goto out;
 	SCpnt = rq->special;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index a7ea27c01286..9feeb3766d7d 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -385,10 +385,9 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
 	int block = 0, this_count, s_size;
 	struct scsi_cd *cd;
 	struct request *rq = SCpnt->request;
-	struct scsi_device *sdp = SCpnt->device;
 	int ret;
 
-	ret = scsi_setup_fs_cmnd(sdp, rq);
+	ret = scsi_init_io(SCpnt, GFP_ATOMIC);
 	if (ret != BLKPREP_OK)
 		goto out;
 	SCpnt = rq->special;
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 36c4114ed9bc..009d2ae91a52 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -30,6 +30,5 @@ extern int scsi_register_interface(struct class_interface *);
 	class_interface_unregister(intf)
 
 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req);
-int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req);
 
 #endif /* _SCSI_SCSI_DRIVER_H */
-- 
cgit v1.2.3


From 5158a899d8f24f74cad29b6aaad2b0f86499e5d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 28 Jun 2014 16:41:43 +0200
Subject: scsi: set sc_data_direction in common code

The data direction fiel in the SCSI command is derived only from the block
request structure.  Move setting it up into common code instead of
duplicating it in the ULDs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c | 14 +++++++-------
 drivers/scsi/sd.c       |  2 --
 drivers/scsi/sr.c       |  2 --
 3 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2afb96b801cb..4e15a3a5ad29 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1068,13 +1068,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 	}
 
 	cmd->cmd_len = req->cmd_len;
-	if (!blk_rq_bytes(req))
-		cmd->sc_data_direction = DMA_NONE;
-	else if (rq_data_dir(req) == WRITE)
-		cmd->sc_data_direction = DMA_TO_DEVICE;
-	else
-		cmd->sc_data_direction = DMA_FROM_DEVICE;
-	
 	cmd->transfersize = blk_rq_bytes(req);
 	cmd->allowed = req->retries;
 	return BLKPREP_OK;
@@ -1203,6 +1196,13 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 		goto out;
 	}
 
+	if (!blk_rq_bytes(req))
+		cmd->sc_data_direction = DMA_NONE;
+	else if (rq_data_dir(req) == WRITE)
+		cmd->sc_data_direction = DMA_TO_DEVICE;
+	else
+		cmd->sc_data_direction = DMA_FROM_DEVICE;
+
 	switch (req->cmd_type) {
 	case REQ_TYPE_FS:
 		ret = scsi_setup_fs_cmnd(sdev, req);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1b166c7ea891..e2932ea429e2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -994,14 +994,12 @@ static int sd_init_command(struct scsi_cmnd *SCpnt)
 			goto out;
 		}
 		SCpnt->cmnd[0] = WRITE_6;
-		SCpnt->sc_data_direction = DMA_TO_DEVICE;
 
 		if (blk_integrity_rq(rq))
 			sd_dif_prepare(rq, block, sdp->sector_size);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
-		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 	} else {
 		scmd_printk(KERN_ERR, SCpnt, "Unknown command %llx\n", (unsigned long long) rq->cmd_flags);
 		goto out;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 9feeb3766d7d..cce4771281d9 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -438,11 +438,9 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
 		if (!cd->device->writeable)
 			goto out;
 		SCpnt->cmnd[0] = WRITE_10;
-		SCpnt->sc_data_direction = DMA_TO_DEVICE;
 		cd->cdi.media_written = 1;
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_10;
-		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 	} else {
 		blk_dump_rq_flags(rq, "Unknown sr command");
 		goto out;
-- 
cgit v1.2.3


From 4f1e57657548d7afb4a6b62097765282f3b03c6e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 28 Jun 2014 12:36:28 +0200
Subject: scsi: mark scsi_setup_blk_pc_cmnd static

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c    | 3 +--
 include/scsi/scsi_driver.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4e15a3a5ad29..85cf0ef843f6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1047,7 +1047,7 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
 	return cmd;
 }
 
-int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
+static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 {
 	struct scsi_cmnd *cmd = req->special;
 
@@ -1072,7 +1072,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 	cmd->allowed = req->retries;
 	return BLKPREP_OK;
 }
-EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
 
 /*
  * Setup a REQ_TYPE_FS command.  These are simple request from filesystems
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 009d2ae91a52..c2b759809d8a 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -29,6 +29,4 @@ extern int scsi_register_interface(struct class_interface *);
 #define scsi_unregister_interface(intf) \
 	class_interface_unregister(intf)
 
-int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req);
-
 #endif /* _SCSI_SCSI_DRIVER_H */
-- 
cgit v1.2.3


From 6af7a4ffa2775c452c77eccc6bb6a4e2b71c2371 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Jul 2014 13:16:17 +0200
Subject: scsi: add scsi_setup_cmnd helper

Factor out command setup code that will be shared with the blk-mq code path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Webb Scales <webbnh@hp.com>
---
 drivers/scsi/scsi_lib.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 85cf0ef843f6..04c368470c8a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1092,6 +1092,27 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 	return scsi_cmd_to_driver(cmd)->init_command(cmd);
 }
 
+static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req)
+{
+	struct scsi_cmnd *cmd = req->special;
+
+	if (!blk_rq_bytes(req))
+		cmd->sc_data_direction = DMA_NONE;
+	else if (rq_data_dir(req) == WRITE)
+		cmd->sc_data_direction = DMA_TO_DEVICE;
+	else
+		cmd->sc_data_direction = DMA_FROM_DEVICE;
+
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
+		return scsi_setup_fs_cmnd(sdev, req);
+	case REQ_TYPE_BLOCK_PC:
+		return scsi_setup_blk_pc_cmnd(sdev, req);
+	default:
+		return BLKPREP_KILL;
+	}
+}
+
 static int
 scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
 {
@@ -1195,24 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 		goto out;
 	}
 
-	if (!blk_rq_bytes(req))
-		cmd->sc_data_direction = DMA_NONE;
-	else if (rq_data_dir(req) == WRITE)
-		cmd->sc_data_direction = DMA_TO_DEVICE;
-	else
-		cmd->sc_data_direction = DMA_FROM_DEVICE;
-
-	switch (req->cmd_type) {
-	case REQ_TYPE_FS:
-		ret = scsi_setup_fs_cmnd(sdev, req);
-		break;
-	case REQ_TYPE_BLOCK_PC:
-		ret = scsi_setup_blk_pc_cmnd(sdev, req);
-		break;
-	default:
-		ret = BLKPREP_KILL;
-	}
-
+	ret = scsi_setup_cmnd(sdev, req);
 out:
 	return scsi_prep_return(q, req, ret);
 }
-- 
cgit v1.2.3


From de3e8bf3315c9c7c7ffd2f9b2f299b4205feefb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jan 2014 13:55:38 +0100
Subject: scsi: split __scsi_queue_insert

Factor out a helper to set the _blocked values, which we'll reuse for the
blk-mq code path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 04c368470c8a..3ac677c48168 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -75,28 +75,12 @@ struct kmem_cache *scsi_sdb_cache;
  */
 #define SCSI_QUEUE_DELAY	3
 
-/**
- * __scsi_queue_insert - private queue insertion
- * @cmd: The SCSI command being requeued
- * @reason:  The reason for the requeue
- * @unbusy: Whether the queue should be unbusied
- *
- * This is a private queue insertion.  The public interface
- * scsi_queue_insert() always assumes the queue should be unbusied
- * because it's always called before the completion.  This function is
- * for a requeue after completion, which should only occur in this
- * file.
- */
-static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
+static void
+scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct scsi_device *device = cmd->device;
 	struct scsi_target *starget = scsi_target(device);
-	struct request_queue *q = device->request_queue;
-	unsigned long flags;
-
-	SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
-		"Inserting command %p into mlqueue\n", cmd));
 
 	/*
 	 * Set the appropriate busy bit for the device/host.
@@ -123,6 +107,30 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 		starget->target_blocked = starget->max_target_blocked;
 		break;
 	}
+}
+
+/**
+ * __scsi_queue_insert - private queue insertion
+ * @cmd: The SCSI command being requeued
+ * @reason:  The reason for the requeue
+ * @unbusy: Whether the queue should be unbusied
+ *
+ * This is a private queue insertion.  The public interface
+ * scsi_queue_insert() always assumes the queue should be unbusied
+ * because it's always called before the completion.  This function is
+ * for a requeue after completion, which should only occur in this
+ * file.
+ */
+static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
+{
+	struct scsi_device *device = cmd->device;
+	struct request_queue *q = device->request_queue;
+	unsigned long flags;
+
+	SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
+		"Inserting command %p into mlqueue\n", cmd));
+
+	scsi_set_blocked(cmd, reason);
 
 	/*
 	 * Decrement the counters, since these commands are no longer
-- 
cgit v1.2.3


From d0d3bbf96ec21167e55a48ebb31912918a674e0d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Jan 2014 18:39:04 +0100
Subject: scsi: centralize command re-queueing in scsi_dispatch_fn

Make sure we only have the logic for requeing commands in one place.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi.c     | 35 ++++++++++++-----------------------
 drivers/scsi/scsi_lib.c |  9 ++++++---
 2 files changed, 18 insertions(+), 26 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 321f854947b4..2396e89dead0 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -645,9 +645,7 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 		 * returns an immediate error upwards, and signals
 		 * that the device is no longer present */
 		cmd->result = DID_NO_CONNECT << 16;
-		scsi_done(cmd);
-		/* return 0 (because the command has been processed) */
-		goto out;
+		goto done;
 	}
 
 	/* Check to see if the scsi lld made this device blocked. */
@@ -659,17 +657,9 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 		 * occur until the device transitions out of the
 		 * suspend state.
 		 */
-
-		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
-
 		SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
 			"queuecommand : device blocked\n"));
-
-		/*
-		 * NOTE: rtn is still zero here because we don't need the
-		 * queue to be plugged on return (it's already stopped)
-		 */
-		goto out;
+		return SCSI_MLQUEUE_DEVICE_BUSY;
 	}
 
 	/*
@@ -693,20 +683,19 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 			       "cdb_size=%d host->max_cmd_len=%d\n",
 			       cmd->cmd_len, cmd->device->host->max_cmd_len));
 		cmd->result = (DID_ABORT << 16);
-
-		scsi_done(cmd);
-		goto out;
+		goto done;
 	}
 
 	if (unlikely(host->shost_state == SHOST_DEL)) {
 		cmd->result = (DID_NO_CONNECT << 16);
-		scsi_done(cmd);
-	} else {
-		trace_scsi_dispatch_cmd_start(cmd);
-		cmd->scsi_done = scsi_done;
-		rtn = host->hostt->queuecommand(host, cmd);
+		goto done;
+
 	}
 
+	trace_scsi_dispatch_cmd_start(cmd);
+
+	cmd->scsi_done = scsi_done;
+	rtn = host->hostt->queuecommand(host, cmd);
 	if (rtn) {
 		trace_scsi_dispatch_cmd_error(cmd, rtn);
 		if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
@@ -715,12 +704,12 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 
 		SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
 			"queuecommand : request rejected\n"));
-
-		scsi_queue_insert(cmd, rtn);
 	}
 
- out:
 	return rtn;
+ done:
+	scsi_done(cmd);
+	return 0;
 }
 
 /**
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3ac677c48168..bf7342748f32 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1557,9 +1557,12 @@ static void scsi_request_fn(struct request_queue *q)
 		 * Dispatch the command to the low-level driver.
 		 */
 		rtn = scsi_dispatch_cmd(cmd);
-		spin_lock_irq(q->queue_lock);
-		if (rtn)
+		if (rtn) {
+			scsi_queue_insert(cmd, rtn);
+			spin_lock_irq(q->queue_lock);
 			goto out_delay;
+		}
+		spin_lock_irq(q->queue_lock);
 	}
 
 	return;
@@ -1579,7 +1582,7 @@ static void scsi_request_fn(struct request_queue *q)
 	blk_requeue_request(q, req);
 	sdev->device_busy--;
 out_delay:
-	if (sdev->device_busy == 0)
+	if (sdev->device_busy == 0 && !scsi_device_blocked(sdev))
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-- 
cgit v1.2.3


From 3b5382c459b709845f43361225a2e3284e50752e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 12:25:40 +0200
Subject: scsi: set ->scsi_done before calling scsi_dispatch_cmd

The blk-mq code path will set this to a different function, so make the
code simpler by setting it up in a legacy-request specific place.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi.c     | 23 +----------------------
 drivers/scsi/scsi_lib.c | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 2396e89dead0..6200a2615436 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -72,8 +72,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/scsi.h>
 
-static void scsi_done(struct scsi_cmnd *cmd);
-
 /*
  * Definitions and constants.
  */
@@ -693,8 +691,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 	}
 
 	trace_scsi_dispatch_cmd_start(cmd);
-
-	cmd->scsi_done = scsi_done;
 	rtn = host->hostt->queuecommand(host, cmd);
 	if (rtn) {
 		trace_scsi_dispatch_cmd_error(cmd, rtn);
@@ -708,27 +704,10 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 
 	return rtn;
  done:
-	scsi_done(cmd);
+	cmd->scsi_done(cmd);
 	return 0;
 }
 
-/**
- * scsi_done - Invoke completion on finished SCSI command.
- * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
- * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
- *
- * Description: This function is the mid-level's (SCSI Core) interrupt routine,
- * which regains ownership of the SCSI command (de facto) from a LLDD, and
- * calls blk_complete_request() for further processing.
- *
- * This function is interrupt context safe.
- */
-static void scsi_done(struct scsi_cmnd *cmd)
-{
-	trace_scsi_dispatch_cmd_done(cmd);
-	blk_complete_request(cmd->request);
-}
-
 /**
  * scsi_finish_command - cleanup and pass command back to upper layer
  * @cmd: the command
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bf7342748f32..b83269689542 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -29,6 +29,8 @@
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_host.h>
 
+#include <trace/events/scsi.h>
+
 #include "scsi_priv.h"
 #include "scsi_logging.h"
 
@@ -1454,6 +1456,23 @@ static void scsi_softirq_done(struct request *rq)
 	}
 }
 
+/**
+ * scsi_done - Invoke completion on finished SCSI command.
+ * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
+ * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
+ *
+ * Description: This function is the mid-level's (SCSI Core) interrupt routine,
+ * which regains ownership of the SCSI command (de facto) from a LLDD, and
+ * calls blk_complete_request() for further processing.
+ *
+ * This function is interrupt context safe.
+ */
+static void scsi_done(struct scsi_cmnd *cmd)
+{
+	trace_scsi_dispatch_cmd_done(cmd);
+	blk_complete_request(cmd->request);
+}
+
 /*
  * Function:    scsi_request_fn()
  *
@@ -1556,6 +1575,7 @@ static void scsi_request_fn(struct request_queue *q)
 		/*
 		 * Dispatch the command to the low-level driver.
 		 */
+		cmd->scsi_done = scsi_done;
 		rtn = scsi_dispatch_cmd(cmd);
 		if (rtn) {
 			scsi_queue_insert(cmd, rtn);
-- 
cgit v1.2.3


From cf68d334dd3a323624f6399dc807d34d8b816391 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Jan 2014 14:36:32 +0100
Subject: scsi: push host_lock down into scsi_{host,target}_queue_ready

Prepare for not taking a host-wide lock in the dispatch path by pushing
the lock down into the places that actually need it.  Note that this
patch is just a preparation step, as it will actually increase lock
roundtrips and thus decrease performance on its own.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c | 75 +++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 36 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b83269689542..112c737884c3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1274,18 +1274,18 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
 /*
  * scsi_target_queue_ready: checks if there we can send commands to target
  * @sdev: scsi device on starget to check.
- *
- * Called with the host lock held.
  */
 static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
 					   struct scsi_device *sdev)
 {
 	struct scsi_target *starget = scsi_target(sdev);
+	int ret = 0;
 
+	spin_lock_irq(shost->host_lock);
 	if (starget->single_lun) {
 		if (starget->starget_sdev_user &&
 		    starget->starget_sdev_user != sdev)
-			return 0;
+			goto out;
 		starget->starget_sdev_user = sdev;
 	}
 
@@ -1293,57 +1293,66 @@ static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
 		/*
 		 * unblock after target_blocked iterates to zero
 		 */
-		if (--starget->target_blocked == 0) {
-			SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
-					 "unblocking target at zero depth\n"));
-		} else
-			return 0;
+		if (--starget->target_blocked != 0)
+			goto out;
+
+		SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
+				 "unblocking target at zero depth\n"));
 	}
 
 	if (scsi_target_is_busy(starget)) {
 		list_move_tail(&sdev->starved_entry, &shost->starved_list);
-		return 0;
+		goto out;
 	}
 
-	return 1;
+	scsi_target(sdev)->target_busy++;
+	ret = 1;
+out:
+	spin_unlock_irq(shost->host_lock);
+	return ret;
 }
 
 /*
  * scsi_host_queue_ready: if we can send requests to shost, return 1 else
  * return 0. We must end up running the queue again whenever 0 is
  * returned, else IO can hang.
- *
- * Called with host_lock held.
  */
 static inline int scsi_host_queue_ready(struct request_queue *q,
 				   struct Scsi_Host *shost,
 				   struct scsi_device *sdev)
 {
+	int ret = 0;
+
+	spin_lock_irq(shost->host_lock);
+
 	if (scsi_host_in_recovery(shost))
-		return 0;
+		goto out;
 	if (shost->host_busy == 0 && shost->host_blocked) {
 		/*
 		 * unblock after host_blocked iterates to zero
 		 */
-		if (--shost->host_blocked == 0) {
-			SCSI_LOG_MLQUEUE(3,
-				shost_printk(KERN_INFO, shost,
-					     "unblocking host at zero depth\n"));
-		} else {
-			return 0;
-		}
+		if (--shost->host_blocked != 0)
+			goto out;
+
+		SCSI_LOG_MLQUEUE(3,
+			shost_printk(KERN_INFO, shost,
+				     "unblocking host at zero depth\n"));
 	}
 	if (scsi_host_is_busy(shost)) {
 		if (list_empty(&sdev->starved_entry))
 			list_add_tail(&sdev->starved_entry, &shost->starved_list);
-		return 0;
+		goto out;
 	}
 
 	/* We're OK to process the command, so we can't be starved */
 	if (!list_empty(&sdev->starved_entry))
 		list_del_init(&sdev->starved_entry);
 
-	return 1;
+	shost->host_busy++;
+	ret = 1;
+out:
+	spin_unlock_irq(shost->host_lock);
+	return ret;
 }
 
 /*
@@ -1524,7 +1533,7 @@ static void scsi_request_fn(struct request_queue *q)
 			blk_start_request(req);
 		sdev->device_busy++;
 
-		spin_unlock(q->queue_lock);
+		spin_unlock_irq(q->queue_lock);
 		cmd = req->special;
 		if (unlikely(cmd == NULL)) {
 			printk(KERN_CRIT "impossible request in %s.\n"
@@ -1534,7 +1543,6 @@ static void scsi_request_fn(struct request_queue *q)
 			blk_dump_rq_flags(req, "foo");
 			BUG();
 		}
-		spin_lock(shost->host_lock);
 
 		/*
 		 * We hit this when the driver is using a host wide
@@ -1545,9 +1553,11 @@ static void scsi_request_fn(struct request_queue *q)
 		 * a run when a tag is freed.
 		 */
 		if (blk_queue_tagged(q) && !blk_rq_tagged(req)) {
+			spin_lock_irq(shost->host_lock);
 			if (list_empty(&sdev->starved_entry))
 				list_add_tail(&sdev->starved_entry,
 					      &shost->starved_list);
+			spin_unlock_irq(shost->host_lock);
 			goto not_ready;
 		}
 
@@ -1555,16 +1565,7 @@ static void scsi_request_fn(struct request_queue *q)
 			goto not_ready;
 
 		if (!scsi_host_queue_ready(q, shost, sdev))
-			goto not_ready;
-
-		scsi_target(sdev)->target_busy++;
-		shost->host_busy++;
-
-		/*
-		 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
-		 *		take the lock again.
-		 */
-		spin_unlock_irq(shost->host_lock);
+			goto host_not_ready;
 
 		/*
 		 * Finally, initialize any error handling parameters, and set up
@@ -1587,9 +1588,11 @@ static void scsi_request_fn(struct request_queue *q)
 
 	return;
 
- not_ready:
+ host_not_ready:
+	spin_lock_irq(shost->host_lock);
+	scsi_target(sdev)->target_busy--;
 	spin_unlock_irq(shost->host_lock);
-
+ not_ready:
 	/*
 	 * lock q, handle tag, requeue req, and decrement device_busy. We
 	 * must return with queue_lock held.
-- 
cgit v1.2.3


From 7ae65c0f9646c29432b69580b80e08632e6cd813 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Jan 2014 14:49:41 +0100
Subject: scsi: convert target_busy to an atomic_t

Avoid taking the host-wide host_lock to check the per-target queue limit.
Instead we do an atomic_inc_return early on to grab our slot in the queue,
and if necessary decrement it after finishing all checks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c    | 53 ++++++++++++++++++++++++++++------------------
 include/scsi/scsi_device.h |  4 ++--
 2 files changed, 34 insertions(+), 23 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 112c737884c3..0580711c2c57 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -294,7 +294,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->host_busy--;
-	starget->target_busy--;
+	atomic_dec(&starget->target_busy);
 	if (unlikely(scsi_host_in_recovery(shost) &&
 		     (shost->host_failed || shost->host_eh_scheduled)))
 		scsi_eh_wakeup(shost);
@@ -361,7 +361,7 @@ static inline int scsi_device_is_busy(struct scsi_device *sdev)
 static inline int scsi_target_is_busy(struct scsi_target *starget)
 {
 	return ((starget->can_queue > 0 &&
-		 starget->target_busy >= starget->can_queue) ||
+		 atomic_read(&starget->target_busy) >= starget->can_queue) ||
 		 starget->target_blocked);
 }
 
@@ -1279,37 +1279,50 @@ static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
 					   struct scsi_device *sdev)
 {
 	struct scsi_target *starget = scsi_target(sdev);
-	int ret = 0;
+	unsigned int busy;
 
-	spin_lock_irq(shost->host_lock);
 	if (starget->single_lun) {
+		spin_lock_irq(shost->host_lock);
 		if (starget->starget_sdev_user &&
-		    starget->starget_sdev_user != sdev)
-			goto out;
+		    starget->starget_sdev_user != sdev) {
+			spin_unlock_irq(shost->host_lock);
+			return 0;
+		}
 		starget->starget_sdev_user = sdev;
+		spin_unlock_irq(shost->host_lock);
 	}
 
-	if (starget->target_busy == 0 && starget->target_blocked) {
+	busy = atomic_inc_return(&starget->target_busy) - 1;
+	if (starget->target_blocked) {
+		if (busy)
+			goto starved;
+
 		/*
 		 * unblock after target_blocked iterates to zero
 		 */
-		if (--starget->target_blocked != 0)
-			goto out;
+		spin_lock_irq(shost->host_lock);
+		if (--starget->target_blocked != 0) {
+			spin_unlock_irq(shost->host_lock);
+			goto out_dec;
+		}
+		spin_unlock_irq(shost->host_lock);
 
 		SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
 				 "unblocking target at zero depth\n"));
 	}
 
-	if (scsi_target_is_busy(starget)) {
-		list_move_tail(&sdev->starved_entry, &shost->starved_list);
-		goto out;
-	}
+	if (starget->can_queue > 0 && busy >= starget->can_queue)
+		goto starved;
 
-	scsi_target(sdev)->target_busy++;
-	ret = 1;
-out:
+	return 1;
+
+starved:
+	spin_lock_irq(shost->host_lock);
+	list_move_tail(&sdev->starved_entry, &shost->starved_list);
 	spin_unlock_irq(shost->host_lock);
-	return ret;
+out_dec:
+	atomic_dec(&starget->target_busy);
+	return 0;
 }
 
 /*
@@ -1419,7 +1432,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	spin_unlock(sdev->request_queue->queue_lock);
 	spin_lock(shost->host_lock);
 	shost->host_busy++;
-	starget->target_busy++;
+	atomic_inc(&starget->target_busy);
 	spin_unlock(shost->host_lock);
 	spin_lock(sdev->request_queue->queue_lock);
 
@@ -1589,9 +1602,7 @@ static void scsi_request_fn(struct request_queue *q)
 	return;
 
  host_not_ready:
-	spin_lock_irq(shost->host_lock);
-	scsi_target(sdev)->target_busy--;
-	spin_unlock_irq(shost->host_lock);
+	atomic_dec(&scsi_target(sdev)->target_busy);
  not_ready:
 	/*
 	 * lock q, handle tag, requeue req, and decrement device_busy. We
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 9aa38f7b303b..4e078b63a9e5 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -291,8 +291,8 @@ struct scsi_target {
 	unsigned int		expecting_lun_change:1;	/* A device has reported
 						 * a 3F/0E UA, other devices on
 						 * the same target will also. */
-	/* commands actually active on LLD. protected by host lock. */
-	unsigned int		target_busy;
+	/* commands actually active on LLD. */
+	atomic_t		target_busy;
 	/*
 	 * LLDs should set this in the slave_alloc host template callout.
 	 * If set to zero then there is not limit.
-- 
cgit v1.2.3


From 74665016086615bbaa3fa6f83af410a0a4e029ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Jan 2014 15:29:29 +0100
Subject: scsi: convert host_busy to atomic_t

Avoid taking the host-wide host_lock to check the per-host queue limit.
Instead we do an atomic_inc_return early on to grab our slot in the queue,
and if necessary decrement it after finishing all checks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/advansys.c             |  4 +-
 drivers/scsi/libiscsi.c             |  4 +-
 drivers/scsi/libsas/sas_scsi_host.c |  5 ++-
 drivers/scsi/qlogicpti.c            |  2 +-
 drivers/scsi/scsi.c                 |  2 +-
 drivers/scsi/scsi_error.c           |  7 ++--
 drivers/scsi/scsi_lib.c             | 74 ++++++++++++++++++++++---------------
 drivers/scsi/scsi_sysfs.c           |  9 ++++-
 include/scsi/scsi_host.h            | 10 ++---
 9 files changed, 69 insertions(+), 48 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index e716d0aef194..43761c1c46f0 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -2512,7 +2512,7 @@ static void asc_prt_scsi_host(struct Scsi_Host *s)
 
 	printk("Scsi_Host at addr 0x%p, device %s\n", s, dev_name(boardp->dev));
 	printk(" host_busy %u, host_no %d,\n",
-	       s->host_busy, s->host_no);
+	       atomic_read(&s->host_busy), s->host_no);
 
 	printk(" base 0x%lx, io_port 0x%lx, irq %d,\n",
 	       (ulong)s->base, (ulong)s->io_port, boardp->irq);
@@ -3346,7 +3346,7 @@ static void asc_prt_driver_conf(struct seq_file *m, struct Scsi_Host *shost)
 
 	seq_printf(m,
 		   " host_busy %u, max_id %u, max_lun %llu, max_channel %u\n",
-		   shost->host_busy, shost->max_id,
+		   atomic_read(&shost->host_busy), shost->max_id,
 		   shost->max_lun, shost->max_channel);
 
 	seq_printf(m,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index f2db82beb646..f9f3a1224dfa 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2971,7 +2971,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	 */
 	for (;;) {
 		spin_lock_irqsave(session->host->host_lock, flags);
-		if (!session->host->host_busy) { /* OK for ERL == 0 */
+		if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
 			spin_unlock_irqrestore(session->host->host_lock, flags);
 			break;
 		}
@@ -2979,7 +2979,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 		msleep_interruptible(500);
 		iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
 				  "host_busy %d host_failed %d\n",
-				  session->host->host_busy,
+				  atomic_read(&session->host->host_busy),
 				  session->host->host_failed);
 		/*
 		 * force eh_abort() to unblock
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 7d02a19419a7..24e477d2ea70 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -813,7 +813,7 @@ retry:
 	spin_unlock_irq(shost->host_lock);
 
 	SAS_DPRINTK("Enter %s busy: %d failed: %d\n",
-		    __func__, shost->host_busy, shost->host_failed);
+		    __func__, atomic_read(&shost->host_busy), shost->host_failed);
 	/*
 	 * Deal with commands that still have SAS tasks (i.e. they didn't
 	 * complete via the normal sas_task completion mechanism),
@@ -858,7 +858,8 @@ out:
 		goto retry;
 
 	SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n",
-		    __func__, shost->host_busy, shost->host_failed, tries);
+		    __func__, atomic_read(&shost->host_busy),
+		    shost->host_failed, tries);
 }
 
 enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 6d48d30bed05..740ae495aa77 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -959,7 +959,7 @@ static inline void update_can_queue(struct Scsi_Host *host, u_int in_ptr, u_int
 	/* Temporary workaround until bug is found and fixed (one bug has been found
 	   already, but fixing it makes things even worse) -jj */
 	int num_free = QLOGICPTI_REQ_QUEUE_LEN - REQ_QUEUE_DEPTH(in_ptr, out_ptr) - 64;
-	host->can_queue = host->host_busy + num_free;
+	host->can_queue = atomic_read(&host->host_busy) + num_free;
 	host->sg_tablesize = QLOGICPTI_MAX_SG(num_free);
 }
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 6200a2615436..21fb97b01dd6 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -600,7 +600,7 @@ void scsi_log_completion(struct scsi_cmnd *cmd, int disposition)
 			if (level > 3)
 				scmd_printk(KERN_INFO, cmd,
 					    "scsi host busy %d failed %d\n",
-					    cmd->device->host->host_busy,
+					    atomic_read(&cmd->device->host->host_busy),
 					    cmd->device->host->host_failed);
 		}
 	}
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index e4a532463f9a..5db8454474ee 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -59,7 +59,7 @@ static int scsi_try_to_abort_cmd(struct scsi_host_template *,
 /* called with shost->host_lock held */
 void scsi_eh_wakeup(struct Scsi_Host *shost)
 {
-	if (shost->host_busy == shost->host_failed) {
+	if (atomic_read(&shost->host_busy) == shost->host_failed) {
 		trace_scsi_eh_wakeup(shost);
 		wake_up_process(shost->ehandler);
 		SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
@@ -2164,7 +2164,7 @@ int scsi_error_handler(void *data)
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
-		    shost->host_failed != shost->host_busy) {
+		    shost->host_failed != atomic_read(&shost->host_busy)) {
 			SCSI_LOG_ERROR_RECOVERY(1,
 				shost_printk(KERN_INFO, shost,
 					     "scsi_eh_%d: sleeping\n",
@@ -2178,7 +2178,8 @@ int scsi_error_handler(void *data)
 			shost_printk(KERN_INFO, shost,
 				     "scsi_eh_%d: waking up %d/%d/%d\n",
 				     shost->host_no, shost->host_eh_scheduled,
-				     shost->host_failed, shost->host_busy));
+				     shost->host_failed,
+				     atomic_read(&shost->host_busy)));
 
 		/*
 		 * We have a host that is failing for some reason.  Figure out
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0580711c2c57..d0bd7e0ab7a8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -292,14 +292,17 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 	struct scsi_target *starget = scsi_target(sdev);
 	unsigned long flags;
 
-	spin_lock_irqsave(shost->host_lock, flags);
-	shost->host_busy--;
+	atomic_dec(&shost->host_busy);
 	atomic_dec(&starget->target_busy);
+
 	if (unlikely(scsi_host_in_recovery(shost) &&
-		     (shost->host_failed || shost->host_eh_scheduled)))
+		     (shost->host_failed || shost->host_eh_scheduled))) {
+		spin_lock_irqsave(shost->host_lock, flags);
 		scsi_eh_wakeup(shost);
-	spin_unlock(shost->host_lock);
-	spin_lock(sdev->request_queue->queue_lock);
+		spin_unlock_irqrestore(shost->host_lock, flags);
+	}
+
+	spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
 	sdev->device_busy--;
 	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
 }
@@ -367,7 +370,8 @@ static inline int scsi_target_is_busy(struct scsi_target *starget)
 
 static inline int scsi_host_is_busy(struct Scsi_Host *shost)
 {
-	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
+	if ((shost->can_queue > 0 &&
+	     atomic_read(&shost->host_busy) >= shost->can_queue) ||
 	    shost->host_blocked || shost->host_self_blocked)
 		return 1;
 
@@ -1334,38 +1338,54 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 				   struct Scsi_Host *shost,
 				   struct scsi_device *sdev)
 {
-	int ret = 0;
-
-	spin_lock_irq(shost->host_lock);
+	unsigned int busy;
 
 	if (scsi_host_in_recovery(shost))
-		goto out;
-	if (shost->host_busy == 0 && shost->host_blocked) {
+		return 0;
+
+	busy = atomic_inc_return(&shost->host_busy) - 1;
+	if (shost->host_blocked) {
+		if (busy)
+			goto starved;
+
 		/*
 		 * unblock after host_blocked iterates to zero
 		 */
-		if (--shost->host_blocked != 0)
-			goto out;
+		spin_lock_irq(shost->host_lock);
+		if (--shost->host_blocked != 0) {
+			spin_unlock_irq(shost->host_lock);
+			goto out_dec;
+		}
+		spin_unlock_irq(shost->host_lock);
 
 		SCSI_LOG_MLQUEUE(3,
 			shost_printk(KERN_INFO, shost,
 				     "unblocking host at zero depth\n"));
 	}
-	if (scsi_host_is_busy(shost)) {
-		if (list_empty(&sdev->starved_entry))
-			list_add_tail(&sdev->starved_entry, &shost->starved_list);
-		goto out;
-	}
+
+	if (shost->can_queue > 0 && busy >= shost->can_queue)
+		goto starved;
+	if (shost->host_self_blocked)
+		goto starved;
 
 	/* We're OK to process the command, so we can't be starved */
-	if (!list_empty(&sdev->starved_entry))
-		list_del_init(&sdev->starved_entry);
+	if (!list_empty(&sdev->starved_entry)) {
+		spin_lock_irq(shost->host_lock);
+		if (!list_empty(&sdev->starved_entry))
+			list_del_init(&sdev->starved_entry);
+		spin_unlock_irq(shost->host_lock);
+	}
 
-	shost->host_busy++;
-	ret = 1;
-out:
+	return 1;
+
+starved:
+	spin_lock_irq(shost->host_lock);
+	if (list_empty(&sdev->starved_entry))
+		list_add_tail(&sdev->starved_entry, &shost->starved_list);
 	spin_unlock_irq(shost->host_lock);
-	return ret;
+out_dec:
+	atomic_dec(&shost->host_busy);
+	return 0;
 }
 
 /*
@@ -1429,12 +1449,8 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	 * with the locks as normal issue path does.
 	 */
 	sdev->device_busy++;
-	spin_unlock(sdev->request_queue->queue_lock);
-	spin_lock(shost->host_lock);
-	shost->host_busy++;
+	atomic_inc(&shost->host_busy);
 	atomic_inc(&starget->target_busy);
-	spin_unlock(shost->host_lock);
-	spin_lock(sdev->request_queue->queue_lock);
 
 	blk_complete_request(req);
 }
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 5f36788705ba..de57b8bca7be 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -334,7 +334,6 @@ store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
 
 shost_rd_attr(unique_id, "%u\n");
-shost_rd_attr(host_busy, "%hu\n");
 shost_rd_attr(cmd_per_lun, "%hd\n");
 shost_rd_attr(can_queue, "%hd\n");
 shost_rd_attr(sg_tablesize, "%hu\n");
@@ -344,6 +343,14 @@ shost_rd_attr(prot_capabilities, "%u\n");
 shost_rd_attr(prot_guard_type, "%hd\n");
 shost_rd_attr2(proc_name, hostt->proc_name, "%s\n");
 
+static ssize_t
+show_host_busy(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	return snprintf(buf, 20, "%d\n", atomic_read(&shost->host_busy));
+}
+static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL);
+
 static struct attribute *scsi_sysfs_shost_attrs[] = {
 	&dev_attr_unique_id.attr,
 	&dev_attr_host_busy.attr,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b2bc5198b7fb..51f7911b1cbb 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -582,13 +582,9 @@ struct Scsi_Host {
 	 */
 	struct blk_queue_tag	*bqt;
 
-	/*
-	 * The following two fields are protected with host_lock;
-	 * however, eh routines can safely access during eh processing
-	 * without acquiring the lock.
-	 */
-	unsigned int host_busy;		   /* commands actually active on low-level */
-	unsigned int host_failed;	   /* commands that failed. */
+	atomic_t host_busy;		   /* commands actually active on low-level */
+	unsigned int host_failed;	   /* commands that failed.
+					      protected by host_lock */
 	unsigned int host_eh_scheduled;    /* EH scheduled without command */
     
 	unsigned int host_no;  /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
-- 
cgit v1.2.3


From 71e75c97f97a9645d25fbf3d8e4165a558f18747 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Apr 2014 19:07:01 +0200
Subject: scsi: convert device_busy to atomic_t

Avoid taking the queue_lock to check the per-device queue limit.  Instead
we do an atomic_inc_return early on to grab our slot in the queue,
and if necessary decrement it after finishing all checks.

Unlike the host and target busy counters this doesn't allow us to avoid the
queue_lock in the request_fn due to the way the interface works, but it'll
allow us to prepare for using the blk-mq code, which doesn't use the
queue_lock at all, and it at least avoids a queue_lock round trip in
scsi_device_unbusy, which is still important given how busy the queue_lock
is.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/message/fusion/mptsas.c |  2 +-
 drivers/scsi/scsi_lib.c         | 50 +++++++++++++++++++++++------------------
 drivers/scsi/scsi_sysfs.c       | 10 ++++++++-
 drivers/scsi/sg.c               |  2 +-
 include/scsi/scsi_device.h      |  4 +---
 5 files changed, 40 insertions(+), 28 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 711fcb5cec87..d636dbe172a3 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -3763,7 +3763,7 @@ mptsas_send_link_status_event(struct fw_event_work *fw_event)
 						printk(MYIOC_s_DEBUG_FMT
 						"SDEV OUTSTANDING CMDS"
 						"%d\n", ioc->name,
-						sdev->device_busy));
+						atomic_read(&sdev->device_busy)));
 				}
 
 			}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d0bd7e0ab7a8..1ddf0fb43b59 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -302,9 +302,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 		spin_unlock_irqrestore(shost->host_lock, flags);
 	}
 
-	spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
-	sdev->device_busy--;
-	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
+	atomic_dec(&sdev->device_busy);
 }
 
 /*
@@ -355,9 +353,9 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
 
 static inline int scsi_device_is_busy(struct scsi_device *sdev)
 {
-	if (sdev->device_busy >= sdev->queue_depth || sdev->device_blocked)
+	if (atomic_read(&sdev->device_busy) >= sdev->queue_depth ||
+	    sdev->device_blocked)
 		return 1;
-
 	return 0;
 }
 
@@ -1204,7 +1202,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 		 * queue must be restarted, so we schedule a callback to happen
 		 * shortly.
 		 */
-		if (sdev->device_busy == 0)
+		if (atomic_read(&sdev->device_busy) == 0)
 			blk_delay_queue(q, SCSI_QUEUE_DELAY);
 		break;
 	default:
@@ -1255,26 +1253,33 @@ static void scsi_unprep_fn(struct request_queue *q, struct request *req)
 static inline int scsi_dev_queue_ready(struct request_queue *q,
 				  struct scsi_device *sdev)
 {
-	if (sdev->device_busy == 0 && sdev->device_blocked) {
+	unsigned int busy;
+
+	busy = atomic_inc_return(&sdev->device_busy) - 1;
+	if (sdev->device_blocked) {
+		if (busy)
+			goto out_dec;
+
 		/*
 		 * unblock after device_blocked iterates to zero
 		 */
-		if (--sdev->device_blocked == 0) {
-			SCSI_LOG_MLQUEUE(3,
-				   sdev_printk(KERN_INFO, sdev,
-				   "unblocking device at zero depth\n"));
-		} else {
+		if (--sdev->device_blocked != 0) {
 			blk_delay_queue(q, SCSI_QUEUE_DELAY);
-			return 0;
+			goto out_dec;
 		}
+		SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
+				   "unblocking device at zero depth\n"));
 	}
-	if (scsi_device_is_busy(sdev))
-		return 0;
+
+	if (busy >= sdev->queue_depth)
+		goto out_dec;
 
 	return 1;
+out_dec:
+	atomic_dec(&sdev->device_busy);
+	return 0;
 }
 
-
 /*
  * scsi_target_queue_ready: checks if there we can send commands to target
  * @sdev: scsi device on starget to check.
@@ -1448,7 +1453,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	 * bump busy counts.  To bump the counters, we need to dance
 	 * with the locks as normal issue path does.
 	 */
-	sdev->device_busy++;
+	atomic_inc(&sdev->device_busy);
 	atomic_inc(&shost->host_busy);
 	atomic_inc(&starget->target_busy);
 
@@ -1544,7 +1549,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * accept it.
 		 */
 		req = blk_peek_request(q);
-		if (!req || !scsi_dev_queue_ready(q, sdev))
+		if (!req)
 			break;
 
 		if (unlikely(!scsi_device_online(sdev))) {
@@ -1554,13 +1559,14 @@ static void scsi_request_fn(struct request_queue *q)
 			continue;
 		}
 
+		if (!scsi_dev_queue_ready(q, sdev))
+			break;
 
 		/*
 		 * Remove the request from the request list.
 		 */
 		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
 			blk_start_request(req);
-		sdev->device_busy++;
 
 		spin_unlock_irq(q->queue_lock);
 		cmd = req->special;
@@ -1630,9 +1636,9 @@ static void scsi_request_fn(struct request_queue *q)
 	 */
 	spin_lock_irq(q->queue_lock);
 	blk_requeue_request(q, req);
-	sdev->device_busy--;
+	atomic_dec(&sdev->device_busy);
 out_delay:
-	if (sdev->device_busy == 0 && !scsi_device_blocked(sdev))
+	if (atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
@@ -2371,7 +2377,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
 		return err;
 
 	scsi_run_queue(sdev->request_queue);
-	while (sdev->device_busy) {
+	while (atomic_read(&sdev->device_busy)) {
 		msleep_interruptible(200);
 		scsi_run_queue(sdev->request_queue);
 	}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index de57b8bca7be..79df9847edef 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -585,13 +585,21 @@ static int scsi_sdev_check_buf_bit(const char *buf)
  * Create the actual show/store functions and data structures.
  */
 sdev_rd_attr (device_blocked, "%d\n");
-sdev_rd_attr (device_busy, "%d\n");
 sdev_rd_attr (type, "%d\n");
 sdev_rd_attr (scsi_level, "%d\n");
 sdev_rd_attr (vendor, "%.8s\n");
 sdev_rd_attr (model, "%.16s\n");
 sdev_rd_attr (rev, "%.4s\n");
 
+static ssize_t
+sdev_show_device_busy(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_busy));
+}
+static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL);
+
 /*
  * TODO: can we make these symlinks to the block layer ones?
  */
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 7a291f5c7227..01cf88888797 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2574,7 +2574,7 @@ static int sg_proc_seq_show_dev(struct seq_file *s, void *v)
 			      scsidp->id, scsidp->lun, (int) scsidp->type,
 			      1,
 			      (int) scsidp->queue_depth,
-			      (int) scsidp->device_busy,
+			      (int) atomic_read(&scsidp->device_busy),
 			      (int) scsi_device_online(scsidp));
 	}
 	read_unlock_irqrestore(&sg_index_lock, iflags);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 4e078b63a9e5..3329901c7243 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -81,9 +81,7 @@ struct scsi_device {
 	struct list_head    siblings;   /* list of all devices on this host */
 	struct list_head    same_target_siblings; /* just the devices sharing same target id */
 
-	/* this is now protected by the request_queue->queue_lock */
-	unsigned int device_busy;	/* commands actually active on
-					 * low-level. protected by queue_lock. */
+	atomic_t device_busy;		/* commands actually active on LLDD */
 	spinlock_t list_lock;
 	struct list_head cmd_list;	/* queue of in use SCSI Command structures */
 	struct list_head starved_entry;
-- 
cgit v1.2.3


From cd9070c9c512ff7995f9019392e0ae548df3a088 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jan 2014 12:07:41 +0100
Subject: scsi: fix the {host,target,device}_blocked counter mess

Seems like these counters are missing any sort of synchronization for
updates, as a over 10 year old comment from me noted.  Fix this by
using atomic counters, and while we're at it also make sure they are
in the same cacheline as the _busy counters and not needlessly stored
to in every I/O completion.

With the new model the _busy counters can temporarily go negative,
so all the readers are updated to check for > 0 values.  Longer
term every successful I/O completion will reset the counters to zero,
so the temporarily negative values will not cause any harm.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi.c        | 21 +++++++--------
 drivers/scsi/scsi_lib.c    | 66 +++++++++++++++++++++++-----------------------
 drivers/scsi/scsi_sysfs.c  | 10 ++++++-
 include/scsi/scsi_device.h |  7 ++---
 include/scsi/scsi_host.h   |  7 ++---
 5 files changed, 58 insertions(+), 53 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 21fb97b01dd6..3dde8a35493f 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -726,17 +726,16 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 
 	scsi_device_unbusy(sdev);
 
-        /*
-         * Clear the flags which say that the device/host is no longer
-         * capable of accepting new commands.  These are set in scsi_queue.c
-         * for both the queue full condition on a device, and for a
-         * host full condition on the host.
-	 *
-	 * XXX(hch): What about locking?
-         */
-        shost->host_blocked = 0;
-	starget->target_blocked = 0;
-        sdev->device_blocked = 0;
+	/*
+	 * Clear the flags that say that the device/target/host is no longer
+	 * capable of accepting new commands.
+	 */
+	if (atomic_read(&shost->host_blocked))
+		atomic_set(&shost->host_blocked, 0);
+	if (atomic_read(&starget->target_blocked))
+		atomic_set(&starget->target_blocked, 0);
+	if (atomic_read(&sdev->device_blocked))
+		atomic_set(&sdev->device_blocked, 0);
 
 	/*
 	 * If we have valid sense information, then some kind of recovery
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1ddf0fb43b59..69da4cb5cb13 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -99,14 +99,16 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 	 */
 	switch (reason) {
 	case SCSI_MLQUEUE_HOST_BUSY:
-		host->host_blocked = host->max_host_blocked;
+		atomic_set(&host->host_blocked, host->max_host_blocked);
 		break;
 	case SCSI_MLQUEUE_DEVICE_BUSY:
 	case SCSI_MLQUEUE_EH_RETRY:
-		device->device_blocked = device->max_device_blocked;
+		atomic_set(&device->device_blocked,
+			   device->max_device_blocked);
 		break;
 	case SCSI_MLQUEUE_TARGET_BUSY:
-		starget->target_blocked = starget->max_target_blocked;
+		atomic_set(&starget->target_blocked,
+			   starget->max_target_blocked);
 		break;
 	}
 }
@@ -351,29 +353,35 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
 	spin_unlock_irqrestore(shost->host_lock, flags);
 }
 
-static inline int scsi_device_is_busy(struct scsi_device *sdev)
+static inline bool scsi_device_is_busy(struct scsi_device *sdev)
 {
-	if (atomic_read(&sdev->device_busy) >= sdev->queue_depth ||
-	    sdev->device_blocked)
-		return 1;
-	return 0;
+	if (atomic_read(&sdev->device_busy) >= sdev->queue_depth)
+		return true;
+	if (atomic_read(&sdev->device_blocked) > 0)
+		return true;
+	return false;
 }
 
-static inline int scsi_target_is_busy(struct scsi_target *starget)
+static inline bool scsi_target_is_busy(struct scsi_target *starget)
 {
-	return ((starget->can_queue > 0 &&
-		 atomic_read(&starget->target_busy) >= starget->can_queue) ||
-		 starget->target_blocked);
+	if (starget->can_queue > 0 &&
+	    atomic_read(&starget->target_busy) >= starget->can_queue)
+		return true;
+	if (atomic_read(&starget->target_blocked) > 0)
+		return true;
+	return false;
 }
 
-static inline int scsi_host_is_busy(struct Scsi_Host *shost)
+static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
 {
-	if ((shost->can_queue > 0 &&
-	     atomic_read(&shost->host_busy) >= shost->can_queue) ||
-	    shost->host_blocked || shost->host_self_blocked)
-		return 1;
-
-	return 0;
+	if (shost->can_queue > 0 &&
+	    atomic_read(&shost->host_busy) >= shost->can_queue)
+		return true;
+	if (atomic_read(&shost->host_blocked) > 0)
+		return true;
+	if (shost->host_self_blocked)
+		return true;
+	return false;
 }
 
 static void scsi_starved_list_run(struct Scsi_Host *shost)
@@ -1256,14 +1264,14 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
 	unsigned int busy;
 
 	busy = atomic_inc_return(&sdev->device_busy) - 1;
-	if (sdev->device_blocked) {
+	if (atomic_read(&sdev->device_blocked)) {
 		if (busy)
 			goto out_dec;
 
 		/*
 		 * unblock after device_blocked iterates to zero
 		 */
-		if (--sdev->device_blocked != 0) {
+		if (atomic_dec_return(&sdev->device_blocked) > 0) {
 			blk_delay_queue(q, SCSI_QUEUE_DELAY);
 			goto out_dec;
 		}
@@ -1302,19 +1310,15 @@ static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
 	}
 
 	busy = atomic_inc_return(&starget->target_busy) - 1;
-	if (starget->target_blocked) {
+	if (atomic_read(&starget->target_blocked) > 0) {
 		if (busy)
 			goto starved;
 
 		/*
 		 * unblock after target_blocked iterates to zero
 		 */
-		spin_lock_irq(shost->host_lock);
-		if (--starget->target_blocked != 0) {
-			spin_unlock_irq(shost->host_lock);
+		if (atomic_dec_return(&starget->target_blocked) > 0)
 			goto out_dec;
-		}
-		spin_unlock_irq(shost->host_lock);
 
 		SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
 				 "unblocking target at zero depth\n"));
@@ -1349,19 +1353,15 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 		return 0;
 
 	busy = atomic_inc_return(&shost->host_busy) - 1;
-	if (shost->host_blocked) {
+	if (atomic_read(&shost->host_blocked) > 0) {
 		if (busy)
 			goto starved;
 
 		/*
 		 * unblock after host_blocked iterates to zero
 		 */
-		spin_lock_irq(shost->host_lock);
-		if (--shost->host_blocked != 0) {
-			spin_unlock_irq(shost->host_lock);
+		if (atomic_dec_return(&shost->host_blocked) > 0)
 			goto out_dec;
-		}
-		spin_unlock_irq(shost->host_lock);
 
 		SCSI_LOG_MLQUEUE(3,
 			shost_printk(KERN_INFO, shost,
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 79df9847edef..209cae3097ea 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -584,7 +584,6 @@ static int scsi_sdev_check_buf_bit(const char *buf)
 /*
  * Create the actual show/store functions and data structures.
  */
-sdev_rd_attr (device_blocked, "%d\n");
 sdev_rd_attr (type, "%d\n");
 sdev_rd_attr (scsi_level, "%d\n");
 sdev_rd_attr (vendor, "%.8s\n");
@@ -600,6 +599,15 @@ sdev_show_device_busy(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL);
 
+static ssize_t
+sdev_show_device_blocked(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_blocked));
+}
+static DEVICE_ATTR(device_blocked, S_IRUGO, sdev_show_device_blocked, NULL);
+
 /*
  * TODO: can we make these symlinks to the block layer ones?
  */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 3329901c7243..0f853f2c9dc7 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -82,6 +82,8 @@ struct scsi_device {
 	struct list_head    same_target_siblings; /* just the devices sharing same target id */
 
 	atomic_t device_busy;		/* commands actually active on LLDD */
+	atomic_t device_blocked;	/* Device returned QUEUE_FULL. */
+
 	spinlock_t list_lock;
 	struct list_head cmd_list;	/* queue of in use SCSI Command structures */
 	struct list_head starved_entry;
@@ -180,8 +182,6 @@ struct scsi_device {
 	struct list_head event_list;	/* asserted events */
 	struct work_struct event_work;
 
-	unsigned int device_blocked;	/* Device returned QUEUE_FULL. */
-
 	unsigned int max_device_blocked; /* what device_blocked counts down from  */
 #define SCSI_DEFAULT_DEVICE_BLOCKED	3
 
@@ -291,12 +291,13 @@ struct scsi_target {
 						 * the same target will also. */
 	/* commands actually active on LLD. */
 	atomic_t		target_busy;
+	atomic_t		target_blocked;
+
 	/*
 	 * LLDs should set this in the slave_alloc host template callout.
 	 * If set to zero then there is not limit.
 	 */
 	unsigned int		can_queue;
-	unsigned int		target_blocked;
 	unsigned int		max_target_blocked;
 #define SCSI_DEFAULT_TARGET_BLOCKED	3
 
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 51f7911b1cbb..5e8ebc1ac12b 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -583,6 +583,8 @@ struct Scsi_Host {
 	struct blk_queue_tag	*bqt;
 
 	atomic_t host_busy;		   /* commands actually active on low-level */
+	atomic_t host_blocked;
+
 	unsigned int host_failed;	   /* commands that failed.
 					      protected by host_lock */
 	unsigned int host_eh_scheduled;    /* EH scheduled without command */
@@ -681,11 +683,6 @@ struct Scsi_Host {
 	 */
 	struct workqueue_struct *tmf_work_q;
 
-	/*
-	 * Host has rejected a command because it was busy.
-	 */
-	unsigned int host_blocked;
-
 	/*
 	 * Value host_blocked counts down from
 	 */
-- 
cgit v1.2.3


From 2ccbb00808465338b57c39f38c0b1e7ce69e2bb1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Mar 2014 10:35:00 +0100
Subject: scsi: only maintain target_blocked if the driver has a target queue
 limit

This saves us an atomic operation for each I/O submission and completion
for the usual case where the driver doesn't set a per-target can_queue
value.  Only a few iscsi hardware offload drivers set the per-target
can_queue value at the moment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 69da4cb5cb13..a643353584b5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -295,7 +295,8 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 	unsigned long flags;
 
 	atomic_dec(&shost->host_busy);
-	atomic_dec(&starget->target_busy);
+	if (starget->can_queue > 0)
+		atomic_dec(&starget->target_busy);
 
 	if (unlikely(scsi_host_in_recovery(shost) &&
 		     (shost->host_failed || shost->host_eh_scheduled))) {
@@ -364,11 +365,12 @@ static inline bool scsi_device_is_busy(struct scsi_device *sdev)
 
 static inline bool scsi_target_is_busy(struct scsi_target *starget)
 {
-	if (starget->can_queue > 0 &&
-	    atomic_read(&starget->target_busy) >= starget->can_queue)
-		return true;
-	if (atomic_read(&starget->target_blocked) > 0)
-		return true;
+	if (starget->can_queue > 0) {
+		if (atomic_read(&starget->target_busy) >= starget->can_queue)
+			return true;
+		if (atomic_read(&starget->target_blocked) > 0)
+			return true;
+	}
 	return false;
 }
 
@@ -1309,6 +1311,9 @@ static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
 		spin_unlock_irq(shost->host_lock);
 	}
 
+	if (starget->can_queue <= 0)
+		return 1;
+
 	busy = atomic_inc_return(&starget->target_busy) - 1;
 	if (atomic_read(&starget->target_blocked) > 0) {
 		if (busy)
@@ -1324,7 +1329,7 @@ static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
 				 "unblocking target at zero depth\n"));
 	}
 
-	if (starget->can_queue > 0 && busy >= starget->can_queue)
+	if (busy >= starget->can_queue)
 		goto starved;
 
 	return 1;
@@ -1334,7 +1339,8 @@ starved:
 	list_move_tail(&sdev->starved_entry, &shost->starved_list);
 	spin_unlock_irq(shost->host_lock);
 out_dec:
-	atomic_dec(&starget->target_busy);
+	if (starget->can_queue > 0)
+		atomic_dec(&starget->target_busy);
 	return 0;
 }
 
@@ -1455,7 +1461,8 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	 */
 	atomic_inc(&sdev->device_busy);
 	atomic_inc(&shost->host_busy);
-	atomic_inc(&starget->target_busy);
+	if (starget->can_queue > 0)
+		atomic_inc(&starget->target_busy);
 
 	blk_complete_request(req);
 }
@@ -1624,7 +1631,8 @@ static void scsi_request_fn(struct request_queue *q)
 	return;
 
  host_not_ready:
-	atomic_dec(&scsi_target(sdev)->target_busy);
+	if (scsi_target(sdev)->can_queue > 0)
+		atomic_dec(&scsi_target(sdev)->target_busy);
  not_ready:
 	/*
 	 * lock q, handle tag, requeue req, and decrement device_busy. We
-- 
cgit v1.2.3


From f6d47e74fcb2814225e429c94355ad1c551daffb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 16 Feb 2014 06:16:13 -0800
Subject: scsi: unwind blk_end_request_all and blk_end_request_err calls

Replace the calls to the various blk_end_request variants with opencode
equivalents.  Blk-mq is using a model that gives the driver control
between the bio updates and the actual completion, and making the old
code follow that same model allows us to keep the code more similar for
both paths.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c | 61 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 19 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a643353584b5..8723abeb018e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -621,6 +621,37 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 	cmd->request->next_rq->special = NULL;
 }
 
+static bool scsi_end_request(struct request *req, int error,
+		unsigned int bytes, unsigned int bidi_bytes)
+{
+	struct scsi_cmnd *cmd = req->special;
+	struct scsi_device *sdev = cmd->device;
+	struct request_queue *q = sdev->request_queue;
+	unsigned long flags;
+
+
+	if (blk_update_request(req, error, bytes))
+		return true;
+
+	/* Bidi request must be completed as a whole */
+	if (unlikely(bidi_bytes) &&
+	    blk_update_request(req->next_rq, error, bidi_bytes))
+		return true;
+
+	if (blk_queue_add_random(q))
+		add_disk_randomness(req->rq_disk);
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	blk_finish_request(req, error);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	if (bidi_bytes)
+		scsi_release_bidi_buffers(cmd);
+	scsi_release_buffers(cmd);
+	scsi_next_command(cmd);
+	return false;
+}
+
 /**
  * __scsi_error_from_host_byte - translate SCSI error code into errno
  * @cmd:	SCSI command (unused)
@@ -693,7 +724,7 @@ static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
  *		   be put back on the queue and retried using the same
  *		   command as before, possibly after a delay.
  *
- *		c) We can call blk_end_request() with -EIO to fail
+ *		c) We can call scsi_end_request() with -EIO to fail
  *		   the remainder of the request.
  */
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
@@ -744,13 +775,9 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			 * both sides at once.
 			 */
 			req->next_rq->resid_len = scsi_in(cmd)->resid;
-
-			scsi_release_buffers(cmd);
-			scsi_release_bidi_buffers(cmd);
-
-			blk_end_request_all(req, 0);
-
-			scsi_next_command(cmd);
+			if (scsi_end_request(req, 0, blk_rq_bytes(req),
+					blk_rq_bytes(req->next_rq)))
+				BUG();
 			return;
 		}
 	} else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) {
@@ -797,15 +824,16 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	/*
 	 * If we finished all bytes in the request we are done now.
 	 */
-	if (!blk_end_request(req, error, good_bytes))
-		goto next_command;
+	if (!scsi_end_request(req, error, good_bytes, 0))
+		return;
 
 	/*
 	 * Kill remainder if no retrys.
 	 */
 	if (error && scsi_noretry_cmd(cmd)) {
-		blk_end_request_all(req, error);
-		goto next_command;
+		if (scsi_end_request(req, error, blk_rq_bytes(req), 0))
+			BUG();
+		return;
 	}
 
 	/*
@@ -919,8 +947,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				scsi_print_sense("", cmd);
 			scsi_print_command(cmd);
 		}
-		if (!blk_end_request_err(req, error))
-			goto next_command;
+		if (!scsi_end_request(req, error, blk_rq_err_bytes(req), 0))
+			return;
 		/*FALLTHRU*/
 	case ACTION_REPREP:
 	requeue:
@@ -939,11 +967,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		__scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, 0);
 		break;
 	}
-	return;
-
-next_command:
-	scsi_release_buffers(cmd);
-	scsi_next_command(cmd);
 }
 
 static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
-- 
cgit v1.2.3


From c53c6d6a68b13b1dff2892551b56cfdc07887d9e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Apr 2014 14:38:31 +0200
Subject: scatterlist: allow chaining to preallocated chunks

Blk-mq drivers usually preallocate their S/G list as part of the request,
but if we want to support the very large S/G lists currently supported by
the SCSI code that would tie up a lot of memory in the preallocated request
pool.  Add support to the scatterlist code so that it can initialize a
S/G list that uses a preallocated first chunks and dynamically allocated
additional chunks.  That way the scsi-mq code can preallocate a first
page worth of S/G entries as part of the request, and dynamically extend
the S/G list when needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c     | 16 +++++++---------
 include/linux/scatterlist.h |  6 +++---
 lib/scatterlist.c           | 25 +++++++++++++++++--------
 3 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8723abeb018e..bbd7a0a08692 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -564,6 +564,11 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask)
 	return mempool_alloc(sgp->pool, gfp_mask);
 }
 
+static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
+{
+	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, false, scsi_sg_free);
+}
+
 static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
 			      gfp_t gfp_mask)
 {
@@ -572,19 +577,12 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
 	BUG_ON(!nents);
 
 	ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS,
-			       gfp_mask, scsi_sg_alloc);
+			       NULL, gfp_mask, scsi_sg_alloc);
 	if (unlikely(ret))
-		__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS,
-				scsi_sg_free);
-
+		scsi_free_sgtable(sdb);
 	return ret;
 }
 
-static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
-{
-	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, scsi_sg_free);
-}
-
 /*
  * Function:    scsi_release_buffers()
  *
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index a964f7285600..f4ec8bbcb372 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -229,10 +229,10 @@ void sg_init_one(struct scatterlist *, const void *, unsigned int);
 typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
 typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
 
-void __sg_free_table(struct sg_table *, unsigned int, sg_free_fn *);
+void __sg_free_table(struct sg_table *, unsigned int, bool, sg_free_fn *);
 void sg_free_table(struct sg_table *);
-int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, gfp_t,
-		     sg_alloc_fn *);
+int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
+		     struct scatterlist *, gfp_t, sg_alloc_fn *);
 int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
 int sg_alloc_table_from_pages(struct sg_table *sgt,
 	struct page **pages, unsigned int n_pages,
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3a8e8e8fb2a5..b4415fceb7e7 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -165,6 +165,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  * __sg_free_table - Free a previously mapped sg table
  * @table:	The sg table header to use
  * @max_ents:	The maximum number of entries per single scatterlist
+ * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk
  * @free_fn:	Free function
  *
  *  Description:
@@ -174,7 +175,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  *
  **/
 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
-		     sg_free_fn *free_fn)
+		     bool skip_first_chunk, sg_free_fn *free_fn)
 {
 	struct scatterlist *sgl, *next;
 
@@ -202,7 +203,10 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
 		}
 
 		table->orig_nents -= sg_size;
-		free_fn(sgl, alloc_size);
+		if (!skip_first_chunk) {
+			free_fn(sgl, alloc_size);
+			skip_first_chunk = false;
+		}
 		sgl = next;
 	}
 
@@ -217,7 +221,7 @@ EXPORT_SYMBOL(__sg_free_table);
  **/
 void sg_free_table(struct sg_table *table)
 {
-	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 }
 EXPORT_SYMBOL(sg_free_table);
 
@@ -241,8 +245,8 @@ EXPORT_SYMBOL(sg_free_table);
  *
  **/
 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
-		     unsigned int max_ents, gfp_t gfp_mask,
-		     sg_alloc_fn *alloc_fn)
+		     unsigned int max_ents, struct scatterlist *first_chunk,
+		     gfp_t gfp_mask, sg_alloc_fn *alloc_fn)
 {
 	struct scatterlist *sg, *prv;
 	unsigned int left;
@@ -269,7 +273,12 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
 		left -= sg_size;
 
-		sg = alloc_fn(alloc_size, gfp_mask);
+		if (first_chunk) {
+			sg = first_chunk;
+			first_chunk = NULL;
+		} else {
+			sg = alloc_fn(alloc_size, gfp_mask);
+		}
 		if (unlikely(!sg)) {
 			/*
 			 * Adjust entry count to reflect that the last
@@ -324,9 +333,9 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 	int ret;
 
 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
-			       gfp_mask, sg_kmalloc);
+			       NULL, gfp_mask, sg_kmalloc);
 	if (unlikely(ret))
-		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 
 	return ret;
 }
-- 
cgit v1.2.3


From d285203cf647d7c97db3a1c33794315c9008593f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Jan 2014 12:06:53 +0100
Subject: scsi: add support for a blk-mq based I/O path.

This patch adds support for an alternate I/O path in the scsi midlayer
which uses the blk-mq infrastructure instead of the legacy request code.

Use of blk-mq is fully transparent to drivers, although for now a host
template field is provided to opt out of blk-mq usage in case any unforseen
incompatibilities arise.

In general replacing the legacy request code with blk-mq is a simple and
mostly mechanical transformation.  The biggest exception is the new code
that deals with the fact the I/O submissions in blk-mq must happen from
process context, which slightly complicates the I/O completion handler.
The second biggest differences is that blk-mq is build around the concept
of preallocated requests that also include driver specific data, which
in SCSI context means the scsi_cmnd structure.  This completely avoids
dynamic memory allocations for the fast path through I/O submission.

Due the preallocated requests the MQ code path exclusively uses the
host-wide shared tag allocator instead of a per-LUN one.  This only
affects drivers actually using the block layer provided tag allocator
instead of their own.  Unlike the old path blk-mq always provides a tag,
although drivers don't have to use it.

For now the blk-mq path is disable by defauly and must be enabled using
the "use_blk_mq" module parameter.  Once the remaining work in the block
layer to make blk-mq more suitable for slow devices is complete I hope
to make it the default and eventually even remove the old code path.

Based on the earlier scsi-mq prototype by Nicholas Bellinger.

Thanks to Bart Van Assche and Robert Elliot for testing, benchmarking and
various sugestions and code contributions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/hosts.c      |  35 +++-
 drivers/scsi/scsi.c       |   5 +-
 drivers/scsi/scsi_lib.c   | 464 ++++++++++++++++++++++++++++++++++++++++------
 drivers/scsi/scsi_priv.h  |   3 +
 drivers/scsi/scsi_scan.c  |   5 +-
 drivers/scsi/scsi_sysfs.c |   2 +
 include/scsi/scsi_host.h  |  18 +-
 include/scsi/scsi_tcq.h   |  28 ++-
 8 files changed, 488 insertions(+), 72 deletions(-)

(limited to 'drivers/scsi/scsi_lib.c')

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 0632eee82620..6de80e352871 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -213,9 +213,24 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 		goto fail;
 	}
 
+	if (shost_use_blk_mq(shost)) {
+		error = scsi_mq_setup_tags(shost);
+		if (error)
+			goto fail;
+	}
+
+	/*
+	 * Note that we allocate the freelist even for the MQ case for now,
+	 * as we need a command set aside for scsi_reset_provider.  Having
+	 * the full host freelist and one command available for that is a
+	 * little heavy-handed, but avoids introducing a special allocator
+	 * just for this.  Eventually the structure of scsi_reset_provider
+	 * will need a major overhaul.
+	 */
 	error = scsi_setup_command_freelist(shost);
 	if (error)
-		goto fail;
+		goto out_destroy_tags;
+
 
 	if (!shost->shost_gendev.parent)
 		shost->shost_gendev.parent = dev ? dev : &platform_bus;
@@ -226,7 +241,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 
 	error = device_add(&shost->shost_gendev);
 	if (error)
-		goto out;
+		goto out_destroy_freelist;
 
 	pm_runtime_set_active(&shost->shost_gendev);
 	pm_runtime_enable(&shost->shost_gendev);
@@ -279,8 +294,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 	device_del(&shost->shost_dev);
  out_del_gendev:
 	device_del(&shost->shost_gendev);
- out:
+ out_destroy_freelist:
 	scsi_destroy_command_freelist(shost);
+ out_destroy_tags:
+	if (shost_use_blk_mq(shost))
+		scsi_mq_destroy_tags(shost);
  fail:
 	return error;
 }
@@ -309,8 +327,13 @@ static void scsi_host_dev_release(struct device *dev)
 	}
 
 	scsi_destroy_command_freelist(shost);
-	if (shost->bqt)
-		blk_free_tags(shost->bqt);
+	if (shost_use_blk_mq(shost)) {
+		if (shost->tag_set.tags)
+			scsi_mq_destroy_tags(shost);
+	} else {
+		if (shost->bqt)
+			blk_free_tags(shost->bqt);
+	}
 
 	kfree(shost->shost_data);
 
@@ -436,6 +459,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	else
 		shost->dma_boundary = 0xffffffff;
 
+	shost->use_blk_mq = scsi_use_blk_mq && !shost->hostt->disable_blk_mq;
+
 	device_initialize(&shost->shost_gendev);
 	dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
 	shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 3dde8a35493f..013709f11bf1 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -805,7 +805,7 @@ void scsi_adjust_queue_depth(struct scsi_device *sdev, int tagged, int tags)
 	 * is more IO than the LLD's can_queue (so there are not enuogh
 	 * tags) request_fn's host queue ready check will handle it.
 	 */
-	if (!sdev->host->bqt) {
+	if (!shost_use_blk_mq(sdev->host) && !sdev->host->bqt) {
 		if (blk_queue_tagged(sdev->request_queue) &&
 		    blk_queue_resize_tags(sdev->request_queue, tags) != 0)
 			goto out;
@@ -1361,6 +1361,9 @@ MODULE_LICENSE("GPL");
 module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
 
+bool scsi_use_blk_mq = false;
+module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO);
+
 static int __init init_scsi(void)
 {
 	int error;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bbd7a0a08692..9c44392b748f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1,5 +1,6 @@
 /*
- *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
+ * Copyright (C) 1999 Eric Youngdale
+ * Copyright (C) 2014 Christoph Hellwig
  *
  *  SCSI queueing library.
  *      Initial versions: Eric Youngdale (eric@andante.org).
@@ -20,6 +21,7 @@
 #include <linux/delay.h>
 #include <linux/hardirq.h>
 #include <linux/scatterlist.h>
+#include <linux/blk-mq.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -113,6 +115,16 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 	}
 }
 
+static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
+{
+	struct scsi_device *sdev = cmd->device;
+	struct request_queue *q = cmd->request->q;
+
+	blk_mq_requeue_request(cmd->request);
+	blk_mq_kick_requeue_list(q);
+	put_device(&sdev->sdev_gendev);
+}
+
 /**
  * __scsi_queue_insert - private queue insertion
  * @cmd: The SCSI command being requeued
@@ -150,6 +162,10 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 	 * before blk_cleanup_queue() finishes.
 	 */
 	cmd->result = 0;
+	if (q->mq_ops) {
+		scsi_mq_requeue_cmd(cmd);
+		return;
+	}
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_requeue_request(q, cmd->request);
 	kblockd_schedule_work(&device->requeue_work);
@@ -308,6 +324,14 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 	atomic_dec(&sdev->device_busy);
 }
 
+static void scsi_kick_queue(struct request_queue *q)
+{
+	if (q->mq_ops)
+		blk_mq_start_hw_queues(q);
+	else
+		blk_run_queue(q);
+}
+
 /*
  * Called for single_lun devices on IO completion. Clear starget_sdev_user,
  * and call blk_run_queue for all the scsi_devices on the target -
@@ -332,7 +356,7 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
 	 * but in most cases, we will be first. Ideally, each LU on the
 	 * target would get some limited time or requests on the target.
 	 */
-	blk_run_queue(current_sdev->request_queue);
+	scsi_kick_queue(current_sdev->request_queue);
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (starget->starget_sdev_user)
@@ -345,7 +369,7 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
 			continue;
 
 		spin_unlock_irqrestore(shost->host_lock, flags);
-		blk_run_queue(sdev->request_queue);
+		scsi_kick_queue(sdev->request_queue);
 		spin_lock_irqsave(shost->host_lock, flags);
 	
 		scsi_device_put(sdev);
@@ -435,7 +459,7 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
 			continue;
 		spin_unlock_irqrestore(shost->host_lock, flags);
 
-		blk_run_queue(slq);
+		scsi_kick_queue(slq);
 		blk_put_queue(slq);
 
 		spin_lock_irqsave(shost->host_lock, flags);
@@ -466,7 +490,10 @@ static void scsi_run_queue(struct request_queue *q)
 	if (!list_empty(&sdev->host->starved_list))
 		scsi_starved_list_run(sdev->host);
 
-	blk_run_queue(q);
+	if (q->mq_ops)
+		blk_mq_start_stopped_hw_queues(q, false);
+	else
+		blk_run_queue(q);
 }
 
 void scsi_requeue_run_queue(struct work_struct *work)
@@ -564,25 +591,72 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask)
 	return mempool_alloc(sgp->pool, gfp_mask);
 }
 
-static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
+static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq)
 {
-	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, false, scsi_sg_free);
+	if (mq && sdb->table.nents <= SCSI_MAX_SG_SEGMENTS)
+		return;
+	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free);
 }
 
 static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
-			      gfp_t gfp_mask)
+			      gfp_t gfp_mask, bool mq)
 {
+	struct scatterlist *first_chunk = NULL;
 	int ret;
 
 	BUG_ON(!nents);
 
+	if (mq) {
+		if (nents <= SCSI_MAX_SG_SEGMENTS) {
+			sdb->table.nents = nents;
+			sg_init_table(sdb->table.sgl, sdb->table.nents);
+			return 0;
+		}
+		first_chunk = sdb->table.sgl;
+	}
+
 	ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS,
-			       NULL, gfp_mask, scsi_sg_alloc);
+			       first_chunk, gfp_mask, scsi_sg_alloc);
 	if (unlikely(ret))
-		scsi_free_sgtable(sdb);
+		scsi_free_sgtable(sdb, mq);
 	return ret;
 }
 
+static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
+{
+	if (cmd->request->cmd_type == REQ_TYPE_FS) {
+		struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
+
+		if (drv->uninit_command)
+			drv->uninit_command(cmd);
+	}
+}
+
+static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd)
+{
+	if (cmd->sdb.table.nents)
+		scsi_free_sgtable(&cmd->sdb, true);
+	if (cmd->request->next_rq && cmd->request->next_rq->special)
+		scsi_free_sgtable(cmd->request->next_rq->special, true);
+	if (scsi_prot_sg_count(cmd))
+		scsi_free_sgtable(cmd->prot_sdb, true);
+}
+
+static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
+{
+	struct scsi_device *sdev = cmd->device;
+	unsigned long flags;
+
+	BUG_ON(list_empty(&cmd->list));
+
+	scsi_mq_free_sgtables(cmd);
+	scsi_uninit_cmd(cmd);
+
+	spin_lock_irqsave(&sdev->list_lock, flags);
+	list_del_init(&cmd->list);
+	spin_unlock_irqrestore(&sdev->list_lock, flags);
+}
+
 /*
  * Function:    scsi_release_buffers()
  *
@@ -602,19 +676,19 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
 static void scsi_release_buffers(struct scsi_cmnd *cmd)
 {
 	if (cmd->sdb.table.nents)
-		scsi_free_sgtable(&cmd->sdb);
+		scsi_free_sgtable(&cmd->sdb, false);
 
 	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 
 	if (scsi_prot_sg_count(cmd))
-		scsi_free_sgtable(cmd->prot_sdb);
+		scsi_free_sgtable(cmd->prot_sdb, false);
 }
 
 static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 {
 	struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special;
 
-	scsi_free_sgtable(bidi_sdb);
+	scsi_free_sgtable(bidi_sdb, false);
 	kmem_cache_free(scsi_sdb_cache, bidi_sdb);
 	cmd->request->next_rq->special = NULL;
 }
@@ -625,8 +699,6 @@ static bool scsi_end_request(struct request *req, int error,
 	struct scsi_cmnd *cmd = req->special;
 	struct scsi_device *sdev = cmd->device;
 	struct request_queue *q = sdev->request_queue;
-	unsigned long flags;
-
 
 	if (blk_update_request(req, error, bytes))
 		return true;
@@ -639,14 +711,38 @@ static bool scsi_end_request(struct request *req, int error,
 	if (blk_queue_add_random(q))
 		add_disk_randomness(req->rq_disk);
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_finish_request(req, error);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	if (req->mq_ctx) {
+		/*
+		 * In the MQ case the command gets freed by __blk_mq_end_io,
+		 * so we have to do all cleanup that depends on it earlier.
+		 *
+		 * We also can't kick the queues from irq context, so we
+		 * will have to defer it to a workqueue.
+		 */
+		scsi_mq_uninit_cmd(cmd);
+
+		__blk_mq_end_io(req, error);
+
+		if (scsi_target(sdev)->single_lun ||
+		    !list_empty(&sdev->host->starved_list))
+			kblockd_schedule_work(&sdev->requeue_work);
+		else
+			blk_mq_start_stopped_hw_queues(q, true);
+
+		put_device(&sdev->sdev_gendev);
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(q->queue_lock, flags);
+		blk_finish_request(req, error);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+
+		if (bidi_bytes)
+			scsi_release_bidi_buffers(cmd);
+		scsi_release_buffers(cmd);
+		scsi_next_command(cmd);
+	}
 
-	if (bidi_bytes)
-		scsi_release_bidi_buffers(cmd);
-	scsi_release_buffers(cmd);
-	scsi_next_command(cmd);
 	return false;
 }
 
@@ -953,8 +1049,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		/* Unprep the request and put it back at the head of the queue.
 		 * A new command will be prepared and issued.
 		 */
-		scsi_release_buffers(cmd);
-		scsi_requeue_command(q, cmd);
+		if (q->mq_ops) {
+			cmd->request->cmd_flags &= ~REQ_DONTPREP;
+			scsi_mq_uninit_cmd(cmd);
+			scsi_mq_requeue_cmd(cmd);
+		} else {
+			scsi_release_buffers(cmd);
+			scsi_requeue_command(q, cmd);
+		}
 		break;
 	case ACTION_RETRY:
 		/* Retry the same command immediately */
@@ -976,9 +1078,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
 	 * If sg table allocation fails, requeue request later.
 	 */
 	if (unlikely(scsi_alloc_sgtable(sdb, req->nr_phys_segments,
-					gfp_mask))) {
+					gfp_mask, req->mq_ctx != NULL)))
 		return BLKPREP_DEFER;
-	}
 
 	/* 
 	 * Next, walk the list, and fill in the addresses and sizes of
@@ -1006,6 +1107,7 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 {
 	struct scsi_device *sdev = cmd->device;
 	struct request *rq = cmd->request;
+	bool is_mq = (rq->mq_ctx != NULL);
 	int error;
 
 	BUG_ON(!rq->nr_phys_segments);
@@ -1015,15 +1117,19 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 		goto err_exit;
 
 	if (blk_bidi_rq(rq)) {
-		struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc(
-			scsi_sdb_cache, GFP_ATOMIC);
-		if (!bidi_sdb) {
-			error = BLKPREP_DEFER;
-			goto err_exit;
+		if (!rq->q->mq_ops) {
+			struct scsi_data_buffer *bidi_sdb =
+				kmem_cache_zalloc(scsi_sdb_cache, GFP_ATOMIC);
+			if (!bidi_sdb) {
+				error = BLKPREP_DEFER;
+				goto err_exit;
+			}
+
+			rq->next_rq->special = bidi_sdb;
 		}
 
-		rq->next_rq->special = bidi_sdb;
-		error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC);
+		error = scsi_init_sgtable(rq->next_rq, rq->next_rq->special,
+					  GFP_ATOMIC);
 		if (error)
 			goto err_exit;
 	}
@@ -1035,7 +1141,7 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 		BUG_ON(prot_sdb == NULL);
 		ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
 
-		if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
+		if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask, is_mq)) {
 			error = BLKPREP_DEFER;
 			goto err_exit;
 		}
@@ -1049,13 +1155,16 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 		cmd->prot_sdb->table.nents = count;
 	}
 
-	return BLKPREP_OK ;
-
+	return BLKPREP_OK;
 err_exit:
-	scsi_release_buffers(cmd);
-	cmd->request->special = NULL;
-	scsi_put_command(cmd);
-	put_device(&sdev->sdev_gendev);
+	if (is_mq) {
+		scsi_mq_free_sgtables(cmd);
+	} else {
+		scsi_release_buffers(cmd);
+		cmd->request->special = NULL;
+		scsi_put_command(cmd);
+		put_device(&sdev->sdev_gendev);
+	}
 	return error;
 }
 EXPORT_SYMBOL(scsi_init_io);
@@ -1266,13 +1375,7 @@ out:
 
 static void scsi_unprep_fn(struct request_queue *q, struct request *req)
 {
-	if (req->cmd_type == REQ_TYPE_FS) {
-		struct scsi_cmnd *cmd = req->special;
-		struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
-
-		if (drv->uninit_command)
-			drv->uninit_command(cmd);
-	}
+	scsi_uninit_cmd(req->special);
 }
 
 /*
@@ -1295,7 +1398,11 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
 		 * unblock after device_blocked iterates to zero
 		 */
 		if (atomic_dec_return(&sdev->device_blocked) > 0) {
-			blk_delay_queue(q, SCSI_QUEUE_DELAY);
+			/*
+			 * For the MQ case we take care of this in the caller.
+			 */
+			if (!q->mq_ops)
+				blk_delay_queue(q, SCSI_QUEUE_DELAY);
 			goto out_dec;
 		}
 		SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
@@ -1671,6 +1778,180 @@ out_delay:
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
+static inline int prep_to_mq(int ret)
+{
+	switch (ret) {
+	case BLKPREP_OK:
+		return 0;
+	case BLKPREP_DEFER:
+		return BLK_MQ_RQ_QUEUE_BUSY;
+	default:
+		return BLK_MQ_RQ_QUEUE_ERROR;
+	}
+}
+
+static int scsi_mq_prep_fn(struct request *req)
+{
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
+	struct scsi_device *sdev = req->q->queuedata;
+	struct Scsi_Host *shost = sdev->host;
+	unsigned char *sense_buf = cmd->sense_buffer;
+	struct scatterlist *sg;
+
+	memset(cmd, 0, sizeof(struct scsi_cmnd));
+
+	req->special = cmd;
+
+	cmd->request = req;
+	cmd->device = sdev;
+	cmd->sense_buffer = sense_buf;
+
+	cmd->tag = req->tag;
+
+	req->cmd = req->__cmd;
+	cmd->cmnd = req->cmd;
+	cmd->prot_op = SCSI_PROT_NORMAL;
+
+	INIT_LIST_HEAD(&cmd->list);
+	INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
+	cmd->jiffies_at_alloc = jiffies;
+
+	/*
+	 * XXX: cmd_list lookups are only used by two drivers, try to get
+	 * rid of this list in common code.
+	 */
+	spin_lock_irq(&sdev->list_lock);
+	list_add_tail(&cmd->list, &sdev->cmd_list);
+	spin_unlock_irq(&sdev->list_lock);
+
+	sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
+	cmd->sdb.table.sgl = sg;
+
+	if (scsi_host_get_prot(shost)) {
+		cmd->prot_sdb = (void *)sg +
+			shost->sg_tablesize * sizeof(struct scatterlist);
+		memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));
+
+		cmd->prot_sdb->table.sgl =
+			(struct scatterlist *)(cmd->prot_sdb + 1);
+	}
+
+	if (blk_bidi_rq(req)) {
+		struct request *next_rq = req->next_rq;
+		struct scsi_data_buffer *bidi_sdb = blk_mq_rq_to_pdu(next_rq);
+
+		memset(bidi_sdb, 0, sizeof(struct scsi_data_buffer));
+		bidi_sdb->table.sgl =
+			(struct scatterlist *)(bidi_sdb + 1);
+
+		next_rq->special = bidi_sdb;
+	}
+
+	return scsi_setup_cmnd(sdev, req);
+}
+
+static void scsi_mq_done(struct scsi_cmnd *cmd)
+{
+	trace_scsi_dispatch_cmd_done(cmd);
+	blk_mq_complete_request(cmd->request);
+}
+
+static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+{
+	struct request_queue *q = req->q;
+	struct scsi_device *sdev = q->queuedata;
+	struct Scsi_Host *shost = sdev->host;
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
+	int ret;
+	int reason;
+
+	ret = prep_to_mq(scsi_prep_state_check(sdev, req));
+	if (ret)
+		goto out;
+
+	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	if (!get_device(&sdev->sdev_gendev))
+		goto out;
+
+	if (!scsi_dev_queue_ready(q, sdev))
+		goto out_put_device;
+	if (!scsi_target_queue_ready(shost, sdev))
+		goto out_dec_device_busy;
+	if (!scsi_host_queue_ready(q, shost, sdev))
+		goto out_dec_target_busy;
+
+	if (!(req->cmd_flags & REQ_DONTPREP)) {
+		ret = prep_to_mq(scsi_mq_prep_fn(req));
+		if (ret)
+			goto out_dec_host_busy;
+		req->cmd_flags |= REQ_DONTPREP;
+	}
+
+	scsi_init_cmd_errh(cmd);
+	cmd->scsi_done = scsi_mq_done;
+
+	reason = scsi_dispatch_cmd(cmd);
+	if (reason) {
+		scsi_set_blocked(cmd, reason);
+		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		goto out_dec_host_busy;
+	}
+
+	return BLK_MQ_RQ_QUEUE_OK;
+
+out_dec_host_busy:
+	atomic_dec(&shost->host_busy);
+out_dec_target_busy:
+	if (scsi_target(sdev)->can_queue > 0)
+		atomic_dec(&scsi_target(sdev)->target_busy);
+out_dec_device_busy:
+	atomic_dec(&sdev->device_busy);
+out_put_device:
+	put_device(&sdev->sdev_gendev);
+out:
+	switch (ret) {
+	case BLK_MQ_RQ_QUEUE_BUSY:
+		blk_mq_stop_hw_queue(hctx);
+		if (atomic_read(&sdev->device_busy) == 0 &&
+		    !scsi_device_blocked(sdev))
+			blk_mq_delay_queue(hctx, SCSI_QUEUE_DELAY);
+		break;
+	case BLK_MQ_RQ_QUEUE_ERROR:
+		/*
+		 * Make sure to release all allocated ressources when
+		 * we hit an error, as we will never see this command
+		 * again.
+		 */
+		if (req->cmd_flags & REQ_DONTPREP)
+			scsi_mq_uninit_cmd(cmd);
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static int scsi_init_request(void *data, struct request *rq,
+		unsigned int hctx_idx, unsigned int request_idx,
+		unsigned int numa_node)
+{
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	cmd->sense_buffer = kzalloc_node(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL,
+			numa_node);
+	if (!cmd->sense_buffer)
+		return -ENOMEM;
+	return 0;
+}
+
+static void scsi_exit_request(void *data, struct request *rq,
+		unsigned int hctx_idx, unsigned int request_idx)
+{
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	kfree(cmd->sense_buffer);
+}
+
 static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
 {
 	struct device *host_dev;
@@ -1692,16 +1973,10 @@ static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
 	return bounce_limit;
 }
 
-struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
-					 request_fn_proc *request_fn)
+static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
-	struct request_queue *q;
 	struct device *dev = shost->dma_dev;
 
-	q = blk_init_queue(request_fn, NULL);
-	if (!q)
-		return NULL;
-
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
@@ -1732,7 +2007,17 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 	 * blk_queue_update_dma_alignment() later.
 	 */
 	blk_queue_dma_alignment(q, 0x03);
+}
 
+struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
+					 request_fn_proc *request_fn)
+{
+	struct request_queue *q;
+
+	q = blk_init_queue(request_fn, NULL);
+	if (!q)
+		return NULL;
+	__scsi_init_queue(shost, q);
 	return q;
 }
 EXPORT_SYMBOL(__scsi_alloc_queue);
@@ -1753,6 +2038,55 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 	return q;
 }
 
+static struct blk_mq_ops scsi_mq_ops = {
+	.map_queue	= blk_mq_map_queue,
+	.queue_rq	= scsi_queue_rq,
+	.complete	= scsi_softirq_done,
+	.timeout	= scsi_times_out,
+	.init_request	= scsi_init_request,
+	.exit_request	= scsi_exit_request,
+};
+
+struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
+{
+	sdev->request_queue = blk_mq_init_queue(&sdev->host->tag_set);
+	if (IS_ERR(sdev->request_queue))
+		return NULL;
+
+	sdev->request_queue->queuedata = sdev;
+	__scsi_init_queue(sdev->host, sdev->request_queue);
+	return sdev->request_queue;
+}
+
+int scsi_mq_setup_tags(struct Scsi_Host *shost)
+{
+	unsigned int cmd_size, sgl_size, tbl_size;
+
+	tbl_size = shost->sg_tablesize;
+	if (tbl_size > SCSI_MAX_SG_SEGMENTS)
+		tbl_size = SCSI_MAX_SG_SEGMENTS;
+	sgl_size = tbl_size * sizeof(struct scatterlist);
+	cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
+	if (scsi_host_get_prot(shost))
+		cmd_size += sizeof(struct scsi_data_buffer) + sgl_size;
+
+	memset(&shost->tag_set, 0, sizeof(shost->tag_set));
+	shost->tag_set.ops = &scsi_mq_ops;
+	shost->tag_set.nr_hw_queues = 1;
+	shost->tag_set.queue_depth = shost->can_queue;
+	shost->tag_set.cmd_size = cmd_size;
+	shost->tag_set.numa_node = NUMA_NO_NODE;
+	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	shost->tag_set.driver_data = shost;
+
+	return blk_mq_alloc_tag_set(&shost->tag_set);
+}
+
+void scsi_mq_destroy_tags(struct Scsi_Host *shost)
+{
+	blk_mq_free_tag_set(&shost->tag_set);
+}
+
 /*
  * Function:    scsi_block_requests()
  *
@@ -2498,9 +2832,13 @@ scsi_internal_device_block(struct scsi_device *sdev)
 	 * block layer from calling the midlayer with this device's
 	 * request queue. 
 	 */
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_stop_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	if (q->mq_ops) {
+		blk_mq_stop_hw_queues(q);
+	} else {
+		spin_lock_irqsave(q->queue_lock, flags);
+		blk_stop_queue(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
 
 	return 0;
 }
@@ -2546,9 +2884,13 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
 		 sdev->sdev_state != SDEV_OFFLINE)
 		return -EINVAL;
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_start_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	if (q->mq_ops) {
+		blk_mq_start_stopped_hw_queues(q, false);
+	} else {
+		spin_lock_irqsave(q->queue_lock, flags);
+		blk_start_queue(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
 
 	return 0;
 }
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index a45d1c2eb414..12b8e1bee7f0 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -88,6 +88,9 @@ extern void scsi_next_command(struct scsi_cmnd *cmd);
 extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
 extern void scsi_run_host_queues(struct Scsi_Host *shost);
 extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
+extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev);
+extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
+extern void scsi_mq_destroy_tags(struct Scsi_Host *shost);
 extern int scsi_init_queue(void);
 extern void scsi_exit_queue(void);
 struct request_queue;
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 4a6e4ba5a400..b91cfaf033aa 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -273,7 +273,10 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	 */
 	sdev->borken = 1;
 
-	sdev->request_queue = scsi_alloc_queue(sdev);
+	if (shost_use_blk_mq(shost))
+		sdev->request_queue = scsi_mq_alloc_queue(sdev);
+	else
+		sdev->request_queue = scsi_alloc_queue(sdev);
 	if (!sdev->request_queue) {
 		/* release fn is set up in scsi_sysfs_device_initialise, so
 		 * have to free and put manually here */
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 209cae3097ea..406b3038bbad 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -333,6 +333,7 @@ store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
 
+shost_rd_attr(use_blk_mq, "%d\n");
 shost_rd_attr(unique_id, "%u\n");
 shost_rd_attr(cmd_per_lun, "%hd\n");
 shost_rd_attr(can_queue, "%hd\n");
@@ -352,6 +353,7 @@ show_host_busy(struct device *dev, struct device_attribute *attr, char *buf)
 static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL);
 
 static struct attribute *scsi_sysfs_shost_attrs[] = {
+	&dev_attr_use_blk_mq.attr,
 	&dev_attr_unique_id.attr,
 	&dev_attr_host_busy.attr,
 	&dev_attr_cmd_per_lun.attr,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 5e8ebc1ac12b..ba2034779961 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -7,6 +7,7 @@
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
 #include <linux/seq_file.h>
+#include <linux/blk-mq.h>
 #include <scsi/scsi.h>
 
 struct request_queue;
@@ -510,6 +511,9 @@ struct scsi_host_template {
 	 */
 	unsigned int cmd_size;
 	struct scsi_host_cmd_pool *cmd_pool;
+
+	/* temporary flag to disable blk-mq I/O path */
+	bool disable_blk_mq;
 };
 
 /*
@@ -580,7 +584,10 @@ struct Scsi_Host {
 	 * Area to keep a shared tag map (if needed, will be
 	 * NULL if not).
 	 */
-	struct blk_queue_tag	*bqt;
+	union {
+		struct blk_queue_tag	*bqt;
+		struct blk_mq_tag_set	tag_set;
+	};
 
 	atomic_t host_busy;		   /* commands actually active on low-level */
 	atomic_t host_blocked;
@@ -672,6 +679,8 @@ struct Scsi_Host {
 	/* The controller does not support WRITE SAME */
 	unsigned no_write_same:1;
 
+	unsigned use_blk_mq:1;
+
 	/*
 	 * Optional work queue to be utilized by the transport
 	 */
@@ -772,6 +781,13 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
 		shost->tmf_in_progress;
 }
 
+extern bool scsi_use_blk_mq;
+
+static inline bool shost_use_blk_mq(struct Scsi_Host *shost)
+{
+	return shost->use_blk_mq;
+}
+
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
 extern void scsi_flush_work(struct Scsi_Host *);
 
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index 81dd12edc38c..cdcc90b07ecb 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -67,7 +67,8 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth)
 	if (!sdev->tagged_supported)
 		return;
 
-	if (!blk_queue_tagged(sdev->request_queue))
+	if (!shost_use_blk_mq(sdev->host) &&
+	    blk_queue_tagged(sdev->request_queue))
 		blk_queue_init_tags(sdev->request_queue, depth,
 				    sdev->host->bqt);
 
@@ -80,7 +81,8 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth)
  **/
 static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth)
 {
-	if (blk_queue_tagged(sdev->request_queue))
+	if (!shost_use_blk_mq(sdev->host) &&
+	    blk_queue_tagged(sdev->request_queue))
 		blk_queue_free_tags(sdev->request_queue);
 	scsi_adjust_queue_depth(sdev, 0, depth);
 }
@@ -108,6 +110,15 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
 	return 0;
 }
 
+static inline struct scsi_cmnd *scsi_mq_find_tag(struct Scsi_Host *shost,
+		unsigned int hw_ctx, int tag)
+{
+	struct request *req;
+
+	req = blk_mq_tag_to_rq(shost->tag_set.tags[hw_ctx], tag);
+	return req ? (struct scsi_cmnd *)req->special : NULL;
+}
+
 /**
  * scsi_find_tag - find a tagged command by device
  * @SDpnt:	pointer to the ScSI device
@@ -118,10 +129,12 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
  **/
 static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag)
 {
-
         struct request *req;
 
         if (tag != SCSI_NO_TAG) {
+		if (shost_use_blk_mq(sdev->host))
+			return scsi_mq_find_tag(sdev->host, 0, tag);
+
         	req = blk_queue_find_tag(sdev->request_queue, tag);
 	        return req ? (struct scsi_cmnd *)req->special : NULL;
 	}
@@ -130,6 +143,7 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag)
 	return sdev->current_cmnd;
 }
 
+
 /**
  * scsi_init_shared_tag_map - create a shared tag map
  * @shost:	the host to share the tag map among all devices
@@ -137,6 +151,12 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag)
  */
 static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
 {
+	/*
+	 * We always have a shared tag map around when using blk-mq.
+	 */
+	if (shost_use_blk_mq(shost))
+		return 0;
+
 	/*
 	 * If the shared tag map isn't already initialized, do it now.
 	 * This saves callers from having to check ->bqt when setting up
@@ -165,6 +185,8 @@ static inline struct scsi_cmnd *scsi_host_find_tag(struct Scsi_Host *shost,
 	struct request *req;
 
 	if (tag != SCSI_NO_TAG) {
+		if (shost_use_blk_mq(shost))
+			return scsi_mq_find_tag(shost, 0, tag);
 		req = blk_map_queue_find_tag(shost->bqt, tag);
 		return req ? (struct scsi_cmnd *)req->special : NULL;
 	}
-- 
cgit v1.2.3