scsi: core: Avoid leaving shost->last_reset with stale value if EH does not run

The changes to issue the abort from the scmd->abort_work instead of the EH thread introduced a problem if eh_deadline is used. If aborting the command(s) is successful, and there are never any scmds added to the shost->eh_cmd_q, there is no code path which will reset the ->last_reset value back to zero. The effect of this is that after a successful abort with no EH thread activity, a subsequent timeout, perhaps a long time later, might immediately be considered past a user-set eh_deadline time, and the host will be reset with no attempt at recovery. Fix this by resetting ->last_reset back to zero in scmd_eh_abort_handler() if it is determined that the EH thread will not run to do this. Thanks to Gopinath Marappan for investigating this problem. Link: https://lore.kernel.org/r/20211029194311.17504-2-emilne@redhat.com Fixes: e494f6a72839 ("[SCSI] improved eh timeout handler") Cc: stable@vger.kernel.org Signed-off-by: Ewan D. Milne <emilne@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
author: Ewan D. Milne <emilne@redhat.com> 2021-10-29 21:43:10 +0200
committer: Martin K. Petersen <martin.petersen@oracle.com> 2021-11-04 02:33:50 +0100
commit: 5ae17501bc62a49b0b193dcce003f16375f16654 (patch)
tree: 62c17bc18a2c76cae1b691dae9c9b9a41bb0783c /drivers/scsi/scsi_error.c
parent: scsi: bsg: Fix errno when scsi_bsg_register_queue() fails (diff)
download: linux-5ae17501bc62a49b0b193dcce003f16375f16654.tar.xz
linux-5ae17501bc62a49b0b193dcce003f16375f16654.zip
1 files changed, 25 insertions, 0 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 3de03925550e..bdf782d9cb86 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -133,6 +133,23 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd)
 	return true;
 }
 
+static void scsi_eh_complete_abort(struct scsi_cmnd *scmd, struct Scsi_Host *shost)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	list_del_init(&scmd->eh_entry);
+	/*
+	 * If the abort succeeds, and there is no further
+	 * EH action, clear the ->last_reset time.
+	 */
+	if (list_empty(&shost->eh_abort_list) &&
+	    list_empty(&shost->eh_cmd_q))
+		if (shost->eh_deadline != -1)
+			shost->last_reset = 0;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:	command to be aborted.
@@ -150,6 +167,7 @@ scmd_eh_abort_handler(struct work_struct *work)
 		container_of(work, struct scsi_cmnd, abort_work.work);
 	struct scsi_device *sdev = scmd->device;
 	enum scsi_disposition rtn;
+	unsigned long flags;
 
 	if (scsi_host_eh_past_deadline(sdev->host)) {
 		SCSI_LOG_ERROR_RECOVERY(3,
@@ -173,12 +191,14 @@ scmd_eh_abort_handler(struct work_struct *work)
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "retry aborted command\n"));
+				scsi_eh_complete_abort(scmd, sdev->host);
 				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
 				return;
 			} else {
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "finish aborted command\n"));
+				scsi_eh_complete_abort(scmd, sdev->host);
 				scsi_finish_command(scmd);
 				return;
 			}
@@ -191,6 +211,9 @@ scmd_eh_abort_handler(struct work_struct *work)
 		}
 	}
 
+	spin_lock_irqsave(sdev->host->host_lock, flags);
+	list_del_init(&scmd->eh_entry);
+	spin_unlock_irqrestore(sdev->host->host_lock, flags);
 	scsi_eh_scmd_add(scmd);
 }
 
@@ -221,6 +244,8 @@ scsi_abort_command(struct scsi_cmnd *scmd)
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->eh_deadline != -1 && !shost->last_reset)
 		shost->last_reset = jiffies;
+	BUG_ON(!list_empty(&scmd->eh_entry));
+	list_add_tail(&scmd->eh_entry, &shost->eh_abort_list);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
author	Ewan D. Milne <emilne@redhat.com>	2021-10-29 21:43:10 +0200
committer	Martin K. Petersen <martin.petersen@oracle.com>	2021-11-04 02:33:50 +0100
commit	5ae17501bc62a49b0b193dcce003f16375f16654 (patch)
tree	62c17bc18a2c76cae1b691dae9c9b9a41bb0783c /drivers/scsi/scsi_error.c
parent	scsi: bsg: Fix errno when scsi_bsg_register_queue() fails (diff)
download	linux-5ae17501bc62a49b0b193dcce003f16375f16654.tar.xz linux-5ae17501bc62a49b0b193dcce003f16375f16654.zip