From 03d4433721880bf1972c924b168e4e1dd3c59d53 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Thu, 15 Mar 2007 10:27:45 -0700 Subject: [SCSI] aacraid: Improved error handling Received from Mark Salyzyn, This set of fixes improve error handling stability of the driver. A popular manifestation of the problems is an NULL pointer reference in the interrupt handler when referencing portions of the scsi command context, or in the scsi_done handling when an offlined device is referenced. The aacraid driver currently does not get notification of orphaned command completions due to devices going offline. The driver also fails to handle the commands that are finished by the error handler, and thus can complete again later at the hands of the adapter causing situations of completion of an invalid scsi command context. Test Unit Ready calls abort assuming that the abort was successful, but are not, and thus when the interrupt from the adapter occurs, they reference invalid command contexts. We add in a TIMED_OUT flag to inform the aacraid FIB context that the interrupt service should merely release the driver resources and not complete the command up. We take advantage of this with the abort handler as well for select abortable commands. And we detect and react if a command that can not be aborted is currently still outstanding to the controller when reissued by the retry mechanism. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 51 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'drivers/scsi/aacraid/aachba.c') diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 3d21d7dd2e5f..0c4e27eb6520 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -5,7 +5,7 @@ * based on the old aacraid driver that is.. * Adaptec aacraid device driver for Linux. * - * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com) + * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -172,6 +172,30 @@ MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. int expose_physicals = -1; module_param(expose_physicals, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on"); + + +static inline int aac_valid_context(struct scsi_cmnd *scsicmd, + struct fib *fibptr) { + struct scsi_device *device; + + if (unlikely(!scsicmd || !scsicmd->scsi_done )) { + dprintk((KERN_WARNING "aac_valid_context: scsi command corrupt\n")) +; + aac_fib_complete(fibptr); + aac_fib_free(fibptr); + return 0; + } + scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; + device = scsicmd->device; + if (unlikely(!device || !scsi_device_online(device))) { + dprintk((KERN_WARNING "aac_valid_context: scsi device corrupt\n")); + aac_fib_complete(fibptr); + aac_fib_free(fibptr); + return 0; + } + return 1; +} + /** * aac_get_config_status - check the adapter configuration * @common: adapter to query @@ -342,6 +366,9 @@ static void get_container_name_callback(void *context, struct fib * fibptr) scsicmd = (struct scsi_cmnd *) context; scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; + if (!aac_valid_context(scsicmd, fibptr)) + return; + dprintk((KERN_DEBUG "get_container_name_callback[cpu %d]: t = %ld.\n", smp_processor_id(), jiffies)); BUG_ON(fibptr == NULL); @@ -431,9 +458,14 @@ static int aac_probe_container_callback2(struct scsi_cmnd * scsicmd) static int _aac_probe_container2(void * context, struct fib * fibptr) { - struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context; - struct fsa_dev_info *fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev; + struct fsa_dev_info *fsa_dev_ptr; int (*callback)(struct scsi_cmnd *); + struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context; + + if (!aac_valid_context(scsicmd, fibptr)) + return 0; + + fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev; scsicmd->SCp.Status = 0; if (fsa_dev_ptr) { @@ -477,6 +509,9 @@ static int _aac_probe_container1(void * context, struct fib * fibptr) scsicmd = (struct scsi_cmnd *) context; scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; + if (!aac_valid_context(scsicmd, fibptr)) + return 0; + aac_fib_init(fibptr); dinfo = (struct aac_query_mount *)fib_data(fibptr); @@ -1287,6 +1322,9 @@ static void io_callback(void *context, struct fib * fibptr) scsicmd = (struct scsi_cmnd *) context; scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; + if (!aac_valid_context(scsicmd, fibptr)) + return; + dev = (struct aac_dev *)scsicmd->device->host->hostdata; cid = scmd_id(scsicmd); @@ -1534,6 +1572,9 @@ static void synchronize_callback(void *context, struct fib *fibptr) cmd = context; cmd->SCp.phase = AAC_OWNER_MIDLEVEL; + if (!aac_valid_context(cmd, fibptr)) + return; + dprintk((KERN_DEBUG "synchronize_callback[cpu %d]: t = %ld.\n", smp_processor_id(), jiffies)); BUG_ON(fibptr == NULL); @@ -2086,6 +2127,10 @@ static void aac_srb_callback(void *context, struct fib * fibptr) scsicmd = (struct scsi_cmnd *) context; scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; + + if (!aac_valid_context(scsicmd, fibptr)) + return; + dev = (struct aac_dev *)scsicmd->device->host->hostdata; BUG_ON(fibptr == NULL); -- cgit v1.2.3