From: Salyzyn, Mark Date: Tue, 8 Jan 2008 21:26:43 +0000 (-0800) Subject: [SCSI] aacraid: OS panic after Adapter panic (hardening). X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=b6ef70f33ca2a3084b4fea12414550724a9114dc;p=linux-beck.git [SCSI] aacraid: OS panic after Adapter panic (hardening). In experiments in the lab we managed to trigger an Adapter firmware panic (BlinkLED) coincidentally while several pass-through ioctl command from the management software were outstanding on a bug only present on a class of RAID Adapters that require a hardware reset rather than a commanded reset. The net result was an attempt to time out the management software command as if it came from the SCSI layer resulting in an OS panic. Adapters that use commanded reset, management commands are returned failed by the Adapter correctly. The adapter firmware panic that resulted in this condition was also resolved, and there were no adapters in the field with this specific firmware bug so we do not expect any field reports. This is a rare or unlikely corner condition, and no reports have ever been forwarded from the field. Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 07def7261004..85b85ae198f4 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1073,6 +1073,7 @@ struct aac_dev (dev)->a_ops.adapter_comm(dev, comm) #define FIB_CONTEXT_FLAG_TIMED_OUT (0x00000001) +#define FIB_CONTEXT_FLAG (0x00000002) /* * Define the command values diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index e82d89ccaf15..3a3017d8dc65 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -171,6 +171,7 @@ struct fib *aac_fib_alloc(struct aac_dev *dev) * each I/O */ fibptr->hw_fib_va->header.XferState = 0; + fibptr->flags = 0; fibptr->callback = NULL; fibptr->callback_data = NULL; @@ -402,6 +403,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, * will have a debug mode where the adapter can notify the host * it had a problem and the host can log that fact. */ + fibptr->flags = 0; if (wait && !reply) { return -EINVAL; } else if (!wait && reply) { @@ -450,10 +452,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, if (!wait) { fibptr->callback = callback; fibptr->callback_data = callback_data; + fibptr->flags = FIB_CONTEXT_FLAG; } fibptr->done = 0; - fibptr->flags = 0; FIB_COUNTER_INCREMENT(aac_config.FibsSent); diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c index 4726ab666c52..d1163ded132b 100644 --- a/drivers/scsi/aacraid/dpcsup.c +++ b/drivers/scsi/aacraid/dpcsup.c @@ -120,6 +120,7 @@ unsigned int aac_response_normal(struct aac_queue * q) * NOTE: we cannot touch the fib after this * call, because it may have been deallocated. */ + fib->flags = 0; fib->callback(fib->callback_data, fib); } else { unsigned long flagv; @@ -229,7 +230,7 @@ unsigned int aac_command_normal(struct aac_queue *q) * all QE there are and wake up all the waiters before exiting. */ -unsigned int aac_intr_normal(struct aac_dev *dev, u32 index) +unsigned int aac_intr_normal(struct aac_dev * dev, u32 index) { dprintk((KERN_INFO "aac_intr_normal(%p,%x)\n", dev, index)); if ((index & 0x00000002L)) { @@ -313,6 +314,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index) * NOTE: we cannot touch the fib after this * call, because it may have been deallocated. */ + fib->flags = 0; fib->callback(fib->callback_data, fib); } else { unsigned long flagv; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 742e1a8edc60..6a553ea730db 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -536,17 +536,33 @@ static int aac_eh_abort(struct scsi_cmnd* cmd) break; case INQUIRY: case READ_CAPACITY: - case TEST_UNIT_READY: /* Mark associated FIB to not complete, eh handler does this */ for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) { struct fib * fib = &aac->fibs[count]; if (fib->hw_fib_va->header.XferState && + (fib->flags & FIB_CONTEXT_FLAG) && (fib->callback_data == cmd)) { fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; ret = SUCCESS; } } + break; + case TEST_UNIT_READY: + /* Mark associated FIB to not complete, eh handler does this */ + for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) { + struct scsi_cmnd * command; + struct fib * fib = &aac->fibs[count]; + if ((fib->hw_fib_va->header.XferState & cpu_to_le32(Async | NoResponseExpected)) && + (fib->flags & FIB_CONTEXT_FLAG) && + ((command = fib->callback_data)) && + (command->device == cmd->device)) { + fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; + command->SCp.phase = AAC_OWNER_ERROR_HANDLER; + if (command == cmd) + ret = SUCCESS; + } + } } return ret; } @@ -569,6 +585,7 @@ static int aac_eh_reset(struct scsi_cmnd* cmd) for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) { struct fib * fib = &aac->fibs[count]; if (fib->hw_fib_va->header.XferState && + (fib->flags & FIB_CONTEXT_FLAG) && (fib->callback_data == cmd)) { fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;