if (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) {
atomic_set(&ddb_entry->state, DDB_STATE_DEAD);
- DEBUG2(printk("scsi%ld: %s: ddb [%d] port down retry count "
+ DEBUG2(printk("scsi%ld: %s: ddb [%d] session recovery timeout "
"of (%d) secs exhausted, marking device DEAD.\n",
ha->host_no, __func__, ddb_entry->fw_ddb_index,
- ha->port_down_retry_count));
-
- qla4xxx_wake_dpc(ha);
+ QL4_SESS_RECOVERY_TMO));
}
}
{
int err;
- ddb_entry->sess->recovery_tmo = ddb_entry->ha->port_down_retry_count;
+ ddb_entry->sess->recovery_tmo = QL4_SESS_RECOVERY_TMO;
+
err = iscsi_add_session(ddb_entry->sess, ddb_entry->fw_ddb_index);
if (err) {
DEBUG2(printk(KERN_ERR "Could not add session.\n"));
struct srb *srb;
int rval;
+ if (test_bit(AF_EEH_BUSY, &ha->flags)) {
+ if (test_bit(AF_PCI_CHANNEL_IO_PERM_FAILURE, &ha->flags))
+ cmd->result = DID_NO_CONNECT << 16;
+ else
+ cmd->result = DID_REQUEUE << 16;
+ goto qc_fail_command;
+ }
+
if (!sess) {
cmd->result = DID_IMM_RETRY << 16;
goto qc_fail_command;
if (ha->nx_pcibase)
iounmap(
(struct device_reg_82xx __iomem *)ha->nx_pcibase);
-
- if (ha->nx_db_wr_ptr)
- iounmap(
- (struct device_reg_82xx __iomem *)ha->nx_db_wr_ptr);
} else if (ha->reg)
iounmap(ha->reg);
pci_release_regions(ha->pdev);
uint32_t fw_heartbeat_counter, halt_status;
fw_heartbeat_counter = qla4_8xxx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
+ /* If PEG_ALIVE_COUNTER is 0xffffffff, AER/EEH is in progress, ignore */
+ if (fw_heartbeat_counter == 0xffffffff) {
+ DEBUG2(printk(KERN_WARNING "scsi%ld: %s: Device in frozen "
+ "state, QLA82XX_PEG_ALIVE_COUNTER is 0xffffffff\n",
+ ha->host_no, __func__));
+ return;
+ }
if (ha->fw_heartbeat_counter == fw_heartbeat_counter) {
ha->seconds_since_last_heartbeat++;
ha->seconds_since_last_heartbeat = 0;
halt_status = qla4_8xxx_rd_32(ha,
QLA82XX_PEG_HALT_STATUS1);
+
/* Since we cannot change dev_state in interrupt
* context, set appropriate DPC flag then wakeup
* DPC */
set_bit(DPC_RESET_HA, &ha->dpc_flags);
}
qla4xxx_wake_dpc(ha);
+ qla4xxx_mailbox_premature_completion(ha);
}
- }
+ } else
+ ha->seconds_since_last_heartbeat = 0;
+
ha->fw_heartbeat_counter = fw_heartbeat_counter;
}
ha->host_no, __func__);
set_bit(DPC_RESET_HA, &ha->dpc_flags);
qla4xxx_wake_dpc(ha);
+ qla4xxx_mailbox_premature_completion(ha);
} else if (dev_state == QLA82XX_DEV_NEED_QUIESCENT &&
!test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags)) {
printk("scsi%ld: %s: HW State: NEED QUIES!\n",
{
struct ddb_entry *ddb_entry, *dtemp;
int start_dpc = 0;
+ uint16_t w;
+
+ /* If we are in the middle of AER/EEH processing
+ * skip any processing and reschedule the timer
+ */
+ if (test_bit(AF_EEH_BUSY, &ha->flags)) {
+ mod_timer(&ha->timer, jiffies + HZ);
+ return;
+ }
+
+ /* Hardware read to trigger an EEH error during mailbox waits. */
+ if (!pci_channel_offline(ha->pdev))
+ pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
if (test_bit(AF_HBA_GOING_AWAY, &ha->flags)) {
DEBUG2(ql4_printk(KERN_INFO, ha, "%s exited. HBA GOING AWAY\n",
/* Find a command that hasn't completed. */
for (index = 0; index < ha->host->can_queue; index++) {
cmd = scsi_host_find_tag(ha->host, index);
- if (cmd != NULL)
+ /*
+ * We cannot just check if the index is valid,
+ * becase if we are run from the scsi eh, then
+ * the scsi/block layer is going to prevent
+ * the tag from being released.
+ */
+ if (cmd != NULL && CMD_SP(cmd))
break;
}
spin_unlock_irqrestore(&ha->hardware_lock, flags);
{
uint32_t max_wait_time;
unsigned long flags = 0;
- int status = QLA_ERROR;
+ int status;
uint32_t ctrl_status;
- qla4xxx_hw_reset(ha);
+ status = qla4xxx_hw_reset(ha);
+ if (status != QLA_SUCCESS)
+ return status;
+ status = QLA_ERROR;
/* Wait until the Network Reset Intr bit is cleared */
max_wait_time = RESET_INTR_TOV;
do {
ha->host_no, __func__));
status = ha->isp_ops->reset_firmware(ha);
if (status == QLA_SUCCESS) {
- qla4xxx_cmd_wait(ha);
+ if (!test_bit(AF_FW_RECOVERY, &ha->flags))
+ qla4xxx_cmd_wait(ha);
ha->isp_ops->disable_intrs(ha);
qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
* or if stop_firmware fails for ISP-82xx.
* This is the default case for ISP-4xxx */
if (!is_qla8022(ha) || reset_chip) {
- qla4xxx_cmd_wait(ha);
+ if (!test_bit(AF_FW_RECOVERY, &ha->flags))
+ qla4xxx_cmd_wait(ha);
qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
DEBUG2(ql4_printk(KERN_INFO, ha,
/* Initialization not yet finished. Don't do anything yet. */
if (!test_bit(AF_INIT_DONE, &ha->flags))
- return;
+ goto do_dpc_exit;
+
+ if (test_bit(AF_EEH_BUSY, &ha->flags)) {
+ DEBUG2(printk(KERN_INFO "scsi%ld: %s: flags = %lx\n",
+ ha->host_no, __func__, ha->flags));
+ goto do_dpc_exit;
+ }
/* HBA is in the process of being permanently disabled.
* Don't process anything */
}
}
}
+
+do_dpc_exit:
clear_bit(AF_DPC_SCHEDULED, &ha->flags);
}
db_base = pci_resource_start(pdev, 4); /* doorbell is on bar 4 */
db_len = pci_resource_len(pdev, 4);
- /* mapping of doorbell write pointer */
- ha->nx_db_wr_ptr = (unsigned long)ioremap(db_base +
- (ha->pdev->devfn << 12), 4);
- if (!ha->nx_db_wr_ptr) {
- printk(KERN_ERR
- "cannot remap MMIO doorbell-write (%s), aborting\n",
- pci_name(pdev));
- goto iospace_error_exit;
- }
- /* mapping of doorbell read pointer */
- ha->nx_db_rd_ptr = (uint8_t *) ha->nx_pcibase + (512 * 1024) +
- (ha->pdev->devfn * 8);
- if (!ha->nx_db_rd_ptr)
- printk(KERN_ERR
- "cannot remap MMIO doorbell-read (%s), aborting\n",
- pci_name(pdev));
- return 0;
+ ha->nx_db_wr_ptr = (ha->pdev->devfn == 4 ? QLA82XX_CAM_RAM_DB1 :
+ QLA82XX_CAM_RAM_DB2);
+ return 0;
iospace_error_exit:
return -ENOMEM;
}
ha->host = host;
ha->host_no = host->host_no;
+ pci_enable_pcie_error_reporting(pdev);
+
/* Setup Runtime configurable options */
if (is_qla8022(ha)) {
ha->isp_ops = &qla4_8xxx_isp_ops;
ha->isp_ops = &qla4xxx_isp_ops;
}
+ /* Set EEH reset type to fundamental if required by hba */
+ if (is_qla8022(ha))
+ pdev->needs_freset = 1;
+
/* Configure PCI I/O space. */
ret = ha->isp_ops->iospace_config(ha);
if (ret)
}
}
+ pci_save_state(ha->pdev);
ha->isp_ops->enable_intrs(ha);
/* Start timer thread. */
qla4xxx_free_adapter(ha);
probe_failed_ioconfig:
+ pci_disable_pcie_error_reporting(pdev);
scsi_host_put(ha->host);
probe_disable_device:
scsi_host_put(ha->host);
+ pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
int done = 0;
struct srb *rp;
uint32_t max_wait_time = EH_WAIT_CMD_TOV;
+ int ret = SUCCESS;
+
+ /* Dont wait on command if PCI error is being handled
+ * by PCI AER driver
+ */
+ if (unlikely(pci_channel_offline(ha->pdev)) ||
+ (test_bit(AF_EEH_BUSY, &ha->flags))) {
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: Return from %s\n",
+ ha->host_no, __func__);
+ return ret;
+ }
do {
/* Checking to see if its returned to OS */
{
unsigned long wait_online;
- wait_online = jiffies + (30 * HZ);
+ wait_online = jiffies + (HBA_ONLINE_TOV * HZ);
while (time_before(jiffies, wait_online)) {
if (adapter_up(ha))
return QLA_SUCCESS;
- else if (ha->retry_reset_ha_cnt == 0)
- return QLA_ERROR;
msleep(2000);
}
unsigned int id = cmd->device->id;
unsigned int lun = cmd->device->lun;
unsigned long serial = cmd->serial_number;
+ unsigned long flags;
struct srb *srb = NULL;
int ret = SUCCESS;
int wait = 0;
"scsi%ld:%d:%d: Abort command issued cmd=%p, pid=%ld\n",
ha->host_no, id, lun, cmd, serial);
+ spin_lock_irqsave(&ha->hardware_lock, flags);
srb = (struct srb *) CMD_SP(cmd);
-
- if (!srb)
+ if (!srb) {
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
return SUCCESS;
-
+ }
kref_get(&srb->srb_ref);
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
if (qla4xxx_abort_task(ha, srb) != QLA_SUCCESS) {
DEBUG3(printk("scsi%ld:%d:%d: Abort_task mbx failed.\n",
return return_status;
}
+/* PCI AER driver recovers from all correctable errors w/o
+ * driver intervention. For uncorrectable errors PCI AER
+ * driver calls the following device driver's callbacks
+ *
+ * - Fatal Errors - link_reset
+ * - Non-Fatal Errors - driver's pci_error_detected() which
+ * returns CAN_RECOVER, NEED_RESET or DISCONNECT.
+ *
+ * PCI AER driver calls
+ * CAN_RECOVER - driver's pci_mmio_enabled(), mmio_enabled
+ * returns RECOVERED or NEED_RESET if fw_hung
+ * NEED_RESET - driver's slot_reset()
+ * DISCONNECT - device is dead & cannot recover
+ * RECOVERED - driver's pci_resume()
+ */
+static pci_ers_result_t
+qla4xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: error detected:state %x\n",
+ ha->host_no, __func__, state);
+
+ if (!is_aer_supported(ha))
+ return PCI_ERS_RESULT_NONE;
+
+ switch (state) {
+ case pci_channel_io_normal:
+ clear_bit(AF_EEH_BUSY, &ha->flags);
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ set_bit(AF_EEH_BUSY, &ha->flags);
+ qla4xxx_mailbox_premature_completion(ha);
+ qla4xxx_free_irqs(ha);
+ pci_disable_device(pdev);
+ /* Return back all IOs */
+ qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ set_bit(AF_EEH_BUSY, &ha->flags);
+ set_bit(AF_PCI_CHANNEL_IO_PERM_FAILURE, &ha->flags);
+ qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * qla4xxx_pci_mmio_enabled() gets called if
+ * qla4xxx_pci_error_detected() returns PCI_ERS_RESULT_CAN_RECOVER
+ * and read/write to the device still works.
+ **/
+static pci_ers_result_t
+qla4xxx_pci_mmio_enabled(struct pci_dev *pdev)
+{
+ struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+
+ if (!is_aer_supported(ha))
+ return PCI_ERS_RESULT_NONE;
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
+{
+ uint32_t rval = QLA_ERROR;
+ uint32_t ret = 0;
+ int fn;
+ struct pci_dev *other_pdev = NULL;
+
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: In %s\n", ha->host_no, __func__);
+
+ set_bit(DPC_RESET_ACTIVE, &ha->dpc_flags);
+
+ if (test_bit(AF_ONLINE, &ha->flags)) {
+ clear_bit(AF_ONLINE, &ha->flags);
+ qla4xxx_mark_all_devices_missing(ha);
+ qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
+ }
+
+ fn = PCI_FUNC(ha->pdev->devfn);
+ while (fn > 0) {
+ fn--;
+ ql4_printk(KERN_INFO, ha, "scsi%ld: %s: Finding PCI device at "
+ "func %x\n", ha->host_no, __func__, fn);
+ /* Get the pci device given the domain, bus,
+ * slot/function number */
+ other_pdev =
+ pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
+ ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
+ fn));
+
+ if (!other_pdev)
+ continue;
+
+ if (atomic_read(&other_pdev->enable_cnt)) {
+ ql4_printk(KERN_INFO, ha, "scsi%ld: %s: Found PCI "
+ "func in enabled state%x\n", ha->host_no,
+ __func__, fn);
+ pci_dev_put(other_pdev);
+ break;
+ }
+ pci_dev_put(other_pdev);
+ }
+
+ /* The first function on the card, the reset owner will
+ * start & initialize the firmware. The other functions
+ * on the card will reset the firmware context
+ */
+ if (!fn) {
+ ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn being reset "
+ "0x%x is the owner\n", ha->host_no, __func__,
+ ha->pdev->devfn);
+
+ qla4_8xxx_idc_lock(ha);
+ qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
+ QLA82XX_DEV_COLD);
+
+ qla4_8xxx_wr_32(ha, QLA82XX_CRB_DRV_IDC_VERSION,
+ QLA82XX_IDC_VERSION);
+
+ qla4_8xxx_idc_unlock(ha);
+ clear_bit(AF_FW_RECOVERY, &ha->flags);
+ rval = qla4xxx_initialize_adapter(ha, PRESERVE_DDB_LIST);
+ qla4_8xxx_idc_lock(ha);
+
+ if (rval != QLA_SUCCESS) {
+ ql4_printk(KERN_INFO, ha, "scsi%ld: %s: HW State: "
+ "FAILED\n", ha->host_no, __func__);
+ qla4_8xxx_clear_drv_active(ha);
+ qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
+ QLA82XX_DEV_FAILED);
+ } else {
+ ql4_printk(KERN_INFO, ha, "scsi%ld: %s: HW State: "
+ "READY\n", ha->host_no, __func__);
+ qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
+ QLA82XX_DEV_READY);
+ /* Clear driver state register */
+ qla4_8xxx_wr_32(ha, QLA82XX_CRB_DRV_STATE, 0);
+ qla4_8xxx_set_drv_active(ha);
+ ret = qla4xxx_request_irqs(ha);
+ if (ret) {
+ ql4_printk(KERN_WARNING, ha, "Failed to "
+ "reserve interrupt %d already in use.\n",
+ ha->pdev->irq);
+ rval = QLA_ERROR;
+ } else {
+ ha->isp_ops->enable_intrs(ha);
+ rval = QLA_SUCCESS;
+ }
+ }
+ qla4_8xxx_idc_unlock(ha);
+ } else {
+ ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not "
+ "the reset owner\n", ha->host_no, __func__,
+ ha->pdev->devfn);
+ if ((qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE) ==
+ QLA82XX_DEV_READY)) {
+ clear_bit(AF_FW_RECOVERY, &ha->flags);
+ rval = qla4xxx_initialize_adapter(ha,
+ PRESERVE_DDB_LIST);
+ if (rval == QLA_SUCCESS) {
+ ret = qla4xxx_request_irqs(ha);
+ if (ret) {
+ ql4_printk(KERN_WARNING, ha, "Failed to"
+ " reserve interrupt %d already in"
+ " use.\n", ha->pdev->irq);
+ rval = QLA_ERROR;
+ } else {
+ ha->isp_ops->enable_intrs(ha);
+ rval = QLA_SUCCESS;
+ }
+ }
+ qla4_8xxx_idc_lock(ha);
+ qla4_8xxx_set_drv_active(ha);
+ qla4_8xxx_idc_unlock(ha);
+ }
+ }
+ clear_bit(DPC_RESET_ACTIVE, &ha->dpc_flags);
+ return rval;
+}
+
+static pci_ers_result_t
+qla4xxx_pci_slot_reset(struct pci_dev *pdev)
+{
+ pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT;
+ struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+ int rc;
+
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: slot_reset\n",
+ ha->host_no, __func__);
+
+ if (!is_aer_supported(ha))
+ return PCI_ERS_RESULT_NONE;
+
+ /* Restore the saved state of PCIe device -
+ * BAR registers, PCI Config space, PCIX, MSI,
+ * IOV states
+ */
+ pci_restore_state(pdev);
+
+ /* pci_restore_state() clears the saved_state flag of the device
+ * save restored state which resets saved_state flag
+ */
+ pci_save_state(pdev);
+
+ /* Initialize device or resume if in suspended state */
+ rc = pci_enable_device(pdev);
+ if (rc) {
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: Cant re-enable "
+ "device after reset\n", ha->host_no, __func__);
+ goto exit_slot_reset;
+ }
+
+ ha->isp_ops->disable_intrs(ha);
+
+ if (is_qla8022(ha)) {
+ if (qla4_8xxx_error_recovery(ha) == QLA_SUCCESS) {
+ ret = PCI_ERS_RESULT_RECOVERED;
+ goto exit_slot_reset;
+ } else
+ goto exit_slot_reset;
+ }
+
+exit_slot_reset:
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: Return=%x\n"
+ "device after reset\n", ha->host_no, __func__, ret);
+ return ret;
+}
+
+static void
+qla4xxx_pci_resume(struct pci_dev *pdev)
+{
+ struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+ int ret;
+
+ ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: pci_resume\n",
+ ha->host_no, __func__);
+
+ ret = qla4xxx_wait_for_hba_online(ha);
+ if (ret != QLA_SUCCESS) {
+ ql4_printk(KERN_ERR, ha, "scsi%ld: %s: the device failed to "
+ "resume I/O from slot/link_reset\n", ha->host_no,
+ __func__);
+ }
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+ clear_bit(AF_EEH_BUSY, &ha->flags);
+}
+
+static struct pci_error_handlers qla4xxx_err_handler = {
+ .error_detected = qla4xxx_pci_error_detected,
+ .mmio_enabled = qla4xxx_pci_mmio_enabled,
+ .slot_reset = qla4xxx_pci_slot_reset,
+ .resume = qla4xxx_pci_resume,
+};
+
static struct pci_device_id qla4xxx_pci_tbl[] = {
{
.vendor = PCI_VENDOR_ID_QLOGIC,
.id_table = qla4xxx_pci_tbl,
.probe = qla4xxx_probe_adapter,
.remove = qla4xxx_remove_adapter,
+ .err_handler = &qla4xxx_err_handler,
};
static int __init qla4xxx_module_init(void)