]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - drivers/scsi/qla4xxx/ql4_os.c
[SCSI] qla4xxx: Added AER support for ISP82xx
[mv-sheeva.git] / drivers / scsi / qla4xxx / ql4_os.c
index 2bb362c6e6340d4c79ffed996f84c04f768ee42b..370d40ff15296ff8c1c19f3294a011ba94450b75 100644 (file)
@@ -475,6 +475,14 @@ static int qla4xxx_queuecommand(struct scsi_cmnd *cmd,
        struct srb *srb;
        int rval;
 
+       if (test_bit(AF_EEH_BUSY, &ha->flags)) {
+               if (test_bit(AF_PCI_CHANNEL_IO_PERM_FAILURE, &ha->flags))
+                       cmd->result = DID_NO_CONNECT << 16;
+               else
+                       cmd->result = DID_REQUEUE << 16;
+               goto qc_fail_command;
+       }
+
        if (!sess) {
                cmd->result = DID_IMM_RETRY << 16;
                goto qc_fail_command;
@@ -655,6 +663,13 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha)
        uint32_t fw_heartbeat_counter, halt_status;
 
        fw_heartbeat_counter = qla4_8xxx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
+       /* If PEG_ALIVE_COUNTER is 0xffffffff, AER/EEH is in progress, ignore */
+       if (fw_heartbeat_counter == 0xffffffff) {
+               DEBUG2(printk(KERN_WARNING "scsi%ld: %s: Device in frozen "
+                   "state, QLA82XX_PEG_ALIVE_COUNTER is 0xffffffff\n",
+                   ha->host_no, __func__));
+               return;
+       }
 
        if (ha->fw_heartbeat_counter == fw_heartbeat_counter) {
                ha->seconds_since_last_heartbeat++;
@@ -723,6 +738,19 @@ static void qla4xxx_timer(struct scsi_qla_host *ha)
 {
        struct ddb_entry *ddb_entry, *dtemp;
        int start_dpc = 0;
+       uint16_t w;
+
+       /* If we are in the middle of AER/EEH processing
+        * skip any processing and reschedule the timer
+        */
+       if (test_bit(AF_EEH_BUSY, &ha->flags)) {
+               mod_timer(&ha->timer, jiffies + HZ);
+               return;
+       }
+
+       /* Hardware read to trigger an EEH error during mailbox waits. */
+       if (!pci_channel_offline(ha->pdev))
+               pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
 
        if (test_bit(AF_HBA_GOING_AWAY, &ha->flags)) {
                DEBUG2(ql4_printk(KERN_INFO, ha, "%s exited. HBA GOING AWAY\n",
@@ -1213,6 +1241,12 @@ static void qla4xxx_do_dpc(struct work_struct *work)
        if (!test_bit(AF_INIT_DONE, &ha->flags))
                goto do_dpc_exit;
 
+       if (test_bit(AF_EEH_BUSY, &ha->flags)) {
+               DEBUG2(printk(KERN_INFO "scsi%ld: %s: flags = %lx\n",
+                   ha->host_no, __func__, ha->flags));
+               goto do_dpc_exit;
+       }
+
        /* HBA is in the process of being permanently disabled.
         * Don't process anything */
        if (test_bit(AF_HBA_GOING_AWAY, &ha->flags))
@@ -1618,6 +1652,8 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
        ha->host = host;
        ha->host_no = host->host_no;
 
+       pci_enable_pcie_error_reporting(pdev);
+
        /* Setup Runtime configurable options */
        if (is_qla8022(ha)) {
                ha->isp_ops = &qla4_8xxx_isp_ops;
@@ -1636,6 +1672,10 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
                ha->isp_ops = &qla4xxx_isp_ops;
        }
 
+       /* Set EEH reset type to fundamental if required by hba */
+       if (is_qla8022(ha))
+               pdev->needs_freset = 1;
+
        /* Configure PCI I/O space. */
        ret = ha->isp_ops->iospace_config(ha);
        if (ret)
@@ -1732,6 +1772,7 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
                }
        }
 
+       pci_save_state(ha->pdev);
        ha->isp_ops->enable_intrs(ha);
 
        /* Start timer thread. */
@@ -1758,6 +1799,7 @@ probe_failed:
        qla4xxx_free_adapter(ha);
 
 probe_failed_ioconfig:
+       pci_disable_pcie_error_reporting(pdev);
        scsi_host_put(ha->host);
 
 probe_disable_device:
@@ -1787,6 +1829,7 @@ static void __devexit qla4xxx_remove_adapter(struct pci_dev *pdev)
 
        scsi_host_put(ha->host);
 
+       pci_disable_pcie_error_reporting(pdev);
        pci_disable_device(pdev);
        pci_set_drvdata(pdev, NULL);
 }
@@ -1883,6 +1926,17 @@ static int qla4xxx_eh_wait_on_command(struct scsi_qla_host *ha,
        int done = 0;
        struct srb *rp;
        uint32_t max_wait_time = EH_WAIT_CMD_TOV;
+       int ret = SUCCESS;
+
+       /* Dont wait on command if PCI error is being handled
+        * by PCI AER driver
+        */
+       if (unlikely(pci_channel_offline(ha->pdev)) ||
+           (test_bit(AF_EEH_BUSY, &ha->flags))) {
+               ql4_printk(KERN_WARNING, ha, "scsi%ld: Return from %s\n",
+                   ha->host_no, __func__);
+               return ret;
+       }
 
        do {
                /* Checking to see if its returned to OS */
@@ -2178,6 +2232,252 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd)
        return return_status;
 }
 
+/* PCI AER driver recovers from all correctable errors w/o
+ * driver intervention. For uncorrectable errors PCI AER
+ * driver calls the following device driver's callbacks
+ *
+ * - Fatal Errors - link_reset
+ * - Non-Fatal Errors - driver's pci_error_detected() which
+ * returns CAN_RECOVER, NEED_RESET or DISCONNECT.
+ *
+ * PCI AER driver calls
+ * CAN_RECOVER - driver's pci_mmio_enabled(), mmio_enabled
+ *               returns RECOVERED or NEED_RESET if fw_hung
+ * NEED_RESET - driver's slot_reset()
+ * DISCONNECT - device is dead & cannot recover
+ * RECOVERED - driver's pci_resume()
+ */
+static pci_ers_result_t
+qla4xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+       struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+
+       ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: error detected:state %x\n",
+           ha->host_no, __func__, state);
+
+       if (!is_aer_supported(ha))
+               return PCI_ERS_RESULT_NONE;
+
+       switch (state) {
+       case pci_channel_io_normal:
+               clear_bit(AF_EEH_BUSY, &ha->flags);
+               return PCI_ERS_RESULT_CAN_RECOVER;
+       case pci_channel_io_frozen:
+               set_bit(AF_EEH_BUSY, &ha->flags);
+               qla4xxx_mailbox_premature_completion(ha);
+               qla4xxx_free_irqs(ha);
+               pci_disable_device(pdev);
+               return PCI_ERS_RESULT_NEED_RESET;
+       case pci_channel_io_perm_failure:
+               set_bit(AF_EEH_BUSY, &ha->flags);
+               set_bit(AF_PCI_CHANNEL_IO_PERM_FAILURE, &ha->flags);
+               qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * qla4xxx_pci_mmio_enabled() gets called if
+ * qla4xxx_pci_error_detected() returns PCI_ERS_RESULT_CAN_RECOVER
+ * and read/write to the device still works.
+ **/
+static pci_ers_result_t
+qla4xxx_pci_mmio_enabled(struct pci_dev *pdev)
+{
+       struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+
+       if (!is_aer_supported(ha))
+               return PCI_ERS_RESULT_NONE;
+
+       if (test_bit(AF_FW_RECOVERY, &ha->flags)) {
+               ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: firmware hang  -- "
+                   "mmio_enabled\n", ha->host_no, __func__);
+               return PCI_ERS_RESULT_NEED_RESET;
+       } else
+               return PCI_ERS_RESULT_RECOVERED;
+}
+
+uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
+{
+       uint32_t rval = QLA_ERROR;
+       int fn;
+       struct pci_dev *other_pdev = NULL;
+
+       ql4_printk(KERN_WARNING, ha, "scsi%ld: In %s\n", ha->host_no, __func__);
+
+       set_bit(DPC_RESET_ACTIVE, &ha->dpc_flags);
+
+       if (test_bit(AF_ONLINE, &ha->flags)) {
+               clear_bit(AF_ONLINE, &ha->flags);
+               qla4xxx_mark_all_devices_missing(ha);
+               qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
+               qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
+       }
+
+       fn = PCI_FUNC(ha->pdev->devfn);
+       while (fn > 0) {
+               fn--;
+               ql4_printk(KERN_INFO, ha, "scsi%ld: %s: Finding PCI device at "
+                   "func %x\n", ha->host_no, __func__, fn);
+               /* Get the pci device given the domain, bus,
+                * slot/function number */
+               other_pdev =
+                   pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
+                   ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
+                   fn));
+
+               if (!other_pdev)
+                       continue;
+
+               if (atomic_read(&other_pdev->enable_cnt)) {
+                       ql4_printk(KERN_INFO, ha, "scsi%ld: %s: Found PCI "
+                           "func in enabled state%x\n", ha->host_no,
+                           __func__, fn);
+                       pci_dev_put(other_pdev);
+                       break;
+               }
+               pci_dev_put(other_pdev);
+       }
+
+       /* The first function on the card, the reset owner will
+        * start & initialize the firmware. The other functions
+        * on the card will reset the firmware context
+        */
+       if (!fn) {
+               ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn being reset "
+                   "0x%x is the owner\n", ha->host_no, __func__,
+                   ha->pdev->devfn);
+
+               qla4_8xxx_idc_lock(ha);
+               qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
+                   QLA82XX_DEV_COLD);
+
+               qla4_8xxx_wr_32(ha, QLA82XX_CRB_DRV_IDC_VERSION,
+                   QLA82XX_IDC_VERSION);
+
+               qla4_8xxx_idc_unlock(ha);
+               clear_bit(AF_FW_RECOVERY, &ha->flags);
+               rval = qla4xxx_initialize_adapter(ha, PRESERVE_DDB_LIST);
+               qla4_8xxx_idc_lock(ha);
+
+               if (rval != QLA_SUCCESS) {
+                       ql4_printk(KERN_INFO, ha, "scsi%ld: %s: HW State: "
+                           "FAILED\n", ha->host_no, __func__);
+                       qla4_8xxx_clear_drv_active(ha);
+                       qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
+                           QLA82XX_DEV_FAILED);
+               } else {
+                       ql4_printk(KERN_INFO, ha, "scsi%ld: %s: HW State: "
+                           "READY\n", ha->host_no, __func__);
+                       qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
+                           QLA82XX_DEV_READY);
+                       /* Clear driver state register */
+                       qla4_8xxx_wr_32(ha, QLA82XX_CRB_DRV_STATE, 0);
+                       qla4_8xxx_set_drv_active(ha);
+                       ha->isp_ops->enable_intrs(ha);
+               }
+               qla4_8xxx_idc_unlock(ha);
+       } else {
+               ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not "
+                   "the reset owner\n", ha->host_no, __func__,
+                   ha->pdev->devfn);
+               if ((qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE) ==
+                   QLA82XX_DEV_READY)) {
+                       clear_bit(AF_FW_RECOVERY, &ha->flags);
+                       rval = qla4xxx_initialize_adapter(ha,
+                           PRESERVE_DDB_LIST);
+                       if (rval == QLA_SUCCESS)
+                               ha->isp_ops->enable_intrs(ha);
+                       qla4_8xxx_idc_lock(ha);
+                       qla4_8xxx_set_drv_active(ha);
+                       qla4_8xxx_idc_unlock(ha);
+               }
+       }
+       clear_bit(DPC_RESET_ACTIVE, &ha->dpc_flags);
+       return rval;
+}
+
+static pci_ers_result_t
+qla4xxx_pci_slot_reset(struct pci_dev *pdev)
+{
+       pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT;
+       struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+       int rc;
+
+       ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: slot_reset\n",
+           ha->host_no, __func__);
+
+       if (!is_aer_supported(ha))
+               return PCI_ERS_RESULT_NONE;
+
+       /* Restore the saved state of PCIe device -
+        * BAR registers, PCI Config space, PCIX, MSI,
+        * IOV states
+        */
+       pci_restore_state(pdev);
+
+       /* pci_restore_state() clears the saved_state flag of the device
+        * save restored state which resets saved_state flag
+        */
+       pci_save_state(pdev);
+
+       /* Initialize device or resume if in suspended state */
+       rc = pci_enable_device(pdev);
+       if (rc) {
+               ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: Cant re-enable "
+                   "device after reset\n", ha->host_no, __func__);
+               goto exit_slot_reset;
+       }
+
+       ret = qla4xxx_request_irqs(ha);
+       if (ret) {
+               ql4_printk(KERN_WARNING, ha, "Failed to reserve interrupt %d"
+                   " already in use.\n", pdev->irq);
+               goto exit_slot_reset;
+       }
+
+       if (is_qla8022(ha)) {
+               if (qla4_8xxx_error_recovery(ha) == QLA_SUCCESS) {
+                       ret = PCI_ERS_RESULT_RECOVERED;
+                       goto exit_slot_reset;
+               } else
+                       goto exit_slot_reset;
+       }
+
+exit_slot_reset:
+       ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: Return=%x\n"
+           "device after reset\n", ha->host_no, __func__, ret);
+       return ret;
+}
+
+static void
+qla4xxx_pci_resume(struct pci_dev *pdev)
+{
+       struct scsi_qla_host *ha = pci_get_drvdata(pdev);
+       int ret;
+
+       ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: pci_resume\n",
+           ha->host_no, __func__);
+
+       ret = qla4xxx_wait_for_hba_online(ha);
+       if (ret != QLA_SUCCESS) {
+               ql4_printk(KERN_ERR, ha, "scsi%ld: %s: the device failed to "
+                   "resume I/O from slot/link_reset\n", ha->host_no,
+                    __func__);
+       }
+
+       pci_cleanup_aer_uncorrect_error_status(pdev);
+       clear_bit(AF_EEH_BUSY, &ha->flags);
+}
+
+static struct pci_error_handlers qla4xxx_err_handler = {
+       .error_detected = qla4xxx_pci_error_detected,
+       .mmio_enabled = qla4xxx_pci_mmio_enabled,
+       .slot_reset = qla4xxx_pci_slot_reset,
+       .resume = qla4xxx_pci_resume,
+};
+
 static struct pci_device_id qla4xxx_pci_tbl[] = {
        {
                .vendor         = PCI_VENDOR_ID_QLOGIC,
@@ -2212,6 +2512,7 @@ static struct pci_driver qla4xxx_pci_driver = {
        .id_table       = qla4xxx_pci_tbl,
        .probe          = qla4xxx_probe_adapter,
        .remove         = qla4xxx_remove_adapter,
+       .err_handler = &qla4xxx_err_handler,
 };
 
 static int __init qla4xxx_module_init(void)