]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - arch/powerpc/kernel/eeh_driver.c
powerpc: Correct DSCR during TM context switch
[karo-tx-linux.git] / arch / powerpc / kernel / eeh_driver.c
index 6d91b51a5ddb14c83117facbf1fb2eca7cbcf14c..7100a5b96e7059caf572609dec9d7f9393f0106f 100644 (file)
@@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev)
        }
 }
 
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+       /* EEH device removed ? */
+       if (!edev || (edev->mode & EEH_DEV_REMOVED))
+               return true;
+
+       return false;
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata)
        enum pci_ers_result rc, *res = userdata;
        struct pci_driver *driver;
 
-       /* We might not have the associated PCI device,
-        * then we should continue for next one.
-        */
-       if (!dev) return NULL;
+       if (!dev || eeh_dev_removed(edev))
+               return NULL;
        dev->error_state = pci_channel_io_frozen;
 
        driver = eeh_pcid_get(dev);
@@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
        enum pci_ers_result rc, *res = userdata;
        struct pci_driver *driver;
 
+       if (!dev || eeh_dev_removed(edev))
+               return NULL;
+
        driver = eeh_pcid_get(dev);
        if (!driver) return NULL;
 
@@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata)
        enum pci_ers_result rc, *res = userdata;
        struct pci_driver *driver;
 
-       if (!dev) return NULL;
+       if (!dev || eeh_dev_removed(edev))
+               return NULL;
        dev->error_state = pci_channel_io_normal;
 
        driver = eeh_pcid_get(dev);
@@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata)
        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
        struct pci_driver *driver;
 
-       if (!dev) return NULL;
+       if (!dev || eeh_dev_removed(edev))
+               return NULL;
        dev->error_state = pci_channel_io_normal;
 
        driver = eeh_pcid_get(dev);
@@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata)
        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
        struct pci_driver *driver;
 
-       if (!dev) return NULL;
+       if (!dev || eeh_dev_removed(edev))
+               return NULL;
        dev->error_state = pci_channel_io_perm_failure;
 
        driver = eeh_pcid_get(dev);
@@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata)
        if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
                return NULL;
 
+       /*
+        * We rely on count-based pcibios_release_device() to
+        * detach permanently offlined PEs. Unfortunately, that's
+        * not reliable enough. We might have the permanently
+        * offlined PEs attached, but we needn't take care of
+        * them and their child devices.
+        */
+       if (eeh_dev_removed(edev))
+               return NULL;
+
        driver = eeh_pcid_get(dev);
        if (driver) {
                eeh_pcid_put(dev);
@@ -417,6 +440,36 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
        return NULL;
 }
 
+/*
+ * Explicitly clear PE's frozen state for PowerNV where
+ * we have frozen PE until BAR restore is completed. It's
+ * harmless to clear it for pSeries. To be consistent with
+ * PE reset (for 3 times), we try to clear the frozen state
+ * for 3 times as well.
+ */
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+{
+       int i, rc;
+
+       for (i = 0; i < 3; i++) {
+               rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+               if (rc)
+                       continue;
+               rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+               if (!rc)
+                       break;
+       }
+
+       /* The PE has been isolated, clear it */
+       if (rc)
+               pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
+                       __func__, pe->phb->global_number, pe->addr, rc);
+       else
+               eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+       return rc;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -474,6 +527,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
        eeh_pe_restore_bars(pe);
        eeh_pe_state_clear(pe, EEH_PE_RESET);
 
+       /* Clear frozen state */
+       rc = eeh_clear_pe_frozen_state(pe);
+       if (rc)
+               return rc;
+
        /* Give the system 5 seconds to finish running the user-space
         * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
         * this is a hack, but if we don't do this, and try to bring
@@ -582,7 +640,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
                        result = PCI_ERS_RESULT_NEED_RESET;
                } else {
                        pr_info("EEH: Notify device drivers to resume I/O\n");
-                       result = PCI_ERS_RESULT_NONE;
                        eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
                }
        }
@@ -594,10 +651,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 
                if (rc < 0)
                        goto hard_fail;
-               if (rc)
+               if (rc) {
                        result = PCI_ERS_RESULT_NEED_RESET;
-               else
+               } else {
+                       /*
+                        * We didn't do PE reset for the case. The PE
+                        * is still in frozen state. Clear it before
+                        * resuming the PE.
+                        */
+                       eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
                        result = PCI_ERS_RESULT_RECOVERED;
+               }
        }
 
        /* If any device has a hard failure, then shut off everything. */
@@ -659,8 +723,17 @@ perm_error:
        /* Notify all devices that they're about to go down. */
        eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
-       /* Shut down the device drivers for good. */
+       /* Mark the PE to be removed permanently */
+       pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+
+       /*
+        * Shut down the device drivers for good. We mark
+        * all removed devices correctly to avoid access
+        * the their PCI config any more.
+        */
        if (frozen_bus) {
+               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
                pci_lock_rescan_remove();
                pcibios_remove_pci_devices(frozen_bus);
                pci_unlock_rescan_remove();