2 * The file intends to implement PE based on the information from
3 * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
4 * All the PEs should be organized as hierarchy tree. The first level
5 * of the tree will be associated to existing PHBs since the particular
6 * PE is only meaningful in one PHB domain.
8 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/delay.h>
26 #include <linux/export.h>
27 #include <linux/gfp.h>
28 #include <linux/init.h>
29 #include <linux/kernel.h>
30 #include <linux/pci.h>
31 #include <linux/string.h>
33 #include <asm/pci-bridge.h>
34 #include <asm/ppc-pci.h>
36 static LIST_HEAD(eeh_phb_pe);
39 * eeh_pe_alloc - Allocate PE
40 * @phb: PCI controller
43 * Allocate PE instance dynamically.
45 static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
50 pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
53 /* Initialize PHB PE */
56 INIT_LIST_HEAD(&pe->child_list);
57 INIT_LIST_HEAD(&pe->child);
58 INIT_LIST_HEAD(&pe->edevs);
64 * eeh_phb_pe_create - Create PHB PE
65 * @phb: PCI controller
67 * The function should be called while the PHB is detected during
68 * system boot or PCI hotplug in order to create PHB PE.
70 int eeh_phb_pe_create(struct pci_controller *phb)
75 pe = eeh_pe_alloc(phb, EEH_PE_PHB);
77 pr_err("%s: out of memory!\n", __func__);
81 /* Put it into the list */
82 list_add_tail(&pe->child, &eeh_phb_pe);
84 pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
90 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
91 * @phb: PCI controller
93 * The overall PEs form hierarchy tree. The first layer of the
94 * hierarchy tree is composed of PHB PEs. The function is used
95 * to retrieve the corresponding PHB PE according to the given PHB.
97 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
101 list_for_each_entry(pe, &eeh_phb_pe, child) {
103 * Actually, we needn't check the type since
104 * the PE for PHB has been determined when that
107 if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
115 * eeh_pe_next - Retrieve the next PE in the tree
119 * The function is used to retrieve the next PE in the
122 static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
125 struct list_head *next = pe->child_list.next;
127 if (next == &pe->child_list) {
131 next = pe->child.next;
132 if (next != &pe->parent->child_list)
138 return list_entry(next, struct eeh_pe, child);
142 * eeh_pe_traverse - Traverse PEs in the specified PHB
145 * @flag: extra parameter to callback
147 * The function is used to traverse the specified PE and its
148 * child PEs. The traversing is to be terminated once the
149 * callback returns something other than NULL, or no more PEs
152 static void *eeh_pe_traverse(struct eeh_pe *root,
153 eeh_traverse_func fn, void *flag)
158 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
167 * eeh_pe_dev_traverse - Traverse the devices from the PE
169 * @fn: function callback
170 * @flag: extra parameter to callback
172 * The function is used to traverse the devices of the specified
173 * PE and its child PEs.
175 void *eeh_pe_dev_traverse(struct eeh_pe *root,
176 eeh_traverse_func fn, void *flag)
179 struct eeh_dev *edev;
183 pr_warning("%s: Invalid PE %p\n", __func__, root);
187 /* Traverse root PE */
188 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
189 eeh_pe_for_each_dev(pe, edev) {
190 ret = fn(edev, flag);
200 * __eeh_pe_get - Check the PE address
204 * For one particular PE, it can be identified by PE address
205 * or tranditional BDF address. BDF address is composed of
206 * Bus/Device/Function number. The extra data referred by flag
207 * indicates which type of address should be used.
209 static void *__eeh_pe_get(void *data, void *flag)
211 struct eeh_pe *pe = (struct eeh_pe *)data;
212 struct eeh_dev *edev = (struct eeh_dev *)flag;
214 /* Unexpected PHB PE */
215 if (pe->type & EEH_PE_PHB)
218 /* We prefer PE address */
219 if (edev->pe_config_addr &&
220 (edev->pe_config_addr == pe->addr))
223 /* Try BDF address */
224 if (edev->config_addr &&
225 (edev->config_addr == pe->config_addr))
232 * eeh_pe_get - Search PE based on the given address
235 * Search the corresponding PE based on the specified address which
236 * is included in the eeh device. The function is used to check if
237 * the associated PE has been created against the PE address. It's
238 * notable that the PE address has 2 format: traditional PE address
239 * which is composed of PCI bus/device/function number, or unified
242 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
244 struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
247 pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
253 * eeh_pe_get_parent - Retrieve the parent PE
256 * The whole PEs existing in the system are organized as hierarchy
257 * tree. The function is used to retrieve the parent PE according
258 * to the parent EEH device.
260 static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
262 struct device_node *dn;
263 struct eeh_dev *parent;
266 * It might have the case for the indirect parent
267 * EEH device already having associated PE, but
268 * the direct parent EEH device doesn't have yet.
270 dn = edev->dn->parent;
272 /* We're poking out of PCI territory */
273 if (!PCI_DN(dn)) return NULL;
275 parent = of_node_to_eeh_dev(dn);
276 /* We're poking out of PCI territory */
277 if (!parent) return NULL;
289 * eeh_add_to_parent_pe - Add EEH device to parent PE
292 * Add EEH device to the parent PE. If the parent PE already
293 * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
294 * we have to create new PE to hold the EEH device and the new
295 * PE will be linked to its parent PE as well.
297 int eeh_add_to_parent_pe(struct eeh_dev *edev)
299 struct eeh_pe *pe, *parent;
302 * Search the PE has been existing or not according
303 * to the PE address. If that has been existing, the
304 * PE should be composed of PCI bus and its subordinate
307 pe = eeh_pe_get(edev);
308 if (pe && !(pe->type & EEH_PE_INVALID)) {
309 if (!edev->pe_config_addr) {
310 pr_err("%s: PE with addr 0x%x already exists\n",
311 __func__, edev->config_addr);
315 /* Mark the PE as type of PCI bus */
316 pe->type = EEH_PE_BUS;
319 /* Put the edev to PE */
320 list_add_tail(&edev->list, &pe->edevs);
321 pr_debug("EEH: Add %s to Bus PE#%x\n",
322 edev->dn->full_name, pe->addr);
325 } else if (pe && (pe->type & EEH_PE_INVALID)) {
326 list_add_tail(&edev->list, &pe->edevs);
329 * We're running to here because of PCI hotplug caused by
330 * EEH recovery. We need clear EEH_PE_INVALID until the top.
334 if (!(parent->type & EEH_PE_INVALID))
336 parent->type &= ~EEH_PE_INVALID;
337 parent = parent->parent;
339 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
340 edev->dn->full_name, pe->addr, pe->parent->addr);
345 /* Create a new EEH PE */
346 pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
348 pr_err("%s: out of memory!\n", __func__);
351 pe->addr = edev->pe_config_addr;
352 pe->config_addr = edev->config_addr;
355 * While doing PE reset, we probably hot-reset the
356 * upstream bridge. However, the PCI devices including
357 * the associated EEH devices might be removed when EEH
358 * core is doing recovery. So that won't safe to retrieve
359 * the bridge through downstream EEH device. We have to
360 * trace the parent PCI bus, then the upstream bridge.
362 if (eeh_probe_mode_dev())
363 pe->bus = eeh_dev_to_pci_dev(edev)->bus;
366 * Put the new EEH PE into hierarchy tree. If the parent
367 * can't be found, the newly created PE will be attached
368 * to PHB directly. Otherwise, we have to associate the
369 * PE with its parent.
371 parent = eeh_pe_get_parent(edev);
373 parent = eeh_phb_pe_get(edev->phb);
375 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
376 __func__, edev->phb->global_number);
385 * Put the newly created PE into the child list and
386 * link the EEH device accordingly.
388 list_add_tail(&pe->child, &parent->child_list);
389 list_add_tail(&edev->list, &pe->edevs);
391 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
392 edev->dn->full_name, pe->addr, pe->parent->addr);
398 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
400 * @purge_pe: remove PE or not
402 * The PE hierarchy tree might be changed when doing PCI hotplug.
403 * Also, the PCI devices or buses could be removed from the system
404 * during EEH recovery. So we have to call the function remove the
405 * corresponding PE accordingly if necessary.
407 int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
409 struct eeh_pe *pe, *parent, *child;
413 pr_warning("%s: No PE found for EEH device %s\n",
414 __func__, edev->dn->full_name);
418 /* Remove the EEH device */
421 list_del(&edev->list);
424 * Check if the parent PE includes any EEH devices.
425 * If not, we should delete that. Also, we should
426 * delete the parent PE if it doesn't have associated
427 * child PEs and EEH devices.
431 if (pe->type & EEH_PE_PHB)
435 if (list_empty(&pe->edevs) &&
436 list_empty(&pe->child_list)) {
437 list_del(&pe->child);
443 if (list_empty(&pe->edevs)) {
445 list_for_each_entry(child, &pe->child_list, child) {
446 if (!(child->type & EEH_PE_INVALID)) {
453 pe->type |= EEH_PE_INVALID;
466 * eeh_pe_update_time_stamp - Update PE's frozen time stamp
469 * We have time stamp for each PE to trace its time of getting
470 * frozen in last hour. The function should be called to update
471 * the time stamp on first error of the specific PE. On the other
472 * handle, we needn't account for errors happened in last hour.
474 void eeh_pe_update_time_stamp(struct eeh_pe *pe)
476 struct timeval tstamp;
480 if (pe->freeze_count <= 0) {
481 pe->freeze_count = 0;
482 do_gettimeofday(&pe->tstamp);
484 do_gettimeofday(&tstamp);
485 if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
487 pe->freeze_count = 0;
493 * __eeh_pe_state_mark - Mark the state for the PE
497 * The function is used to mark the indicated state for the given
498 * PE. Also, the associated PCI devices will be put into IO frozen
501 static void *__eeh_pe_state_mark(void *data, void *flag)
503 struct eeh_pe *pe = (struct eeh_pe *)data;
504 int state = *((int *)flag);
506 struct pci_dev *pdev;
509 * Mark the PE with the indicated state. Also,
510 * the associated PCI device will be put into
511 * I/O frozen state to avoid I/O accesses from
512 * the PCI device driver.
515 eeh_pe_for_each_dev(pe, tmp) {
516 pdev = eeh_dev_to_pci_dev(tmp);
518 pdev->error_state = pci_channel_io_frozen;
525 * eeh_pe_state_mark - Mark specified state for PE and its associated device
528 * EEH error affects the current PE and its child PEs. The function
529 * is used to mark appropriate state for the affected PEs and the
530 * associated devices.
532 void eeh_pe_state_mark(struct eeh_pe *pe, int state)
534 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
538 * __eeh_pe_state_clear - Clear state for the PE
542 * The function is used to clear the indicated state from the
543 * given PE. Besides, we also clear the check count of the PE
546 static void *__eeh_pe_state_clear(void *data, void *flag)
548 struct eeh_pe *pe = (struct eeh_pe *)data;
549 int state = *((int *)flag);
558 * eeh_pe_state_clear - Clear state for the PE and its children
560 * @state: state to be cleared
562 * When the PE and its children has been recovered from error,
563 * we need clear the error state for that. The function is used
566 void eeh_pe_state_clear(struct eeh_pe *pe, int state)
568 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
572 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
573 * buses assigned explicitly by firmware, and we probably have
574 * lost that after reset. So we have to delay the check until
575 * the PCI-CFG registers have been restored for the parent
578 * Don't use normal PCI-CFG accessors, which probably has been
579 * blocked on normal path during the stage. So we need utilize
580 * eeh operations, which is always permitted.
582 static void eeh_bridge_check_link(struct pci_dev *pdev,
583 struct device_node *dn)
590 * We only check root port and downstream ports of
593 if (!pci_is_pcie(pdev) ||
594 (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT &&
595 pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
598 pr_debug("%s: Check PCIe link for %s ...\n",
599 __func__, pci_name(pdev));
601 /* Check slot status */
602 cap = pdev->pcie_cap;
603 eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
604 if (!(val & PCI_EXP_SLTSTA_PDS)) {
605 pr_debug(" No card in the slot (0x%04x) !\n", val);
609 /* Check power status if we have the capability */
610 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
611 if (val & PCI_EXP_SLTCAP_PCP) {
612 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
613 if (val & PCI_EXP_SLTCTL_PCC) {
614 pr_debug(" In power-off state, power it on ...\n");
615 val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
616 val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
617 eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
623 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
624 val &= ~PCI_EXP_LNKCTL_LD;
625 eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
628 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
629 if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
630 pr_debug(" No link reporting capability (0x%08x) \n", val);
635 /* Wait the link is up until timeout (5s) */
637 while (timeout < 5000) {
641 eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
642 if (val & PCI_EXP_LNKSTA_DLLLA)
646 if (val & PCI_EXP_LNKSTA_DLLLA)
647 pr_debug(" Link up (%s)\n",
648 (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
650 pr_debug(" Link not ready (0x%04x)\n", val);
653 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
654 #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
656 static void eeh_restore_bridge_bars(struct pci_dev *pdev,
657 struct eeh_dev *edev,
658 struct device_node *dn)
663 * Device BARs: 0x10 - 0x18
664 * Bus numbers and windows: 0x18 - 0x30
666 for (i = 4; i < 13; i++)
667 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
669 eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
671 /* Cache line & Latency timer: 0xC 0xD */
672 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
673 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
674 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
675 SAVED_BYTE(PCI_LATENCY_TIMER));
676 /* Max latency, min grant, interrupt ping and line: 0x3C */
677 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
679 /* PCI Command: 0x4 */
680 eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
682 /* Check the PCIe link is ready */
683 eeh_bridge_check_link(pdev, dn);
686 static void eeh_restore_device_bars(struct eeh_dev *edev,
687 struct device_node *dn)
692 for (i = 4; i < 10; i++)
693 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
694 /* 12 == Expansion ROM Address */
695 eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
697 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
698 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
699 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
700 SAVED_BYTE(PCI_LATENCY_TIMER));
702 /* max latency, min grant, interrupt pin and line */
703 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
706 * Restore PERR & SERR bits, some devices require it,
707 * don't touch the other command bits
709 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
710 if (edev->config_space[1] & PCI_COMMAND_PARITY)
711 cmd |= PCI_COMMAND_PARITY;
713 cmd &= ~PCI_COMMAND_PARITY;
714 if (edev->config_space[1] & PCI_COMMAND_SERR)
715 cmd |= PCI_COMMAND_SERR;
717 cmd &= ~PCI_COMMAND_SERR;
718 eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
722 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
726 * Loads the PCI configuration space base address registers,
727 * the expansion ROM base address, the latency timer, and etc.
728 * from the saved values in the device node.
730 static void *eeh_restore_one_device_bars(void *data, void *flag)
732 struct pci_dev *pdev = NULL;
733 struct eeh_dev *edev = (struct eeh_dev *)data;
734 struct device_node *dn = eeh_dev_to_of_node(edev);
736 /* Trace the PCI bridge */
737 if (eeh_probe_mode_dev()) {
738 pdev = eeh_dev_to_pci_dev(edev);
739 if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
744 eeh_restore_bridge_bars(pdev, edev, dn);
746 eeh_restore_device_bars(edev, dn);
752 * eeh_pe_restore_bars - Restore the PCI config space info
755 * This routine performs a recursive walk to the children
756 * of this device as well.
758 void eeh_pe_restore_bars(struct eeh_pe *pe)
761 * We needn't take the EEH lock since eeh_pe_dev_traverse()
764 eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
768 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
771 * Retrieve the PCI bus according to the given PE. Basically,
772 * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
773 * primary PCI bus will be retrieved. The parent bus will be
774 * returned for BUS PE. However, we don't have associated PCI
777 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
779 struct pci_bus *bus = NULL;
780 struct eeh_dev *edev;
781 struct pci_dev *pdev;
783 if (pe->type & EEH_PE_PHB) {
785 } else if (pe->type & EEH_PE_BUS ||
786 pe->type & EEH_PE_DEVICE) {
792 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
793 pdev = eeh_dev_to_pci_dev(edev);