2 * The file intends to implement PE based on the information from
3 * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
4 * All the PEs should be organized as hierarchy tree. The first level
5 * of the tree will be associated to existing PHBs since the particular
6 * PE is only meaningful in one PHB domain.
8 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/delay.h>
26 #include <linux/export.h>
27 #include <linux/gfp.h>
28 #include <linux/kernel.h>
29 #include <linux/pci.h>
30 #include <linux/string.h>
32 #include <asm/pci-bridge.h>
33 #include <asm/ppc-pci.h>
35 static int eeh_pe_aux_size = 0;
36 static LIST_HEAD(eeh_phb_pe);
39 * eeh_set_pe_aux_size - Set PE auxillary data size
40 * @size: PE auxillary data size
42 * Set PE auxillary data size
44 void eeh_set_pe_aux_size(int size)
49 eeh_pe_aux_size = size;
53 * eeh_pe_alloc - Allocate PE
54 * @phb: PCI controller
57 * Allocate PE instance dynamically.
59 static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
64 alloc_size = sizeof(struct eeh_pe);
65 if (eeh_pe_aux_size) {
66 alloc_size = ALIGN(alloc_size, cache_line_size());
67 alloc_size += eeh_pe_aux_size;
71 pe = kzalloc(alloc_size, GFP_KERNEL);
74 /* Initialize PHB PE */
77 INIT_LIST_HEAD(&pe->child_list);
78 INIT_LIST_HEAD(&pe->child);
79 INIT_LIST_HEAD(&pe->edevs);
81 pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
87 * eeh_phb_pe_create - Create PHB PE
88 * @phb: PCI controller
90 * The function should be called while the PHB is detected during
91 * system boot or PCI hotplug in order to create PHB PE.
93 int eeh_phb_pe_create(struct pci_controller *phb)
98 pe = eeh_pe_alloc(phb, EEH_PE_PHB);
100 pr_err("%s: out of memory!\n", __func__);
104 /* Put it into the list */
105 list_add_tail(&pe->child, &eeh_phb_pe);
107 pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
113 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
114 * @phb: PCI controller
116 * The overall PEs form hierarchy tree. The first layer of the
117 * hierarchy tree is composed of PHB PEs. The function is used
118 * to retrieve the corresponding PHB PE according to the given PHB.
120 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
124 list_for_each_entry(pe, &eeh_phb_pe, child) {
126 * Actually, we needn't check the type since
127 * the PE for PHB has been determined when that
130 if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
138 * eeh_pe_next - Retrieve the next PE in the tree
142 * The function is used to retrieve the next PE in the
145 static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
148 struct list_head *next = pe->child_list.next;
150 if (next == &pe->child_list) {
154 next = pe->child.next;
155 if (next != &pe->parent->child_list)
161 return list_entry(next, struct eeh_pe, child);
165 * eeh_pe_traverse - Traverse PEs in the specified PHB
168 * @flag: extra parameter to callback
170 * The function is used to traverse the specified PE and its
171 * child PEs. The traversing is to be terminated once the
172 * callback returns something other than NULL, or no more PEs
175 void *eeh_pe_traverse(struct eeh_pe *root,
176 eeh_traverse_func fn, void *flag)
181 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
190 * eeh_pe_dev_traverse - Traverse the devices from the PE
192 * @fn: function callback
193 * @flag: extra parameter to callback
195 * The function is used to traverse the devices of the specified
196 * PE and its child PEs.
198 void *eeh_pe_dev_traverse(struct eeh_pe *root,
199 eeh_traverse_func fn, void *flag)
202 struct eeh_dev *edev, *tmp;
206 pr_warn("%s: Invalid PE %p\n",
211 /* Traverse root PE */
212 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
213 eeh_pe_for_each_dev(pe, edev, tmp) {
214 ret = fn(edev, flag);
224 * __eeh_pe_get - Check the PE address
228 * For one particular PE, it can be identified by PE address
229 * or tranditional BDF address. BDF address is composed of
230 * Bus/Device/Function number. The extra data referred by flag
231 * indicates which type of address should be used.
233 static void *__eeh_pe_get(void *data, void *flag)
235 struct eeh_pe *pe = (struct eeh_pe *)data;
236 struct eeh_dev *edev = (struct eeh_dev *)flag;
238 /* Unexpected PHB PE */
239 if (pe->type & EEH_PE_PHB)
242 /* We prefer PE address */
243 if (edev->pe_config_addr &&
244 (edev->pe_config_addr == pe->addr))
247 /* Try BDF address */
248 if (edev->config_addr &&
249 (edev->config_addr == pe->config_addr))
256 * eeh_pe_get - Search PE based on the given address
259 * Search the corresponding PE based on the specified address which
260 * is included in the eeh device. The function is used to check if
261 * the associated PE has been created against the PE address. It's
262 * notable that the PE address has 2 format: traditional PE address
263 * which is composed of PCI bus/device/function number, or unified
266 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
268 struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
271 pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
277 * eeh_pe_get_parent - Retrieve the parent PE
280 * The whole PEs existing in the system are organized as hierarchy
281 * tree. The function is used to retrieve the parent PE according
282 * to the parent EEH device.
284 static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
286 struct device_node *dn;
287 struct eeh_dev *parent;
290 * It might have the case for the indirect parent
291 * EEH device already having associated PE, but
292 * the direct parent EEH device doesn't have yet.
294 dn = edev->dn->parent;
296 /* We're poking out of PCI territory */
297 if (!PCI_DN(dn)) return NULL;
299 parent = of_node_to_eeh_dev(dn);
300 /* We're poking out of PCI territory */
301 if (!parent) return NULL;
313 * eeh_add_to_parent_pe - Add EEH device to parent PE
316 * Add EEH device to the parent PE. If the parent PE already
317 * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
318 * we have to create new PE to hold the EEH device and the new
319 * PE will be linked to its parent PE as well.
321 int eeh_add_to_parent_pe(struct eeh_dev *edev)
323 struct eeh_pe *pe, *parent;
326 * Search the PE has been existing or not according
327 * to the PE address. If that has been existing, the
328 * PE should be composed of PCI bus and its subordinate
331 pe = eeh_pe_get(edev);
332 if (pe && !(pe->type & EEH_PE_INVALID)) {
333 if (!edev->pe_config_addr) {
334 pr_err("%s: PE with addr 0x%x already exists\n",
335 __func__, edev->config_addr);
339 /* Mark the PE as type of PCI bus */
340 pe->type = EEH_PE_BUS;
343 /* Put the edev to PE */
344 list_add_tail(&edev->list, &pe->edevs);
345 pr_debug("EEH: Add %s to Bus PE#%x\n",
346 edev->dn->full_name, pe->addr);
349 } else if (pe && (pe->type & EEH_PE_INVALID)) {
350 list_add_tail(&edev->list, &pe->edevs);
353 * We're running to here because of PCI hotplug caused by
354 * EEH recovery. We need clear EEH_PE_INVALID until the top.
358 if (!(parent->type & EEH_PE_INVALID))
360 parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
361 parent = parent->parent;
363 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
364 edev->dn->full_name, pe->addr, pe->parent->addr);
369 /* Create a new EEH PE */
370 pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
372 pr_err("%s: out of memory!\n", __func__);
375 pe->addr = edev->pe_config_addr;
376 pe->config_addr = edev->config_addr;
379 * Put the new EEH PE into hierarchy tree. If the parent
380 * can't be found, the newly created PE will be attached
381 * to PHB directly. Otherwise, we have to associate the
382 * PE with its parent.
384 parent = eeh_pe_get_parent(edev);
386 parent = eeh_phb_pe_get(edev->phb);
388 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
389 __func__, edev->phb->global_number);
398 * Put the newly created PE into the child list and
399 * link the EEH device accordingly.
401 list_add_tail(&pe->child, &parent->child_list);
402 list_add_tail(&edev->list, &pe->edevs);
404 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
405 edev->dn->full_name, pe->addr, pe->parent->addr);
411 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
414 * The PE hierarchy tree might be changed when doing PCI hotplug.
415 * Also, the PCI devices or buses could be removed from the system
416 * during EEH recovery. So we have to call the function remove the
417 * corresponding PE accordingly if necessary.
419 int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
421 struct eeh_pe *pe, *parent, *child;
425 pr_debug("%s: No PE found for EEH device %s\n",
426 __func__, edev->dn->full_name);
430 /* Remove the EEH device */
431 pe = eeh_dev_to_pe(edev);
433 list_del(&edev->list);
436 * Check if the parent PE includes any EEH devices.
437 * If not, we should delete that. Also, we should
438 * delete the parent PE if it doesn't have associated
439 * child PEs and EEH devices.
443 if (pe->type & EEH_PE_PHB)
446 if (!(pe->state & EEH_PE_KEEP)) {
447 if (list_empty(&pe->edevs) &&
448 list_empty(&pe->child_list)) {
449 list_del(&pe->child);
455 if (list_empty(&pe->edevs)) {
457 list_for_each_entry(child, &pe->child_list, child) {
458 if (!(child->type & EEH_PE_INVALID)) {
465 pe->type |= EEH_PE_INVALID;
478 * eeh_pe_update_time_stamp - Update PE's frozen time stamp
481 * We have time stamp for each PE to trace its time of getting
482 * frozen in last hour. The function should be called to update
483 * the time stamp on first error of the specific PE. On the other
484 * handle, we needn't account for errors happened in last hour.
486 void eeh_pe_update_time_stamp(struct eeh_pe *pe)
488 struct timeval tstamp;
492 if (pe->freeze_count <= 0) {
493 pe->freeze_count = 0;
494 do_gettimeofday(&pe->tstamp);
496 do_gettimeofday(&tstamp);
497 if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
499 pe->freeze_count = 0;
505 * __eeh_pe_state_mark - Mark the state for the PE
509 * The function is used to mark the indicated state for the given
510 * PE. Also, the associated PCI devices will be put into IO frozen
513 static void *__eeh_pe_state_mark(void *data, void *flag)
515 struct eeh_pe *pe = (struct eeh_pe *)data;
516 int state = *((int *)flag);
517 struct eeh_dev *edev, *tmp;
518 struct pci_dev *pdev;
520 /* Keep the state of permanently removed PE intact */
521 if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
522 (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
527 /* Offline PCI devices if applicable */
528 if (!(state & EEH_PE_ISOLATED))
531 eeh_pe_for_each_dev(pe, edev, tmp) {
532 pdev = eeh_dev_to_pci_dev(edev);
534 pdev->error_state = pci_channel_io_frozen;
537 /* Block PCI config access if required */
538 if (pe->state & EEH_PE_CFG_RESTRICTED)
539 pe->state |= EEH_PE_CFG_BLOCKED;
545 * eeh_pe_state_mark - Mark specified state for PE and its associated device
548 * EEH error affects the current PE and its child PEs. The function
549 * is used to mark appropriate state for the affected PEs and the
550 * associated devices.
552 void eeh_pe_state_mark(struct eeh_pe *pe, int state)
554 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
557 static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
559 struct eeh_dev *edev = data;
560 int mode = *((int *)flag);
568 * eeh_pe_dev_state_mark - Mark state for all device under the PE
571 * Mark specific state for all child devices of the PE.
573 void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
575 eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
579 * __eeh_pe_state_clear - Clear state for the PE
583 * The function is used to clear the indicated state from the
584 * given PE. Besides, we also clear the check count of the PE
587 static void *__eeh_pe_state_clear(void *data, void *flag)
589 struct eeh_pe *pe = (struct eeh_pe *)data;
590 int state = *((int *)flag);
591 struct eeh_dev *edev, *tmp;
592 struct pci_dev *pdev;
594 /* Keep the state of permanently removed PE intact */
595 if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
596 (state & EEH_PE_ISOLATED))
602 * Special treatment on clearing isolated state. Clear
603 * check count since last isolation and put all affected
604 * devices to normal state.
606 if (!(state & EEH_PE_ISOLATED))
610 eeh_pe_for_each_dev(pe, edev, tmp) {
611 pdev = eeh_dev_to_pci_dev(edev);
615 pdev->error_state = pci_channel_io_normal;
618 /* Unblock PCI config access if required */
619 if (pe->state & EEH_PE_CFG_RESTRICTED)
620 pe->state &= ~EEH_PE_CFG_BLOCKED;
626 * eeh_pe_state_clear - Clear state for the PE and its children
628 * @state: state to be cleared
630 * When the PE and its children has been recovered from error,
631 * we need clear the error state for that. The function is used
634 void eeh_pe_state_clear(struct eeh_pe *pe, int state)
636 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
640 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
641 * buses assigned explicitly by firmware, and we probably have
642 * lost that after reset. So we have to delay the check until
643 * the PCI-CFG registers have been restored for the parent
646 * Don't use normal PCI-CFG accessors, which probably has been
647 * blocked on normal path during the stage. So we need utilize
648 * eeh operations, which is always permitted.
650 static void eeh_bridge_check_link(struct eeh_dev *edev,
651 struct device_node *dn)
658 * We only check root port and downstream ports of
661 if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
664 pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
665 __func__, edev->phb->global_number,
666 edev->config_addr >> 8,
667 PCI_SLOT(edev->config_addr & 0xFF),
668 PCI_FUNC(edev->config_addr & 0xFF));
670 /* Check slot status */
671 cap = edev->pcie_cap;
672 eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
673 if (!(val & PCI_EXP_SLTSTA_PDS)) {
674 pr_debug(" No card in the slot (0x%04x) !\n", val);
678 /* Check power status if we have the capability */
679 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
680 if (val & PCI_EXP_SLTCAP_PCP) {
681 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
682 if (val & PCI_EXP_SLTCTL_PCC) {
683 pr_debug(" In power-off state, power it on ...\n");
684 val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
685 val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
686 eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
692 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
693 val &= ~PCI_EXP_LNKCTL_LD;
694 eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
697 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
698 if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
699 pr_debug(" No link reporting capability (0x%08x) \n", val);
704 /* Wait the link is up until timeout (5s) */
706 while (timeout < 5000) {
710 eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
711 if (val & PCI_EXP_LNKSTA_DLLLA)
715 if (val & PCI_EXP_LNKSTA_DLLLA)
716 pr_debug(" Link up (%s)\n",
717 (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
719 pr_debug(" Link not ready (0x%04x)\n", val);
722 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
723 #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
725 static void eeh_restore_bridge_bars(struct eeh_dev *edev,
726 struct device_node *dn)
731 * Device BARs: 0x10 - 0x18
732 * Bus numbers and windows: 0x18 - 0x30
734 for (i = 4; i < 13; i++)
735 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
737 eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
739 /* Cache line & Latency timer: 0xC 0xD */
740 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
741 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
742 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
743 SAVED_BYTE(PCI_LATENCY_TIMER));
744 /* Max latency, min grant, interrupt ping and line: 0x3C */
745 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
747 /* PCI Command: 0x4 */
748 eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
750 /* Check the PCIe link is ready */
751 eeh_bridge_check_link(edev, dn);
754 static void eeh_restore_device_bars(struct eeh_dev *edev,
755 struct device_node *dn)
760 for (i = 4; i < 10; i++)
761 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
762 /* 12 == Expansion ROM Address */
763 eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
765 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
766 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
767 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
768 SAVED_BYTE(PCI_LATENCY_TIMER));
770 /* max latency, min grant, interrupt pin and line */
771 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
774 * Restore PERR & SERR bits, some devices require it,
775 * don't touch the other command bits
777 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
778 if (edev->config_space[1] & PCI_COMMAND_PARITY)
779 cmd |= PCI_COMMAND_PARITY;
781 cmd &= ~PCI_COMMAND_PARITY;
782 if (edev->config_space[1] & PCI_COMMAND_SERR)
783 cmd |= PCI_COMMAND_SERR;
785 cmd &= ~PCI_COMMAND_SERR;
786 eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
790 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
794 * Loads the PCI configuration space base address registers,
795 * the expansion ROM base address, the latency timer, and etc.
796 * from the saved values in the device node.
798 static void *eeh_restore_one_device_bars(void *data, void *flag)
800 struct eeh_dev *edev = (struct eeh_dev *)data;
801 struct device_node *dn = eeh_dev_to_of_node(edev);
803 /* Do special restore for bridges */
804 if (edev->mode & EEH_DEV_BRIDGE)
805 eeh_restore_bridge_bars(edev, dn);
807 eeh_restore_device_bars(edev, dn);
809 if (eeh_ops->restore_config)
810 eeh_ops->restore_config(dn);
816 * eeh_pe_restore_bars - Restore the PCI config space info
819 * This routine performs a recursive walk to the children
820 * of this device as well.
822 void eeh_pe_restore_bars(struct eeh_pe *pe)
825 * We needn't take the EEH lock since eeh_pe_dev_traverse()
828 eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
832 * eeh_pe_loc_get - Retrieve location code binding to the given PE
835 * Retrieve the location code of the given PE. If the primary PE bus
836 * is root bus, we will grab location code from PHB device tree node
837 * or root port. Otherwise, the upstream bridge's device tree node
838 * of the primary PE bus will be checked for the location code.
840 const char *eeh_pe_loc_get(struct eeh_pe *pe)
842 struct pci_bus *bus = eeh_pe_bus_get(pe);
843 struct device_node *dn = pci_bus_to_OF_node(bus);
844 const char *loc = NULL;
849 /* PHB PE or root PE ? */
850 if (pci_is_root_bus(bus)) {
851 loc = of_get_property(dn, "ibm,loc-code", NULL);
853 loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
857 /* Check the root port */
863 loc = of_get_property(dn, "ibm,loc-code", NULL);
865 loc = of_get_property(dn, "ibm,slot-location-code", NULL);
868 return loc ? loc : "N/A";
872 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
875 * Retrieve the PCI bus according to the given PE. Basically,
876 * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
877 * primary PCI bus will be retrieved. The parent bus will be
878 * returned for BUS PE. However, we don't have associated PCI
881 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
883 struct pci_bus *bus = NULL;
884 struct eeh_dev *edev;
885 struct pci_dev *pdev;
887 if (pe->type & EEH_PE_PHB) {
889 } else if (pe->type & EEH_PE_BUS ||
890 pe->type & EEH_PE_DEVICE) {
896 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
897 pdev = eeh_dev_to_pci_dev(edev);