]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/powerpc/platforms/powernv/pci-ioda.c
f9cb6c53a0cabd02ef537fbf72bf189357113ae3
[karo-tx-linux.git] / arch / powerpc / platforms / powernv / pci-ioda.c
1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #undef DEBUG
13
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/debugfs.h>
17 #include <linux/delay.h>
18 #include <linux/string.h>
19 #include <linux/init.h>
20 #include <linux/bootmem.h>
21 #include <linux/irq.h>
22 #include <linux/io.h>
23 #include <linux/msi.h>
24
25 #include <asm/sections.h>
26 #include <asm/io.h>
27 #include <asm/prom.h>
28 #include <asm/pci-bridge.h>
29 #include <asm/machdep.h>
30 #include <asm/msi_bitmap.h>
31 #include <asm/ppc-pci.h>
32 #include <asm/opal.h>
33 #include <asm/iommu.h>
34 #include <asm/tce.h>
35 #include <asm/xics.h>
36 #include <asm/debug.h>
37
38 #include "powernv.h"
39 #include "pci.h"
40
41 #define define_pe_printk_level(func, kern_level)                \
42 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)     \
43 {                                                               \
44         struct va_format vaf;                                   \
45         va_list args;                                           \
46         char pfix[32];                                          \
47         int r;                                                  \
48                                                                 \
49         va_start(args, fmt);                                    \
50                                                                 \
51         vaf.fmt = fmt;                                          \
52         vaf.va = &args;                                         \
53                                                                 \
54         if (pe->pdev)                                           \
55                 strlcpy(pfix, dev_name(&pe->pdev->dev),         \
56                         sizeof(pfix));                          \
57         else                                                    \
58                 sprintf(pfix, "%04x:%02x     ",                 \
59                         pci_domain_nr(pe->pbus),                \
60                         pe->pbus->number);                      \
61         r = printk(kern_level "pci %s: [PE# %.3d] %pV",         \
62                    pfix, pe->pe_number, &vaf);                  \
63                                                                 \
64         va_end(args);                                           \
65                                                                 \
66         return r;                                               \
67 }                                                               \
68
69 define_pe_printk_level(pe_err, KERN_ERR);
70 define_pe_printk_level(pe_warn, KERN_WARNING);
71 define_pe_printk_level(pe_info, KERN_INFO);
72
73 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
74 {
75         unsigned long pe;
76
77         do {
78                 pe = find_next_zero_bit(phb->ioda.pe_alloc,
79                                         phb->ioda.total_pe, 0);
80                 if (pe >= phb->ioda.total_pe)
81                         return IODA_INVALID_PE;
82         } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
83
84         phb->ioda.pe_array[pe].phb = phb;
85         phb->ioda.pe_array[pe].pe_number = pe;
86         return pe;
87 }
88
89 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
90 {
91         WARN_ON(phb->ioda.pe_array[pe].pdev);
92
93         memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
94         clear_bit(pe, phb->ioda.pe_alloc);
95 }
96
97 /* Currently those 2 are only used when MSIs are enabled, this will change
98  * but in the meantime, we need to protect them to avoid warnings
99  */
100 #ifdef CONFIG_PCI_MSI
101 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
102 {
103         struct pci_controller *hose = pci_bus_to_host(dev->bus);
104         struct pnv_phb *phb = hose->private_data;
105         struct pci_dn *pdn = pci_get_pdn(dev);
106
107         if (!pdn)
108                 return NULL;
109         if (pdn->pe_number == IODA_INVALID_PE)
110                 return NULL;
111         return &phb->ioda.pe_array[pdn->pe_number];
112 }
113 #endif /* CONFIG_PCI_MSI */
114
115 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
116 {
117         struct pci_dev *parent;
118         uint8_t bcomp, dcomp, fcomp;
119         long rc, rid_end, rid;
120
121         /* Bus validation ? */
122         if (pe->pbus) {
123                 int count;
124
125                 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
126                 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
127                 parent = pe->pbus->self;
128                 if (pe->flags & PNV_IODA_PE_BUS_ALL)
129                         count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
130                 else
131                         count = 1;
132
133                 switch(count) {
134                 case  1: bcomp = OpalPciBusAll;         break;
135                 case  2: bcomp = OpalPciBus7Bits;       break;
136                 case  4: bcomp = OpalPciBus6Bits;       break;
137                 case  8: bcomp = OpalPciBus5Bits;       break;
138                 case 16: bcomp = OpalPciBus4Bits;       break;
139                 case 32: bcomp = OpalPciBus3Bits;       break;
140                 default:
141                         pr_err("%s: Number of subordinate busses %d"
142                                " unsupported\n",
143                                pci_name(pe->pbus->self), count);
144                         /* Do an exact match only */
145                         bcomp = OpalPciBusAll;
146                 }
147                 rid_end = pe->rid + (count << 8);
148         } else {
149                 parent = pe->pdev->bus->self;
150                 bcomp = OpalPciBusAll;
151                 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
152                 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
153                 rid_end = pe->rid + 1;
154         }
155
156         /* Associate PE in PELT */
157         rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
158                              bcomp, dcomp, fcomp, OPAL_MAP_PE);
159         if (rc) {
160                 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
161                 return -ENXIO;
162         }
163         opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
164                                   OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
165
166         /* Add to all parents PELT-V */
167         while (parent) {
168                 struct pci_dn *pdn = pci_get_pdn(parent);
169                 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
170                         rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
171                                                 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
172                         /* XXX What to do in case of error ? */
173                 }
174                 parent = parent->bus->self;
175         }
176         /* Setup reverse map */
177         for (rid = pe->rid; rid < rid_end; rid++)
178                 phb->ioda.pe_rmap[rid] = pe->pe_number;
179
180         /* Setup one MVTs on IODA1 */
181         if (phb->type == PNV_PHB_IODA1) {
182                 pe->mve_number = pe->pe_number;
183                 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
184                                       pe->pe_number);
185                 if (rc) {
186                         pe_err(pe, "OPAL error %ld setting up MVE %d\n",
187                                rc, pe->mve_number);
188                         pe->mve_number = -1;
189                 } else {
190                         rc = opal_pci_set_mve_enable(phb->opal_id,
191                                                      pe->mve_number, OPAL_ENABLE_MVE);
192                         if (rc) {
193                                 pe_err(pe, "OPAL error %ld enabling MVE %d\n",
194                                        rc, pe->mve_number);
195                                 pe->mve_number = -1;
196                         }
197                 }
198         } else if (phb->type == PNV_PHB_IODA2)
199                 pe->mve_number = 0;
200
201         return 0;
202 }
203
204 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
205                                        struct pnv_ioda_pe *pe)
206 {
207         struct pnv_ioda_pe *lpe;
208
209         list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
210                 if (lpe->dma_weight < pe->dma_weight) {
211                         list_add_tail(&pe->dma_link, &lpe->dma_link);
212                         return;
213                 }
214         }
215         list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
216 }
217
218 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
219 {
220         /* This is quite simplistic. The "base" weight of a device
221          * is 10. 0 means no DMA is to be accounted for it.
222          */
223
224         /* If it's a bridge, no DMA */
225         if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
226                 return 0;
227
228         /* Reduce the weight of slow USB controllers */
229         if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
230             dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
231             dev->class == PCI_CLASS_SERIAL_USB_EHCI)
232                 return 3;
233
234         /* Increase the weight of RAID (includes Obsidian) */
235         if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
236                 return 15;
237
238         /* Default */
239         return 10;
240 }
241
242 #if 0
243 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
244 {
245         struct pci_controller *hose = pci_bus_to_host(dev->bus);
246         struct pnv_phb *phb = hose->private_data;
247         struct pci_dn *pdn = pci_get_pdn(dev);
248         struct pnv_ioda_pe *pe;
249         int pe_num;
250
251         if (!pdn) {
252                 pr_err("%s: Device tree node not associated properly\n",
253                            pci_name(dev));
254                 return NULL;
255         }
256         if (pdn->pe_number != IODA_INVALID_PE)
257                 return NULL;
258
259         /* PE#0 has been pre-set */
260         if (dev->bus->number == 0)
261                 pe_num = 0;
262         else
263                 pe_num = pnv_ioda_alloc_pe(phb);
264         if (pe_num == IODA_INVALID_PE) {
265                 pr_warning("%s: Not enough PE# available, disabling device\n",
266                            pci_name(dev));
267                 return NULL;
268         }
269
270         /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
271          * pointer in the PE data structure, both should be destroyed at the
272          * same time. However, this needs to be looked at more closely again
273          * once we actually start removing things (Hotplug, SR-IOV, ...)
274          *
275          * At some point we want to remove the PDN completely anyways
276          */
277         pe = &phb->ioda.pe_array[pe_num];
278         pci_dev_get(dev);
279         pdn->pcidev = dev;
280         pdn->pe_number = pe_num;
281         pe->pdev = dev;
282         pe->pbus = NULL;
283         pe->tce32_seg = -1;
284         pe->mve_number = -1;
285         pe->rid = dev->bus->number << 8 | pdn->devfn;
286
287         pe_info(pe, "Associated device to PE\n");
288
289         if (pnv_ioda_configure_pe(phb, pe)) {
290                 /* XXX What do we do here ? */
291                 if (pe_num)
292                         pnv_ioda_free_pe(phb, pe_num);
293                 pdn->pe_number = IODA_INVALID_PE;
294                 pe->pdev = NULL;
295                 pci_dev_put(dev);
296                 return NULL;
297         }
298
299         /* Assign a DMA weight to the device */
300         pe->dma_weight = pnv_ioda_dma_weight(dev);
301         if (pe->dma_weight != 0) {
302                 phb->ioda.dma_weight += pe->dma_weight;
303                 phb->ioda.dma_pe_count++;
304         }
305
306         /* Link the PE */
307         pnv_ioda_link_pe_by_weight(phb, pe);
308
309         return pe;
310 }
311 #endif /* Useful for SRIOV case */
312
313 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
314 {
315         struct pci_dev *dev;
316
317         list_for_each_entry(dev, &bus->devices, bus_list) {
318                 struct pci_dn *pdn = pci_get_pdn(dev);
319
320                 if (pdn == NULL) {
321                         pr_warn("%s: No device node associated with device !\n",
322                                 pci_name(dev));
323                         continue;
324                 }
325                 pci_dev_get(dev);
326                 pdn->pcidev = dev;
327                 pdn->pe_number = pe->pe_number;
328                 pe->dma_weight += pnv_ioda_dma_weight(dev);
329                 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
330                         pnv_ioda_setup_same_PE(dev->subordinate, pe);
331         }
332 }
333
334 /*
335  * There're 2 types of PCI bus sensitive PEs: One that is compromised of
336  * single PCI bus. Another one that contains the primary PCI bus and its
337  * subordinate PCI devices and buses. The second type of PE is normally
338  * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
339  */
340 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
341 {
342         struct pci_controller *hose = pci_bus_to_host(bus);
343         struct pnv_phb *phb = hose->private_data;
344         struct pnv_ioda_pe *pe;
345         int pe_num;
346
347         pe_num = pnv_ioda_alloc_pe(phb);
348         if (pe_num == IODA_INVALID_PE) {
349                 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
350                         __func__, pci_domain_nr(bus), bus->number);
351                 return;
352         }
353
354         pe = &phb->ioda.pe_array[pe_num];
355         pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
356         pe->pbus = bus;
357         pe->pdev = NULL;
358         pe->tce32_seg = -1;
359         pe->mve_number = -1;
360         pe->rid = bus->busn_res.start << 8;
361         pe->dma_weight = 0;
362
363         if (all)
364                 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
365                         bus->busn_res.start, bus->busn_res.end, pe_num);
366         else
367                 pe_info(pe, "Secondary bus %d associated with PE#%d\n",
368                         bus->busn_res.start, pe_num);
369
370         if (pnv_ioda_configure_pe(phb, pe)) {
371                 /* XXX What do we do here ? */
372                 if (pe_num)
373                         pnv_ioda_free_pe(phb, pe_num);
374                 pe->pbus = NULL;
375                 return;
376         }
377
378         /* Associate it with all child devices */
379         pnv_ioda_setup_same_PE(bus, pe);
380
381         /* Put PE to the list */
382         list_add_tail(&pe->list, &phb->ioda.pe_list);
383
384         /* Account for one DMA PE if at least one DMA capable device exist
385          * below the bridge
386          */
387         if (pe->dma_weight != 0) {
388                 phb->ioda.dma_weight += pe->dma_weight;
389                 phb->ioda.dma_pe_count++;
390         }
391
392         /* Link the PE */
393         pnv_ioda_link_pe_by_weight(phb, pe);
394 }
395
396 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
397 {
398         struct pci_dev *dev;
399
400         pnv_ioda_setup_bus_PE(bus, 0);
401
402         list_for_each_entry(dev, &bus->devices, bus_list) {
403                 if (dev->subordinate) {
404                         if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
405                                 pnv_ioda_setup_bus_PE(dev->subordinate, 1);
406                         else
407                                 pnv_ioda_setup_PEs(dev->subordinate);
408                 }
409         }
410 }
411
412 /*
413  * Configure PEs so that the downstream PCI buses and devices
414  * could have their associated PE#. Unfortunately, we didn't
415  * figure out the way to identify the PLX bridge yet. So we
416  * simply put the PCI bus and the subordinate behind the root
417  * port to PE# here. The game rule here is expected to be changed
418  * as soon as we can detected PLX bridge correctly.
419  */
420 static void pnv_pci_ioda_setup_PEs(void)
421 {
422         struct pci_controller *hose, *tmp;
423
424         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
425                 pnv_ioda_setup_PEs(hose->bus);
426         }
427 }
428
429 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
430 {
431         struct pci_dn *pdn = pci_get_pdn(pdev);
432         struct pnv_ioda_pe *pe;
433
434         /*
435          * The function can be called while the PE#
436          * hasn't been assigned. Do nothing for the
437          * case.
438          */
439         if (!pdn || pdn->pe_number == IODA_INVALID_PE)
440                 return;
441
442         pe = &phb->ioda.pe_array[pdn->pe_number];
443         set_iommu_table_base(&pdev->dev, &pe->tce32_table);
444 }
445
446 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
447 {
448         struct pci_dev *dev;
449
450         list_for_each_entry(dev, &bus->devices, bus_list) {
451                 set_iommu_table_base(&dev->dev, &pe->tce32_table);
452                 if (dev->subordinate)
453                         pnv_ioda_setup_bus_dma(pe, dev->subordinate);
454         }
455 }
456
457 static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
458                                          u64 *startp, u64 *endp)
459 {
460         __be64 __iomem *invalidate = (__be64 __iomem *)tbl->it_index;
461         unsigned long start, end, inc;
462
463         start = __pa(startp);
464         end = __pa(endp);
465
466         /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
467         if (tbl->it_busno) {
468                 start <<= 12;
469                 end <<= 12;
470                 inc = 128 << 12;
471                 start |= tbl->it_busno;
472                 end |= tbl->it_busno;
473         } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
474                 /* p7ioc-style invalidation, 2 TCEs per write */
475                 start |= (1ull << 63);
476                 end |= (1ull << 63);
477                 inc = 16;
478         } else {
479                 /* Default (older HW) */
480                 inc = 128;
481         }
482
483         end |= inc - 1; /* round up end to be different than start */
484
485         mb(); /* Ensure above stores are visible */
486         while (start <= end) {
487                 __raw_writeq(cpu_to_be64(start), invalidate);
488                 start += inc;
489         }
490
491         /*
492          * The iommu layer will do another mb() for us on build()
493          * and we don't care on free()
494          */
495 }
496
497 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
498                                          struct iommu_table *tbl,
499                                          u64 *startp, u64 *endp)
500 {
501         unsigned long start, end, inc;
502         __be64 __iomem *invalidate = (__be64 __iomem *)tbl->it_index;
503
504         /* We'll invalidate DMA address in PE scope */
505         start = 0x2ul << 60;
506         start |= (pe->pe_number & 0xFF);
507         end = start;
508
509         /* Figure out the start, end and step */
510         inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
511         start |= (inc << 12);
512         inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
513         end |= (inc << 12);
514         inc = (0x1ul << 12);
515         mb();
516
517         while (start <= end) {
518                 __raw_writeq(cpu_to_be64(start), invalidate);
519                 start += inc;
520         }
521 }
522
523 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
524                                  u64 *startp, u64 *endp)
525 {
526         struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
527                                               tce32_table);
528         struct pnv_phb *phb = pe->phb;
529
530         if (phb->type == PNV_PHB_IODA1)
531                 pnv_pci_ioda1_tce_invalidate(tbl, startp, endp);
532         else
533                 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp);
534 }
535
536 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
537                                       struct pnv_ioda_pe *pe, unsigned int base,
538                                       unsigned int segs)
539 {
540
541         struct page *tce_mem = NULL;
542         const __be64 *swinvp;
543         struct iommu_table *tbl;
544         unsigned int i;
545         int64_t rc;
546         void *addr;
547
548         /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
549 #define TCE32_TABLE_SIZE        ((0x10000000 / 0x1000) * 8)
550
551         /* XXX FIXME: Handle 64-bit only DMA devices */
552         /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
553         /* XXX FIXME: Allocate multi-level tables on PHB3 */
554
555         /* We shouldn't already have a 32-bit DMA associated */
556         if (WARN_ON(pe->tce32_seg >= 0))
557                 return;
558
559         /* Grab a 32-bit TCE table */
560         pe->tce32_seg = base;
561         pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
562                 (base << 28), ((base + segs) << 28) - 1);
563
564         /* XXX Currently, we allocate one big contiguous table for the
565          * TCEs. We only really need one chunk per 256M of TCE space
566          * (ie per segment) but that's an optimization for later, it
567          * requires some added smarts with our get/put_tce implementation
568          */
569         tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
570                                    get_order(TCE32_TABLE_SIZE * segs));
571         if (!tce_mem) {
572                 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
573                 goto fail;
574         }
575         addr = page_address(tce_mem);
576         memset(addr, 0, TCE32_TABLE_SIZE * segs);
577
578         /* Configure HW */
579         for (i = 0; i < segs; i++) {
580                 rc = opal_pci_map_pe_dma_window(phb->opal_id,
581                                               pe->pe_number,
582                                               base + i, 1,
583                                               __pa(addr) + TCE32_TABLE_SIZE * i,
584                                               TCE32_TABLE_SIZE, 0x1000);
585                 if (rc) {
586                         pe_err(pe, " Failed to configure 32-bit TCE table,"
587                                " err %ld\n", rc);
588                         goto fail;
589                 }
590         }
591
592         /* Setup linux iommu table */
593         tbl = &pe->tce32_table;
594         pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
595                                   base << 28);
596
597         /* OPAL variant of P7IOC SW invalidated TCEs */
598         swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
599         if (swinvp) {
600                 /* We need a couple more fields -- an address and a data
601                  * to or.  Since the bus is only printed out on table free
602                  * errors, and on the first pass the data will be a relative
603                  * bus number, print that out instead.
604                  */
605                 tbl->it_busno = 0;
606                 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
607                 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
608                                TCE_PCI_SWINV_PAIR;
609         }
610         iommu_init_table(tbl, phb->hose->node);
611         iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
612
613         if (pe->pdev)
614                 set_iommu_table_base(&pe->pdev->dev, tbl);
615         else
616                 pnv_ioda_setup_bus_dma(pe, pe->pbus);
617
618         return;
619  fail:
620         /* XXX Failure: Try to fallback to 64-bit only ? */
621         if (pe->tce32_seg >= 0)
622                 pe->tce32_seg = -1;
623         if (tce_mem)
624                 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
625 }
626
627 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
628                                        struct pnv_ioda_pe *pe)
629 {
630         struct page *tce_mem = NULL;
631         void *addr;
632         const __be64 *swinvp;
633         struct iommu_table *tbl;
634         unsigned int tce_table_size, end;
635         int64_t rc;
636
637         /* We shouldn't already have a 32-bit DMA associated */
638         if (WARN_ON(pe->tce32_seg >= 0))
639                 return;
640
641         /* The PE will reserve all possible 32-bits space */
642         pe->tce32_seg = 0;
643         end = (1 << ilog2(phb->ioda.m32_pci_base));
644         tce_table_size = (end / 0x1000) * 8;
645         pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
646                 end);
647
648         /* Allocate TCE table */
649         tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
650                                    get_order(tce_table_size));
651         if (!tce_mem) {
652                 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
653                 goto fail;
654         }
655         addr = page_address(tce_mem);
656         memset(addr, 0, tce_table_size);
657
658         /*
659          * Map TCE table through TVT. The TVE index is the PE number
660          * shifted by 1 bit for 32-bits DMA space.
661          */
662         rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
663                                         pe->pe_number << 1, 1, __pa(addr),
664                                         tce_table_size, 0x1000);
665         if (rc) {
666                 pe_err(pe, "Failed to configure 32-bit TCE table,"
667                        " err %ld\n", rc);
668                 goto fail;
669         }
670
671         /* Setup linux iommu table */
672         tbl = &pe->tce32_table;
673         pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
674
675         /* OPAL variant of PHB3 invalidated TCEs */
676         swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
677         if (swinvp) {
678                 /* We need a couple more fields -- an address and a data
679                  * to or.  Since the bus is only printed out on table free
680                  * errors, and on the first pass the data will be a relative
681                  * bus number, print that out instead.
682                  */
683                 tbl->it_busno = 0;
684                 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
685                 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
686         }
687         iommu_init_table(tbl, phb->hose->node);
688
689         if (pe->pdev)
690                 set_iommu_table_base(&pe->pdev->dev, tbl);
691         else
692                 pnv_ioda_setup_bus_dma(pe, pe->pbus);
693
694         return;
695 fail:
696         if (pe->tce32_seg >= 0)
697                 pe->tce32_seg = -1;
698         if (tce_mem)
699                 __free_pages(tce_mem, get_order(tce_table_size));
700 }
701
702 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
703 {
704         struct pci_controller *hose = phb->hose;
705         unsigned int residual, remaining, segs, tw, base;
706         struct pnv_ioda_pe *pe;
707
708         /* If we have more PE# than segments available, hand out one
709          * per PE until we run out and let the rest fail. If not,
710          * then we assign at least one segment per PE, plus more based
711          * on the amount of devices under that PE
712          */
713         if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
714                 residual = 0;
715         else
716                 residual = phb->ioda.tce32_count -
717                         phb->ioda.dma_pe_count;
718
719         pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
720                 hose->global_number, phb->ioda.tce32_count);
721         pr_info("PCI: %d PE# for a total weight of %d\n",
722                 phb->ioda.dma_pe_count, phb->ioda.dma_weight);
723
724         /* Walk our PE list and configure their DMA segments, hand them
725          * out one base segment plus any residual segments based on
726          * weight
727          */
728         remaining = phb->ioda.tce32_count;
729         tw = phb->ioda.dma_weight;
730         base = 0;
731         list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
732                 if (!pe->dma_weight)
733                         continue;
734                 if (!remaining) {
735                         pe_warn(pe, "No DMA32 resources available\n");
736                         continue;
737                 }
738                 segs = 1;
739                 if (residual) {
740                         segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
741                         if (segs > remaining)
742                                 segs = remaining;
743                 }
744
745                 /*
746                  * For IODA2 compliant PHB3, we needn't care about the weight.
747                  * The all available 32-bits DMA space will be assigned to
748                  * the specific PE.
749                  */
750                 if (phb->type == PNV_PHB_IODA1) {
751                         pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
752                                 pe->dma_weight, segs);
753                         pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
754                 } else {
755                         pe_info(pe, "Assign DMA32 space\n");
756                         segs = 0;
757                         pnv_pci_ioda2_setup_dma_pe(phb, pe);
758                 }
759
760                 remaining -= segs;
761                 base += segs;
762         }
763 }
764
765 #ifdef CONFIG_PCI_MSI
766 static void pnv_ioda2_msi_eoi(struct irq_data *d)
767 {
768         unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
769         struct irq_chip *chip = irq_data_get_irq_chip(d);
770         struct pnv_phb *phb = container_of(chip, struct pnv_phb,
771                                            ioda.irq_chip);
772         int64_t rc;
773
774         rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
775         WARN_ON_ONCE(rc);
776
777         icp_native_eoi(d);
778 }
779
780 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
781                                   unsigned int hwirq, unsigned int virq,
782                                   unsigned int is_64, struct msi_msg *msg)
783 {
784         struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
785         struct pci_dn *pdn = pci_get_pdn(dev);
786         struct irq_data *idata;
787         struct irq_chip *ichip;
788         unsigned int xive_num = hwirq - phb->msi_base;
789         __be32 data;
790         int rc;
791
792         /* No PE assigned ? bail out ... no MSI for you ! */
793         if (pe == NULL)
794                 return -ENXIO;
795
796         /* Check if we have an MVE */
797         if (pe->mve_number < 0)
798                 return -ENXIO;
799
800         /* Force 32-bit MSI on some broken devices */
801         if (pdn && pdn->force_32bit_msi)
802                 is_64 = 0;
803
804         /* Assign XIVE to PE */
805         rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
806         if (rc) {
807                 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
808                         pci_name(dev), rc, xive_num);
809                 return -EIO;
810         }
811
812         if (is_64) {
813                 __be64 addr64;
814
815                 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
816                                      &addr64, &data);
817                 if (rc) {
818                         pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
819                                 pci_name(dev), rc);
820                         return -EIO;
821                 }
822                 msg->address_hi = be64_to_cpu(addr64) >> 32;
823                 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
824         } else {
825                 __be32 addr32;
826
827                 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
828                                      &addr32, &data);
829                 if (rc) {
830                         pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
831                                 pci_name(dev), rc);
832                         return -EIO;
833                 }
834                 msg->address_hi = 0;
835                 msg->address_lo = be32_to_cpu(addr32);
836         }
837         msg->data = be32_to_cpu(data);
838
839         /*
840          * Change the IRQ chip for the MSI interrupts on PHB3.
841          * The corresponding IRQ chip should be populated for
842          * the first time.
843          */
844         if (phb->type == PNV_PHB_IODA2) {
845                 if (!phb->ioda.irq_chip_init) {
846                         idata = irq_get_irq_data(virq);
847                         ichip = irq_data_get_irq_chip(idata);
848                         phb->ioda.irq_chip_init = 1;
849                         phb->ioda.irq_chip = *ichip;
850                         phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
851                 }
852
853                 irq_set_chip(virq, &phb->ioda.irq_chip);
854         }
855
856         pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
857                  " address=%x_%08x data=%x PE# %d\n",
858                  pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
859                  msg->address_hi, msg->address_lo, data, pe->pe_number);
860
861         return 0;
862 }
863
864 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
865 {
866         unsigned int count;
867         const __be32 *prop = of_get_property(phb->hose->dn,
868                                              "ibm,opal-msi-ranges", NULL);
869         if (!prop) {
870                 /* BML Fallback */
871                 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
872         }
873         if (!prop)
874                 return;
875
876         phb->msi_base = be32_to_cpup(prop);
877         count = be32_to_cpup(prop + 1);
878         if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
879                 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
880                        phb->hose->global_number);
881                 return;
882         }
883
884         phb->msi_setup = pnv_pci_ioda_msi_setup;
885         phb->msi32_support = 1;
886         pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
887                 count, phb->msi_base);
888 }
889 #else
890 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
891 #endif /* CONFIG_PCI_MSI */
892
893 /*
894  * This function is supposed to be called on basis of PE from top
895  * to bottom style. So the the I/O or MMIO segment assigned to
896  * parent PE could be overrided by its child PEs if necessary.
897  */
898 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
899                                   struct pnv_ioda_pe *pe)
900 {
901         struct pnv_phb *phb = hose->private_data;
902         struct pci_bus_region region;
903         struct resource *res;
904         int i, index;
905         int rc;
906
907         /*
908          * NOTE: We only care PCI bus based PE for now. For PCI
909          * device based PE, for example SRIOV sensitive VF should
910          * be figured out later.
911          */
912         BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
913
914         pci_bus_for_each_resource(pe->pbus, res, i) {
915                 if (!res || !res->flags ||
916                     res->start > res->end)
917                         continue;
918
919                 if (res->flags & IORESOURCE_IO) {
920                         region.start = res->start - phb->ioda.io_pci_base;
921                         region.end   = res->end - phb->ioda.io_pci_base;
922                         index = region.start / phb->ioda.io_segsize;
923
924                         while (index < phb->ioda.total_pe &&
925                                region.start <= region.end) {
926                                 phb->ioda.io_segmap[index] = pe->pe_number;
927                                 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
928                                         pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
929                                 if (rc != OPAL_SUCCESS) {
930                                         pr_err("%s: OPAL error %d when mapping IO "
931                                                "segment #%d to PE#%d\n",
932                                                __func__, rc, index, pe->pe_number);
933                                         break;
934                                 }
935
936                                 region.start += phb->ioda.io_segsize;
937                                 index++;
938                         }
939                 } else if (res->flags & IORESOURCE_MEM) {
940                         /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
941                          * harden that algorithm when we start supporting M64
942                          */
943                         region.start = res->start -
944                                        hose->mem_offset[0] -
945                                        phb->ioda.m32_pci_base;
946                         region.end   = res->end -
947                                        hose->mem_offset[0] -
948                                        phb->ioda.m32_pci_base;
949                         index = region.start / phb->ioda.m32_segsize;
950
951                         while (index < phb->ioda.total_pe &&
952                                region.start <= region.end) {
953                                 phb->ioda.m32_segmap[index] = pe->pe_number;
954                                 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
955                                         pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
956                                 if (rc != OPAL_SUCCESS) {
957                                         pr_err("%s: OPAL error %d when mapping M32 "
958                                                "segment#%d to PE#%d",
959                                                __func__, rc, index, pe->pe_number);
960                                         break;
961                                 }
962
963                                 region.start += phb->ioda.m32_segsize;
964                                 index++;
965                         }
966                 }
967         }
968 }
969
970 static void pnv_pci_ioda_setup_seg(void)
971 {
972         struct pci_controller *tmp, *hose;
973         struct pnv_phb *phb;
974         struct pnv_ioda_pe *pe;
975
976         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
977                 phb = hose->private_data;
978                 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
979                         pnv_ioda_setup_pe_seg(hose, pe);
980                 }
981         }
982 }
983
984 static void pnv_pci_ioda_setup_DMA(void)
985 {
986         struct pci_controller *hose, *tmp;
987         struct pnv_phb *phb;
988
989         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
990                 pnv_ioda_setup_dma(hose->private_data);
991
992                 /* Mark the PHB initialization done */
993                 phb = hose->private_data;
994                 phb->initialized = 1;
995         }
996 }
997
998 static void pnv_pci_ioda_create_dbgfs(void)
999 {
1000 #ifdef CONFIG_DEBUG_FS
1001         struct pci_controller *hose, *tmp;
1002         struct pnv_phb *phb;
1003         char name[16];
1004
1005         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1006                 phb = hose->private_data;
1007
1008                 sprintf(name, "PCI%04x", hose->global_number);
1009                 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1010                 if (!phb->dbgfs)
1011                         pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1012                                 __func__, hose->global_number);
1013         }
1014 #endif /* CONFIG_DEBUG_FS */
1015 }
1016
1017 static void pnv_pci_ioda_fixup(void)
1018 {
1019         pnv_pci_ioda_setup_PEs();
1020         pnv_pci_ioda_setup_seg();
1021         pnv_pci_ioda_setup_DMA();
1022
1023         pnv_pci_ioda_create_dbgfs();
1024
1025 #ifdef CONFIG_EEH
1026         eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1027         eeh_addr_cache_build();
1028         eeh_init();
1029 #endif
1030 }
1031
1032 /*
1033  * Returns the alignment for I/O or memory windows for P2P
1034  * bridges. That actually depends on how PEs are segmented.
1035  * For now, we return I/O or M32 segment size for PE sensitive
1036  * P2P bridges. Otherwise, the default values (4KiB for I/O,
1037  * 1MiB for memory) will be returned.
1038  *
1039  * The current PCI bus might be put into one PE, which was
1040  * create against the parent PCI bridge. For that case, we
1041  * needn't enlarge the alignment so that we can save some
1042  * resources.
1043  */
1044 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
1045                                                 unsigned long type)
1046 {
1047         struct pci_dev *bridge;
1048         struct pci_controller *hose = pci_bus_to_host(bus);
1049         struct pnv_phb *phb = hose->private_data;
1050         int num_pci_bridges = 0;
1051
1052         bridge = bus->self;
1053         while (bridge) {
1054                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
1055                         num_pci_bridges++;
1056                         if (num_pci_bridges >= 2)
1057                                 return 1;
1058                 }
1059
1060                 bridge = bridge->bus->self;
1061         }
1062
1063         /* We need support prefetchable memory window later */
1064         if (type & IORESOURCE_MEM)
1065                 return phb->ioda.m32_segsize;
1066
1067         return phb->ioda.io_segsize;
1068 }
1069
1070 /* Prevent enabling devices for which we couldn't properly
1071  * assign a PE
1072  */
1073 static int pnv_pci_enable_device_hook(struct pci_dev *dev)
1074 {
1075         struct pci_controller *hose = pci_bus_to_host(dev->bus);
1076         struct pnv_phb *phb = hose->private_data;
1077         struct pci_dn *pdn;
1078
1079         /* The function is probably called while the PEs have
1080          * not be created yet. For example, resource reassignment
1081          * during PCI probe period. We just skip the check if
1082          * PEs isn't ready.
1083          */
1084         if (!phb->initialized)
1085                 return 0;
1086
1087         pdn = pci_get_pdn(dev);
1088         if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1089                 return -EINVAL;
1090
1091         return 0;
1092 }
1093
1094 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1095                                u32 devfn)
1096 {
1097         return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1098 }
1099
1100 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1101 {
1102         opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
1103                        OPAL_ASSERT_RESET);
1104 }
1105
1106 void __init pnv_pci_init_ioda_phb(struct device_node *np,
1107                                   u64 hub_id, int ioda_type)
1108 {
1109         struct pci_controller *hose;
1110         struct pnv_phb *phb;
1111         unsigned long size, m32map_off, iomap_off, pemap_off;
1112         const __be64 *prop64;
1113         const __be32 *prop32;
1114         int len;
1115         u64 phb_id;
1116         void *aux;
1117         long rc;
1118
1119         pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
1120
1121         prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1122         if (!prop64) {
1123                 pr_err("  Missing \"ibm,opal-phbid\" property !\n");
1124                 return;
1125         }
1126         phb_id = be64_to_cpup(prop64);
1127         pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
1128
1129         phb = alloc_bootmem(sizeof(struct pnv_phb));
1130         if (!phb) {
1131                 pr_err("  Out of memory !\n");
1132                 return;
1133         }
1134
1135         /* Allocate PCI controller */
1136         memset(phb, 0, sizeof(struct pnv_phb));
1137         phb->hose = hose = pcibios_alloc_controller(np);
1138         if (!phb->hose) {
1139                 pr_err("  Can't allocate PCI controller for %s\n",
1140                        np->full_name);
1141                 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
1142                 return;
1143         }
1144
1145         spin_lock_init(&phb->lock);
1146         prop32 = of_get_property(np, "bus-range", &len);
1147         if (prop32 && len == 8) {
1148                 hose->first_busno = be32_to_cpu(prop32[0]);
1149                 hose->last_busno = be32_to_cpu(prop32[1]);
1150         } else {
1151                 pr_warn("  Broken <bus-range> on %s\n", np->full_name);
1152                 hose->first_busno = 0;
1153                 hose->last_busno = 0xff;
1154         }
1155         hose->private_data = phb;
1156         phb->hub_id = hub_id;
1157         phb->opal_id = phb_id;
1158         phb->type = ioda_type;
1159
1160         /* Detect specific models for error handling */
1161         if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1162                 phb->model = PNV_PHB_MODEL_P7IOC;
1163         else if (of_device_is_compatible(np, "ibm,power8-pciex"))
1164                 phb->model = PNV_PHB_MODEL_PHB3;
1165         else
1166                 phb->model = PNV_PHB_MODEL_UNKNOWN;
1167
1168         /* Parse 32-bit and IO ranges (if any) */
1169         pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
1170
1171         /* Get registers */
1172         phb->regs = of_iomap(np, 0);
1173         if (phb->regs == NULL)
1174                 pr_err("  Failed to map registers !\n");
1175
1176         /* Initialize more IODA stuff */
1177         prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
1178         if (!prop32)
1179                 phb->ioda.total_pe = 1;
1180         else
1181                 phb->ioda.total_pe = be32_to_cpup(prop32);
1182
1183         phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1184         /* FW Has already off top 64k of M32 space (MSI space) */
1185         phb->ioda.m32_size += 0x10000;
1186
1187         phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1188         phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
1189         phb->ioda.io_size = hose->pci_io_size;
1190         phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1191         phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1192
1193         /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
1194         size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1195         m32map_off = size;
1196         size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
1197         iomap_off = size;
1198         if (phb->type == PNV_PHB_IODA1) {
1199                 iomap_off = size;
1200                 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
1201         }
1202         pemap_off = size;
1203         size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1204         aux = alloc_bootmem(size);
1205         memset(aux, 0, size);
1206         phb->ioda.pe_alloc = aux;
1207         phb->ioda.m32_segmap = aux + m32map_off;
1208         if (phb->type == PNV_PHB_IODA1)
1209                 phb->ioda.io_segmap = aux + iomap_off;
1210         phb->ioda.pe_array = aux + pemap_off;
1211         set_bit(0, phb->ioda.pe_alloc);
1212
1213         INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
1214         INIT_LIST_HEAD(&phb->ioda.pe_list);
1215
1216         /* Calculate how many 32-bit TCE segments we have */
1217         phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1218
1219         /* Clear unusable m64 */
1220         hose->mem_resources[1].flags = 0;
1221         hose->mem_resources[1].start = 0;
1222         hose->mem_resources[1].end = 0;
1223         hose->mem_resources[2].flags = 0;
1224         hose->mem_resources[2].start = 0;
1225         hose->mem_resources[2].end = 0;
1226
1227 #if 0 /* We should really do that ... */
1228         rc = opal_pci_set_phb_mem_window(opal->phb_id,
1229                                          window_type,
1230                                          window_num,
1231                                          starting_real_address,
1232                                          starting_pci_address,
1233                                          segment_size);
1234 #endif
1235
1236         pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1237                 phb->ioda.total_pe,
1238                 phb->ioda.m32_size, phb->ioda.m32_segsize,
1239                 phb->ioda.io_size, phb->ioda.io_segsize);
1240
1241         phb->hose->ops = &pnv_pci_ops;
1242 #ifdef CONFIG_EEH
1243         phb->eeh_ops = &ioda_eeh_ops;
1244 #endif
1245
1246         /* Setup RID -> PE mapping function */
1247         phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1248
1249         /* Setup TCEs */
1250         phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1251
1252         /* Setup shutdown function for kexec */
1253         phb->shutdown = pnv_pci_ioda_shutdown;
1254
1255         /* Setup MSI support */
1256         pnv_pci_init_ioda_msis(phb);
1257
1258         /*
1259          * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
1260          * to let the PCI core do resource assignment. It's supposed
1261          * that the PCI core will do correct I/O and MMIO alignment
1262          * for the P2P bridge bars so that each PCI bus (excluding
1263          * the child P2P bridges) can form individual PE.
1264          */
1265         ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
1266         ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1267         ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
1268         pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1269
1270         /* Reset IODA tables to a clean state */
1271         rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1272         if (rc)
1273                 pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
1274
1275         /*
1276          * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
1277          * has cleared the RTT which has the same effect
1278          */
1279         if (ioda_type == PNV_PHB_IODA1)
1280                 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1281 }
1282
1283 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
1284 {
1285         pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1286 }
1287
1288 void __init pnv_pci_init_ioda_hub(struct device_node *np)
1289 {
1290         struct device_node *phbn;
1291         const __be64 *prop64;
1292         u64 hub_id;
1293
1294         pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1295
1296         prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1297         if (!prop64) {
1298                 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1299                 return;
1300         }
1301         hub_id = be64_to_cpup(prop64);
1302         pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1303
1304         /* Count child PHBs */
1305         for_each_child_of_node(np, phbn) {
1306                 /* Look for IODA1 PHBs */
1307                 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1308                         pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1309         }
1310 }