]> git.karo-electronics.de Git - linux-beck.git/blob - arch/powerpc/platforms/powernv/pci-ioda.c
powerpc/powernv: Supports PHB3
[linux-beck.git] / arch / powerpc / platforms / powernv / pci-ioda.c
1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #undef DEBUG
13
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/delay.h>
17 #include <linux/string.h>
18 #include <linux/init.h>
19 #include <linux/bootmem.h>
20 #include <linux/irq.h>
21 #include <linux/io.h>
22 #include <linux/msi.h>
23
24 #include <asm/sections.h>
25 #include <asm/io.h>
26 #include <asm/prom.h>
27 #include <asm/pci-bridge.h>
28 #include <asm/machdep.h>
29 #include <asm/msi_bitmap.h>
30 #include <asm/ppc-pci.h>
31 #include <asm/opal.h>
32 #include <asm/iommu.h>
33 #include <asm/tce.h>
34
35 #include "powernv.h"
36 #include "pci.h"
37
38 #define define_pe_printk_level(func, kern_level)                \
39 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)     \
40 {                                                               \
41         struct va_format vaf;                                   \
42         va_list args;                                           \
43         char pfix[32];                                          \
44         int r;                                                  \
45                                                                 \
46         va_start(args, fmt);                                    \
47                                                                 \
48         vaf.fmt = fmt;                                          \
49         vaf.va = &args;                                         \
50                                                                 \
51         if (pe->pdev)                                           \
52                 strlcpy(pfix, dev_name(&pe->pdev->dev),         \
53                         sizeof(pfix));                          \
54         else                                                    \
55                 sprintf(pfix, "%04x:%02x     ",                 \
56                         pci_domain_nr(pe->pbus),                \
57                         pe->pbus->number);                      \
58         r = printk(kern_level "pci %s: [PE# %.3d] %pV",         \
59                    pfix, pe->pe_number, &vaf);                  \
60                                                                 \
61         va_end(args);                                           \
62                                                                 \
63         return r;                                               \
64 }                                                               \
65
66 define_pe_printk_level(pe_err, KERN_ERR);
67 define_pe_printk_level(pe_warn, KERN_WARNING);
68 define_pe_printk_level(pe_info, KERN_INFO);
69
70 static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
71 {
72         struct device_node *np;
73
74         np = pci_device_to_OF_node(dev);
75         if (!np)
76                 return NULL;
77         return PCI_DN(np);
78 }
79
80 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
81 {
82         unsigned long pe;
83
84         do {
85                 pe = find_next_zero_bit(phb->ioda.pe_alloc,
86                                         phb->ioda.total_pe, 0);
87                 if (pe >= phb->ioda.total_pe)
88                         return IODA_INVALID_PE;
89         } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
90
91         phb->ioda.pe_array[pe].pe_number = pe;
92         return pe;
93 }
94
95 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
96 {
97         WARN_ON(phb->ioda.pe_array[pe].pdev);
98
99         memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
100         clear_bit(pe, phb->ioda.pe_alloc);
101 }
102
103 /* Currently those 2 are only used when MSIs are enabled, this will change
104  * but in the meantime, we need to protect them to avoid warnings
105  */
106 #ifdef CONFIG_PCI_MSI
107 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
108 {
109         struct pci_controller *hose = pci_bus_to_host(dev->bus);
110         struct pnv_phb *phb = hose->private_data;
111         struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
112
113         if (!pdn)
114                 return NULL;
115         if (pdn->pe_number == IODA_INVALID_PE)
116                 return NULL;
117         return &phb->ioda.pe_array[pdn->pe_number];
118 }
119 #endif /* CONFIG_PCI_MSI */
120
121 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
122 {
123         struct pci_dev *parent;
124         uint8_t bcomp, dcomp, fcomp;
125         long rc, rid_end, rid;
126
127         /* Bus validation ? */
128         if (pe->pbus) {
129                 int count;
130
131                 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
132                 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
133                 parent = pe->pbus->self;
134                 if (pe->flags & PNV_IODA_PE_BUS_ALL)
135                         count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
136                 else
137                         count = 1;
138
139                 switch(count) {
140                 case  1: bcomp = OpalPciBusAll;         break;
141                 case  2: bcomp = OpalPciBus7Bits;       break;
142                 case  4: bcomp = OpalPciBus6Bits;       break;
143                 case  8: bcomp = OpalPciBus5Bits;       break;
144                 case 16: bcomp = OpalPciBus4Bits;       break;
145                 case 32: bcomp = OpalPciBus3Bits;       break;
146                 default:
147                         pr_err("%s: Number of subordinate busses %d"
148                                " unsupported\n",
149                                pci_name(pe->pbus->self), count);
150                         /* Do an exact match only */
151                         bcomp = OpalPciBusAll;
152                 }
153                 rid_end = pe->rid + (count << 8);
154         } else {
155                 parent = pe->pdev->bus->self;
156                 bcomp = OpalPciBusAll;
157                 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
158                 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
159                 rid_end = pe->rid + 1;
160         }
161
162         /* Associate PE in PELT */
163         rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
164                              bcomp, dcomp, fcomp, OPAL_MAP_PE);
165         if (rc) {
166                 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
167                 return -ENXIO;
168         }
169         opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
170                                   OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
171
172         /* Add to all parents PELT-V */
173         while (parent) {
174                 struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
175                 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
176                         rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
177                                                 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
178                         /* XXX What to do in case of error ? */
179                 }
180                 parent = parent->bus->self;
181         }
182         /* Setup reverse map */
183         for (rid = pe->rid; rid < rid_end; rid++)
184                 phb->ioda.pe_rmap[rid] = pe->pe_number;
185
186         /* Setup one MVTs on IODA1 */
187         if (phb->type == PNV_PHB_IODA1) {
188                 pe->mve_number = pe->pe_number;
189                 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
190                                       pe->pe_number);
191                 if (rc) {
192                         pe_err(pe, "OPAL error %ld setting up MVE %d\n",
193                                rc, pe->mve_number);
194                         pe->mve_number = -1;
195                 } else {
196                         rc = opal_pci_set_mve_enable(phb->opal_id,
197                                                      pe->mve_number, OPAL_ENABLE_MVE);
198                         if (rc) {
199                                 pe_err(pe, "OPAL error %ld enabling MVE %d\n",
200                                        rc, pe->mve_number);
201                                 pe->mve_number = -1;
202                         }
203                 }
204         } else if (phb->type == PNV_PHB_IODA2)
205                 pe->mve_number = 0;
206
207         return 0;
208 }
209
210 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
211                                        struct pnv_ioda_pe *pe)
212 {
213         struct pnv_ioda_pe *lpe;
214
215         list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
216                 if (lpe->dma_weight < pe->dma_weight) {
217                         list_add_tail(&pe->dma_link, &lpe->dma_link);
218                         return;
219                 }
220         }
221         list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
222 }
223
224 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
225 {
226         /* This is quite simplistic. The "base" weight of a device
227          * is 10. 0 means no DMA is to be accounted for it.
228          */
229
230         /* If it's a bridge, no DMA */
231         if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
232                 return 0;
233
234         /* Reduce the weight of slow USB controllers */
235         if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
236             dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
237             dev->class == PCI_CLASS_SERIAL_USB_EHCI)
238                 return 3;
239
240         /* Increase the weight of RAID (includes Obsidian) */
241         if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
242                 return 15;
243
244         /* Default */
245         return 10;
246 }
247
248 #if 0
249 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
250 {
251         struct pci_controller *hose = pci_bus_to_host(dev->bus);
252         struct pnv_phb *phb = hose->private_data;
253         struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
254         struct pnv_ioda_pe *pe;
255         int pe_num;
256
257         if (!pdn) {
258                 pr_err("%s: Device tree node not associated properly\n",
259                            pci_name(dev));
260                 return NULL;
261         }
262         if (pdn->pe_number != IODA_INVALID_PE)
263                 return NULL;
264
265         /* PE#0 has been pre-set */
266         if (dev->bus->number == 0)
267                 pe_num = 0;
268         else
269                 pe_num = pnv_ioda_alloc_pe(phb);
270         if (pe_num == IODA_INVALID_PE) {
271                 pr_warning("%s: Not enough PE# available, disabling device\n",
272                            pci_name(dev));
273                 return NULL;
274         }
275
276         /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
277          * pointer in the PE data structure, both should be destroyed at the
278          * same time. However, this needs to be looked at more closely again
279          * once we actually start removing things (Hotplug, SR-IOV, ...)
280          *
281          * At some point we want to remove the PDN completely anyways
282          */
283         pe = &phb->ioda.pe_array[pe_num];
284         pci_dev_get(dev);
285         pdn->pcidev = dev;
286         pdn->pe_number = pe_num;
287         pe->pdev = dev;
288         pe->pbus = NULL;
289         pe->tce32_seg = -1;
290         pe->mve_number = -1;
291         pe->rid = dev->bus->number << 8 | pdn->devfn;
292
293         pe_info(pe, "Associated device to PE\n");
294
295         if (pnv_ioda_configure_pe(phb, pe)) {
296                 /* XXX What do we do here ? */
297                 if (pe_num)
298                         pnv_ioda_free_pe(phb, pe_num);
299                 pdn->pe_number = IODA_INVALID_PE;
300                 pe->pdev = NULL;
301                 pci_dev_put(dev);
302                 return NULL;
303         }
304
305         /* Assign a DMA weight to the device */
306         pe->dma_weight = pnv_ioda_dma_weight(dev);
307         if (pe->dma_weight != 0) {
308                 phb->ioda.dma_weight += pe->dma_weight;
309                 phb->ioda.dma_pe_count++;
310         }
311
312         /* Link the PE */
313         pnv_ioda_link_pe_by_weight(phb, pe);
314
315         return pe;
316 }
317 #endif /* Useful for SRIOV case */
318
319 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
320 {
321         struct pci_dev *dev;
322
323         list_for_each_entry(dev, &bus->devices, bus_list) {
324                 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
325
326                 if (pdn == NULL) {
327                         pr_warn("%s: No device node associated with device !\n",
328                                 pci_name(dev));
329                         continue;
330                 }
331                 pci_dev_get(dev);
332                 pdn->pcidev = dev;
333                 pdn->pe_number = pe->pe_number;
334                 pe->dma_weight += pnv_ioda_dma_weight(dev);
335                 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
336                         pnv_ioda_setup_same_PE(dev->subordinate, pe);
337         }
338 }
339
340 /*
341  * There're 2 types of PCI bus sensitive PEs: One that is compromised of
342  * single PCI bus. Another one that contains the primary PCI bus and its
343  * subordinate PCI devices and buses. The second type of PE is normally
344  * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
345  */
346 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
347 {
348         struct pci_controller *hose = pci_bus_to_host(bus);
349         struct pnv_phb *phb = hose->private_data;
350         struct pnv_ioda_pe *pe;
351         int pe_num;
352
353         pe_num = pnv_ioda_alloc_pe(phb);
354         if (pe_num == IODA_INVALID_PE) {
355                 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
356                         __func__, pci_domain_nr(bus), bus->number);
357                 return;
358         }
359
360         pe = &phb->ioda.pe_array[pe_num];
361         pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
362         pe->pbus = bus;
363         pe->pdev = NULL;
364         pe->tce32_seg = -1;
365         pe->mve_number = -1;
366         pe->rid = bus->busn_res.start << 8;
367         pe->dma_weight = 0;
368
369         if (all)
370                 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
371                         bus->busn_res.start, bus->busn_res.end, pe_num);
372         else
373                 pe_info(pe, "Secondary bus %d associated with PE#%d\n",
374                         bus->busn_res.start, pe_num);
375
376         if (pnv_ioda_configure_pe(phb, pe)) {
377                 /* XXX What do we do here ? */
378                 if (pe_num)
379                         pnv_ioda_free_pe(phb, pe_num);
380                 pe->pbus = NULL;
381                 return;
382         }
383
384         /* Associate it with all child devices */
385         pnv_ioda_setup_same_PE(bus, pe);
386
387         /* Put PE to the list */
388         list_add_tail(&pe->list, &phb->ioda.pe_list);
389
390         /* Account for one DMA PE if at least one DMA capable device exist
391          * below the bridge
392          */
393         if (pe->dma_weight != 0) {
394                 phb->ioda.dma_weight += pe->dma_weight;
395                 phb->ioda.dma_pe_count++;
396         }
397
398         /* Link the PE */
399         pnv_ioda_link_pe_by_weight(phb, pe);
400 }
401
402 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
403 {
404         struct pci_dev *dev;
405
406         pnv_ioda_setup_bus_PE(bus, 0);
407
408         list_for_each_entry(dev, &bus->devices, bus_list) {
409                 if (dev->subordinate) {
410                         if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
411                                 pnv_ioda_setup_bus_PE(dev->subordinate, 1);
412                         else
413                                 pnv_ioda_setup_PEs(dev->subordinate);
414                 }
415         }
416 }
417
418 /*
419  * Configure PEs so that the downstream PCI buses and devices
420  * could have their associated PE#. Unfortunately, we didn't
421  * figure out the way to identify the PLX bridge yet. So we
422  * simply put the PCI bus and the subordinate behind the root
423  * port to PE# here. The game rule here is expected to be changed
424  * as soon as we can detected PLX bridge correctly.
425  */
426 static void pnv_pci_ioda_setup_PEs(void)
427 {
428         struct pci_controller *hose, *tmp;
429
430         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
431                 pnv_ioda_setup_PEs(hose->bus);
432         }
433 }
434
435 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *dev)
436 {
437         /* We delay DMA setup after we have assigned all PE# */
438 }
439
440 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
441 {
442         struct pci_dev *dev;
443
444         list_for_each_entry(dev, &bus->devices, bus_list) {
445                 set_iommu_table_base(&dev->dev, &pe->tce32_table);
446                 if (dev->subordinate)
447                         pnv_ioda_setup_bus_dma(pe, dev->subordinate);
448         }
449 }
450
451 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
452                                       struct pnv_ioda_pe *pe, unsigned int base,
453                                       unsigned int segs)
454 {
455
456         struct page *tce_mem = NULL;
457         const __be64 *swinvp;
458         struct iommu_table *tbl;
459         unsigned int i;
460         int64_t rc;
461         void *addr;
462
463         /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
464 #define TCE32_TABLE_SIZE        ((0x10000000 / 0x1000) * 8)
465
466         /* XXX FIXME: Handle 64-bit only DMA devices */
467         /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
468         /* XXX FIXME: Allocate multi-level tables on PHB3 */
469
470         /* We shouldn't already have a 32-bit DMA associated */
471         if (WARN_ON(pe->tce32_seg >= 0))
472                 return;
473
474         /* Grab a 32-bit TCE table */
475         pe->tce32_seg = base;
476         pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
477                 (base << 28), ((base + segs) << 28) - 1);
478
479         /* XXX Currently, we allocate one big contiguous table for the
480          * TCEs. We only really need one chunk per 256M of TCE space
481          * (ie per segment) but that's an optimization for later, it
482          * requires some added smarts with our get/put_tce implementation
483          */
484         tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
485                                    get_order(TCE32_TABLE_SIZE * segs));
486         if (!tce_mem) {
487                 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
488                 goto fail;
489         }
490         addr = page_address(tce_mem);
491         memset(addr, 0, TCE32_TABLE_SIZE * segs);
492
493         /* Configure HW */
494         for (i = 0; i < segs; i++) {
495                 rc = opal_pci_map_pe_dma_window(phb->opal_id,
496                                               pe->pe_number,
497                                               base + i, 1,
498                                               __pa(addr) + TCE32_TABLE_SIZE * i,
499                                               TCE32_TABLE_SIZE, 0x1000);
500                 if (rc) {
501                         pe_err(pe, " Failed to configure 32-bit TCE table,"
502                                " err %ld\n", rc);
503                         goto fail;
504                 }
505         }
506
507         /* Setup linux iommu table */
508         tbl = &pe->tce32_table;
509         pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
510                                   base << 28);
511
512         /* OPAL variant of P7IOC SW invalidated TCEs */
513         swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
514         if (swinvp) {
515                 /* We need a couple more fields -- an address and a data
516                  * to or.  Since the bus is only printed out on table free
517                  * errors, and on the first pass the data will be a relative
518                  * bus number, print that out instead.
519                  */
520                 tbl->it_busno = 0;
521                 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
522                 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
523                         | TCE_PCI_SWINV_PAIR;
524         }
525         iommu_init_table(tbl, phb->hose->node);
526
527         if (pe->pdev)
528                 set_iommu_table_base(&pe->pdev->dev, tbl);
529         else
530                 pnv_ioda_setup_bus_dma(pe, pe->pbus);
531
532         return;
533  fail:
534         /* XXX Failure: Try to fallback to 64-bit only ? */
535         if (pe->tce32_seg >= 0)
536                 pe->tce32_seg = -1;
537         if (tce_mem)
538                 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
539 }
540
541 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
542 {
543         struct pci_controller *hose = phb->hose;
544         unsigned int residual, remaining, segs, tw, base;
545         struct pnv_ioda_pe *pe;
546
547         /* If we have more PE# than segments available, hand out one
548          * per PE until we run out and let the rest fail. If not,
549          * then we assign at least one segment per PE, plus more based
550          * on the amount of devices under that PE
551          */
552         if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
553                 residual = 0;
554         else
555                 residual = phb->ioda.tce32_count -
556                         phb->ioda.dma_pe_count;
557
558         pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
559                 hose->global_number, phb->ioda.tce32_count);
560         pr_info("PCI: %d PE# for a total weight of %d\n",
561                 phb->ioda.dma_pe_count, phb->ioda.dma_weight);
562
563         /* Walk our PE list and configure their DMA segments, hand them
564          * out one base segment plus any residual segments based on
565          * weight
566          */
567         remaining = phb->ioda.tce32_count;
568         tw = phb->ioda.dma_weight;
569         base = 0;
570         list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
571                 if (!pe->dma_weight)
572                         continue;
573                 if (!remaining) {
574                         pe_warn(pe, "No DMA32 resources available\n");
575                         continue;
576                 }
577                 segs = 1;
578                 if (residual) {
579                         segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
580                         if (segs > remaining)
581                                 segs = remaining;
582                 }
583                 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
584                         pe->dma_weight, segs);
585                 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
586                 remaining -= segs;
587                 base += segs;
588         }
589 }
590
591 #ifdef CONFIG_PCI_MSI
592 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
593                                   unsigned int hwirq, unsigned int is_64,
594                                   struct msi_msg *msg)
595 {
596         struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
597         unsigned int xive_num = hwirq - phb->msi_base;
598         uint64_t addr64;
599         uint32_t addr32, data;
600         int rc;
601
602         /* No PE assigned ? bail out ... no MSI for you ! */
603         if (pe == NULL)
604                 return -ENXIO;
605
606         /* Check if we have an MVE */
607         if (pe->mve_number < 0)
608                 return -ENXIO;
609
610         /* Assign XIVE to PE */
611         rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
612         if (rc) {
613                 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
614                         pci_name(dev), rc, xive_num);
615                 return -EIO;
616         }
617
618         if (is_64) {
619                 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
620                                      &addr64, &data);
621                 if (rc) {
622                         pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
623                                 pci_name(dev), rc);
624                         return -EIO;
625                 }
626                 msg->address_hi = addr64 >> 32;
627                 msg->address_lo = addr64 & 0xfffffffful;
628         } else {
629                 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
630                                      &addr32, &data);
631                 if (rc) {
632                         pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
633                                 pci_name(dev), rc);
634                         return -EIO;
635                 }
636                 msg->address_hi = 0;
637                 msg->address_lo = addr32;
638         }
639         msg->data = data;
640
641         pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
642                  " address=%x_%08x data=%x PE# %d\n",
643                  pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
644                  msg->address_hi, msg->address_lo, data, pe->pe_number);
645
646         return 0;
647 }
648
649 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
650 {
651         unsigned int count;
652         const __be32 *prop = of_get_property(phb->hose->dn,
653                                              "ibm,opal-msi-ranges", NULL);
654         if (!prop) {
655                 /* BML Fallback */
656                 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
657         }
658         if (!prop)
659                 return;
660
661         phb->msi_base = be32_to_cpup(prop);
662         count = be32_to_cpup(prop + 1);
663         if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
664                 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
665                        phb->hose->global_number);
666                 return;
667         }
668
669         phb->msi_setup = pnv_pci_ioda_msi_setup;
670         phb->msi32_support = 1;
671         pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
672                 count, phb->msi_base);
673 }
674 #else
675 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
676 #endif /* CONFIG_PCI_MSI */
677
678 /*
679  * This function is supposed to be called on basis of PE from top
680  * to bottom style. So the the I/O or MMIO segment assigned to
681  * parent PE could be overrided by its child PEs if necessary.
682  */
683 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
684                                   struct pnv_ioda_pe *pe)
685 {
686         struct pnv_phb *phb = hose->private_data;
687         struct pci_bus_region region;
688         struct resource *res;
689         int i, index;
690         int rc;
691
692         /*
693          * NOTE: We only care PCI bus based PE for now. For PCI
694          * device based PE, for example SRIOV sensitive VF should
695          * be figured out later.
696          */
697         BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
698
699         pci_bus_for_each_resource(pe->pbus, res, i) {
700                 if (!res || !res->flags ||
701                     res->start > res->end)
702                         continue;
703
704                 if (res->flags & IORESOURCE_IO) {
705                         region.start = res->start - phb->ioda.io_pci_base;
706                         region.end   = res->end - phb->ioda.io_pci_base;
707                         index = region.start / phb->ioda.io_segsize;
708
709                         while (index < phb->ioda.total_pe &&
710                                region.start <= region.end) {
711                                 phb->ioda.io_segmap[index] = pe->pe_number;
712                                 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
713                                         pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
714                                 if (rc != OPAL_SUCCESS) {
715                                         pr_err("%s: OPAL error %d when mapping IO "
716                                                "segment #%d to PE#%d\n",
717                                                __func__, rc, index, pe->pe_number);
718                                         break;
719                                 }
720
721                                 region.start += phb->ioda.io_segsize;
722                                 index++;
723                         }
724                 } else if (res->flags & IORESOURCE_MEM) {
725                         region.start = res->start -
726                                        hose->pci_mem_offset -
727                                        phb->ioda.m32_pci_base;
728                         region.end   = res->end -
729                                        hose->pci_mem_offset -
730                                        phb->ioda.m32_pci_base;
731                         index = region.start / phb->ioda.m32_segsize;
732
733                         while (index < phb->ioda.total_pe &&
734                                region.start <= region.end) {
735                                 phb->ioda.m32_segmap[index] = pe->pe_number;
736                                 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
737                                         pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
738                                 if (rc != OPAL_SUCCESS) {
739                                         pr_err("%s: OPAL error %d when mapping M32 "
740                                                "segment#%d to PE#%d",
741                                                __func__, rc, index, pe->pe_number);
742                                         break;
743                                 }
744
745                                 region.start += phb->ioda.m32_segsize;
746                                 index++;
747                         }
748                 }
749         }
750 }
751
752 static void pnv_pci_ioda_setup_seg(void)
753 {
754         struct pci_controller *tmp, *hose;
755         struct pnv_phb *phb;
756         struct pnv_ioda_pe *pe;
757
758         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
759                 phb = hose->private_data;
760                 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
761                         pnv_ioda_setup_pe_seg(hose, pe);
762                 }
763         }
764 }
765
766 static void pnv_pci_ioda_setup_DMA(void)
767 {
768         struct pci_controller *hose, *tmp;
769         struct pnv_phb *phb;
770
771         list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
772                 pnv_ioda_setup_dma(hose->private_data);
773
774                 /* Mark the PHB initialization done */
775                 phb = hose->private_data;
776                 phb->initialized = 1;
777         }
778 }
779
780 static void pnv_pci_ioda_fixup(void)
781 {
782         pnv_pci_ioda_setup_PEs();
783         pnv_pci_ioda_setup_seg();
784         pnv_pci_ioda_setup_DMA();
785 }
786
787 /*
788  * Returns the alignment for I/O or memory windows for P2P
789  * bridges. That actually depends on how PEs are segmented.
790  * For now, we return I/O or M32 segment size for PE sensitive
791  * P2P bridges. Otherwise, the default values (4KiB for I/O,
792  * 1MiB for memory) will be returned.
793  *
794  * The current PCI bus might be put into one PE, which was
795  * create against the parent PCI bridge. For that case, we
796  * needn't enlarge the alignment so that we can save some
797  * resources.
798  */
799 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
800                                                 unsigned long type)
801 {
802         struct pci_dev *bridge;
803         struct pci_controller *hose = pci_bus_to_host(bus);
804         struct pnv_phb *phb = hose->private_data;
805         int num_pci_bridges = 0;
806
807         bridge = bus->self;
808         while (bridge) {
809                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
810                         num_pci_bridges++;
811                         if (num_pci_bridges >= 2)
812                                 return 1;
813                 }
814
815                 bridge = bridge->bus->self;
816         }
817
818         /* We need support prefetchable memory window later */
819         if (type & IORESOURCE_MEM)
820                 return phb->ioda.m32_segsize;
821
822         return phb->ioda.io_segsize;
823 }
824
825 /* Prevent enabling devices for which we couldn't properly
826  * assign a PE
827  */
828 static int pnv_pci_enable_device_hook(struct pci_dev *dev)
829 {
830         struct pci_controller *hose = pci_bus_to_host(dev->bus);
831         struct pnv_phb *phb = hose->private_data;
832         struct pci_dn *pdn;
833
834         /* The function is probably called while the PEs have
835          * not be created yet. For example, resource reassignment
836          * during PCI probe period. We just skip the check if
837          * PEs isn't ready.
838          */
839         if (!phb->initialized)
840                 return 0;
841
842         pdn = pnv_ioda_get_pdn(dev);
843         if (!pdn || pdn->pe_number == IODA_INVALID_PE)
844                 return -EINVAL;
845
846         return 0;
847 }
848
849 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
850                                u32 devfn)
851 {
852         return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
853 }
854
855 void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
856 {
857         struct pci_controller *hose;
858         static int primary = 1;
859         struct pnv_phb *phb;
860         unsigned long size, m32map_off, iomap_off, pemap_off;
861         const u64 *prop64;
862         const u32 *prop32;
863         u64 phb_id;
864         void *aux;
865         long rc;
866
867         pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
868
869         prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
870         if (!prop64) {
871                 pr_err("  Missing \"ibm,opal-phbid\" property !\n");
872                 return;
873         }
874         phb_id = be64_to_cpup(prop64);
875         pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
876
877         phb = alloc_bootmem(sizeof(struct pnv_phb));
878         if (phb) {
879                 memset(phb, 0, sizeof(struct pnv_phb));
880                 phb->hose = hose = pcibios_alloc_controller(np);
881         }
882         if (!phb || !phb->hose) {
883                 pr_err("PCI: Failed to allocate PCI controller for %s\n",
884                        np->full_name);
885                 return;
886         }
887
888         spin_lock_init(&phb->lock);
889         /* XXX Use device-tree */
890         hose->first_busno = 0;
891         hose->last_busno = 0xff;
892         hose->private_data = phb;
893         phb->opal_id = phb_id;
894         phb->type = ioda_type;
895
896         /* Detect specific models for error handling */
897         if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
898                 phb->model = PNV_PHB_MODEL_P7IOC;
899         else if (of_device_is_compatible(np, "ibm,p8-pciex"))
900                 phb->model = PNV_PHB_MODEL_PHB3;
901         else
902                 phb->model = PNV_PHB_MODEL_UNKNOWN;
903
904         /* Parse 32-bit and IO ranges (if any) */
905         pci_process_bridge_OF_ranges(phb->hose, np, primary);
906         primary = 0;
907
908         /* Get registers */
909         phb->regs = of_iomap(np, 0);
910         if (phb->regs == NULL)
911                 pr_err("  Failed to map registers !\n");
912
913         /* Initialize more IODA stuff */
914         prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
915         if (!prop32)
916                 phb->ioda.total_pe = 1;
917         else
918                 phb->ioda.total_pe = *prop32;
919
920         phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
921         /* FW Has already off top 64k of M32 space (MSI space) */
922         phb->ioda.m32_size += 0x10000;
923
924         phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
925         phb->ioda.m32_pci_base = hose->mem_resources[0].start -
926                 hose->pci_mem_offset;
927         phb->ioda.io_size = hose->pci_io_size;
928         phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
929         phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
930
931         /* Allocate aux data & arrays
932          *
933          * XXX TODO: Don't allocate io segmap on PHB3
934          */
935         size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
936         m32map_off = size;
937         size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
938         iomap_off = size;
939         size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
940         pemap_off = size;
941         size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
942         aux = alloc_bootmem(size);
943         memset(aux, 0, size);
944         phb->ioda.pe_alloc = aux;
945         phb->ioda.m32_segmap = aux + m32map_off;
946         phb->ioda.io_segmap = aux + iomap_off;
947         phb->ioda.pe_array = aux + pemap_off;
948         set_bit(0, phb->ioda.pe_alloc);
949
950         INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
951         INIT_LIST_HEAD(&phb->ioda.pe_list);
952
953         /* Calculate how many 32-bit TCE segments we have */
954         phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
955
956         /* Clear unusable m64 */
957         hose->mem_resources[1].flags = 0;
958         hose->mem_resources[1].start = 0;
959         hose->mem_resources[1].end = 0;
960         hose->mem_resources[2].flags = 0;
961         hose->mem_resources[2].start = 0;
962         hose->mem_resources[2].end = 0;
963
964 #if 0 /* We should really do that ... */
965         rc = opal_pci_set_phb_mem_window(opal->phb_id,
966                                          window_type,
967                                          window_num,
968                                          starting_real_address,
969                                          starting_pci_address,
970                                          segment_size);
971 #endif
972
973         pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
974                 phb->ioda.total_pe,
975                 phb->ioda.m32_size, phb->ioda.m32_segsize,
976                 phb->ioda.io_size, phb->ioda.io_segsize);
977
978         phb->hose->ops = &pnv_pci_ops;
979
980         /* Setup RID -> PE mapping function */
981         phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
982
983         /* Setup TCEs */
984         phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
985
986         /* Setup MSI support */
987         pnv_pci_init_ioda_msis(phb);
988
989         /*
990          * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
991          * to let the PCI core do resource assignment. It's supposed
992          * that the PCI core will do correct I/O and MMIO alignment
993          * for the P2P bridge bars so that each PCI bus (excluding
994          * the child P2P bridges) can form individual PE.
995          */
996         ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
997         ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
998         ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
999         pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1000
1001         /* Reset IODA tables to a clean state */
1002         rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1003         if (rc)
1004                 pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
1005
1006         /*
1007          * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
1008          * has cleared the RTT which has the same effect
1009          */
1010         if (ioda_type == PNV_PHB_IODA1)
1011                 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1012 }
1013
1014 void pnv_pci_init_ioda2_phb(struct device_node *np)
1015 {
1016         pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
1017 }
1018
1019 void __init pnv_pci_init_ioda_hub(struct device_node *np)
1020 {
1021         struct device_node *phbn;
1022         const u64 *prop64;
1023         u64 hub_id;
1024
1025         pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1026
1027         prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1028         if (!prop64) {
1029                 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1030                 return;
1031         }
1032         hub_id = be64_to_cpup(prop64);
1033         pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1034
1035         /* Count child PHBs */
1036         for_each_child_of_node(np, phbn) {
1037                 /* Look for IODA1 PHBs */
1038                 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1039                         pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
1040         }
1041 }