]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - arch/powerpc/platforms/powernv/pci-ioda.c
powerpc/iommu/powernv: Release replaced TCE
[karo-tx-linux.git] / arch / powerpc / platforms / powernv / pci-ioda.c
index 8070dc9921f2e76de332f4ffd01a350698d7946f..ecbc071a143eb50cf39f6bf9eb0de880f7702727 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/msi.h>
 #include <linux/memblock.h>
 #include <linux/iommu.h>
+#include <linux/rculist.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -1087,10 +1088,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
                return;
        }
 
-       pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
-                       GFP_KERNEL, hose->node);
-       pe->tce32_table->data = pe;
-
        /* Associate it with all child devices */
        pnv_ioda_setup_same_PE(bus, pe);
 
@@ -1296,7 +1293,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
        bus = dev->bus;
        hose = pci_bus_to_host(bus);
        phb = hose->private_data;
-       tbl = pe->tce32_table;
+       tbl = pe->table_group.tables[0];
        addr = tbl->it_base;
 
        opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
@@ -1311,13 +1308,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
        if (rc)
                pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
 
-       if (tbl->it_group) {
-               iommu_group_put(tbl->it_group);
-               BUG_ON(tbl->it_group);
+       pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+       if (pe->table_group.group) {
+               iommu_group_put(pe->table_group.group);
+               BUG_ON(pe->table_group.group);
        }
        iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
        free_pages(addr, get_order(TCE32_TABLE_SIZE));
-       pe->tce32_table = NULL;
 }
 
 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -1465,10 +1462,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
                        continue;
                }
 
-               pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
-                               GFP_KERNEL, hose->node);
-               pe->tce32_table->data = pe;
-
                /* Put PE to the list */
                mutex_lock(&phb->ioda.pe_list_mutex);
                list_add_tail(&pe->list, &phb->ioda.pe_list);
@@ -1603,7 +1596,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 
        pe = &phb->ioda.pe_array[pdn->pe_number];
        WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
-       set_iommu_table_base(&pdev->dev, pe->tce32_table);
+       set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
        /*
         * Note: iommu_add_device() will fail here as
         * for physical PE: the device is already added by now;
@@ -1637,7 +1630,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
        } else {
                dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
                set_dma_ops(&pdev->dev, &dma_iommu_ops);
-               set_iommu_table_base(&pdev->dev, pe->tce32_table);
+               set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
        }
        *pdev->dev.dma_mask = dma_mask;
        return 0;
@@ -1671,7 +1664,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
        struct pci_dev *dev;
 
        list_for_each_entry(dev, &bus->devices, bus_list) {
-               set_iommu_table_base(&dev->dev, pe->tce32_table);
+               set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
                iommu_add_device(&dev->dev);
 
                if (dev->subordinate)
@@ -1679,18 +1672,23 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
        }
 }
 
-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
-                                        struct iommu_table *tbl,
-                                        __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+               unsigned long index, unsigned long npages, bool rm)
 {
+       struct iommu_table_group_link *tgl = list_first_entry_or_null(
+                       &tbl->it_group_list, struct iommu_table_group_link,
+                       next);
+       struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+                       struct pnv_ioda_pe, table_group);
        __be64 __iomem *invalidate = rm ?
-               (__be64 __iomem *)pe->tce_inval_reg_phys :
-               (__be64 __iomem *)tbl->it_index;
+               (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+               pe->phb->ioda.tce_inval_reg;
        unsigned long start, end, inc;
        const unsigned shift = tbl->it_page_shift;
 
-       start = __pa(startp);
-       end = __pa(endp);
+       start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
+       end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
+                       npages - 1);
 
        /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
        if (tbl->it_busno) {
@@ -1726,26 +1724,79 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
         */
 }
 
-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
-                                        struct iommu_table *tbl,
-                                        __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+               long npages, unsigned long uaddr,
+               enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+                       attrs);
+
+       if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+               pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+
+       return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+       if (!ret && (tbl->it_type &
+                       (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+               pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false);
+
+       return ret;
+}
+#endif
+
+static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
+               long npages)
+{
+       pnv_tce_free(tbl, index, npages);
+
+       if (tbl->it_type & TCE_PCI_SWINV_FREE)
+               pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+       .set = pnv_ioda1_tce_build,
+#ifdef CONFIG_IOMMU_API
+       .exchange = pnv_ioda1_tce_xchg,
+#endif
+       .clear = pnv_ioda1_tce_free,
+       .get = pnv_tce_get,
+};
+
+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+       /* 01xb - invalidate TCEs that match the specified PE# */
+       unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+       struct pnv_phb *phb = pe->phb;
+
+       if (!phb->ioda.tce_inval_reg)
+               return;
+
+       mb(); /* Ensure above stores are visible */
+       __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
+               __be64 __iomem *invalidate, unsigned shift,
+               unsigned long index, unsigned long npages)
 {
        unsigned long start, end, inc;
-       __be64 __iomem *invalidate = rm ?
-               (__be64 __iomem *)pe->tce_inval_reg_phys :
-               (__be64 __iomem *)tbl->it_index;
-       const unsigned shift = tbl->it_page_shift;
 
        /* We'll invalidate DMA address in PE scope */
        start = 0x2ull << 60;
-       start |= (pe->pe_number & 0xFF);
+       start |= (pe_number & 0xFF);
        end = start;
 
        /* Figure out the start, end and step */
-       inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
-       start |= (inc << shift);
-       inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
-       end |= (inc << shift);
+       start |= (index << shift);
+       end |= ((index + npages - 1) << shift);
        inc = (0x1ull << shift);
        mb();
 
@@ -1758,25 +1809,76 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
        }
 }
 
-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-                                __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+               unsigned long index, unsigned long npages, bool rm)
 {
-       struct pnv_ioda_pe *pe = tbl->data;
-       struct pnv_phb *phb = pe->phb;
+       struct iommu_table_group_link *tgl;
 
-       if (phb->type == PNV_PHB_IODA1)
-               pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
-       else
-               pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+       list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+               struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+                               struct pnv_ioda_pe, table_group);
+               __be64 __iomem *invalidate = rm ?
+                       (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+                       pe->phb->ioda.tce_inval_reg;
+
+               pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
+                       invalidate, tbl->it_page_shift,
+                       index, npages);
+       }
 }
 
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+               long npages, unsigned long uaddr,
+               enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+                       attrs);
+
+       if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+               pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+
+       return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+       if (!ret && (tbl->it_type &
+                       (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+               pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
+
+       return ret;
+}
+#endif
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+               long npages)
+{
+       pnv_tce_free(tbl, index, npages);
+
+       if (tbl->it_type & TCE_PCI_SWINV_FREE)
+               pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+       .set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+       .exchange = pnv_ioda2_tce_xchg,
+#endif
+       .clear = pnv_ioda2_tce_free,
+       .get = pnv_tce_get,
+};
+
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                                      struct pnv_ioda_pe *pe, unsigned int base,
                                      unsigned int segs)
 {
 
        struct page *tce_mem = NULL;
-       const __be64 *swinvp;
        struct iommu_table *tbl;
        unsigned int i;
        int64_t rc;
@@ -1790,8 +1892,10 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
        if (WARN_ON(pe->tce32_seg >= 0))
                return;
 
-       tbl = pe->tce32_table;
-       iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
+       tbl = pnv_pci_table_alloc(phb->hose->node);
+       iommu_register_group(&pe->table_group, phb->hose->global_number,
+                       pe->pe_number);
+       pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
 
        /* Grab a 32-bit TCE table */
        pe->tce32_seg = base;
@@ -1831,20 +1935,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                                  base << 28, IOMMU_PAGE_SHIFT_4K);
 
        /* OPAL variant of P7IOC SW invalidated TCEs */
-       swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
-       if (swinvp) {
-               /* We need a couple more fields -- an address and a data
-                * to or.  Since the bus is only printed out on table free
-                * errors, and on the first pass the data will be a relative
-                * bus number, print that out instead.
-                */
-               pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
-               tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
-                               8);
+       if (phb->ioda.tce_inval_reg)
                tbl->it_type |= (TCE_PCI_SWINV_CREATE |
                                 TCE_PCI_SWINV_FREE   |
                                 TCE_PCI_SWINV_PAIR);
-       }
+
+       tbl->it_ops = &pnv_ioda1_iommu_ops;
        iommu_init_table(tbl, phb->hose->node);
 
        if (pe->flags & PNV_IODA_PE_DEV) {
@@ -1865,11 +1961,14 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                pe->tce32_seg = -1;
        if (tce_mem)
                __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+       if (tbl) {
+               pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+               iommu_free_table(tbl, "pnv");
+       }
 }
 
-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
 {
-       struct pnv_ioda_pe *pe = tbl->data;
        uint16_t window_id = (pe->pe_number << 1 ) + 1;
        int64_t rc;
 
@@ -1896,17 +1995,42 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
                pe->tce_bypass_enabled = enable;
 }
 
-static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
-                                         struct pnv_ioda_pe *pe)
+#ifdef CONFIG_IOMMU_API
+static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 {
-       /* TVE #1 is selected by PCI address bit 59 */
-       pe->tce_bypass_base = 1ull << 59;
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                                               table_group);
 
-       /* Install set_bypass callback for VFIO */
-       pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+       iommu_take_ownership(table_group->tables[0]);
+       pnv_pci_ioda2_set_bypass(pe, false);
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                                               table_group);
 
-       /* Enable bypass by default */
-       pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+       iommu_release_ownership(table_group->tables[0]);
+       pnv_pci_ioda2_set_bypass(pe, true);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+       .take_ownership = pnv_ioda2_take_ownership,
+       .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
+{
+       const __be64 *swinvp;
+
+       /* OPAL variant of PHB3 invalidated TCEs */
+       swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+       if (!swinvp)
+               return;
+
+       phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp);
+       phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8);
 }
 
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
@@ -1914,7 +2038,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 {
        struct page *tce_mem = NULL;
        void *addr;
-       const __be64 *swinvp;
        struct iommu_table *tbl;
        unsigned int tce_table_size, end;
        int64_t rc;
@@ -1923,8 +2046,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
        if (WARN_ON(pe->tce32_seg >= 0))
                return;
 
-       tbl = pe->tce32_table;
-       iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
+       /* TVE #1 is selected by PCI address bit 59 */
+       pe->tce_bypass_base = 1ull << 59;
+
+       tbl = pnv_pci_table_alloc(phb->hose->node);
+       iommu_register_group(&pe->table_group, phb->hose->global_number,
+                       pe->pe_number);
+       pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
 
        /* The PE will reserve all possible 32-bits space */
        pe->tce32_seg = 0;
@@ -1956,24 +2084,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
                goto fail;
        }
 
+       pnv_pci_ioda2_tce_invalidate_entire(pe);
+
        /* Setup linux iommu table */
        pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
                        IOMMU_PAGE_SHIFT_4K);
 
        /* OPAL variant of PHB3 invalidated TCEs */
-       swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
-       if (swinvp) {
-               /* We need a couple more fields -- an address and a data
-                * to or.  Since the bus is only printed out on table free
-                * errors, and on the first pass the data will be a relative
-                * bus number, print that out instead.
-                */
-               pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
-               tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
-                               8);
+       if (phb->ioda.tce_inval_reg)
                tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
-       }
+
+       tbl->it_ops = &pnv_ioda2_iommu_ops;
        iommu_init_table(tbl, phb->hose->node);
+#ifdef CONFIG_IOMMU_API
+       pe->table_group.ops = &pnv_pci_ioda2_ops;
+#endif
 
        if (pe->flags & PNV_IODA_PE_DEV) {
                /*
@@ -1988,7 +2113,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 
        /* Also create a bypass window */
        if (!pnv_iommu_bypass_disabled)
-               pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+               pnv_pci_ioda2_set_bypass(pe, true);
 
        return;
 fail:
@@ -1996,6 +2121,10 @@ fail:
                pe->tce32_seg = -1;
        if (tce_mem)
                __free_pages(tce_mem, get_order(tce_table_size));
+       if (tbl) {
+               pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+               iommu_free_table(tbl, "pnv");
+       }
 }
 
 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
@@ -2020,6 +2149,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
        pr_info("PCI: %d PE# for a total weight of %d\n",
                phb->ioda.dma_pe_count, phb->ioda.dma_weight);
 
+       pnv_pci_ioda_setup_opal_tce_kill(phb);
+
        /* Walk our PE list and configure their DMA segments, hand them
         * out one base segment plus any residual segments based on
         * weight