#include <linux/msi.h>
#include <linux/memblock.h>
#include <linux/iommu.h>
+#include <linux/rculist.h>
#include <asm/sections.h>
#include <asm/io.h>
return;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Associate it with all child devices */
pnv_ioda_setup_same_PE(bus, pe);
bus = dev->bus;
hose = pci_bus_to_host(bus);
phb = hose->private_data;
- tbl = pe->tce32_table;
+ tbl = pe->table_group.tables[0];
addr = tbl->it_base;
opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
if (rc)
pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
- if (tbl->it_group) {
- iommu_group_put(tbl->it_group);
- BUG_ON(tbl->it_group);
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ if (pe->table_group.group) {
+ iommu_group_put(pe->table_group.group);
+ BUG_ON(pe->table_group.group);
}
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
free_pages(addr, get_order(TCE32_TABLE_SIZE));
- pe->tce32_table = NULL;
}
static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
continue;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Put PE to the list */
mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
/*
* Note: iommu_add_device() will fail here as
* for physical PE: the device is already added by now;
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
}
*pdev->dev.dma_mask = dma_mask;
return 0;
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- set_iommu_table_base(&dev->dev, pe->tce32_table);
+ set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
iommu_add_device(&dev->dev);
if (dev->subordinate)
}
}
-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
+ struct iommu_table_group_link *tgl = list_first_entry_or_null(
+ &tbl->it_group_list, struct iommu_table_group_link,
+ next);
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
- (__be64 __iomem *)pe->tce_inval_reg_phys :
- (__be64 __iomem *)tbl->it_index;
+ (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+ pe->phb->ioda.tce_inval_reg;
unsigned long start, end, inc;
const unsigned shift = tbl->it_page_shift;
- start = __pa(startp);
- end = __pa(endp);
+ start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
+ end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
+ npages - 1);
/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
if (tbl->it_busno) {
*/
}
-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret && (tbl->it_type &
+ (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false);
+
+ return ret;
+}
+#endif
+
+static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+ .set = pnv_ioda1_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda1_tce_xchg,
+#endif
+ .clear = pnv_ioda1_tce_free,
+ .get = pnv_tce_get,
+};
+
+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+ /* 01xb - invalidate TCEs that match the specified PE# */
+ unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+ struct pnv_phb *phb = pe->phb;
+
+ if (!phb->ioda.tce_inval_reg)
+ return;
+
+ mb(); /* Ensure above stores are visible */
+ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
+ __be64 __iomem *invalidate, unsigned shift,
+ unsigned long index, unsigned long npages)
{
unsigned long start, end, inc;
- __be64 __iomem *invalidate = rm ?
- (__be64 __iomem *)pe->tce_inval_reg_phys :
- (__be64 __iomem *)tbl->it_index;
- const unsigned shift = tbl->it_page_shift;
/* We'll invalidate DMA address in PE scope */
start = 0x2ull << 60;
- start |= (pe->pe_number & 0xFF);
+ start |= (pe_number & 0xFF);
end = start;
/* Figure out the start, end and step */
- inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
- start |= (inc << shift);
- inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
- end |= (inc << shift);
+ start |= (index << shift);
+ end |= ((index + npages - 1) << shift);
inc = (0x1ull << shift);
mb();
}
}
-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
- struct pnv_ioda_pe *pe = tbl->data;
- struct pnv_phb *phb = pe->phb;
+ struct iommu_table_group_link *tgl;
- if (phb->type == PNV_PHB_IODA1)
- pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
- else
- pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
+ __be64 __iomem *invalidate = rm ?
+ (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+ pe->phb->ioda.tce_inval_reg;
+
+ pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
+ invalidate, tbl->it_page_shift,
+ index, npages);
+ }
}
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret && (tbl->it_type &
+ (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
+
+ return ret;
+}
+#endif
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ .set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda2_tce_xchg,
+#endif
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+};
+
static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe, unsigned int base,
unsigned int segs)
{
struct page *tce_mem = NULL;
- const __be64 *swinvp;
struct iommu_table *tbl;
unsigned int i;
int64_t rc;
if (WARN_ON(pe->tce32_seg >= 0))
return;
- tbl = pe->tce32_table;
- iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
+ tbl = pnv_pci_table_alloc(phb->hose->node);
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+ pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
/* Grab a 32-bit TCE table */
pe->tce32_seg = base;
base << 28, IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
- swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
- if (swinvp) {
- /* We need a couple more fields -- an address and a data
- * to or. Since the bus is only printed out on table free
- * errors, and on the first pass the data will be a relative
- * bus number, print that out instead.
- */
- pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
- tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
- 8);
+ if (phb->ioda.tce_inval_reg)
tbl->it_type |= (TCE_PCI_SWINV_CREATE |
TCE_PCI_SWINV_FREE |
TCE_PCI_SWINV_PAIR);
- }
+
+ tbl->it_ops = &pnv_ioda1_iommu_ops;
iommu_init_table(tbl, phb->hose->node);
if (pe->flags & PNV_IODA_PE_DEV) {
pe->tce32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ if (tbl) {
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ iommu_free_table(tbl, "pnv");
+ }
}
-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
- struct pnv_ioda_pe *pe = tbl->data;
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
pe->tce_bypass_enabled = enable;
}
-static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+#ifdef CONFIG_IOMMU_API
+static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
{
- /* TVE #1 is selected by PCI address bit 59 */
- pe->tce_bypass_base = 1ull << 59;
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
- /* Install set_bypass callback for VFIO */
- pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+ iommu_take_ownership(table_group->tables[0]);
+ pnv_pci_ioda2_set_bypass(pe, false);
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
- /* Enable bypass by default */
- pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+ iommu_release_ownership(table_group->tables[0]);
+ pnv_pci_ioda2_set_bypass(pe, true);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ .take_ownership = pnv_ioda2_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
+{
+ const __be64 *swinvp;
+
+ /* OPAL variant of PHB3 invalidated TCEs */
+ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+ if (!swinvp)
+ return;
+
+ phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp);
+ phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8);
}
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
{
struct page *tce_mem = NULL;
void *addr;
- const __be64 *swinvp;
struct iommu_table *tbl;
unsigned int tce_table_size, end;
int64_t rc;
if (WARN_ON(pe->tce32_seg >= 0))
return;
- tbl = pe->tce32_table;
- iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
+ /* TVE #1 is selected by PCI address bit 59 */
+ pe->tce_bypass_base = 1ull << 59;
+
+ tbl = pnv_pci_table_alloc(phb->hose->node);
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+ pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
/* The PE will reserve all possible 32-bits space */
pe->tce32_seg = 0;
goto fail;
}
+ pnv_pci_ioda2_tce_invalidate_entire(pe);
+
/* Setup linux iommu table */
pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of PHB3 invalidated TCEs */
- swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
- if (swinvp) {
- /* We need a couple more fields -- an address and a data
- * to or. Since the bus is only printed out on table free
- * errors, and on the first pass the data will be a relative
- * bus number, print that out instead.
- */
- pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
- tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
- 8);
+ if (phb->ioda.tce_inval_reg)
tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
- }
+
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
iommu_init_table(tbl, phb->hose->node);
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+#endif
if (pe->flags & PNV_IODA_PE_DEV) {
/*
/* Also create a bypass window */
if (!pnv_iommu_bypass_disabled)
- pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+ pnv_pci_ioda2_set_bypass(pe, true);
return;
fail:
pe->tce32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(tce_table_size));
+ if (tbl) {
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ iommu_free_table(tbl, "pnv");
+ }
}
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
pr_info("PCI: %d PE# for a total weight of %d\n",
phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+ pnv_pci_ioda_setup_opal_tce_kill(phb);
+
/* Walk our PE list and configure their DMA segments, hand them
* out one base segment plus any residual segments based on
* weight