#include <linux/io.h>
#include <linux/msi.h>
#include <linux/memblock.h>
+#include <linux/iommu.h>
#include <asm/sections.h>
#include <asm/io.h>
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
#include "powernv.h"
#include "pci.h"
return;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Associate it with all child devices */
pnv_ioda_setup_same_PE(bus, pe);
bus = dev->bus;
hose = pci_bus_to_host(bus);
phb = hose->private_data;
- tbl = pe->tce32_table;
+ tbl = pe->table_group.tables[0];
addr = tbl->it_base;
opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
if (rc)
pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ if (pe->table_group.group) {
+ iommu_group_put(pe->table_group.group);
+ BUG_ON(pe->table_group.group);
+ }
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
free_pages(addr, get_order(TCE32_TABLE_SIZE));
- pe->tce32_table = NULL;
}
static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
continue;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Put PE to the list */
mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
+ /*
+ * Note: iommu_add_device() will fail here as
+ * for physical PE: the device is already added by now;
+ * for virtual PE: sysfs entries are not ready yet and
+ * tce_iommu_bus_notifier will add the device to a group later.
+ */
}
-static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
- struct pci_dev *pdev, u64 dma_mask)
+static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
uint64_t top;
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
}
*pdev->dev.dma_mask = dma_mask;
return 0;
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus,
- bool add_to_iommu_group)
+ struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- if (add_to_iommu_group)
- set_iommu_table_base_and_group(&dev->dev,
- pe->tce32_table);
- else
- set_iommu_table_base(&dev->dev, pe->tce32_table);
+ set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+ iommu_add_device(&dev->dev);
if (dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate,
- add_to_iommu_group);
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
}
-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
+ struct iommu_table_group_link *tgl = list_first_entry_or_null(
+ &tbl->it_group_list, struct iommu_table_group_link,
+ next);
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->tce_inval_reg_phys :
(__be64 __iomem *)tbl->it_index;
unsigned long start, end, inc;
const unsigned shift = tbl->it_page_shift;
- start = __pa(startp);
- end = __pa(endp);
+ start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
+ end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
+ npages - 1);
/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
if (tbl->it_busno) {
*/
}
-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+ .set = pnv_ioda1_tce_build,
+ .clear = pnv_ioda1_tce_free,
+ .get = pnv_tce_get,
+};
+
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
+{
+ struct iommu_table_group_link *tgl = list_first_entry_or_null(
+ &tbl->it_group_list, struct iommu_table_group_link,
+ next);
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
unsigned long start, end, inc;
__be64 __iomem *invalidate = rm ?
(__be64 __iomem *)pe->tce_inval_reg_phys :
end = start;
/* Figure out the start, end and step */
- inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
- start |= (inc << shift);
- inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
- end |= (inc << shift);
+ start |= (index << shift);
+ end |= ((index + npages - 1) << shift);
inc = (0x1ull << shift);
mb();
}
}
-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- struct pnv_ioda_pe *pe = tbl->data;
- struct pnv_phb *phb = pe->phb;
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
- if (phb->type == PNV_PHB_IODA1)
- pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
- else
- pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
}
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ .set = pnv_ioda2_tce_build,
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+};
+
static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe, unsigned int base,
unsigned int segs)
if (WARN_ON(pe->tce32_seg >= 0))
return;
+ tbl = pnv_pci_table_alloc(phb->hose->node);
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+ pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
+
/* Grab a 32-bit TCE table */
pe->tce32_seg = base;
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
}
/* Setup linux iommu table */
- tbl = pe->tce32_table;
pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
base << 28, IOMMU_PAGE_SHIFT_4K);
TCE_PCI_SWINV_FREE |
TCE_PCI_SWINV_PAIR);
}
+ tbl->it_ops = &pnv_ioda1_iommu_ops;
iommu_init_table(tbl, phb->hose->node);
if (pe->flags & PNV_IODA_PE_DEV) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
- } else if (pe->flags & PNV_IODA_PE_VF) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- }
+ /*
+ * Setting table base here only for carrying iommu_group
+ * further down to let iommu_add_device() do the job.
+ * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+ */
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+ iommu_add_device(&pe->pdev->dev);
+ } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
return;
fail:
pe->tce32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ if (tbl) {
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ iommu_free_table(tbl, "pnv");
+ }
}
-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
- struct pnv_ioda_pe *pe = tbl->data;
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
window_id,
pe->tce_bypass_base,
0);
-
- /*
- * EEH needs the mapping between IOMMU table and group
- * of those VFIO/KVM pass-through devices. We can postpone
- * resetting DMA ops until the DMA mask is configured in
- * host side.
- */
- if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
- else
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
if (rc)
pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
pe->tce_bypass_enabled = enable;
}
-static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+#ifdef CONFIG_IOMMU_API
+static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
{
- /* TVE #1 is selected by PCI address bit 59 */
- pe->tce_bypass_base = 1ull << 59;
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+
+ iommu_take_ownership(table_group->tables[0]);
+ pnv_pci_ioda2_set_bypass(pe, false);
+}
- /* Install set_bypass callback for VFIO */
- pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
- /* Enable bypass by default */
- pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+ iommu_release_ownership(table_group->tables[0]);
+ pnv_pci_ioda2_set_bypass(pe, true);
}
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ .take_ownership = pnv_ioda2_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe)
{
if (WARN_ON(pe->tce32_seg >= 0))
return;
+ /* TVE #1 is selected by PCI address bit 59 */
+ pe->tce_bypass_base = 1ull << 59;
+
+ tbl = pnv_pci_table_alloc(phb->hose->node);
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+ pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
+
/* The PE will reserve all possible 32-bits space */
pe->tce32_seg = 0;
end = (1 << ilog2(phb->ioda.m32_pci_base));
}
/* Setup linux iommu table */
- tbl = pe->tce32_table;
pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
IOMMU_PAGE_SHIFT_4K);
8);
tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
}
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
iommu_init_table(tbl, phb->hose->node);
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+#endif
if (pe->flags & PNV_IODA_PE_DEV) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
- } else if (pe->flags & PNV_IODA_PE_VF) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- }
+ /*
+ * Setting table base here only for carrying iommu_group
+ * further down to let iommu_add_device() do the job.
+ * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+ */
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+ iommu_add_device(&pe->pdev->dev);
+ } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
/* Also create a bypass window */
if (!pnv_iommu_bypass_disabled)
- pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+ pnv_pci_ioda2_set_bypass(pe, true);
return;
fail:
pe->tce32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(tce_table_size));
+ if (tbl) {
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ iommu_free_table(tbl, "pnv");
+ }
}
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
-static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
+ struct pnv_phb *phb = hose->private_data;
+
opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
OPAL_ASSERT_RESET);
}
+static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+#ifdef CONFIG_PCI_MSI
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_pci_ioda_dma_set_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
u64 hub_id, int ioda_type)
{
hose->last_busno = 0xff;
}
hose->private_data = phb;
- hose->controller_ops = pnv_pci_controller_ops;
phb->hub_id = hub_id;
phb->opal_id = phb_id;
phb->type = ioda_type;
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
- phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
- /* Setup shutdown function for kexec */
- phb->shutdown = pnv_pci_ioda_shutdown;
-
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
* the child P2P bridges) can form individual PE.
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
- pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
- pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
+ hose->controller_ops = pnv_pci_ioda_controller_ops;
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;