#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EXIT_LOOP_COUNT 10000000
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
return iommu->cap & IOMMU_CAP_NPCACHE;
}
+/****************************************************************************
+ *
+ * Interrupt handling functions
+ *
+ ****************************************************************************/
+
+irqreturn_t amd_iommu_int_handler(int irq, void *data)
+{
+ return IRQ_NONE;
+}
+
/****************************************************************************
*
* IOMMU command queuing functions
u8 *target;
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
- target = (iommu->cmd_buf + tail);
+ target = iommu->cmd_buf + tail;
memcpy_toio(target, cmd, sizeof(*cmd));
tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
- int ret;
+ int ret, ready = 0;
+ unsigned status = 0;
struct iommu_cmd cmd;
- volatile u64 ready = 0;
- unsigned long ready_phys = virt_to_phys(&ready);
unsigned long i = 0;
memset(&cmd, 0, sizeof(cmd));
- cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- cmd.data[1] = upper_32_bits(ready_phys);
- cmd.data[2] = 1; /* value written to 'ready' */
+ cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
iommu->need_sync = 0;
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
- cpu_relax();
+ /* wait for the bit to become one */
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
}
+ /* set bit back to zero */
+ status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+ writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
- cmd.data[2] = LOW_U32(address);
+ cmd.data[2] = lower_32_bits(address);
cmd.data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
u64 address, size_t size)
{
int s = 0;
- unsigned pages = to_pages(address, size);
+ unsigned pages = iommu_num_pages(address, size);
address &= PAGE_MASK;
return 0;
}
+/* Flush the whole IO/TLB for a given protection domain */
+static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
+{
+ u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+ iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
+}
+
/****************************************************************************
*
* The functions below are used the create the page table mappings for
*/
static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom,
- unsigned int pages)
+ unsigned int pages,
+ unsigned long align_mask)
{
unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
unsigned long address;
PAGE_SIZE) >> PAGE_SHIFT;
limit = limit < size ? limit : size;
- if (dom->next_bit >= limit)
+ if (dom->next_bit >= limit) {
dom->next_bit = 0;
+ dom->need_flush = true;
+ }
address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
- 0 , boundary_size, 0);
- if (address == -1)
+ 0 , boundary_size, align_mask);
+ if (address == -1) {
address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
- 0, boundary_size, 0);
+ 0, boundary_size, align_mask);
+ dom->need_flush = true;
+ }
if (likely(address != -1)) {
dom->next_bit = address + pages;
dma_dom->bitmap[0] = 1;
dma_dom->next_bit = 0;
+ dma_dom->need_flush = false;
+
/* Intialize the exclusion range if necessary */
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = to_pages(iommu->exclusion_start,
- iommu->exclusion_length);
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length);
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
*
*****************************************************************************/
+/*
+ * This function checks if the driver got a valid device from the caller to
+ * avoid dereferencing invalid pointers.
+ */
+static bool check_device(struct device *dev)
+{
+ if (!dev || !dev->dma_mask)
+ return false;
+
+ return true;
+}
+
/*
* In the dma_ops path we only have the struct device. This function
* finds the corresponding IOMMU, the protection domain and the
struct pci_dev *pcidev;
u16 _bdf;
- BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
+ *iommu = NULL;
+ *domain = NULL;
+ *bdf = 0xffff;
+
+ if (dev->bus != &pci_bus_type)
+ return 0;
pcidev = to_pci_dev(dev);
_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
/* device not translated by any IOMMU in the system? */
- if (_bdf > amd_iommu_last_bdf) {
- *iommu = NULL;
- *domain = NULL;
- *bdf = 0xffff;
+ if (_bdf > amd_iommu_last_bdf)
return 0;
- }
*bdf = amd_iommu_alias_table[_bdf];
struct dma_ops_domain *dma_dom,
phys_addr_t paddr,
size_t size,
- int dir)
+ int dir,
+ bool align)
{
dma_addr_t offset = paddr & ~PAGE_MASK;
dma_addr_t address, start;
unsigned int pages;
+ unsigned long align_mask = 0;
int i;
- pages = to_pages(paddr, size);
+ pages = iommu_num_pages(paddr, size);
paddr &= PAGE_MASK;
- address = dma_ops_alloc_addresses(dev, dma_dom, pages);
+ if (align)
+ align_mask = (1UL << get_order(size)) - 1;
+
+ address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask);
if (unlikely(address == bad_dma_address))
goto out;
}
address += offset;
+ if (unlikely(dma_dom->need_flush && !iommu_fullflush)) {
+ iommu_flush_tlb(iommu, dma_dom->domain.id);
+ dma_dom->need_flush = false;
+ } else if (unlikely(iommu_has_npcache(iommu)))
+ iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
+
out:
return address;
}
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
return;
- pages = to_pages(dma_addr, size);
+ pages = iommu_num_pages(dma_addr, size);
dma_addr &= PAGE_MASK;
start = dma_addr;
}
dma_ops_free_addresses(dma_dom, dma_addr, pages);
+
+ if (iommu_fullflush)
+ iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
}
/*
u16 devid;
dma_addr_t addr;
+ if (!check_device(dev))
+ return bad_dma_address;
+
get_device_resources(dev, &iommu, &domain, &devid);
if (iommu == NULL || domain == NULL)
return (dma_addr_t)paddr;
spin_lock_irqsave(&domain->lock, flags);
- addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
+ addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false);
if (addr == bad_dma_address)
goto out;
- if (iommu_has_npcache(iommu))
- iommu_flush_pages(iommu, domain->id, addr, size);
-
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
out:
struct protection_domain *domain;
u16 devid;
- if (!get_device_resources(dev, &iommu, &domain, &devid))
+ if (!check_device(dev) ||
+ !get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return;
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
- iommu_flush_pages(iommu, domain->id, dma_addr, size);
-
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
phys_addr_t paddr;
int mapped_elems = 0;
+ if (!check_device(dev))
+ return 0;
+
get_device_resources(dev, &iommu, &domain, &devid);
if (!iommu || !domain)
paddr = sg_phys(s);
s->dma_address = __map_single(dev, iommu, domain->priv,
- paddr, s->length, dir);
+ paddr, s->length, dir, false);
if (s->dma_address) {
s->dma_length = s->length;
mapped_elems++;
} else
goto unmap;
- if (iommu_has_npcache(iommu))
- iommu_flush_pages(iommu, domain->id, s->dma_address,
- s->dma_length);
}
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
out:
u16 devid;
int i;
- if (!get_device_resources(dev, &iommu, &domain, &devid))
+ if (!check_device(dev) ||
+ !get_device_resources(dev, &iommu, &domain, &devid))
return;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
__unmap_single(iommu, domain->priv, s->dma_address,
s->dma_length, dir);
- iommu_flush_pages(iommu, domain->id, s->dma_address,
- s->dma_length);
s->dma_address = s->dma_length = 0;
}
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
u16 devid;
phys_addr_t paddr;
+ if (!check_device(dev))
+ return NULL;
+
virt_addr = (void *)__get_free_pages(flag, get_order(size));
if (!virt_addr)
return 0;
spin_lock_irqsave(&domain->lock, flags);
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
- size, DMA_BIDIRECTIONAL);
+ size, DMA_BIDIRECTIONAL, true);
if (*dma_addr == bad_dma_address) {
free_pages((unsigned long)virt_addr, get_order(size));
goto out;
}
- if (iommu_has_npcache(iommu))
- iommu_flush_pages(iommu, domain->id, *dma_addr, size);
-
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
out:
/*
* The exported free_coherent function for dma_ops.
- * FIXME: fix the generic x86 DMA layer so that it actually calls that
- * function.
*/
static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr)
struct protection_domain *domain;
u16 devid;
+ if (!check_device(dev))
+ return;
+
get_device_resources(dev, &iommu, &domain, &devid);
if (!iommu || !domain)
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- iommu_flush_pages(iommu, domain->id, dma_addr, size);
- if (iommu->need_sync)
+ if (unlikely(iommu->need_sync))
iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);