#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#include <linux/sysdev.h>
+#include <linux/io.h>
#include <asm/atomic.h>
-#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
static u32 *iommu_gatt_base; /* Remapping table */
+/*
+ * If this is disabled the IOMMU will use an optimized flushing strategy
+ * of only flushing when an mapping is reused. With it true the GART is
+ * flushed for every mapping. Problem is that doing the lazy flush seems
+ * to trigger bugs with some popular PCI cards, in particular 3ware (but
+ * has been also also seen with Qlogic at least).
+ */
+int iommu_fullflush = 1;
+
/* Allocation bitmap for the remapping area: */
static DEFINE_SPINLOCK(iommu_bitmap_lock);
/* Guarded by iommu_bitmap_lock: */
AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
-static int need_flush; /* global flush state. set for each gart wrap */
+static bool need_flush; /* global flush state. set for each gart wrap */
static unsigned long alloc_iommu(struct device *dev, int size,
- unsigned long align_mask, u64 dma_mask)
+ unsigned long align_mask)
{
unsigned long offset, flags;
unsigned long boundary_size;
unsigned long base_index;
- unsigned long limit;
base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
PAGE_SIZE) >> PAGE_SHIFT;
boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
- limit = iommu_device_max_index(iommu_pages,
- DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE),
- dma_mask >> PAGE_SHIFT);
-
spin_lock_irqsave(&iommu_bitmap_lock, flags);
-
- if (limit <= next_bit) {
- need_flush = 1;
- next_bit = 0;
- }
-
- offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit,
+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
size, base_index, boundary_size, align_mask);
- if (offset == -1 && next_bit) {
- need_flush = 1;
- offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0,
+ if (offset == -1) {
+ need_flush = true;
+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
size, base_index, boundary_size,
align_mask);
}
next_bit = offset+size;
if (next_bit >= iommu_pages) {
next_bit = 0;
- need_flush = 1;
+ need_flush = true;
}
}
if (iommu_fullflush)
- need_flush = 1;
+ need_flush = true;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
return offset;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) {
k8_flush_garts();
- need_flush = 0;
+ need_flush = false;
}
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
iommu_leak_pages);
for (i = 0; i < iommu_leak_pages; i += 2) {
printk(KERN_DEBUG "%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
+ printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
+ 0);
printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
}
printk(KERN_DEBUG "\n");
* Caller needs to check if the iommu is needed and flush.
*/
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
- size_t size, int dir, unsigned long align_mask,
- u64 dma_mask)
+ size_t size, int dir, unsigned long align_mask)
{
unsigned long npages = iommu_num_pages(phys_mem, size);
- unsigned long iommu_page;
+ unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
int i;
- iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask);
if (iommu_page == -1) {
if (!nonforced_iommu(dev, phys_mem, size))
return phys_mem;
if (!need_iommu(dev, paddr, size))
return paddr;
- bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev));
+ bus = dma_map_area(dev, paddr, size, dir, 0);
flush_gart();
return bus;
{
struct scatterlist *s;
int i;
- u64 dma_mask = dma_get_mask(dev);
#ifdef CONFIG_IOMMU_DEBUG
printk(KERN_DEBUG "dma_map_sg overflow\n");
unsigned long addr = sg_phys(s);
if (nonforced_iommu(dev, addr, s->length)) {
- addr = dma_map_area(dev, addr, s->length, dir, 0,
- dma_mask);
+ addr = dma_map_area(dev, addr, s->length, dir, 0);
if (addr == bad_dma_address) {
if (i > 0)
gart_unmap_sg(dev, sg, i, dir);
int nelems, struct scatterlist *sout,
unsigned long pages)
{
- unsigned long iommu_start;
- unsigned long iommu_page;
+ unsigned long iommu_start = alloc_iommu(dev, pages, 0);
+ unsigned long iommu_page = iommu_start;
struct scatterlist *s;
int i;
- iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev));
if (iommu_start == -1)
return -1;
- iommu_page = iommu_start;
for_each_sg(start, s, nelems, i) {
unsigned long pages, addr;
unsigned long phys_addr = s->dma_address;
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
gfp_t flag)
{
- void *vaddr;
dma_addr_t paddr;
unsigned long align_mask;
- u64 dma_mask = dma_alloc_coherent_mask(dev, flag);
-
- vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size));
- if (!vaddr)
- return NULL;
-
- paddr = virt_to_phys(vaddr);
- if (is_buffer_dma_capable(dma_mask, paddr, size)) {
- *dma_addr = paddr;
- return vaddr;
- }
-
- align_mask = (1UL << get_order(size)) - 1;
-
- *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL,
- align_mask, dma_mask);
- flush_gart();
-
- if (*dma_addr != bad_dma_address)
- return vaddr;
-
- free_pages((unsigned long)vaddr, get_order(size));
+ struct page *page;
+
+ if (force_iommu && !(flag & GFP_DMA)) {
+ flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ page = alloc_pages(flag | __GFP_ZERO, get_order(size));
+ if (!page)
+ return NULL;
+
+ align_mask = (1UL << get_order(size)) - 1;
+ paddr = dma_map_area(dev, page_to_phys(page), size,
+ DMA_BIDIRECTIONAL, align_mask);
+
+ flush_gart();
+ if (paddr != bad_dma_address) {
+ *dma_addr = paddr;
+ return page_address(page);
+ }
+ __free_pages(page, get_order(size));
+ } else
+ return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
return NULL;
}
struct pci_dev *dev;
void *gatt;
int i, error;
- unsigned long start_pfn, end_pfn;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
info->aper_size = aper_size >> 20;
gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
- gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+ gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(gatt_size));
if (!gatt)
panic("Cannot allocate GATT table");
if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
panic("Could not set GART PTEs to uncacheable pages");
- memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
enable_gart_translations();
if (!error)
error = sysdev_register(&device_gart);
if (error)
- panic("Could not register gart_sysdev -- would corrupt data on next suspend");
+ panic("Could not register gart_sysdev -- "
+ "would corrupt data on next suspend");
flush_gart();
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
aper_base, aper_size>>10);
- /* need to map that range */
- end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
- if (end_pfn > max_low_pfn_mapped) {
- start_pfn = (aper_base>>PAGE_SHIFT);
- init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
- }
return 0;
nommu:
return -1;
}
-extern int agp_amd64_init(void);
-
static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single,
.unmap_single = gart_unmap_single,
- .sync_single_for_cpu = NULL,
- .sync_single_for_device = NULL,
- .sync_single_range_for_cpu = NULL,
- .sync_single_range_for_device = NULL,
- .sync_sg_for_cpu = NULL,
- .sync_sg_for_device = NULL,
.map_sg = gart_map_sg,
.unmap_sg = gart_unmap_sg,
.alloc_coherent = gart_alloc_coherent,
{
struct agp_kern_info info;
unsigned long iommu_start;
- unsigned long aper_size;
+ unsigned long aper_base, aper_size;
+ unsigned long start_pfn, end_pfn;
unsigned long scratch;
long i;
(no_agp && init_k8_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
printk(KERN_WARNING "More than 4GB of memory "
- "but GART IOMMU not available.\n"
- KERN_WARNING "falling back to iommu=soft.\n");
+ "but GART IOMMU not available.\n");
+ printk(KERN_WARNING "falling back to iommu=soft.\n");
}
return;
}
+ /* need to map that range */
+ aper_size = info.aper_size << 20;
+ aper_base = info.aper_base;
+ end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+ if (end_pfn > max_low_pfn_mapped) {
+ start_pfn = (aper_base>>PAGE_SHIFT);
+ init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ }
+
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
- aper_size = info.aper_size * 1024 * 1024;
iommu_size = check_iommu_size(info.aper_base, aper_size);
iommu_pages = iommu_size >> PAGE_SHIFT;
- iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+ iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(iommu_pages/8));
if (!iommu_gart_bitmap)
panic("Cannot allocate iommu bitmap\n");
- memset(iommu_gart_bitmap, 0, iommu_pages/8);
#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+ iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(iommu_pages*sizeof(void *)));
- if (iommu_leak_tab)
- memset(iommu_leak_tab, 0, iommu_pages * 8);
- else
+ if (!iommu_leak_tab)
printk(KERN_DEBUG
"PCI-DMA: Cannot allocate leak trace area\n");
}
if (!strncmp(p, "leak", 4)) {
leak_trace = 1;
p += 4;
- if (*p == '=') ++p;
+ if (*p == '=')
+ ++p;
if (isdigit(*p) && get_option(&p, &arg))
iommu_leak_pages = arg;
}
#endif
if (isdigit(*p) && get_option(&p, &arg))
iommu_size = arg;
+ if (!strncmp(p, "fullflush", 8))
+ iommu_fullflush = 1;
+ if (!strncmp(p, "nofullflush", 11))
+ iommu_fullflush = 0;
if (!strncmp(p, "noagp", 5))
no_agp = 1;
if (!strncmp(p, "noaperture", 10))