]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - drivers/gpu/drm/nouveau/nouveau_mem.c
drm/nv50: enable 4KiB pages for small vram allocations
[mv-sheeva.git] / drivers / gpu / drm / nouveau / nouveau_mem.c
index fe4a30dc4b42e929f283d35e1094b26a422f4f2e..3a531d53de4d86c323236cdbdaac718c6656cf02 100644 (file)
 
 #include "nouveau_drv.h"
 #include "nouveau_pm.h"
+#include "nouveau_mm.h"
+#include "nouveau_vm.h"
 
 /*
  * NV10-NV40 tiling helpers
  */
 
 static void
-nv10_mem_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
-                          uint32_t size, uint32_t pitch)
+nv10_mem_update_tile_region(struct drm_device *dev,
+                           struct nouveau_tile_reg *tile, uint32_t addr,
+                           uint32_t size, uint32_t pitch, uint32_t flags)
 {
        struct drm_nouveau_private *dev_priv = dev->dev_private;
        struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
        struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
        struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-       struct nouveau_tile_reg *tile = &dev_priv->tile[i];
+       int i = tile - dev_priv->tile.reg;
+       unsigned long save;
 
-       tile->addr = addr;
-       tile->size = size;
-       tile->used = !!pitch;
-       nouveau_fence_unref((void **)&tile->fence);
+       nouveau_fence_unref(&tile->fence);
 
+       if (tile->pitch)
+               pfb->free_tile_region(dev, i);
+
+       if (pitch)
+               pfb->init_tile_region(dev, i, addr, size, pitch, flags);
+
+       spin_lock_irqsave(&dev_priv->context_switch_lock, save);
        pfifo->reassign(dev, false);
        pfifo->cache_pull(dev, false);
 
        nouveau_wait_for_idle(dev);
 
-       pgraph->set_region_tiling(dev, i, addr, size, pitch);
-       pfb->set_region_tiling(dev, i, addr, size, pitch);
+       pfb->set_tile_region(dev, i);
+       pgraph->set_tile_region(dev, i);
 
        pfifo->cache_pull(dev, true);
        pfifo->reassign(dev, true);
+       spin_unlock_irqrestore(&dev_priv->context_switch_lock, save);
 }
 
-struct nouveau_tile_reg *
-nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
-                   uint32_t pitch)
+static struct nouveau_tile_reg *
+nv10_mem_get_tile_region(struct drm_device *dev, int i)
 {
        struct drm_nouveau_private *dev_priv = dev->dev_private;
-       struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
-       struct nouveau_tile_reg *found = NULL;
-       unsigned long i, flags;
+       struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
 
-       spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-
-       for (i = 0; i < pfb->num_tiles; i++) {
-               struct nouveau_tile_reg *tile = &dev_priv->tile[i];
-
-               if (tile->used)
-                       /* Tile region in use. */
-                       continue;
-
-               if (tile->fence &&
-                   !nouveau_fence_signalled(tile->fence, NULL))
-                       /* Pending tile region. */
-                       continue;
-
-               if (max(tile->addr, addr) <
-                   min(tile->addr + tile->size, addr + size))
-                       /* Kill an intersecting tile region. */
-                       nv10_mem_set_region_tiling(dev, i, 0, 0, 0);
-
-               if (pitch && !found) {
-                       /* Free tile region. */
-                       nv10_mem_set_region_tiling(dev, i, addr, size, pitch);
-                       found = tile;
-               }
-       }
+       spin_lock(&dev_priv->tile.lock);
 
-       spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+       if (!tile->used &&
+           (!tile->fence || nouveau_fence_signalled(tile->fence)))
+               tile->used = true;
+       else
+               tile = NULL;
 
-       return found;
+       spin_unlock(&dev_priv->tile.lock);
+       return tile;
 }
 
 void
-nv10_mem_expire_tiling(struct drm_device *dev, struct nouveau_tile_reg *tile,
-                      struct nouveau_fence *fence)
-{
-       if (fence) {
-               /* Mark it as pending. */
-               tile->fence = fence;
-               nouveau_fence_ref(fence);
-       }
-
-       tile->used = false;
-}
-
-/*
- * NV50 VM helpers
- */
-int
-nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
-                       uint32_t flags, uint64_t phys)
+nv10_mem_put_tile_region(struct drm_device *dev, struct nouveau_tile_reg *tile,
+                        struct nouveau_fence *fence)
 {
        struct drm_nouveau_private *dev_priv = dev->dev_private;
-       struct nouveau_gpuobj *pgt;
-       unsigned block;
-       int i;
-
-       virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1;
-       size = (size >> 16) << 1;
-
-       phys |= ((uint64_t)flags << 32);
-       phys |= 1;
-       if (dev_priv->vram_sys_base) {
-               phys += dev_priv->vram_sys_base;
-               phys |= 0x30;
-       }
 
-       while (size) {
-               unsigned offset_h = upper_32_bits(phys);
-               unsigned offset_l = lower_32_bits(phys);
-               unsigned pte, end;
-
-               for (i = 7; i >= 0; i--) {
-                       block = 1 << (i + 1);
-                       if (size >= block && !(virt & (block - 1)))
-                               break;
-               }
-               offset_l |= (i << 7);
-
-               phys += block << 15;
-               size -= block;
-
-               while (block) {
-                       pgt = dev_priv->vm_vram_pt[virt >> 14];
-                       pte = virt & 0x3ffe;
-
-                       end = pte + block;
-                       if (end > 16384)
-                               end = 16384;
-                       block -= (end - pte);
-                       virt  += (end - pte);
-
-                       while (pte < end) {
-                               nv_wo32(pgt, (pte * 4) + 0, offset_l);
-                               nv_wo32(pgt, (pte * 4) + 4, offset_h);
-                               pte += 2;
-                       }
+       if (tile) {
+               spin_lock(&dev_priv->tile.lock);
+               if (fence) {
+                       /* Mark it as pending. */
+                       tile->fence = fence;
+                       nouveau_fence_ref(fence);
                }
-       }
 
-       dev_priv->engine.instmem.flush(dev);
-       dev_priv->engine.fifo.tlb_flush(dev);
-       dev_priv->engine.graph.tlb_flush(dev);
-       nv50_vm_flush(dev, 6);
-       return 0;
+               tile->used = false;
+               spin_unlock(&dev_priv->tile.lock);
+       }
 }
 
-void
-nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
+struct nouveau_tile_reg *
+nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
+                   uint32_t pitch, uint32_t flags)
 {
        struct drm_nouveau_private *dev_priv = dev->dev_private;
-       struct nouveau_gpuobj *pgt;
-       unsigned pages, pte, end;
-
-       virt -= dev_priv->vm_vram_base;
-       pages = (size >> 16) << 1;
+       struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+       struct nouveau_tile_reg *tile, *found = NULL;
+       int i;
 
-       while (pages) {
-               pgt = dev_priv->vm_vram_pt[virt >> 29];
-               pte = (virt & 0x1ffe0000ULL) >> 15;
+       for (i = 0; i < pfb->num_tiles; i++) {
+               tile = nv10_mem_get_tile_region(dev, i);
 
-               end = pte + pages;
-               if (end > 16384)
-                       end = 16384;
-               pages -= (end - pte);
-               virt  += (end - pte) << 15;
+               if (pitch && !found) {
+                       found = tile;
+                       continue;
 
-               while (pte < end) {
-                       nv_wo32(pgt, (pte * 4), 0);
-                       pte++;
+               } else if (tile && tile->pitch) {
+                       /* Kill an unused tile region. */
+                       nv10_mem_update_tile_region(dev, tile, 0, 0, 0, 0);
                }
+
+               nv10_mem_put_tile_region(dev, tile, NULL);
        }
 
-       dev_priv->engine.instmem.flush(dev);
-       dev_priv->engine.fifo.tlb_flush(dev);
-       dev_priv->engine.graph.tlb_flush(dev);
-       nv50_vm_flush(dev, 6);
+       if (found)
+               nv10_mem_update_tile_region(dev, found, addr, size,
+                                           pitch, flags);
+       return found;
 }
 
 /*
@@ -312,61 +241,6 @@ nouveau_mem_detect_nforce(struct drm_device *dev)
        return 0;
 }
 
-static void
-nv50_vram_preinit(struct drm_device *dev)
-{
-       struct drm_nouveau_private *dev_priv = dev->dev_private;
-       int i, parts, colbits, rowbitsa, rowbitsb, banks;
-       u64 rowsize, predicted;
-       u32 r0, r4, rt, ru;
-
-       r0 = nv_rd32(dev, 0x100200);
-       r4 = nv_rd32(dev, 0x100204);
-       rt = nv_rd32(dev, 0x100250);
-       ru = nv_rd32(dev, 0x001540);
-       NV_DEBUG(dev, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt, ru);
-
-       for (i = 0, parts = 0; i < 8; i++) {
-               if (ru & (0x00010000 << i))
-                       parts++;
-       }
-
-       colbits  =  (r4 & 0x0000f000) >> 12;
-       rowbitsa = ((r4 & 0x000f0000) >> 16) + 8;
-       rowbitsb = ((r4 & 0x00f00000) >> 20) + 8;
-       banks    = ((r4 & 0x01000000) ? 8 : 4);
-
-       rowsize = parts * banks * (1 << colbits) * 8;
-       predicted = rowsize << rowbitsa;
-       if (r0 & 0x00000004)
-               predicted += rowsize << rowbitsb;
-
-       if (predicted != dev_priv->vram_size) {
-               NV_WARN(dev, "memory controller reports %dMiB VRAM\n",
-                       (u32)(dev_priv->vram_size >> 20));
-               NV_WARN(dev, "we calculated %dMiB VRAM\n",
-                       (u32)(predicted >> 20));
-       }
-
-       dev_priv->vram_rblock_size = rowsize >> 12;
-       if (rt & 1)
-               dev_priv->vram_rblock_size *= 3;
-
-       NV_DEBUG(dev, "rblock %lld bytes\n",
-                (u64)dev_priv->vram_rblock_size << 12);
-}
-
-static void
-nvaa_vram_preinit(struct drm_device *dev)
-{
-       struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-       /* To our knowledge, there's no large scale reordering of pages
-        * that occurs on IGP chipsets.
-        */
-       dev_priv->vram_rblock_size = 1;
-}
-
 static int
 nouveau_mem_detect(struct drm_device *dev)
 {
@@ -383,22 +257,8 @@ nouveau_mem_detect(struct drm_device *dev)
                dev_priv->vram_size &= NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_MASK;
        } else
        if (dev_priv->card_type < NV_C0) {
-               dev_priv->vram_size = nv_rd32(dev, NV04_PFB_FIFO_DATA);
-               dev_priv->vram_size |= (dev_priv->vram_size & 0xff) << 32;
-               dev_priv->vram_size &= 0xffffffff00ll;
-
-               switch (dev_priv->chipset) {
-               case 0xaa:
-               case 0xac:
-               case 0xaf:
-                       dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10);
-                       dev_priv->vram_sys_base <<= 12;
-                       nvaa_vram_preinit(dev);
-                       break;
-               default:
-                       nv50_vram_preinit(dev);
-                       break;
-               }
+               if (nv50_vram_init(dev))
+                       return -ENOMEM;
        } else {
                dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
                dev_priv->vram_size *= nv_rd32(dev, 0x121c74);
@@ -547,10 +407,6 @@ nouveau_mem_vram_init(struct drm_device *dev)
        if (ret)
                return ret;
 
-       ret = nouveau_mem_detect(dev);
-       if (ret)
-               return ret;
-
        dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
 
        ret = nouveau_ttm_global_init(dev_priv);
@@ -566,13 +422,6 @@ nouveau_mem_vram_init(struct drm_device *dev)
                return ret;
        }
 
-       dev_priv->fb_available_size = dev_priv->vram_size;
-       dev_priv->fb_mappable_pages = dev_priv->fb_available_size;
-       if (dev_priv->fb_mappable_pages > pci_resource_len(dev->pdev, 1))
-               dev_priv->fb_mappable_pages =
-                       pci_resource_len(dev->pdev, 1);
-       dev_priv->fb_mappable_pages >>= PAGE_SHIFT;
-
        /* reserve space at end of VRAM for PRAMIN */
        if (dev_priv->chipset == 0x40 || dev_priv->chipset == 0x47 ||
            dev_priv->chipset == 0x49 || dev_priv->chipset == 0x4b)
@@ -583,6 +432,17 @@ nouveau_mem_vram_init(struct drm_device *dev)
        else
                dev_priv->ramin_rsvd_vram = (512 * 1024);
 
+       /* initialise gpu-specific vram backend */
+       ret = nouveau_mem_detect(dev);
+       if (ret)
+               return ret;
+
+       dev_priv->fb_available_size = dev_priv->vram_size;
+       dev_priv->fb_mappable_pages = dev_priv->fb_available_size;
+       if (dev_priv->fb_mappable_pages > pci_resource_len(dev->pdev, 1))
+               dev_priv->fb_mappable_pages = pci_resource_len(dev->pdev, 1);
+       dev_priv->fb_mappable_pages >>= PAGE_SHIFT;
+
        dev_priv->fb_available_size -= dev_priv->ramin_rsvd_vram;
        dev_priv->fb_aper_free = dev_priv->fb_available_size;
 
@@ -799,3 +659,109 @@ nouveau_mem_timing_fini(struct drm_device *dev)
 
        kfree(mem->timing);
 }
+
+static int
+nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long p_size)
+{
+       struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev);
+       struct nouveau_mm *mm;
+       u32 b_size;
+       int ret;
+
+       p_size = (p_size << PAGE_SHIFT) >> 12;
+       b_size = dev_priv->vram_rblock_size >> 12;
+
+       ret = nouveau_mm_init(&mm, 0, p_size, b_size);
+       if (ret)
+               return ret;
+
+       man->priv = mm;
+       return 0;
+}
+
+static int
+nouveau_vram_manager_fini(struct ttm_mem_type_manager *man)
+{
+       struct nouveau_mm *mm = man->priv;
+       int ret;
+
+       ret = nouveau_mm_fini(&mm);
+       if (ret)
+               return ret;
+
+       man->priv = NULL;
+       return 0;
+}
+
+static void
+nouveau_vram_manager_del(struct ttm_mem_type_manager *man,
+                        struct ttm_mem_reg *mem)
+{
+       struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev);
+       struct drm_device *dev = dev_priv->dev;
+
+       nv50_vram_del(dev, (struct nouveau_vram **)&mem->mm_node);
+}
+
+static int
+nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
+                        struct ttm_buffer_object *bo,
+                        struct ttm_placement *placement,
+                        struct ttm_mem_reg *mem)
+{
+       struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev);
+       struct drm_device *dev = dev_priv->dev;
+       struct nouveau_bo *nvbo = nouveau_bo(bo);
+       struct nouveau_vram *vram;
+       int ret;
+
+       ret = nv50_vram_new(dev, mem->num_pages << PAGE_SHIFT,
+                           mem->page_alignment << PAGE_SHIFT, 0,
+                           (nvbo->tile_flags >> 8) & 0x7f, &vram);
+       if (ret)
+               return ret;
+
+       mem->mm_node = vram;
+       mem->start   = vram->offset >> PAGE_SHIFT;
+       return 0;
+}
+
+void
+nouveau_vram_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
+{
+       struct ttm_bo_global *glob = man->bdev->glob;
+       struct nouveau_mm *mm = man->priv;
+       struct nouveau_mm_node *r;
+       u64 total = 0, ttotal[3] = {}, tused[3] = {}, tfree[3] = {};
+       int i;
+
+       mutex_lock(&mm->mutex);
+       list_for_each_entry(r, &mm->nodes, nl_entry) {
+               printk(KERN_DEBUG "%s %s-%d: 0x%010llx 0x%010llx\n",
+                      prefix, r->free ? "free" : "used", r->type,
+                      ((u64)r->offset << 12),
+                      (((u64)r->offset + r->length) << 12));
+               total += r->length;
+               ttotal[r->type] += r->length;
+               if (r->free)
+                       tfree[r->type] += r->length;
+               else
+                       tused[r->type] += r->length;
+       }
+       mutex_unlock(&mm->mutex);
+
+       printk(KERN_DEBUG "%s  total: 0x%010llx\n", prefix, total << 12);
+       for (i = 0; i < 3; i++) {
+               printk(KERN_DEBUG "%s type %d: 0x%010llx, "
+                                 "used 0x%010llx, free 0x%010llx\n", prefix,
+                      i, ttotal[i] << 12, tused[i] << 12, tfree[i] << 12);
+       }
+}
+
+const struct ttm_mem_type_manager_func nouveau_vram_manager = {
+       nouveau_vram_manager_init,
+       nouveau_vram_manager_fini,
+       nouveau_vram_manager_new,
+       nouveau_vram_manager_del,
+       nouveau_vram_manager_debug
+};