From: Alex Deucher Date: Fri, 1 Feb 2013 16:32:42 +0000 (+0100) Subject: drm/radeon: use IBs for VM page table updates v2 X-Git-Tag: next-20130218~64^2~15^2~7 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=43f1214aa094e46efdfc0255d9601be0e5ea0f62;p=karo-tx-linux.git drm/radeon: use IBs for VM page table updates v2 For very large page table updates, we can exceed the size of the ring. To avoid this, use an IB to perform the page table update. v2(ck): cleanup the IB infrastructure and the use it instead of filling the struct ourself. Signed-off-by: Alex Deucher Signed-off-by: Christian König --- diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 170bd03d4dd8..7cead763be9e 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1946,19 +1946,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags) * cayman_vm_set_page - update the page tables using the CP * * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * - * Update the page tables using the CP (cayman-si). + * Update the page tables using the CP (cayman/TN). */ -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void cayman_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); uint64_t value; unsigned ndw; @@ -1969,9 +1971,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, if (ndw > 0x3FFF) ndw = 0x3FFF; - radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; for (; ndw > 1; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -1983,8 +1985,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { @@ -1994,9 +1996,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -2008,10 +2010,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); } } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 6539d6cb4bc7..307d681b3443 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -780,6 +780,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); +void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib); int radeon_ib_pool_init(struct radeon_device *rdev); @@ -1188,7 +1189,9 @@ struct radeon_asic { void (*fini)(struct radeon_device *rdev); u32 pt_ring_index; - void (*set_page)(struct radeon_device *rdev, uint64_t pe, + void (*set_page)(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); } vm; @@ -1810,7 +1813,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) -#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags))) +#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp)) #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e429e2574cae..f4134a823958 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -474,7 +474,9 @@ int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void cayman_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); @@ -506,7 +508,9 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void si_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 1d214b66650f..70d38241b083 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -125,18 +125,6 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; } -static void radeon_cs_sync_to(struct radeon_cs_parser *p, - struct radeon_fence *fence) -{ - struct radeon_fence *other; - - if (!fence) - return; - - other = p->ib.sync_to[fence->ring]; - p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other); -} - static void radeon_cs_sync_rings(struct radeon_cs_parser *p) { int i; @@ -145,7 +133,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) if (!p->relocs[i].robj) continue; - radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj); + radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); } } @@ -472,8 +460,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); - radeon_cs_sync_to(parser, vm->fence); - radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring)); + radeon_ib_sync_to(&parser->ib, vm->fence); + radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( + rdev, vm, parser->ring)); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 6e24f84755b5..2c1341f63dc5 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) */ static int radeon_vm_update_pdes(struct radeon_device *rdev, struct radeon_vm *vm, + struct radeon_ib *ib, uint64_t start, uint64_t end) { static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; @@ -971,7 +972,7 @@ retry: ((last_pt + incr * count) != pt)) { if (count) { - radeon_asic_vm_set_page(rdev, last_pde, + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, incr, RADEON_VM_PAGE_VALID); } @@ -985,7 +986,7 @@ retry: } if (count) { - radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, incr, RADEON_VM_PAGE_VALID); } @@ -1009,6 +1010,7 @@ retry: */ static void radeon_vm_update_ptes(struct radeon_device *rdev, struct radeon_vm *vm, + struct radeon_ib *ib, uint64_t start, uint64_t end, uint64_t dst, uint32_t flags) { @@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, if ((last_pte + 8 * count) != pte) { if (count) { - radeon_asic_vm_set_page(rdev, last_pte, + radeon_asic_vm_set_page(rdev, ib, last_pte, last_dst, count, RADEON_GPU_PAGE_SIZE, flags); @@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, } if (count) { - radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, RADEON_GPU_PAGE_SIZE, flags); } } @@ -1080,8 +1083,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct ttm_mem_reg *mem) { unsigned ridx = rdev->asic->vm.pt_ring_index; - struct radeon_ring *ring = &rdev->ring[ridx]; - struct radeon_semaphore *sem = NULL; + struct radeon_ib ib; struct radeon_bo_va *bo_va; unsigned nptes, npdes, ndw; uint64_t addr; @@ -1124,25 +1126,13 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, bo_va->valid = false; } - if (vm->fence && radeon_fence_signaled(vm->fence)) { - radeon_fence_unref(&vm->fence); - } - - if (vm->fence && vm->fence->ring != ridx) { - r = radeon_semaphore_create(rdev, &sem); - if (r) { - return r; - } - } - nptes = radeon_bo_ngpu_pages(bo); /* assume two extra pdes in case the mapping overlaps the borders */ npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - /* estimate number of dw needed */ - /* semaphore, fence and padding */ - ndw = 32; + /* padding, etc. */ + ndw = 64; if (RADEON_VM_BLOCK_SIZE > 11) /* reserve space for one header for every 2k dwords */ @@ -1161,33 +1151,31 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, /* reserve space for pde addresses */ ndw += npdes * 2; - r = radeon_ring_lock(rdev, ring, ndw); - if (r) { - return r; - } + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; - if (sem && radeon_fence_need_sync(vm->fence, ridx)) { - radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx); - radeon_fence_note_sync(vm->fence, ridx); - } + r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4); + ib.length_dw = 0; - r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); + r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); if (r) { - radeon_ring_unlock_undo(rdev, ring); + radeon_ib_free(rdev, &ib); return r; } - radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, bo_va->flags); - radeon_fence_unref(&vm->fence); - r = radeon_fence_emit(rdev, &vm->fence, ridx); + radeon_ib_sync_to(&ib, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { - radeon_ring_unlock_undo(rdev, ring); + radeon_ib_free(rdev, &ib); return r; } - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, vm->fence); + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); radeon_fence_unref(&vm->last_flush); return 0; diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index cd72062d5a91..8d58e268ff6d 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -108,6 +108,25 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) radeon_fence_unref(&ib->fence); } +/** + * radeon_ib_sync_to - sync to fence before executing the IB + * + * @ib: IB object to add fence to + * @fence: fence to sync to + * + * Sync to the fence before executing the IB + */ +void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = ib->sync_to[fence->ring]; + ib->sync_to[fence->ring] = radeon_fence_later(fence, other); +} + /** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index cd83bc5bd235..a910cb92cfd0 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3043,19 +3043,21 @@ void si_vm_fini(struct radeon_device *rdev) * si_vm_set_page - update the page tables using the CP * * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * - * Update the page tables using the CP (cayman-si). + * Update the page tables using the CP (SI). */ -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void si_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); uint64_t value; unsigned ndw; @@ -3066,11 +3068,11 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, if (ndw > 0x3FFE) ndw = 0x3FFE; - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(1))); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe)); + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); + ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(1)); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe); for (; ndw > 2; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -3082,8 +3084,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { @@ -3095,9 +3097,9 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -3109,8 +3111,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { @@ -3124,20 +3126,22 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, else value = 0; /* for physically contiguous pages (vram) */ - radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw)); - radeon_ring_write(ring, pe); /* dst addr */ - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); - radeon_ring_write(ring, r600_flags); /* mask */ - radeon_ring_write(ring, 0); - radeon_ring_write(ring, value); /* value */ - radeon_ring_write(ring, upper_32_bits(value)); - radeon_ring_write(ring, incr); /* increment size */ - radeon_ring_write(ring, 0); + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; pe += ndw * 4; addr += (ndw / 2) * incr; count -= ndw / 2; } } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); } }