From 4ff37a83f19dab4e67299325ee22e98346eee857 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Fri, 26 Feb 2016 16:18:26 +0100 Subject: [PATCH] drm/amdgpu: fix VM faults caused by vm_grab_id() v4 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The owner must be per ring as long as we don't support sharing VMIDs per process. Also move the assigned VMID and page directory address into the IB structure. v3: assign the VMID to all IBs, not just the first one. v4: use correct pointer for owner Signed-off-by: Christian König Reviewed-by: Chunming Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 15 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 115 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- 9 files changed, 91 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f5bac97a438b..0c42a85ca5a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -769,8 +769,9 @@ struct amdgpu_ib { uint32_t *ptr; struct amdgpu_fence *fence; struct amdgpu_user_fence *user; - bool grabbed_vmid; struct amdgpu_vm *vm; + unsigned vm_id; + uint64_t vm_pd_addr; struct amdgpu_ctx *ctx; uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; @@ -877,10 +878,10 @@ struct amdgpu_vm_pt { }; struct amdgpu_vm_id { - unsigned id; - uint64_t pd_gpu_addr; + struct amdgpu_vm_manager_id *mgr_id; + uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct fence *flushed_updates; + struct fence *flushed_updates; }; struct amdgpu_vm { @@ -954,10 +955,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates); void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct fence *fence); + struct amdgpu_sync *sync, struct fence *fence, + unsigned *vm_id, uint64_t *vm_pd_addr); void amdgpu_vm_flush(struct amdgpu_ring *ring, - struct amdgpu_vm *vm, - struct fence *updates); + unsigned vmid, + uint64_t pd_addr); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index b5bdd5d59b58..db14a7bbb8f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -75,6 +75,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, } ib->vm = vm; + ib->vm_id = 0; return 0; } @@ -139,7 +140,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } - if (vm && !ibs->grabbed_vmid) { + if (vm && !ibs->vm_id) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } @@ -152,10 +153,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (vm) { /* do context switch */ - amdgpu_vm_flush(ring, vm, last_vm_update); + amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr); if (ring->funcs->emit_gds_switch) - amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, + amdgpu_ring_emit_gds_switch(ring, ib->vm_id, ib->gds_base, ib->gds_size, ib->gws_base, ib->gws_size, ib->oa_base, ib->oa_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index f29bbb96a881..90e52f7e17a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -105,16 +105,23 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) struct fence *fence = amdgpu_sync_get_fence(&job->sync); - if (fence == NULL && vm && !job->ibs->grabbed_vmid) { + if (fence == NULL && vm && !job->ibs->vm_id) { struct amdgpu_ring *ring = job->ring; + unsigned i, vm_id; + uint64_t vm_pd_addr; int r; r = amdgpu_vm_grab_id(vm, ring, &job->sync, - &job->base.s_fence->base); + &job->base.s_fence->base, + &vm_id, &vm_pd_addr); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - else - job->ibs->grabbed_vmid = true; + else { + for (i = 0; i < job->num_ibs; ++i) { + job->ibs[i].vm_id = vm_id; + job->ibs[i].vm_pd_addr = vm_pd_addr; + } + } fence = amdgpu_sync_get_fence(&job->sync); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 264c5968a1d3..ba909245fef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -50,6 +50,9 @@ * SI supports 16. */ +/* Special value that no flush is necessary */ +#define AMDGPU_VM_NO_FLUSH (~0ll) + /** * amdgpu_vm_num_pde - return the number of page directory entries * @@ -157,50 +160,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, * Allocate an id for the vm, adding fences to the sync obj as necessary. */ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct fence *fence) + struct amdgpu_sync *sync, struct fence *fence, + unsigned *vm_id, uint64_t *vm_pd_addr) { - struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; + uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_device *adev = ring->adev; - struct amdgpu_vm_manager_id *id; + struct amdgpu_vm_id *id = &vm->ids[ring->idx]; + struct fence *updates = sync->last_vm_update; int r; mutex_lock(&adev->vm_manager.lock); /* check if the id is still valid */ - if (vm_id->id) { + if (id->mgr_id) { + struct fence *flushed = id->flushed_updates; + bool is_later; long owner; - id = &adev->vm_manager.ids[vm_id->id]; - owner = atomic_long_read(&id->owner); - if (owner == (long)vm) { - list_move_tail(&id->list, &adev->vm_manager.ids_lru); - trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); + if (!flushed) + is_later = true; + else if (!updates) + is_later = false; + else + is_later = fence_is_later(updates, flushed); + + owner = atomic_long_read(&id->mgr_id->owner); + if (!is_later && owner == (long)id && + pd_addr == id->pd_gpu_addr) { + + fence_put(id->mgr_id->active); + id->mgr_id->active = fence_get(fence); + + list_move_tail(&id->mgr_id->list, + &adev->vm_manager.ids_lru); - fence_put(id->active); - id->active = fence_get(fence); + *vm_id = id->mgr_id - adev->vm_manager.ids; + *vm_pd_addr = AMDGPU_VM_NO_FLUSH; + trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx); mutex_unlock(&adev->vm_manager.lock); return 0; } } - /* we definately need to flush */ - vm_id->pd_gpu_addr = ~0ll; + id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, + struct amdgpu_vm_manager_id, + list); - id = list_first_entry(&adev->vm_manager.ids_lru, - struct amdgpu_vm_manager_id, - list); - list_move_tail(&id->list, &adev->vm_manager.ids_lru); - atomic_long_set(&id->owner, (long)vm); + r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active); + if (!r) { + fence_put(id->mgr_id->active); + id->mgr_id->active = fence_get(fence); - vm_id->id = id - adev->vm_manager.ids; - trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); + fence_put(id->flushed_updates); + id->flushed_updates = fence_get(updates); - r = amdgpu_sync_fence(ring->adev, sync, id->active); + id->pd_gpu_addr = pd_addr; - if (!r) { - fence_put(id->active); - id->active = fence_get(fence); + list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru); + atomic_long_set(&id->mgr_id->owner, (long)id); + + *vm_id = id->mgr_id - adev->vm_manager.ids; + *vm_pd_addr = pd_addr; + trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx); } mutex_unlock(&adev->vm_manager.lock); @@ -211,35 +233,18 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vm: vm we want to flush - * @updates: last vm update that we waited for + * @vmid: vmid number to use + * @pd_addr: address of the page directory * - * Flush the vm. + * Emit a VM flush when it is necessary. */ void amdgpu_vm_flush(struct amdgpu_ring *ring, - struct amdgpu_vm *vm, - struct fence *updates) + unsigned vmid, + uint64_t pd_addr) { - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); - struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; - struct fence *flushed_updates = vm_id->flushed_updates; - bool is_later; - - if (!flushed_updates) - is_later = true; - else if (!updates) - is_later = false; - else - is_later = fence_is_later(updates, flushed_updates); - - if (pd_addr != vm_id->pd_gpu_addr || is_later) { - trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); - if (is_later) { - vm_id->flushed_updates = fence_get(updates); - fence_put(flushed_updates); - } - vm_id->pd_gpu_addr = pd_addr; - amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); + if (pd_addr != AMDGPU_VM_NO_FLUSH) { + trace_amdgpu_vm_flush(pd_addr, ring->idx, vmid); + amdgpu_ring_emit_vm_flush(ring, vmid, pd_addr); } } @@ -1284,7 +1289,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - vm->ids[i].id = 0; + vm->ids[i].mgr_id = NULL; vm->ids[i].flushed_updates = NULL; } vm->va = RB_ROOT; @@ -1381,13 +1386,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - unsigned id = vm->ids[i].id; + struct amdgpu_vm_id *id = &vm->ids[i]; - atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner, - (long)vm, 0); - fence_put(vm->ids[i].flushed_updates); + if (id->mgr_id) + atomic_long_cmpxchg(&id->mgr_id->owner, + (long)id, 0); + fence_put(id->flushed_updates); } - } /** diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 675f34916aab..e4e4b2ac77b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; + u32 extra_bits = ib->vm_id & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 4) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index bc5bdaf3d2bb..9cdf59518533 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2043,8 +2043,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | - (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); + control |= ib->length_dw | (ib->vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -2072,8 +2071,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | - (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); + control |= ib->length_dw | (ib->vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 71d536e595a2..5f67a189bce9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4619,8 +4619,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | - (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); + control |= ib->length_dw | (ib->vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -4649,8 +4648,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | - (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); + control |= ib->length_dw | (ib->vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 29ec986dd6fc..dddb8d6a81f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; + u32 vmid = ib->vm_id & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 2) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 6f064d7076e6..19e02f7a06f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; + u32 vmid = ib->vm_id & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 2) -- 2.39.5