From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Tue, 7 Jul 2015 17:24:49 +0200 Subject: [PATCH] drm/amdgpu: add user fence context map v2 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is a prerequisite for the GPU scheduler to make the order of submission independent from the order of execution. v2: properly implement the locking Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 +++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 +- 4 files changed, 110 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70e783a849ed..0220d98ba8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -415,6 +415,8 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; + /* resulting sequence number */ + uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -985,9 +987,18 @@ struct amdgpu_vm_manager { * context related structures */ +#define AMDGPU_CTX_MAX_CS_PENDING 16 + +struct amdgpu_ctx_ring { + uint64_t sequence; + struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; +}; + struct amdgpu_ctx { struct kref refcount; unsigned reset_counter; + spinlock_t ring_lock; + struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; }; struct amdgpu_ctx_mgr { @@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence); +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq); + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 53e6a10fe9f9..cef8360698be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, sizeof(struct drm_amdgpu_cs_chunk_dep); for (j = 0; j < num_deps; ++j) { - struct amdgpu_fence *fence; struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct fence *fence; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, if (ctx == NULL) return -EINVAL; - r = amdgpu_fence_recreate(ring, p->filp, - deps[j].handle, - &fence); - if (r) { + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[j].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); amdgpu_ctx_put(ctx); return r; - } - - r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); - amdgpu_fence_unref(&fence); - amdgpu_ctx_put(ctx); - if (r) - return r; + } else if (fence) { + r = amdgpu_sync_fence(adev, &ib->sync, fence); + fence_put(fence); + amdgpu_ctx_put(ctx); + if (r) + return r; + } } } @@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_ib_fill(adev, &parser); } - if (!r) + if (!r) { r = amdgpu_cs_dependencies(adev, &parser); + if (r) + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + } if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); @@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } - cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; + cs->out.handle = parser.uf.sequence; out: amdgpu_cs_parser_fini(&parser, r, true); up_read(&adev->exclusive_lock); @@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_fence *fence = NULL; struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; + struct fence *fence; long r; + r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) + return r; + ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); - r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + + } else + r = 1; - r = fence_wait_timeout(&fence->base, true, timeout); - amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e63cfb7fa390..c23bfd8fe414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,17 +28,22 @@ static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + unsigned i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) + fence_put(ctx->rings[i].fences[j]); kfree(ctx); } int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id) { - int r; struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + int i, r; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, memset(ctx, 0, sizeof(*ctx)); kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + ctx->rings[i].sequence = 1; mutex_unlock(&mgr->lock); return 0; @@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) kref_put(&ctx->refcount, amdgpu_ctx_do_release); return 0; } + +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + uint64_t seq = cring->sequence; + unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; + struct fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + } + + fence_get(fence); + + spin_lock(&ctx->ring_lock); + cring->fences[idx] = fence; + cring->sequence++; + spin_unlock(&ctx->ring_lock); + + fence_put(other); + + return seq; +} + +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + struct fence *fence; + + spin_lock(&ctx->ring_lock); + if (seq >= cring->sequence) { + spin_unlock(&ctx->ring_lock); + return ERR_PTR(-EINVAL); + } + + if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { + spin_unlock(&ctx->ring_lock); + return NULL; + } + + fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]); + spin_unlock(&ctx->ring_lock); + + return fence; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2722815eddbb..95d533422a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); + ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, + &ib->fence->base); addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, + amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, AMDGPU_FENCE_FLAG_64BIT); } -- 2.39.5