From b6295f9a38fc3f7d59ec0caa45aa803461c6cea5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 15 Mar 2016 18:26:28 -0400 Subject: [PATCH] drm/msm: 'struct fence' conversion Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 +- drivers/gpu/drm/msm/msm_atomic.c | 42 ++++---- drivers/gpu/drm/msm/msm_drv.h | 5 +- drivers/gpu/drm/msm/msm_fence.c | 71 +++++++++++++- drivers/gpu/drm/msm/msm_fence.h | 4 + drivers/gpu/drm/msm/msm_gem.c | 124 ++++++++++++++++++------ drivers/gpu/drm/msm/msm_gem.h | 16 +-- drivers/gpu/drm/msm/msm_gem_submit.c | 22 ++++- drivers/gpu/drm/msm/msm_gpu.c | 27 ++++-- drivers/gpu/drm/msm/msm_rd.c | 2 +- 10 files changed, 233 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 9aab871f4def..95aed377b933 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -168,7 +168,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT2(ring); OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->fence); + OUT_RING(ring, submit->fence->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing @@ -185,7 +185,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); OUT_RING(ring, rbmemptr(adreno_gpu, fence)); - OUT_RING(ring, submit->fence); + OUT_RING(ring, submit->fence->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 6ac7192d0ad6..e3892c263f27 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -107,27 +107,6 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev, } } -static void wait_fences(struct msm_commit *c, bool async) -{ - int nplanes = c->dev->mode_config.num_total_plane; - ktime_t timeout = ktime_add_ms(ktime_get(), 1000); - int i; - - for (i = 0; i < nplanes; i++) { - struct drm_plane *plane = c->state->planes[i]; - struct drm_plane_state *new_state = c->state->plane_states[i]; - - if (!plane) - continue; - - if ((plane->state->fb != new_state->fb) && new_state->fb) { - struct drm_gem_object *obj = - msm_framebuffer_bo(new_state->fb, 0); - msm_gem_cpu_sync(obj, MSM_PREP_READ, &timeout); - } - } -} - /* The (potentially) asynchronous part of the commit. At this point * nothing can fail short of armageddon. */ @@ -138,7 +117,7 @@ static void complete_commit(struct msm_commit *c, bool async) struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; - wait_fences(c, async); + drm_atomic_helper_wait_for_fences(dev, state); kms->funcs->prepare_commit(kms, state); @@ -213,6 +192,7 @@ int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock) { struct msm_drm_private *priv = dev->dev_private; + int nplanes = dev->mode_config.num_total_plane; int ncrtcs = dev->mode_config.num_crtc; struct msm_commit *c; int i, ret; @@ -237,6 +217,24 @@ int msm_atomic_commit(struct drm_device *dev, c->crtc_mask |= (1 << drm_crtc_index(crtc)); } + /* + * Figure out what fence to wait for: + */ + for (i = 0; i < nplanes; i++) { + struct drm_plane *plane = state->planes[i]; + struct drm_plane_state *new_state = state->plane_states[i]; + + if (!plane) + continue; + + if ((plane->state->fb != new_state->fb) && new_state->fb) { + struct drm_gem_object *obj = msm_framebuffer_bo(new_state->fb, 0); + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + new_state->fence = reservation_object_get_excl_rcu(msm_obj->resv); + } + } + /* * Wait for pending updates on any of the same crtc's and then * mark our set of crtc's as busy: diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 7124c7f262ec..4114a40e4a09 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -190,10 +190,11 @@ int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); void *msm_gem_vaddr_locked(struct drm_gem_object *obj); void *msm_gem_vaddr(struct drm_gem_object *obj); +int msm_gem_sync_object(struct drm_gem_object *obj, + struct msm_fence_context *fctx, bool exclusive); void msm_gem_move_to_active(struct drm_gem_object *obj, - struct msm_gpu *gpu, bool write, uint32_t fence); + struct msm_gpu *gpu, bool exclusive, struct fence *fence); void msm_gem_move_to_inactive(struct drm_gem_object *obj); -int msm_gem_cpu_sync(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); int msm_gem_cpu_fini(struct drm_gem_object *obj); void msm_gem_free_object(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index 088610ca80ca..a9b9b1c95a2e 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -32,7 +32,9 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) fctx->dev = dev; fctx->name = name; + fctx->context = fence_context_alloc(1); init_waitqueue_head(&fctx->event); + spin_lock_init(&fctx->spinlock); return fctx; } @@ -47,6 +49,7 @@ static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fenc return (int32_t)(fctx->completed_fence - fence) >= 0; } +/* legacy path for WAIT_FENCE ioctl: */ int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, ktime_t *timeout, bool interruptible) { @@ -88,9 +91,73 @@ int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, /* called from workqueue */ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) { - mutex_lock(&fctx->dev->struct_mutex); + spin_lock(&fctx->spinlock); fctx->completed_fence = max(fence, fctx->completed_fence); - mutex_unlock(&fctx->dev->struct_mutex); + spin_unlock(&fctx->spinlock); wake_up_all(&fctx->event); } + +struct msm_fence { + struct msm_fence_context *fctx; + struct fence base; +}; + +static inline struct msm_fence *to_msm_fence(struct fence *fence) +{ + return container_of(fence, struct msm_fence, base); +} + +static const char *msm_fence_get_driver_name(struct fence *fence) +{ + return "msm"; +} + +static const char *msm_fence_get_timeline_name(struct fence *fence) +{ + struct msm_fence *f = to_msm_fence(fence); + return f->fctx->name; +} + +static bool msm_fence_enable_signaling(struct fence *fence) +{ + return true; +} + +static bool msm_fence_signaled(struct fence *fence) +{ + struct msm_fence *f = to_msm_fence(fence); + return fence_completed(f->fctx, f->base.seqno); +} + +static void msm_fence_release(struct fence *fence) +{ + struct msm_fence *f = to_msm_fence(fence); + kfree_rcu(f, base.rcu); +} + +static const struct fence_ops msm_fence_ops = { + .get_driver_name = msm_fence_get_driver_name, + .get_timeline_name = msm_fence_get_timeline_name, + .enable_signaling = msm_fence_enable_signaling, + .signaled = msm_fence_signaled, + .wait = fence_default_wait, + .release = msm_fence_release, +}; + +struct fence * +msm_fence_alloc(struct msm_fence_context *fctx) +{ + struct msm_fence *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return ERR_PTR(-ENOMEM); + + f->fctx = fctx; + + fence_init(&f->base, &msm_fence_ops, &fctx->spinlock, + fctx->context, ++fctx->last_fence); + + return &f->base; +} diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 2820781d974a..ceb5b3d314b4 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -23,10 +23,12 @@ struct msm_fence_context { struct drm_device *dev; const char *name; + unsigned context; /* last_fence == completed_fence --> no pending work */ uint32_t last_fence; /* last assigned fence */ uint32_t completed_fence; /* last completed fence */ wait_queue_head_t event; + spinlock_t spinlock; }; struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev, @@ -39,4 +41,6 @@ int msm_queue_fence_cb(struct msm_fence_context *fctx, struct msm_fence_cb *cb, uint32_t fence); void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence); +struct fence * msm_fence_alloc(struct msm_fence_context *fctx); + #endif diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 9080ed13998a..9bbe1c5e7151 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -411,15 +411,62 @@ void *msm_gem_vaddr(struct drm_gem_object *obj) return ret; } +/* must be called before _move_to_active().. */ +int msm_gem_sync_object(struct drm_gem_object *obj, + struct msm_fence_context *fctx, bool exclusive) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct reservation_object_list *fobj; + struct fence *fence; + int i, ret; + + if (!exclusive) { + /* NOTE: _reserve_shared() must happen before _add_shared_fence(), + * which makes this a slightly strange place to call it. OTOH this + * is a convenient can-fail point to hook it in. (And similar to + * how etnaviv and nouveau handle this.) + */ + ret = reservation_object_reserve_shared(msm_obj->resv); + if (ret) + return ret; + } + + fobj = reservation_object_get_list(msm_obj->resv); + if (!fobj || (fobj->shared_count == 0)) { + fence = reservation_object_get_excl(msm_obj->resv); + /* don't need to wait on our own fences, since ring is fifo */ + if (fence && (fence->context != fctx->context)) { + ret = fence_wait(fence, true); + if (ret) + return ret; + } + } + + if (!exclusive || !fobj) + return 0; + + for (i = 0; i < fobj->shared_count; i++) { + fence = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(msm_obj->resv)); + if (fence->context != fctx->context) { + ret = fence_wait(fence, true); + if (ret) + return ret; + } + } + + return 0; +} + void msm_gem_move_to_active(struct drm_gem_object *obj, - struct msm_gpu *gpu, bool write, uint32_t fence) + struct msm_gpu *gpu, bool exclusive, struct fence *fence) { struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_obj->gpu = gpu; - if (write) - msm_obj->write_fence = fence; + if (exclusive) + reservation_object_add_excl_fence(msm_obj->resv, fence); else - msm_obj->read_fence = fence; + reservation_object_add_shared_fence(msm_obj->resv, fence); list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &gpu->active_list); } @@ -433,39 +480,30 @@ void msm_gem_move_to_inactive(struct drm_gem_object *obj) WARN_ON(!mutex_is_locked(&dev->struct_mutex)); msm_obj->gpu = NULL; - msm_obj->read_fence = 0; - msm_obj->write_fence = 0; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &priv->inactive_list); } -int msm_gem_cpu_sync(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) +int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) { - struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); - int ret = 0; - - if (is_active(msm_obj)) { - uint32_t fence = msm_gem_fence(msm_obj, op); + bool write = !!(op & MSM_PREP_WRITE); - if (op & MSM_PREP_NOSYNC) - timeout = NULL; + if (op & MSM_PREP_NOSYNC) { + if (!reservation_object_test_signaled_rcu(msm_obj->resv, write)) + return -EBUSY; + } else { + int ret; - if (priv->gpu) - ret = msm_wait_fence(priv->gpu->fctx, fence, timeout, true); + ret = reservation_object_wait_timeout_rcu(msm_obj->resv, write, + true, timeout_to_jiffies(timeout)); + if (ret <= 0) + return ret == 0 ? -ETIMEDOUT : ret; } - return ret; -} - -int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) -{ - int ret = msm_gem_cpu_sync(obj, op, timeout); - /* TODO cache maintenance */ - return ret; + return 0; } int msm_gem_cpu_fini(struct drm_gem_object *obj) @@ -475,18 +513,46 @@ int msm_gem_cpu_fini(struct drm_gem_object *obj) } #ifdef CONFIG_DEBUG_FS +static void describe_fence(struct fence *fence, const char *type, + struct seq_file *m) +{ + if (!fence_is_signaled(fence)) + seq_printf(m, "\t%9s: %s %s seq %u\n", type, + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->seqno); +} + void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { - struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct reservation_object *robj = msm_obj->resv; + struct reservation_object_list *fobj; + struct fence *fence; uint64_t off = drm_vma_node_start(&obj->vma_node); - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - seq_printf(m, "%08x: %c(r=%u,w=%u) %2d (%2d) %08llx %p %zu\n", + WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + + seq_printf(m, "%08x: %c %2d (%2d) %08llx %p %zu\n", msm_obj->flags, is_active(msm_obj) ? 'A' : 'I', - msm_obj->read_fence, msm_obj->write_fence, obj->name, obj->refcount.refcount.counter, off, msm_obj->vaddr, obj->size); + + rcu_read_lock(); + fobj = rcu_dereference(robj->fence); + if (fobj) { + unsigned int i, shared_count = fobj->shared_count; + + for (i = 0; i < shared_count; i++) { + fence = rcu_dereference(fobj->shared[i]); + describe_fence(fence, "Shared", m); + } + } + + fence = rcu_dereference(robj->fence_excl); + if (fence) + describe_fence(fence, "Exclusive", m); + rcu_read_unlock(); } void msm_gem_describe_objects(struct list_head *list, struct seq_file *m) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index b8904517117d..2771d155587a 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -39,7 +39,6 @@ struct msm_gem_object { */ struct list_head mm_list; struct msm_gpu *gpu; /* non-null if active */ - uint32_t read_fence, write_fence; /* Transiently in the process of submit ioctl, objects associated * with the submit are on submit->bo_list.. this only lasts for @@ -73,19 +72,6 @@ static inline bool is_active(struct msm_gem_object *msm_obj) return msm_obj->gpu != NULL; } -static inline uint32_t msm_gem_fence(struct msm_gem_object *msm_obj, - uint32_t op) -{ - uint32_t fence = 0; - - if (op & MSM_PREP_READ) - fence = msm_obj->write_fence; - if (op & MSM_PREP_WRITE) - fence = max(fence, msm_obj->read_fence); - - return fence; -} - #define MAX_CMDS 4 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, @@ -99,7 +85,7 @@ struct msm_gem_submit { struct list_head node; /* node in gpu submit_list */ struct list_head bo_list; struct ww_acquire_ctx ticket; - uint32_t fence; + struct fence *fence; bool valid; /* true if no cmdstream patching needed */ unsigned int nr_cmds; unsigned int nr_bos; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index b231544b3f52..2661a7cb23c8 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -184,6 +184,22 @@ fail: return ret; } +static int submit_fence_sync(struct msm_gem_submit *submit) +{ + int i, ret = 0; + + for (i = 0; i < submit->nr_bos; i++) { + struct msm_gem_object *msm_obj = submit->bos[i].obj; + bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; + + ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); + if (ret) + break; + } + + return ret; +} + static int submit_pin_objects(struct msm_gem_submit *submit) { int i, ret = 0; @@ -358,6 +374,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; + ret = submit_fence_sync(submit); + if (ret) + goto out; + ret = submit_pin_objects(submit); if (ret) goto out; @@ -424,7 +444,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, ret = msm_gpu_submit(gpu, submit, ctx); - args->fence = submit->fence; + args->fence = submit->fence->seqno; out: submit_cleanup(submit, !!ret); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 8f0b29548cfc..f94d01099e19 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -266,22 +266,24 @@ static void inactive_start(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ -static void retire_submits(struct msm_gpu *gpu, uint32_t fence); +static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + uint32_t fence = gpu->funcs->last_fence(gpu); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); + msm_update_fence(gpu->fctx, fence + 1); + mutex_lock(&dev->struct_mutex); if (msm_gpu_active(gpu)) { struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); /* retire completed submits, plus the one that hung: */ - retire_submits(gpu, fence + 1); + retire_submits(gpu); inactive_cancel(gpu); gpu->funcs->recover(gpu); @@ -444,11 +446,12 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) drm_gem_object_unreference(&msm_obj->base); } + fence_put(submit->fence); list_del(&submit->node); kfree(submit); } -static void retire_submits(struct msm_gpu *gpu, uint32_t fence) +static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; @@ -460,7 +463,7 @@ static void retire_submits(struct msm_gpu *gpu, uint32_t fence) submit = list_first_entry(&gpu->submit_list, struct msm_gem_submit, node); - if (submit->fence <= fence) { + if (fence_is_signaled(submit->fence)) { retire_submit(gpu, submit); } else { break; @@ -477,7 +480,7 @@ static void retire_worker(struct work_struct *work) msm_update_fence(gpu->fctx, fence); mutex_lock(&dev->struct_mutex); - retire_submits(gpu, fence); + retire_submits(gpu); mutex_unlock(&dev->struct_mutex); if (!msm_gpu_active(gpu)) @@ -502,7 +505,12 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - submit->fence = ++gpu->fctx->last_fence; + submit->fence = msm_fence_alloc(gpu->fctx); + if (IS_ERR(submit->fence)) { + ret = PTR_ERR(submit->fence); + submit->fence = NULL; + return ret; + } inactive_cancel(gpu); @@ -526,11 +534,10 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gem_get_iova_locked(&msm_obj->base, submit->gpu->id, &iova); - if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) - msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); + else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) + msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); } ret = gpu->funcs->submit(gpu, submit, ctx); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 9a78c48817c6..b48f73ac6389 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -296,7 +296,7 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", TASK_COMM_LEN, current->comm, task_pid_nr(current), - submit->fence); + submit->fence->seqno); rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); -- 2.39.5