From bf7dc5b70952eb58c4fd57c9b964488650303a32 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 29 May 2015 17:43:24 +0100 Subject: [PATCH] drm/i915: i915_add_request must not fail The i915_add_request() function is called to keep track of work that has been written to the ring buffer. It adds epilogue commands to track progress (seqno updates and such), moves the request structure onto the right list and other such house keeping tasks. However, the work itself has already been written to the ring and will get executed whether or not the add request call succeeds. So no matter what goes wrong, there isn't a whole lot of point in failing the call. At the moment, this is fine(ish). If the add request does bail early on and not do the housekeeping, the request will still float around in the ring->outstanding_lazy_request field and be picked up next time. It means multiple pieces of work will be tagged as the same request and driver can't actually wait for the first piece of work until something else has been submitted. But it all sort of hangs together. This patch series is all about removing the OLR and guaranteeing that each piece of work gets its own personal request. That means that there is no more 'hoovering up of forgotten requests'. If the request does not get tracked then it will be leaked. Thus the add request call _must_ not fail. The previous patch should have already ensured that it _will_ not fail by removing the potential for running out of ring space. This patch enforces the rule by actually removing the early exit paths and the return code. Note that if something does manage to fail and the epilogue commands don't get written to the ring, the driver will still hang together. The request will be added to the tracking lists. And as in the old case, any subsequent work will generate a new seqno which will suffice for marking the old one as complete. v2: Improved WARNings (Tomas Elf review request). For: VIZ-5115 Signed-off-by: John Harrison Reviewed-by: Tomas Elf Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 +-- drivers/gpu/drm/i915/i915_gem.c | 43 +++++++++----------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 8 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 +--- 7 files changed, 31 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6446911077d0..b687e9f4dcf9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2887,9 +2887,9 @@ void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); -int __i915_add_request(struct intel_engine_cs *ring, - struct drm_file *file, - struct drm_i915_gem_object *batch_obj); +void __i915_add_request(struct intel_engine_cs *ring, + struct drm_file *file, + struct drm_i915_gem_object *batch_obj); #define i915_add_request(ring) \ __i915_add_request(ring, NULL, NULL) int __i915_wait_request(struct drm_i915_gem_request *req, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b9e0989063b4..23ee54dbc9c5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1155,15 +1155,12 @@ i915_gem_check_wedge(struct i915_gpu_error *error, int i915_gem_check_olr(struct drm_i915_gem_request *req) { - int ret; - WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); - ret = 0; if (req == req->ring->outstanding_lazy_request) - ret = i915_add_request(req->ring); + i915_add_request(req->ring); - return ret; + return 0; } static void fake_irq(unsigned long data) @@ -2466,9 +2463,14 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) return 0; } -int __i915_add_request(struct intel_engine_cs *ring, - struct drm_file *file, - struct drm_i915_gem_object *obj) +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct intel_engine_cs *ring, + struct drm_file *file, + struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; @@ -2478,7 +2480,7 @@ int __i915_add_request(struct intel_engine_cs *ring, request = ring->outstanding_lazy_request; if (WARN_ON(request == NULL)) - return -ENOMEM; + return; if (i915.enable_execlists) { ringbuf = request->ctx->engine[ring->id].ringbuf; @@ -2500,15 +2502,12 @@ int __i915_add_request(struct intel_engine_cs *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - if (i915.enable_execlists) { + if (i915.enable_execlists) ret = logical_ring_flush_all_caches(ringbuf, request->ctx); - if (ret) - return ret; - } else { + else ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; - } + /* Not allowed to fail! */ + WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the @@ -2517,17 +2516,15 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->postfix = intel_ring_get_tail(ringbuf); - if (i915.enable_execlists) { + if (i915.enable_execlists) ret = ring->emit_request(ringbuf, request); - if (ret) - return ret; - } else { + else { ret = ring->add_request(ring); - if (ret) - return ret; request->tail = intel_ring_get_tail(ringbuf); } + /* Not allowed to fail! */ + WARN(ret, "emit|add_request failed: %d!\n", ret); request->head = request_start; @@ -2576,8 +2573,6 @@ int __i915_add_request(struct intel_engine_cs *ring, /* Sanity check that the reserved size was large enough. */ intel_ring_reserved_space_end(ringbuf); - - return 0; } static bool i915_context_is_banned(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 21f8eb659251..8ebc10d0527d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1069,7 +1069,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, ring->gpu_caches_dirty = true; /* Add a breadcrumb for the completion of the batch buffer */ - (void)__i915_add_request(ring, file, obj); + __i915_add_request(ring, file, obj); } static int diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 521548a08578..ce4788ff3df5 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -173,7 +173,7 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring) i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); - ret = __i915_add_request(ring, NULL, so.obj); + __i915_add_request(ring, NULL, so.obj); /* __i915_add_request moves object to inactive if it fails */ out: i915_gem_render_state_fini(&so); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7451f38b2ef8..5373b0d1068a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1601,7 +1601,7 @@ static int intel_lr_context_render_state_init(struct intel_engine_cs *ring, i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); - ret = __i915_add_request(ring, file, so.obj); + __i915_add_request(ring, file, so.obj); /* intel_logical_ring_add_request moves object to inactive if it * fails */ out: diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 25c8ec697da1..e7534b946695 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -220,9 +220,7 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, WARN_ON(overlay->last_flip_req); i915_gem_request_assign(&overlay->last_flip_req, ring->outstanding_lazy_request); - ret = i915_add_request(ring); - if (ret) - return ret; + i915_add_request(ring); overlay->flip_tail = tail; ret = i915_wait_request(overlay->last_flip_req); @@ -291,7 +289,9 @@ static int intel_overlay_continue(struct intel_overlay *overlay, WARN_ON(overlay->last_flip_req); i915_gem_request_assign(&overlay->last_flip_req, ring->outstanding_lazy_request); - return i915_add_request(ring); + i915_add_request(ring); + + return 0; } static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0c2bf0ed633d..b48aea12acc4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2166,14 +2166,10 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) int intel_ring_idle(struct intel_engine_cs *ring) { struct drm_i915_gem_request *req; - int ret; /* We need to add any requests required to flush the objects and ring */ - if (ring->outstanding_lazy_request) { - ret = i915_add_request(ring); - if (ret) - return ret; - } + if (ring->outstanding_lazy_request) + i915_add_request(ring); /* Wait upon the last request to be completed */ if (list_empty(&ring->request_list)) -- 2.39.5