From 9c8e1bdb958661c3925225dc19e2c32ea4c62612 Mon Sep 17 00:00:00 2001 From: Tomas Elf Date: Mon, 19 Oct 2015 17:51:57 +0100 Subject: [PATCH] drm/i915: Cope with request list state change during error state capture Since we're not synchronizing the ring request list during error state capture the request list state might change between the time the corresponding error request list was allocated and dimensioned to the time when the ring request list is actually captured into the error state. If this happens then do an early exit and be aware that the captured error state might not be fully reliable. * v2: - Chris Wilson: Removed WARN_ON from size check since having the error state request list and the live driver request list diverge like this is a legitimate behaviour. - Tomas Elf: Removed update of num_request field since this made no sense. Just exit and move on. * v3: - Chris Wilson: Removed error message at the point of early exit. The user is not interested in any state changes happening during the error state capture, only in the state that we're trying to capture at the point of the error. Signed-off-by: Tomas Elf Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2f04e4f2ff35..f3dc67b33d94 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1071,6 +1071,25 @@ static void i915_gem_record_rings(struct drm_device *dev, list_for_each_entry(request, &ring->request_list, list) { struct drm_i915_error_request *erq; + if (count >= error->ring[i].num_requests) { + /* + * If the ring request list was changed in + * between the point where the error request + * list was created and dimensioned and this + * point then just exit early to avoid crashes. + * + * We don't need to communicate that the + * request list changed state during error + * state capture and that the error state is + * slightly incorrect as a consequence since we + * are typically only interested in the request + * list state at the point of error state + * capture, not in any changes happening during + * the capture. + */ + break; + } + erq = &error->ring[i].requests[count++]; erq->seqno = request->seqno; erq->jiffies = request->emitted_jiffies; -- 2.39.5