From 0e178aef8f13ff11dc9dec82c2cd849981cb1ad1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Jan 2017 17:59:06 +0200 Subject: [PATCH] drm/i915: Detect a failed GPU reset+recovery If we can't recover the GPU after the reset, mark it as wedged to cancel the outstanding tasks and to prevent new users from trying to use the broken GPU. v2: Check the same ring is hung again before declaring the reset broken. v3: use engine_stalled (Mika) Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-6-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 7 ++++++- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index fbad2b64b890..348dec482502 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv) pr_notice("drm/i915: Resetting chip after gpu hang\n"); disable_irq(dev_priv->drm.irq); - i915_gem_reset_prepare(dev_priv); + ret = i915_gem_reset_prepare(dev_priv); + if (ret) { + DRM_ERROR("GPU recovery failed\n"); + intel_gpu_reset(dev_priv, ALL_ENGINES); + goto error; + } ret = intel_gpu_reset(dev_priv, ALL_ENGINES); if (ret) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1cd485c314e6..e75e367f253a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3327,7 +3327,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) return READ_ONCE(error->reset_count); } -void i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 832ad092f462..3e10e8101014 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2625,16 +2625,28 @@ static bool engine_stalled(struct intel_engine_cs *engine) return true; } -void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) +int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* Ensure irq handler finishes, and not run again. */ - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct drm_i915_gem_request *request; + tasklet_kill(&engine->irq_tasklet); + if (engine_stalled(engine)) { + request = i915_gem_find_active_request(engine); + if (request && request->fence.error == -EIO) + err = -EIO; /* Previous reset failed! */ + } + } + i915_gem_revoke_fences(dev_priv); + + return err; } static void skip_request(struct drm_i915_gem_request *request) -- 2.39.5