enum intel_engine_id id;
int j;
+ if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+ seq_printf(m, "Wedged\n");
+ if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags))
+ seq_printf(m, "Reset in progress\n");
+ if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
+ seq_printf(m, "Waiter holding struct mutex\n");
+ if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
+ seq_printf(m, "struct_mutex blocked for reset\n");
+
if (!i915.enable_hangcheck) {
seq_printf(m, "Hangcheck disabled\n");
return 0;
mutex_lock(&dev->struct_mutex);
if (i915_gem_init_hw(dev)) {
DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
- atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+ set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
}
mutex_unlock(&dev->struct_mutex);
{
struct drm_device *dev = &dev_priv->drm;
struct i915_gpu_error *error = &dev_priv->gpu_error;
- unsigned reset_counter;
int ret;
mutex_lock(&dev->struct_mutex);
/* Clear any previous failed attempts at recovery. Time to try again. */
- atomic_andnot(I915_WEDGED, &error->reset_counter);
-
- /* Clear the reset-in-progress flag and increment the reset epoch. */
- reset_counter = atomic_inc_return(&error->reset_counter);
- if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
- ret = -EIO;
- goto error;
- }
+ __clear_bit(I915_WEDGED, &error->flags);
+ error->reset_count++;
pr_notice("drm/i915: Resetting chip after gpu hang\n");
goto error;
}
+ clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
mutex_unlock(&dev->struct_mutex);
/*
return 0;
error:
- atomic_or(I915_WEDGED, &error->reset_counter);
+ set_bit(I915_WEDGED, &error->flags);
mutex_unlock(&dev->struct_mutex);
return ret;
}
* State variable controlling the reset flow and count
*
* This is a counter which gets incremented when reset is triggered,
- * and again when reset has been handled. So odd values (lowest bit set)
- * means that reset is in progress and even values that
- * (reset_counter >> 1):th reset was successfully completed.
+ *
+ * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+ * meaning that any waiters holding onto the struct_mutex should
+ * relinquish the lock immediately in order for the reset to start.
*
* If reset is not completed succesfully, the I915_WEDGE bit is
* set meaning that hardware is terminally sour and there is no
* naturally enforces the correct ordering between the bail-out of the
* waiter and the gpu reset work code.
*/
- atomic_t reset_counter;
+ unsigned long reset_count;
-#define I915_RESET_IN_PROGRESS_FLAG 1
-#define I915_WEDGED (1 << 31)
+ unsigned long flags;
+#define I915_RESET_IN_PROGRESS 0
+#define I915_WEDGED (BITS_PER_LONG - 1)
/**
* Waitqueue to signal when a hang is detected. Used to for waiters
void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
- return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
- return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
- return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
- return unlikely(reset & I915_WEDGED);
-}
-
static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
{
- return __i915_reset_in_progress(i915_reset_counter(error));
+ return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
}
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
{
- return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+ return unlikely(test_bit(I915_WEDGED, &error->flags));
}
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
{
- return __i915_terminally_wedged(i915_reset_counter(error));
+ return i915_reset_in_progress(error) | i915_terminally_wedged(error);
}
static inline u32 i915_reset_count(struct i915_gpu_error *error)
{
- return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+ return READ_ONCE(error->reset_count);
}
void i915_gem_reset(struct drm_device *dev);
* for all other failure, such as an allocation failure, bail.
*/
DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
- atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+ set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
ret = 0;
}
} while (tmp != req);
}
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
{
- if (__i915_terminally_wedged(reset_counter))
+ struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+ if (i915_terminally_wedged(error))
return -EIO;
- if (__i915_reset_in_progress(reset_counter)) {
+ if (i915_reset_in_progress(error)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
* -EIO unconditionally for these.
*/
- if (!interruptible)
+ if (!dev_priv->mm.interruptible)
return -EIO;
return -EAGAIN;
struct i915_gem_context *ctx)
{
struct drm_i915_private *dev_priv = engine->i915;
- unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
struct drm_i915_gem_request *req;
u32 seqno;
int ret;
* EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
* and restart.
*/
- ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+ ret = i915_gem_check_wedge(dev_priv);
if (ret)
return ERR_PTR(ret);
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+ DRM_DEBUG_DRIVER("resetting chip\n");
+ kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
/*
- * Note that there's only one work item which does gpu resets, so we
- * need not worry about concurrent gpu resets potentially incrementing
- * error->reset_counter twice. We only need to take care of another
- * racing irq/hangcheck declaring the gpu dead for a second time. A
- * quick check for that is good enough: schedule_work ensures the
- * correct ordering between hang detection and this work item, and since
- * the reset in-progress bit is only ever set by code outside of this
- * work we don't need to worry about any other races.
+ * In most cases it's guaranteed that we get here with an RPM
+ * reference held, for example because there is a pending GPU
+ * request that won't finish until the reset is done. This
+ * isn't the case at least when we get here by doing a
+ * simulated reset via debugs, so get an RPM reference.
*/
- if (i915_reset_in_progress(&dev_priv->gpu_error)) {
- DRM_DEBUG_DRIVER("resetting chip\n");
- kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
- /*
- * In most cases it's guaranteed that we get here with an RPM
- * reference held, for example because there is a pending GPU
- * request that won't finish until the reset is done. This
- * isn't the case at least when we get here by doing a
- * simulated reset via debugs, so get an RPM reference.
- */
- intel_runtime_pm_get(dev_priv);
+ intel_runtime_pm_get(dev_priv);
- intel_prepare_reset(dev_priv);
+ intel_prepare_reset(dev_priv);
- /*
- * All state reset _must_ be completed before we update the
- * reset counter, for otherwise waiters might miss the reset
- * pending state and not properly drop locks, resulting in
- * deadlocks with the reset work.
- */
- ret = i915_reset(dev_priv);
+ /*
+ * All state reset _must_ be completed before we update the
+ * reset counter, for otherwise waiters might miss the reset
+ * pending state and not properly drop locks, resulting in
+ * deadlocks with the reset work.
+ */
+ ret = i915_reset(dev_priv);
- intel_finish_reset(dev_priv);
+ intel_finish_reset(dev_priv);
- intel_runtime_pm_put(dev_priv);
+ intel_runtime_pm_put(dev_priv);
- if (ret == 0)
- kobject_uevent_env(kobj,
- KOBJ_CHANGE, reset_done_event);
+ if (ret == 0)
+ kobject_uevent_env(kobj,
+ KOBJ_CHANGE, reset_done_event);
- /*
- * Note: The wake_up also serves as a memory barrier so that
- * waiters see the update value of the reset counter atomic_t.
- */
- wake_up_all(&dev_priv->gpu_error.reset_queue);
- }
+ /*
+ * Note: The wake_up also serves as a memory barrier so that
+ * waiters see the updated value of the dev_priv->gpu_error.
+ */
+ wake_up_all(&dev_priv->gpu_error.reset_queue);
}
static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
i915_capture_error_state(dev_priv, engine_mask, error_msg);
i915_report_and_clear_eir(dev_priv);
- if (engine_mask) {
- atomic_or(I915_RESET_IN_PROGRESS_FLAG,
- &dev_priv->gpu_error.reset_counter);
+ if (!engine_mask)
+ return;
- /*
- * Wakeup waiting processes so that the reset function
- * i915_reset_and_wakeup doesn't deadlock trying to grab
- * various locks. By bumping the reset counter first, the woken
- * processes will see a reset in progress and back off,
- * releasing their locks and then wait for the reset completion.
- * We must do this for _all_ gpu waiters that might hold locks
- * that the reset work needs to acquire.
- *
- * Note: The wake_up serves as the required memory barrier to
- * ensure that the waiters see the updated value of the reset
- * counter atomic_t.
- */
- i915_error_wake_up(dev_priv);
- }
+ if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+ &dev_priv->gpu_error.flags))
+ return;
+
+ /*
+ * Wakeup waiting processes so that the reset function
+ * i915_reset_and_wakeup doesn't deadlock trying to grab
+ * various locks. By bumping the reset counter first, the woken
+ * processes will see a reset in progress and back off,
+ * releasing their locks and then wait for the reset completion.
+ * We must do this for _all_ gpu waiters that might hold locks
+ * that the reset work needs to acquire.
+ *
+ * Note: The wake_up also provides a memory barrier to ensure that the
+ * waiters see the updated value of the reset flags.
+ */
+ i915_error_wake_up(dev_priv);
i915_reset_and_wakeup(dev_priv);
}
mutex_unlock(&dev->mode_config.mutex);
}
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+ struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+ if (i915_reset_in_progress(error))
+ return true;
+
+ if (crtc->reset_count != i915_reset_count(error))
+ return true;
+
+ return false;
+}
+
static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- unsigned reset_counter;
bool pending;
- reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
- if (intel_crtc->reset_counter != reset_counter)
+ if (abort_flip_on_reset(intel_crtc))
return false;
spin_lock_irq(&dev->event_lock);
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- unsigned reset_counter;
- reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- if (crtc->reset_counter != reset_counter)
+ if (abort_flip_on_reset(crtc))
return true;
/*
if (ret)
goto cleanup;
- intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+ intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+ if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
ret = -EIO;
goto cleanup;
}
struct intel_crtc_state *config;
- /* reset counter value when the last flip was submitted */
- unsigned int reset_counter;
+ /* global reset count when the last flip was submitted */
+ unsigned int reset_count;
/* Access to these should be protected by dev_priv->irq_lock. */
bool cpu_fifo_underrun_disabled;