simulated = dev_priv->gpu_error.stop_rings != 0;
- if (!simulated && get_seconds() - dev_priv->gpu_error.last_reset < 5) {
- DRM_ERROR("GPU hanging too fast, declaring wedged!\n");
- ret = -ENODEV;
- } else {
- ret = intel_gpu_reset(dev);
-
- /* Also reset the gpu hangman. */
- if (simulated) {
- DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
- dev_priv->gpu_error.stop_rings = 0;
- if (ret == -ENODEV) {
- DRM_ERROR("Reset not implemented, but ignoring "
- "error for simulated gpu hangs\n");
- ret = 0;
- }
- } else
- dev_priv->gpu_error.last_reset = get_seconds();
+ ret = intel_gpu_reset(dev);
+
+ /* Also reset the gpu hangman. */
+ if (simulated) {
+ DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
+ dev_priv->gpu_error.stop_rings = 0;
+ if (ret == -ENODEV) {
+ DRM_ERROR("Reset not implemented, but ignoring "
+ "error for simulated gpu hangs\n");
+ ret = 0;
+ }
}
+
if (ret) {
DRM_ERROR("Failed to reset chip.\n");
mutex_unlock(&dev->struct_mutex);
/* This context had batch active when hang was declared */
unsigned batch_active;
+
+ /* Time when this context was last blamed for a GPU reset */
+ unsigned long guilty_ts;
+
+ /* This context is banned to submit more work */
+ bool banned;
};
/* This must match up with the value previously used for execbuf2.rsvd1. */
/* For hangcheck timer */
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
+ /* Hang gpu twice in this window and your context gets banned */
+#define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
+
struct timer_list hangcheck_timer;
/* For reset and error_state handling. */
struct drm_i915_error_state *first_error;
struct work_struct work;
- unsigned long last_reset;
-
/**
* State variable and reset counter controlling the reset flow
*
return false;
}
+static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
+{
+ const unsigned long elapsed = get_seconds() - hs->guilty_ts;
+
+ if (hs->banned)
+ return true;
+
+ if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
+ DRM_ERROR("context hanging too fast, declaring banned!\n");
+ return true;
+ }
+
+ return false;
+}
+
static void i915_set_reset_status(struct intel_ring_buffer *ring,
struct drm_i915_gem_request *request,
u32 acthd)
hs = &request->file_priv->hang_stats;
if (hs) {
- if (guilty)
+ if (guilty) {
+ hs->banned = i915_context_is_banned(hs);
hs->batch_active++;
- else
+ hs->guilty_ts = get_seconds();
+ } else {
hs->batch_pending++;
+ }
}
}
struct drm_i915_gem_object *batch_obj;
struct drm_clip_rect *cliprects = NULL;
struct intel_ring_buffer *ring;
+ struct i915_ctx_hang_stats *hs;
u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 exec_start, exec_len;
u32 mask, flags;
if (ret)
goto err;
+ hs = i915_gem_context_get_hang_stats(dev, file, ctx_id);
+ if (IS_ERR(hs)) {
+ ret = PTR_ERR(hs);
+ goto err;
+ }
+
+ if (hs->banned) {
+ ret = -EIO;
+ goto err;
+ }
+
ret = i915_switch_context(ring, file, ctx_id);
if (ret)
goto err;