drm/i915: Unify active context tracking between legacy/execlists/guc

author Chris Wilson <chris@chris-wilson.co.uk>

Sun, 18 Dec 2016 15:37:20 +0000 (15:37 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Sun, 18 Dec 2016 16:18:50 +0000 (16:18 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Sun, 18 Dec 2016 15:37:20 +0000 (15:37 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Sun, 18 Dec 2016 16:18:50 +0000 (16:18 +0000)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 085c8ecd09b54c48208251f1b29089626e6e9094..c89b012af914b1030ea1c7025ffab0d0eed8b2f7 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2446,7 +2446,6 @@ struct drm_i915_private {
                         struct i915_perf_stream *exclusive_stream;
  
                         u32 specific_ctx_id;
-                       struct i915_vma *pinned_rcs_vma;
  
                         struct hrtimer poll_check_timer;
                         wait_queue_head_t poll_wq;
@@ -3488,9 +3487,6 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
  int i915_switch_context(struct drm_i915_gem_request *req);
  int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
-struct i915_vma *
-i915_gem_context_pin_legacy(struct i915_gem_context *ctx,
-                           unsigned int flags);
  void i915_gem_context_free(struct kref *ctx_ref);
  struct i915_gem_context *
  i915_gem_context_create_gvt(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 782be625d37d520bd837e72acdab553faf81037e..9b308af89ada449204628ca53fcf63d274a19ad9 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4206,7 +4206,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
         enum intel_engine_id id;
  
         for_each_engine(engine, dev_priv, id)
-               GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
+               GEM_BUG_ON(engine->last_retired_context != dev_priv->kernel_context);
  }
  
  int i915_gem_suspend(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index a57c22659a3c1c0b09f7e1f90c538a8093c3bb6e..598a70d2b695402829f7f9591576e7d10e64c3f3 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -416,21 +416,6 @@ out:
         return ctx;
  }
  
-static void i915_gem_context_unpin(struct i915_gem_context *ctx,
-                                  struct intel_engine_cs *engine)
-{
-       if (i915.enable_execlists) {
-               intel_lr_context_unpin(ctx, engine);
-       } else {
-               struct intel_context *ce = &ctx->engine[engine->id];
-
-               if (ce->state)
-                       i915_vma_unpin(ce->state);
-
-               i915_gem_context_put(ctx);
-       }
-}
-
  int i915_gem_context_init(struct drm_i915_private *dev_priv)
  {
         struct i915_gem_context *ctx;
@@ -490,10 +475,13 @@ void i915_gem_context_lost(struct drm_i915_private *dev_priv)
         lockdep_assert_held(&dev_priv->drm.struct_mutex);
  
         for_each_engine(engine, dev_priv, id) {
-               if (engine->last_context) {
-                       i915_gem_context_unpin(engine->last_context, engine);
-                       engine->last_context = NULL;
-               }
+               engine->legacy_active_context = NULL;
+
+               if (!engine->last_retired_context)
+                       continue;
+
+               engine->context_unpin(engine, engine->last_retired_context);
+               engine->last_retired_context = NULL;
         }
  
         /* Force the GPU state to be restored on enabling */
@@ -715,7 +703,7 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
         if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
                 return false;
  
-       return to == engine->last_context;
+       return to == engine->legacy_active_context;
  }
  
  static bool
@@ -727,11 +715,11 @@ needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
                 return false;
  
         /* Always load the ppgtt on first use */
-       if (!engine->last_context)
+       if (!engine->legacy_active_context)
                 return true;
  
         /* Same context without new entries, skip */
-       if (engine->last_context == to &&
+       if (engine->legacy_active_context == to &&
             !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
                 return false;
  
@@ -761,57 +749,20 @@ needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
         return false;
  }
  
-struct i915_vma *
-i915_gem_context_pin_legacy(struct i915_gem_context *ctx,
-                           unsigned int flags)
-{
-       struct i915_vma *vma = ctx->engine[RCS].state;
-       int ret;
-
-       /* Clear this page out of any CPU caches for coherent swap-in/out.
-        * We only want to do this on the first bind so that we do not stall
-        * on an active context (which by nature is already on the GPU).
-        */
-       if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-               ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
-               if (ret)
-                       return ERR_PTR(ret);
-       }
-
-       ret = i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags);
-       if (ret)
-               return ERR_PTR(ret);
-
-       return vma;
-}
-
  static int do_rcs_switch(struct drm_i915_gem_request *req)
  {
         struct i915_gem_context *to = req->ctx;
         struct intel_engine_cs *engine = req->engine;
         struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
-       struct i915_vma *vma;
-       struct i915_gem_context *from;
+       struct i915_gem_context *from = engine->legacy_active_context;
         u32 hw_flags;
         int ret, i;
  
+       GEM_BUG_ON(engine->id != RCS);
+
         if (skip_rcs_switch(ppgtt, engine, to))
                 return 0;
  
-       /* Trying to pin first makes error handling easier. */
-       vma = i915_gem_context_pin_legacy(to, 0);
-       if (IS_ERR(vma))
-               return PTR_ERR(vma);
-
-       /*
-        * Pin can switch back to the default context if we end up calling into
-        * evict_everything - as a last ditch gtt defrag effort that also
-        * switches to the default context. Hence we need to reload from here.
-        *
-        * XXX: Doing so is painfully broken!
-        */
-       from = engine->last_context;
-
         if (needs_pd_load_pre(ppgtt, engine, to)) {
                 /* Older GENs and non render rings still want the load first,
                  * "PP_DCLV followed by PP_DIR_BASE register through Load
@@ -820,7 +771,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
                 trace_switch_mm(engine, to);
                 ret = ppgtt->switch_mm(ppgtt, req);
                 if (ret)
-                       goto err;
+                       return ret;
         }
  
         if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
@@ -837,29 +788,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
         if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
                 ret = mi_set_context(req, hw_flags);
                 if (ret)
-                       goto err;
-       }
+                       return ret;
  
-       /* The backing object for the context is done after switching to the
-        * *next* context. Therefore we cannot retire the previous context until
-        * the next context has already started running. In fact, the below code
-        * is a bit suboptimal because the retiring can occur simply after the
-        * MI_SET_CONTEXT instead of when the next seqno has completed.
-        */
-       if (from != NULL) {
-               /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
-                * whole damn pipeline, we don't need to explicitly mark the
-                * object dirty. The only exception is that the context must be
-                * correct in case the object gets swapped out. Ideally we'd be
-                * able to defer doing this until we know the object would be
-                * swapped, but there is no way to do that yet.
-                */
-               i915_vma_move_to_active(from->engine[RCS].state, req, 0);
-               /* state is kept alive until the next request */
-               i915_vma_unpin(from->engine[RCS].state);
-               i915_gem_context_put(from);
+               engine->legacy_active_context = to;
         }
-       engine->last_context = i915_gem_context_get(to);
  
         /* GEN8 does *not* require an explicit reload if the PDPs have been
          * setup, and we do not wish to move them.
@@ -900,10 +832,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
         }
  
         return 0;
-
-err:
-       i915_vma_unpin(vma);
-       return ret;
  }
  
  /**
@@ -943,12 +871,6 @@ int i915_switch_context(struct drm_i915_gem_request *req)
                         ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
                 }
  
-               if (to != engine->last_context) {
-                       if (engine->last_context)
-                               i915_gem_context_put(engine->last_context);
-                       engine->last_context = i915_gem_context_get(to);
-               }
-
                 return 0;
         }
  
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index fcf22b0e2967dde12f75c3785033c3a2e3c9ce7b..475d557f2301ffd3dc37006241d9e351b373105c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -206,6 +206,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
  
  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
  {
+       struct intel_engine_cs *engine = request->engine;
         struct i915_gem_active *active, *next;
  
         lockdep_assert_held(&request->i915->drm.struct_mutex);
@@ -216,9 +217,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
  
         trace_i915_gem_request_retire(request);
  
-       spin_lock_irq(&request->engine->timeline->lock);
+       spin_lock_irq(&engine->timeline->lock);
         list_del_init(&request->link);
-       spin_unlock_irq(&request->engine->timeline->lock);
+       spin_unlock_irq(&engine->timeline->lock);
  
         /* We know the GPU must have read the request to have
          * sent us the seqno + interrupt, so use the position
@@ -266,17 +267,20 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
  
         i915_gem_request_remove_from_client(request);
  
-       if (request->previous_context) {
-               if (i915.enable_execlists)
-                       intel_lr_context_unpin(request->previous_context,
-                                              request->engine);
-       }
-
         /* Retirement decays the ban score as it is a sign of ctx progress */
         if (request->ctx->ban_score > 0)
                 request->ctx->ban_score--;
  
-       i915_gem_context_put(request->ctx);
+       /* The backing object for the context is done after switching to the
+        * *next* context. Therefore we cannot retire the previous context until
+        * the next context has already started running. However, since we
+        * cannot take the required locks at i915_gem_request_submit() we
+        * defer the unpinning of the active context to now, retirement of
+        * the subsequent request.
+        */
+       if (engine->last_retired_context)
+               engine->context_unpin(engine, engine->last_retired_context);
+       engine->last_retired_context = request->ctx;
  
         dma_fence_signal(&request->fence);
  
@@ -524,10 +528,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         if (ret)
                 return ERR_PTR(ret);
  
-       ret = reserve_global_seqno(dev_priv);
+       /* Pinning the contexts may generate requests in order to acquire
+        * GGTT space, so do this first before we reserve a seqno for
+        * ourselves.
+        */
+       ret = engine->context_pin(engine, ctx);
         if (ret)
                 return ERR_PTR(ret);
  
+       ret = reserve_global_seqno(dev_priv);
+       if (ret)
+               goto err_unpin;
+
         /* Move the oldest request to the slab-cache (if not in use!) */
         req = list_first_entry_or_null(&engine->timeline->requests,
                                        typeof(*req), link);
@@ -593,11 +605,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         INIT_LIST_HEAD(&req->active_list);
         req->i915 = dev_priv;
         req->engine = engine;
-       req->ctx = i915_gem_context_get(ctx);
+       req->ctx = ctx;
  
         /* No zalloc, must clear what we need by hand */
         req->global_seqno = 0;
-       req->previous_context = NULL;
         req->file_priv = NULL;
         req->batch = NULL;
  
@@ -633,10 +644,11 @@ err_ctx:
         GEM_BUG_ON(!list_empty(&req->priotree.signalers_list));
         GEM_BUG_ON(!list_empty(&req->priotree.waiters_list));
  
-       i915_gem_context_put(ctx);
         kmem_cache_free(dev_priv->requests, req);
  err_unreserve:
         dev_priv->gt.active_requests--;
+err_unpin:
+       engine->context_unpin(engine, ctx);
         return ERR_PTR(ret);
  }
  
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h

index e2b077df2da023107c92a818d3c74042dcf73bac..8569b35a332a219ddd8424d853118c9db6745f6d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -170,17 +170,6 @@ struct drm_i915_gem_request {
         /** Preallocate space in the ring for the emitting the request */
         u32 reserved_space;
  
-       /**
-        * Context related to the previous request.
-        * As the contexts are accessed by the hardware until the switch is
-        * completed to a new context, the hardware may still be writing
-        * to the context object after the breadcrumb is visible. We must
-        * not unpin/unbind/prune that object whilst still active and so
-        * we keep the previous context pinned until the following (this)
-        * request is retired.
-        */
-       struct i915_gem_context *previous_context;
-
         /** Batch buffer related to this request if any (used for
          * error state dump only).
          */
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c

index a1232034f37a87cce8499b4ea26c0b26490b65b8..3e20fe2be8111d7f24264eb58d2c532b87783430 100644 (file)
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -534,17 +534,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
  
  static void i915_guc_submit(struct drm_i915_gem_request *rq)
  {
-       struct intel_engine_cs *engine = rq->engine;
-
-       /* We keep the previous context alive until we retire the following
-        * request. This ensures that any the context object is still pinned
-        * for any residual writes the HW makes into it on the context switch
-        * into the next object following the breadcrumb. Otherwise, we may
-        * retire the context too early.
-        */
-       rq->previous_context = engine->last_context;
-       engine->last_context = rq->ctx;
-
         i915_gem_request_submit(rq);
         __i915_guc_submit(rq);
  }
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c

index ae7bd0ed7b1afb60e436fb2428652c47308835f0..da8537cb8136a4ca457fa0b468e4d83d944903ef 100644 (file)
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -743,7 +743,7 @@ static int i915_oa_read(struct i915_perf_stream *stream,
  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  {
         struct drm_i915_private *dev_priv = stream->dev_priv;
-       struct i915_vma *vma;
+       struct intel_engine_cs *engine = dev_priv->engine[RCS];
         int ret;
  
         ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -755,19 +755,16 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
          *
          * NB: implied RCS engine...
          */
-       vma = i915_gem_context_pin_legacy(stream->ctx, 0);
-       if (IS_ERR(vma)) {
-               ret = PTR_ERR(vma);
+       ret = engine->context_pin(engine, stream->ctx);
+       if (ret)
                 goto unlock;
-       }
-
-       dev_priv->perf.oa.pinned_rcs_vma = vma;
  
         /* Explicitly track the ID (instead of calling i915_ggtt_offset()
          * on the fly) considering the difference with gen8+ and
          * execlists
          */
-       dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma);
+       dev_priv->perf.oa.specific_ctx_id =
+               i915_ggtt_offset(stream->ctx->engine[engine->id].state);
  
  unlock:
         mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -785,13 +782,12 @@ unlock:
  static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
  {
         struct drm_i915_private *dev_priv = stream->dev_priv;
+       struct intel_engine_cs *engine = dev_priv->engine[RCS];
  
         mutex_lock(&dev_priv->drm.struct_mutex);
  
-       i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma);
-       dev_priv->perf.oa.pinned_rcs_vma = NULL;
-
         dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+       engine->context_unpin(engine, stream->ctx);
  
         mutex_unlock(&dev_priv->drm.struct_mutex);
  }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index bc1af87789bc9d2507185743922176f4bcb9d0f9..9f356ad5329e1864c8b23a79c5da94e07ac735f0 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12295,7 +12295,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                 INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
                 queue_work(system_unbound_wq, &work->mmio_work);
         } else {
-               request = i915_gem_request_alloc(engine, engine->last_context);
+               request = i915_gem_request_alloc(engine,
+                                                dev_priv->kernel_context);
                 if (IS_ERR(request)) {
                         ret = PTR_ERR(request);
                         goto cleanup_unpin;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c

index e8afe11858314406dd1947d2dc3bfc9d6255a740..97bbbc3d6aa8c3407ff76a60e498a723dd13a03d 100644 (file)
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -304,15 +304,30 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  {
         int ret;
  
-       ret = intel_engine_init_breadcrumbs(engine);
+       /* We may need to do things with the shrinker which
+        * require us to immediately switch back to the default
+        * context. This can cause a problem as pinning the
+        * default context also requires GTT space which may not
+        * be available. To avoid this we always pin the default
+        * context.
+        */
+       ret = engine->context_pin(engine, engine->i915->kernel_context);
         if (ret)
                 return ret;
  
+       ret = intel_engine_init_breadcrumbs(engine);
+       if (ret)
+               goto err_unpin;
+
         ret = i915_gem_render_state_init(engine);
         if (ret)
-               return ret;
+               goto err_unpin;
  
         return 0;
+
+err_unpin:
+       engine->context_unpin(engine, engine->i915->kernel_context);
+       return ret;
  }
  
  /**
@@ -330,6 +345,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
         intel_engine_fini_breadcrumbs(engine);
         intel_engine_cleanup_cmd_parser(engine);
         i915_gem_batch_pool_fini(&engine->batch_pool);
+
+       engine->context_unpin(engine, engine->i915->kernel_context);
  }
  
  u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index b848b5f205ce50e82f8d1b930630b1c0110b5af3..599afedc449415a410b6c27e2e50b0a38ae0354e 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -512,15 +512,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 RB_CLEAR_NODE(&cursor->priotree.node);
                 cursor->priotree.priority = INT_MAX;
  
-               /* We keep the previous context alive until we retire the
-                * following request. This ensures that any the context object
-                * is still pinned for any residual writes the HW makes into it
-                * on the context switch into the next object following the
-                * breadcrumb. Otherwise, we may retire the context too early.
-                */
-               cursor->previous_context = engine->last_context;
-               engine->last_context = cursor->ctx;
-
                 __i915_gem_request_submit(cursor);
                 last = cursor;
                 submit = true;
@@ -772,8 +763,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
         /* XXX Do we need to preempt to make room for us and our deps? */
  }
  
-static int intel_lr_context_pin(struct i915_gem_context *ctx,
-                               struct intel_engine_cs *engine)
+static int execlists_context_pin(struct intel_engine_cs *engine,
+                                struct i915_gem_context *ctx)
  {
         struct intel_context *ce = &ctx->engine[engine->id];
         void *vaddr;
@@ -784,6 +775,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
         if (ce->pin_count++)
                 return 0;
  
+       if (!ce->state) {
+               ret = execlists_context_deferred_alloc(ctx, engine);
+               if (ret)
+                       goto err;
+       }
+
         ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
                            PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
         if (ret)
@@ -825,8 +822,8 @@ err:
         return ret;
  }
  
-void intel_lr_context_unpin(struct i915_gem_context *ctx,
-                           struct intel_engine_cs *engine)
+static void execlists_context_unpin(struct intel_engine_cs *engine,
+                                   struct i915_gem_context *ctx)
  {
         struct intel_context *ce = &ctx->engine[engine->id];
  
@@ -850,24 +847,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
         struct intel_context *ce = &request->ctx->engine[engine->id];
         int ret;
  
+       GEM_BUG_ON(!ce->pin_count);
+
         /* Flush enough space to reduce the likelihood of waiting after
          * we start building the request - in which case we will just
          * have to repeat work.
          */
         request->reserved_space += EXECLISTS_REQUEST_SIZE;
  
-       if (!ce->state) {
-               ret = execlists_context_deferred_alloc(request->ctx, engine);
-               if (ret)
-                       return ret;
-       }
-
+       GEM_BUG_ON(!ce->ring);
         request->ring = ce->ring;
  
-       ret = intel_lr_context_pin(request->ctx, engine);
-       if (ret)
-               return ret;
-
         if (i915.enable_guc_submission) {
                 /*
                  * Check that the GuC has space for the request before
@@ -876,7 +866,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
                  */
                 ret = i915_guc_wq_reserve(request);
                 if (ret)
-                       goto err_unpin;
+                       goto err;
         }
  
         ret = intel_ring_begin(request, 0);
@@ -904,8 +894,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
  err_unreserve:
         if (i915.enable_guc_submission)
                 i915_guc_wq_unreserve(request);
-err_unpin:
-       intel_lr_context_unpin(request->ctx, engine);
+err:
         return ret;
  }
  
@@ -1789,13 +1778,12 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
         if (engine->cleanup)
                 engine->cleanup(engine);
  
-       intel_engine_cleanup_common(engine);
-
         if (engine->status_page.vma) {
                 i915_gem_object_unpin_map(engine->status_page.vma->obj);
                 engine->status_page.vma = NULL;
         }
-       intel_lr_context_unpin(dev_priv->kernel_context, engine);
+
+       intel_engine_cleanup_common(engine);
  
         lrc_destroy_wa_ctx_obj(engine);
         engine->i915 = NULL;
@@ -1820,6 +1808,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
         /* Default vfuncs which can be overriden by each engine. */
         engine->init_hw = gen8_init_common_ring;
         engine->reset_hw = reset_common_ring;
+
+       engine->context_pin = execlists_context_pin;
+       engine->context_unpin = execlists_context_unpin;
+
         engine->emit_flush = gen8_emit_flush;
         engine->emit_breadcrumb = gen8_emit_breadcrumb;
         engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
@@ -1902,18 +1894,6 @@ logical_ring_init(struct intel_engine_cs *engine)
         if (ret)
                 goto error;
  
-       ret = execlists_context_deferred_alloc(dctx, engine);
-       if (ret)
-               goto error;
-
-       /* As this is the default context, always pin it */
-       ret = intel_lr_context_pin(dctx, engine);
-       if (ret) {
-               DRM_ERROR("Failed to pin context for %s: %d\n",
-                         engine->name, ret);
-               goto error;
-       }
-
         /* And setup the hardware status page. */
         ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
         if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h

index 7c640324339491134ed6ed7155abbd2dfc2c4777..b5630331086ae61c1f7c5ef00e72a4a85896fc7a 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -79,13 +79,10 @@ int intel_engines_init(struct drm_i915_private *dev_priv);
  #define LRC_PPHWSP_PN  (LRC_GUCSHR_PN + 1)
  #define LRC_STATE_PN   (LRC_PPHWSP_PN + 1)
  
+struct drm_i915_private;
  struct i915_gem_context;
  
  uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-void intel_lr_context_unpin(struct i915_gem_context *ctx,
-                           struct intel_engine_cs *engine);
-
-struct drm_i915_private;
  
  void intel_lr_context_resume(struct drm_i915_private *dev_priv);
  uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c

index 06e55967f1809a91a22283d28733a930c087c22d..1d4699d8fb10d9adc2142624174c6433134e32fb 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6792,7 +6792,7 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work)
                 goto out;
  
         rcs = dev_priv->engine[RCS];
-       if (rcs->last_context)
+       if (rcs->last_retired_context)
                 goto out;
  
         if (!rcs->init_context)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 0b7d13b6e2286f070877753990c1543e1001e28e..00ff572541b64a5b50cf84bdda11b42b9d94354b 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1939,8 +1939,26 @@ intel_ring_free(struct intel_ring *ring)
         kfree(ring);
  }
  
-static int intel_ring_context_pin(struct i915_gem_context *ctx,
-                                 struct intel_engine_cs *engine)
+static int context_pin(struct i915_gem_context *ctx, unsigned int flags)
+{
+       struct i915_vma *vma = ctx->engine[RCS].state;
+       int ret;
+
+       /* Clear this page out of any CPU caches for coherent swap-in/out.
+        * We only want to do this on the first bind so that we do not stall
+        * on an active context (which by nature is already on the GPU).
+        */
+       if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+               ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+               if (ret)
+                       return ret;
+       }
+
+       return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags);
+}
+
+static int intel_ring_context_pin(struct intel_engine_cs *engine,
+                                 struct i915_gem_context *ctx)
  {
         struct intel_context *ce = &ctx->engine[engine->id];
         int ret;
@@ -1951,13 +1969,15 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
                 return 0;
  
         if (ce->state) {
-               struct i915_vma *vma;
+               unsigned int flags;
+
+               flags = 0;
+               if (ctx == ctx->i915->kernel_context)
+                       flags = PIN_HIGH;
  
-               vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH);
-               if (IS_ERR(vma)) {
-                       ret = PTR_ERR(vma);
+               ret = context_pin(ctx, flags);
+               if (ret)
                         goto error;
-               }
         }
  
         /* The kernel context is only used as a placeholder for flushing the
@@ -1978,12 +1998,13 @@ error:
         return ret;
  }
  
-static void intel_ring_context_unpin(struct i915_gem_context *ctx,
-                                    struct intel_engine_cs *engine)
+static void intel_ring_context_unpin(struct intel_engine_cs *engine,
+                                    struct i915_gem_context *ctx)
  {
         struct intel_context *ce = &ctx->engine[engine->id];
  
         lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+       GEM_BUG_ON(ce->pin_count == 0);
  
         if (--ce->pin_count)
                 return;
@@ -2008,17 +2029,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
         if (ret)
                 goto error;
  
-       /* We may need to do things with the shrinker which
-        * require us to immediately switch back to the default
-        * context. This can cause a problem as pinning the
-        * default context also requires GTT space which may not
-        * be available. To avoid this we always pin the default
-        * context.
-        */
-       ret = intel_ring_context_pin(dev_priv->kernel_context, engine);
-       if (ret)
-               goto error;
-
         ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
         if (IS_ERR(ring)) {
                 ret = PTR_ERR(ring);
@@ -2077,8 +2087,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
  
         intel_engine_cleanup_common(engine);
  
-       intel_ring_context_unpin(dev_priv->kernel_context, engine);
-
         engine->i915 = NULL;
         dev_priv->engine[engine->id] = NULL;
         kfree(engine);
@@ -2099,12 +2107,15 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
  {
         int ret;
  
+       GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
+
         /* Flush enough space to reduce the likelihood of waiting after
          * we start building the request - in which case we will just
          * have to repeat work.
          */
         request->reserved_space += LEGACY_REQUEST_SIZE;
  
+       GEM_BUG_ON(!request->engine->buffer);
         request->ring = request->engine->buffer;
  
         ret = intel_ring_begin(request, 0);
@@ -2584,6 +2595,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
         engine->init_hw = init_ring_common;
         engine->reset_hw = reset_ring_common;
  
+       engine->context_pin = intel_ring_context_pin;
+       engine->context_unpin = intel_ring_context_unpin;
+
         engine->emit_breadcrumb = i9xx_emit_breadcrumb;
         engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
         if (i915.semaphores) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index 3f43adefd1c0f13617249cf5da8834d3dbbc4e55..4f1271821fa9f9c18e0906852bf6347fcb1dda5c 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -266,6 +266,10 @@ struct intel_engine_cs {
         void            (*reset_hw)(struct intel_engine_cs *engine,
                                     struct drm_i915_gem_request *req);
  
+       int             (*context_pin)(struct intel_engine_cs *engine,
+                                      struct i915_gem_context *ctx);
+       void            (*context_unpin)(struct intel_engine_cs *engine,
+                                        struct i915_gem_context *ctx);
         int             (*init_context)(struct drm_i915_gem_request *req);
  
         int             (*emit_flush)(struct drm_i915_gem_request *request,
@@ -379,7 +383,24 @@ struct intel_engine_cs {
         bool preempt_wa;
         u32 ctx_desc_template;
  
-       struct i915_gem_context *last_context;
+       /* Contexts are pinned whilst they are active on the GPU. The last
+        * context executed remains active whilst the GPU is idle - the
+        * switch away and write to the context object only occurs on the
+        * next execution.  Contexts are only unpinned on retirement of the
+        * following request ensuring that we can always write to the object
+        * on the context switch even after idling. Across suspend, we switch
+        * to the kernel context and trash it as the save may not happen
+        * before the hardware is powered down.
+        */
+       struct i915_gem_context *last_retired_context;
+
+       /* We track the current MI_SET_CONTEXT in order to eliminate
+        * redudant context switches. This presumes that requests are not
+        * reordered! Or when they are the tracking is updated along with
+        * the emission of individual requests into the legacy command
+        * stream (ring).
+        */
+       struct i915_gem_context *legacy_active_context;
  
         struct intel_engine_hangcheck hangcheck;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Sun, 18 Dec 2016 15:37:20 +0000 (15:37 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Sun, 18 Dec 2016 16:18:50 +0000 (16:18 +0000)
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_context.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_guc_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_perf.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_display.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_pm.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.h		patch \| blob \| history