]> git.karo-electronics.de Git - linux-beck.git/commitdiff
drm/i915: Reserve space in the global seqno during request allocation
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Oct 2016 12:58:56 +0000 (13:58 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Oct 2016 19:53:56 +0000 (20:53 +0100)
A restriction on our global seqno is that they cannot wrap, and that we
cannot use the value 0. This allows us to detect when a request has not
yet been submitted, its global seqno is still 0, and ensures that
hardware semaphores are monotonic as required by older hardware. To
meet these restrictions when we defer the assignment of the global
seqno, we must check that we have an available slot in the global seqno
space during request construction. If that test fails, we wait for all
requests to be completed and reset the hardware back to 0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-33-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_gem_timeline.h

index 1723a1f5b20edb24f6ee87ca5ee37bcbcc0e91f7..9bef6f55f99dc8c6e731ee2c60cc40b0d85b9fd5 100644 (file)
@@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
                                seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
                                           engine->name,
                                           i915_gem_request_get_seqno(work->flip_queued_req),
-                                          dev_priv->gt.global_timeline.next_seqno,
+                                          atomic_read(&dev_priv->gt.global_timeline.next_seqno),
                                           intel_engine_get_seqno(engine),
                                           i915_gem_request_completed(work->flip_queued_req));
                        } else
@@ -1046,7 +1046,7 @@ i915_next_seqno_get(void *data, u64 *val)
 {
        struct drm_i915_private *dev_priv = data;
 
-       *val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno);
+       *val = atomic_read(&dev_priv->gt.global_timeline.next_seqno);
        return 0;
 }
 
@@ -2277,8 +2277,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
        struct drm_file *file;
 
        seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
-       seq_printf(m, "GPU busy? %s [%x]\n",
-                  yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
+       seq_printf(m, "GPU busy? %s [%d requests]\n",
+                  yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
        seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
        seq_printf(m, "Frequency requested %d\n",
                   intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
@@ -2313,7 +2313,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 
        if (INTEL_GEN(dev_priv) >= 6 &&
            dev_priv->rps.enabled &&
-           dev_priv->gt.active_engines) {
+           dev_priv->gt.active_requests) {
                u32 rpup, rpupei;
                u32 rpdown, rpdownei;
 
index 8b987f772d4710e3728a12163a9751d1b6b219cc..eacb144af29e28703e48cccf63bf61b9c5bae4ff 100644 (file)
@@ -2092,6 +2092,7 @@ struct drm_i915_private {
 
                struct list_head timelines;
                struct i915_gem_timeline global_timeline;
+               u32 active_requests;
 
                /**
                 * Is the GPU currently considered idle, or busy executing
@@ -2100,7 +2101,6 @@ struct drm_i915_private {
                 * In order to reduce the effect on performance, there
                 * is a slight delay before we do so.
                 */
-               unsigned int active_engines;
                bool awake;
 
                /**
index f4cfb88bd8048c1a438216367bd06c5b0ee7b369..8a5d20715e5f75083771c73b6434741ee4f09bd5 100644 (file)
@@ -2688,8 +2688,6 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
                memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
                spin_unlock(&engine->execlist_lock);
        }
-
-       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
@@ -2746,7 +2744,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
        if (!READ_ONCE(dev_priv->gt.awake))
                return;
 
-       if (READ_ONCE(dev_priv->gt.active_engines))
+       if (READ_ONCE(dev_priv->gt.active_requests))
                return;
 
        rearm_hangcheck =
@@ -2760,7 +2758,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
                goto out_rearm;
        }
 
-       if (dev_priv->gt.active_engines)
+       if (dev_priv->gt.active_requests)
                goto out_unlock;
 
        for_each_engine(engine, dev_priv, id)
@@ -4399,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev)
                goto err;
 
        i915_gem_retire_requests(dev_priv);
+       GEM_BUG_ON(dev_priv->gt.active_requests);
 
        assert_kernel_context_is_current(dev_priv);
        i915_gem_context_lost(dev_priv);
index 9c34a4c540b503d884564f8cf4f9f9ad34850141..9b22f66464f08a5994f696afa3bd8d414390a5a7 100644 (file)
@@ -159,6 +159,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
         */
        list_del(&request->ring_link);
        request->ring->last_retired_head = request->postfix;
+       request->i915->gt.active_requests--;
 
        /* Walk through the active list, calling retire on each. This allows
         * objects to track their GPU activity and mark themselves as idle
@@ -253,13 +254,15 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
                return ret;
 
        i915_gem_retire_requests(i915);
+       GEM_BUG_ON(i915->gt.active_requests > 1);
 
        /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-       if (!i915_seqno_passed(seqno, timeline->next_seqno)) {
+       if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) {
                while (intel_kick_waiters(i915) || intel_kick_signalers(i915))
                        yield();
                yield();
        }
+       atomic_set(&timeline->next_seqno, seqno);
 
        /* Finally reset hw state */
        for_each_engine(engine, i915, id)
@@ -279,7 +282,6 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
-       int ret;
 
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
@@ -289,34 +291,33 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
        /* HWS page needs to be set less than what we
         * will inject to ring
         */
-       ret = i915_gem_init_global_seqno(dev_priv, seqno - 1);
-       if (ret)
-               return ret;
-
-       dev_priv->gt.global_timeline.next_seqno = seqno;
-       return 0;
+       return i915_gem_init_global_seqno(dev_priv, seqno - 1);
 }
 
-static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv,
-                                    u32 *seqno)
+static int reserve_global_seqno(struct drm_i915_private *i915)
 {
-       struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline;
-
-       /* reserve 0 for non-seqno */
-       if (unlikely(tl->next_seqno == 0)) {
-               int ret;
+       u32 active_requests = ++i915->gt.active_requests;
+       u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno);
+       int ret;
 
-               ret = i915_gem_init_global_seqno(dev_priv, 0);
-               if (ret)
-                       return ret;
+       /* Reservation is fine until we need to wrap around */
+       if (likely(next_seqno + active_requests > next_seqno))
+               return 0;
 
-               tl->next_seqno = 1;
+       ret = i915_gem_init_global_seqno(i915, 0);
+       if (ret) {
+               i915->gt.active_requests--;
+               return ret;
        }
 
-       *seqno = tl->next_seqno++;
        return 0;
 }
 
+static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
+{
+       return atomic_inc_return(&tl->next_seqno);
+}
+
 static int __i915_sw_fence_call
 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -356,9 +357,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 {
        struct drm_i915_private *dev_priv = engine->i915;
        struct drm_i915_gem_request *req;
-       u32 seqno;
        int ret;
 
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
        /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
         * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
         * and restart.
@@ -367,6 +369,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
        if (ret)
                return ERR_PTR(ret);
 
+       ret = reserve_global_seqno(dev_priv);
+       if (ret)
+               return ERR_PTR(ret);
+
        /* Move the oldest request to the slab-cache (if not in use!) */
        req = list_first_entry_or_null(&engine->timeline->requests,
                                       typeof(*req), link);
@@ -402,12 +408,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         * Do not use kmem_cache_zalloc() here!
         */
        req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
-       if (!req)
-               return ERR_PTR(-ENOMEM);
-
-       ret = i915_gem_get_global_seqno(dev_priv, &seqno);
-       if (ret)
-               goto err;
+       if (!req) {
+               ret = -ENOMEM;
+               goto err_unreserve;
+       }
 
        req->timeline = engine->timeline;
 
@@ -416,14 +420,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
                       &i915_fence_ops,
                       &req->lock,
                       req->timeline->fence_context,
-                      seqno);
+                      timeline_get_seqno(req->timeline->common));
 
        i915_sw_fence_init(&req->submit, submit_notify);
 
        INIT_LIST_HEAD(&req->active_list);
        req->i915 = dev_priv;
        req->engine = engine;
-       req->global_seqno = seqno;
+       req->global_seqno = req->fence.seqno;
        req->ctx = i915_gem_context_get(ctx);
 
        /* No zalloc, must clear what we need by hand */
@@ -459,8 +463,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 err_ctx:
        i915_gem_context_put(ctx);
-err:
        kmem_cache_free(dev_priv->requests, req);
+err_unreserve:
+       dev_priv->gt.active_requests--;
        return ERR_PTR(ret);
 }
 
@@ -624,7 +629,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 {
        struct drm_i915_private *dev_priv = engine->i915;
 
-       dev_priv->gt.active_engines |= intel_engine_flag(engine);
        if (dev_priv->gt.awake)
                return;
 
@@ -700,6 +704,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
                i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
                                             &request->submitq);
 
+       GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
+                                    request->fence.seqno));
+
        request->emitted_jiffies = jiffies;
        request->previous_seqno = timeline->last_pending_seqno;
        timeline->last_pending_seqno = request->fence.seqno;
@@ -962,38 +969,35 @@ complete:
        return timeout;
 }
 
-static bool engine_retire_requests(struct intel_engine_cs *engine)
+static void engine_retire_requests(struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_request *request, *next;
 
        list_for_each_entry_safe(request, next,
                                 &engine->timeline->requests, link) {
                if (!i915_gem_request_completed(request))
-                       return false;
+                       return;
 
                i915_gem_request_retire(request);
        }
-
-       return true;
 }
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
-       unsigned int tmp;
+       enum intel_engine_id id;
 
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-       if (dev_priv->gt.active_engines == 0)
+       if (!dev_priv->gt.active_requests)
                return;
 
        GEM_BUG_ON(!dev_priv->gt.awake);
 
-       for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
-               if (engine_retire_requests(engine))
-                       dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
+       for_each_engine(engine, dev_priv, id)
+               engine_retire_requests(engine);
 
-       if (dev_priv->gt.active_engines == 0)
+       if (!dev_priv->gt.active_requests)
                queue_delayed_work(dev_priv->wq,
                                   &dev_priv->gt.idle_work,
                                   msecs_to_jiffies(100));
index 767b23914ec5884a762a87757db0c6d2a906c228..18e603980dd93e938ab8523107e27f2a0f813415 100644 (file)
@@ -55,7 +55,7 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
        struct list_head link;
-       u32 next_seqno;
+       atomic_t next_seqno;
 
        struct drm_i915_private *i915;
        const char *name;