]> git.karo-electronics.de Git - linux-beck.git/commitdiff
drm/i915: Introduce a global_seqno for each request
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Oct 2016 12:58:49 +0000 (13:58 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Oct 2016 19:53:53 +0000 (20:53 +0100)
Though we will have multiple timelines, we still have a single timeline
of execution. This we can use to provide an execution and retirement order
of requests. This keeps tracking execution of requests simple, and vital
for preserving a single waiter (i.e. so that we can order the waiters so
that only the earliest to wakeup need be woken). To accomplish this we
distinguish the seqno used to order requests per-context (external) and
that used internally for execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-26-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_gem_request.h
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_guc_submission.c
drivers/gpu/drm/i915/i915_trace.h
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c

index 3a0ea5eace3771962413177c669caf4ce4ad3602..90bc4a89e0d5c5e1487877bb6a396df6d64b06a7 100644 (file)
@@ -637,7 +637,7 @@ static void print_request(struct seq_file *m,
        rcu_read_lock();
        task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
        seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix,
-                  rq->fence.seqno, rq->ctx->hw_id, rq->fence.seqno,
+                  rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
                   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
                   task ? task->comm : "<unknown>",
                   task ? task->pid : -1);
index f0f68f64d09caa6e585b86964f409b2651cc6574..217674bb149527f6229e8e6ddb13b5fe46976fd0 100644 (file)
@@ -4050,7 +4050,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
        /* Before we do the heavier coherent read of the seqno,
         * check the value (hopefully) in the CPU cacheline.
         */
-       if (i915_gem_request_completed(req))
+       if (__i915_gem_request_completed(req))
                return true;
 
        /* Ensure our read of the seqno is coherent so that we
@@ -4101,7 +4101,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
                        wake_up_process(tsk);
                rcu_read_unlock();
 
-               if (i915_gem_request_completed(req))
+               if (__i915_gem_request_completed(req))
                        return true;
        }
 
index 5ea46a7d991f2c6dfde7ac479c51d11cc63182fc..f4cfb88bd8048c1a438216367bd06c5b0ee7b369 100644 (file)
@@ -2615,7 +2615,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
                return;
 
        DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-                        engine->name, request->fence.seqno);
+                        engine->name, request->global_seqno);
 
        /* Setup the CS to resume from the breadcrumb of the hung request */
        engine->reset_hw(engine, request);
index 03ae85a1eefb018720c4cb66cfbbece31b1d3c81..311cf3fac2e09ae8696dfb40690920028b9686ae 100644 (file)
@@ -376,7 +376,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         * of being read by __i915_gem_active_get_rcu(). As such,
         * we have to be very careful when overwriting the contents. During
         * the RCU lookup, we change chase the request->engine pointer,
-        * read the request->fence.seqno and increment the reference count.
+        * read the request->global_seqno and increment the reference count.
         *
         * The reference count is incremented atomically. If it is zero,
         * the lookup knows the request is unallocated and complete. Otherwise,
@@ -418,6 +418,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
        INIT_LIST_HEAD(&req->active_list);
        req->i915 = dev_priv;
        req->engine = engine;
+       req->global_seqno = seqno;
        req->ctx = i915_gem_context_get(ctx);
 
        /* No zalloc, must clear what we need by hand */
@@ -475,8 +476,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
                return ret < 0 ? ret : 0;
        }
 
+       if (!from->global_seqno) {
+               ret = i915_sw_fence_await_dma_fence(&to->submit,
+                                                   &from->fence, 0,
+                                                   GFP_KERNEL);
+               return ret < 0 ? ret : 0;
+       }
+
        idx = intel_engine_sync_index(from->engine, to->engine);
-       if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+       if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx])
                return 0;
 
        trace_i915_gem_ring_sync_to(to, from);
@@ -494,7 +502,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
                        return ret;
        }
 
-       from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+       from->engine->semaphore.sync_seqno[idx] = from->global_seqno;
        return 0;
 }
 
@@ -774,7 +782,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 
        timeout_us += local_clock_us(&cpu);
        do {
-               if (i915_gem_request_completed(req))
+               if (__i915_gem_request_completed(req))
                        return true;
 
                if (signal_pending_state(state, current))
@@ -883,6 +891,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
                GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
        }
+       GEM_BUG_ON(!req->global_seqno);
 
        /* Optimistic short spin before touching IRQs */
        if (i915_spin_request(req, state, 5))
@@ -892,7 +901,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
        if (flags & I915_WAIT_LOCKED)
                add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
-       intel_wait_init(&wait, req->fence.seqno);
+       intel_wait_init(&wait, req->global_seqno);
        if (intel_engine_add_wait(req->engine, &wait))
                /* In order to check that we haven't missed the interrupt
                 * as we enabled it, we need to kick ourselves to do a
index 4ac30ae93e499c6cb06c5c6a049a63a2dce220f7..75f8360b3421b1e6477edd548b09563d693656c5 100644 (file)
@@ -87,6 +87,8 @@ struct drm_i915_gem_request {
        struct i915_sw_fence submit;
        wait_queue_t submitq;
 
+       u32 global_seqno;
+
        /** GEM sequence number associated with the previous request,
         * when the HWS breadcrumb is equal to this the GPU is processing
         * this request.
@@ -163,7 +165,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
 static inline u32
 i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
 {
-       return req ? req->fence.seqno : 0;
+       return req ? req->global_seqno : 0;
 }
 
 static inline struct intel_engine_cs *
@@ -248,17 +250,37 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 }
 
 static inline bool
-i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
+       GEM_BUG_ON(!req->global_seqno);
        return i915_seqno_passed(intel_engine_get_seqno(req->engine),
                                 req->previous_seqno);
 }
 
 static inline bool
-i915_gem_request_completed(const struct drm_i915_gem_request *req)
+i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
+       if (!req->global_seqno)
+               return false;
+
+       return __i915_gem_request_started(req);
+}
+
+static inline bool
+__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+       GEM_BUG_ON(!req->global_seqno);
        return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-                                req->fence.seqno);
+                                req->global_seqno);
+}
+
+static inline bool
+i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+       if (!req->global_seqno)
+               return false;
+
+       return __i915_gem_request_completed(req);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
@@ -266,7 +288,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *request,
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
                                     int state, unsigned long timeout_us)
 {
-       return (i915_gem_request_started(request) &&
+       return (__i915_gem_request_started(request) &&
                __i915_spin_request(request, state, timeout_us));
 }
 
index 12fea57d41fb7c6254066030542227e0aca2b7ee..9aa197ca6210b18f35c70bd2f0613872755f81f0 100644 (file)
@@ -1176,7 +1176,7 @@ static void record_request(struct drm_i915_gem_request *request,
                           struct drm_i915_error_request *erq)
 {
        erq->context = request->ctx->hw_id;
-       erq->seqno = request->fence.seqno;
+       erq->seqno = request->global_seqno;
        erq->jiffies = request->emitted_jiffies;
        erq->head = request->head;
        erq->tail = request->tail;
index cca250e90845516225583a931db0911bcccd96ba..857ef914cae755cbc56db191d3d035763bfb017a 100644 (file)
@@ -554,7 +554,7 @@ static void guc_wq_item_append(struct i915_guc_client *gc,
        wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
        wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
-       wqi->fence_id = rq->fence.seqno;
+       wqi->fence_id = rq->global_seqno;
 
        kunmap_atomic(base);
 }
@@ -655,7 +655,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
                client->b_fail += 1;
 
        guc->submissions[engine_id] += 1;
-       guc->last_seqno[engine_id] = rq->fence.seqno;
+       guc->last_seqno[engine_id] = rq->global_seqno;
        spin_unlock(&client->wq_lock);
 }
 
index 5c912c25f7d301ccb0bead7b4867bbfb2627127c..c5d210ebaa9a21f268bced6661c3cb5a49497f9c 100644 (file)
@@ -466,7 +466,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
                           __entry->dev = from->i915->drm.primary->index;
                           __entry->sync_from = from->engine->id;
                           __entry->sync_to = to->engine->id;
-                          __entry->seqno = from->fence.seqno;
+                          __entry->seqno = from->global_seqno;
                           ),
 
            TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -489,7 +489,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
            TP_fast_assign(
                           __entry->dev = req->i915->drm.primary->index;
                           __entry->ring = req->engine->id;
-                          __entry->seqno = req->fence.seqno;
+                          __entry->seqno = req->global_seqno;
                           __entry->flags = flags;
                           dma_fence_enable_sw_signaling(&req->fence);
                           ),
@@ -534,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
            TP_fast_assign(
                           __entry->dev = req->i915->drm.primary->index;
                           __entry->ring = req->engine->id;
-                          __entry->seqno = req->fence.seqno;
+                          __entry->seqno = req->global_seqno;
                           ),
 
            TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -596,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
            TP_fast_assign(
                           __entry->dev = req->i915->drm.primary->index;
                           __entry->ring = req->engine->id;
-                          __entry->seqno = req->fence.seqno;
+                          __entry->seqno = req->global_seqno;
                           __entry->blocking =
                                     mutex_is_locked(&req->i915->drm.struct_mutex);
                           ),
index 56efcc507ea2c181d53beea696025fa59d052f44..0d5def0d2dfe0465e3a32f4727ff7aad0dbb70b9 100644 (file)
@@ -504,9 +504,11 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 
        /* locked by dma_fence_enable_sw_signaling() */
        assert_spin_locked(&request->lock);
+       if (!request->global_seqno)
+               return;
 
        request->signaling.wait.tsk = b->signaler;
-       request->signaling.wait.seqno = request->fence.seqno;
+       request->signaling.wait.seqno = request->global_seqno;
        i915_gem_request_get(request);
 
        spin_lock(&b->lock);
@@ -530,8 +532,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
        p = &b->signals.rb_node;
        while (*p) {
                parent = *p;
-               if (i915_seqno_passed(request->fence.seqno,
-                                     to_signaler(parent)->fence.seqno)) {
+               if (i915_seqno_passed(request->global_seqno,
+                                     to_signaler(parent)->global_seqno)) {
                        p = &parent->rb_right;
                        first = false;
                } else {
index cb30549dfd40a9d3ec562ea11818c15062034c55..e0a9bf81774b2feabc9053d9814acefd074d1c52 100644 (file)
@@ -1584,7 +1584,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
                        intel_hws_seqno_address(request->engine) |
                        MI_FLUSH_DW_USE_GTT);
        intel_ring_emit(ring, 0);
-       intel_ring_emit(ring, request->fence.seqno);
+       intel_ring_emit(ring, request->global_seqno);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
        intel_ring_emit(ring, MI_NOOP);
        return intel_logical_ring_advance(request);
@@ -1613,7 +1613,7 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
                         PIPE_CONTROL_QW_WRITE));
        intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
        intel_ring_emit(ring, 0);
-       intel_ring_emit(ring, i915_gem_request_get_seqno(request));
+       intel_ring_emit(ring, request->global_seqno);
        /* We're thrashing one dword of HWS. */
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
index aaa46d9ffbc13560e4f5ace4e0b84128af899bfb..76c6b70303fbc5db6061e0f3186771905620749a 100644 (file)
@@ -1238,7 +1238,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req)
                                PIPE_CONTROL_CS_STALL);
                intel_ring_emit(ring, lower_32_bits(gtt_offset));
                intel_ring_emit(ring, upper_32_bits(gtt_offset));
-               intel_ring_emit(ring, req->fence.seqno);
+               intel_ring_emit(ring, req->global_seqno);
                intel_ring_emit(ring, 0);
                intel_ring_emit(ring,
                                MI_SEMAPHORE_SIGNAL |
@@ -1274,7 +1274,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req)
                                lower_32_bits(gtt_offset) |
                                MI_FLUSH_DW_USE_GTT);
                intel_ring_emit(ring, upper_32_bits(gtt_offset));
-               intel_ring_emit(ring, req->fence.seqno);
+               intel_ring_emit(ring, req->global_seqno);
                intel_ring_emit(ring,
                                MI_SEMAPHORE_SIGNAL |
                                MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1308,7 +1308,7 @@ static int gen6_signal(struct drm_i915_gem_request *req)
                if (i915_mmio_reg_valid(mbox_reg)) {
                        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
                        intel_ring_emit_reg(ring, mbox_reg);
-                       intel_ring_emit(ring, req->fence.seqno);
+                       intel_ring_emit(ring, req->global_seqno);
                }
        }
 
@@ -1339,7 +1339,7 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req)
 
        intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
        intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-       intel_ring_emit(ring, req->fence.seqno);
+       intel_ring_emit(ring, req->global_seqno);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
        intel_ring_advance(ring);
 
@@ -1389,7 +1389,7 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req)
                               PIPE_CONTROL_QW_WRITE));
        intel_ring_emit(ring, intel_hws_seqno_address(engine));
        intel_ring_emit(ring, 0);
-       intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+       intel_ring_emit(ring, req->global_seqno);
        /* We're thrashing one dword of HWS. */
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
@@ -1427,7 +1427,7 @@ gen8_ring_sync_to(struct drm_i915_gem_request *req,
                        MI_SEMAPHORE_WAIT |
                        MI_SEMAPHORE_GLOBAL_GTT |
                        MI_SEMAPHORE_SAD_GTE_SDD);
-       intel_ring_emit(ring, signal->fence.seqno);
+       intel_ring_emit(ring, signal->global_seqno);
        intel_ring_emit(ring, lower_32_bits(offset));
        intel_ring_emit(ring, upper_32_bits(offset));
        intel_ring_advance(ring);
@@ -1465,7 +1465,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req,
         * seqno is >= the last seqno executed. However for hardware the
         * comparison is strictly greater than.
         */
-       intel_ring_emit(ring, signal->fence.seqno - 1);
+       intel_ring_emit(ring, signal->global_seqno - 1);
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_advance(ring);