drm/i915: Avoid accessing request->timeline outside of its lifetime

[karo-tx-linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.h
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index ec0b4a0c605db2f1df88ff7e567cc51d196f2d19..642b54692d0d9697b3c8c8117dc96f84ac18a957 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -4,6 +4,7 @@
  #include <linux/hashtable.h>
  #include "i915_gem_batch_pool.h"
  #include "i915_gem_request.h"
+#include "i915_gem_timeline.h"
  
  #define I915_CMD_HASH_ORDER 9
  
@@ -73,13 +74,40 @@ enum intel_engine_hangcheck_action {
  
  #define HANGCHECK_SCORE_RING_HUNG 31
  
+#define I915_MAX_SLICES        3
+#define I915_MAX_SUBSLICES 3
+
+#define instdone_slice_mask(dev_priv__) \
+       (INTEL_GEN(dev_priv__) == 7 ? \
+        1 : INTEL_INFO(dev_priv__)->sseu.slice_mask)
+
+#define instdone_subslice_mask(dev_priv__) \
+       (INTEL_GEN(dev_priv__) == 7 ? \
+        1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+
+#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
+       for ((slice__) = 0, (subslice__) = 0; \
+            (slice__) < I915_MAX_SLICES; \
+            (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
+              (slice__) += ((subslice__) == 0)) \
+               for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
+                           (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+
+struct intel_instdone {
+       u32 instdone;
+       /* The following exist only in the RCS engine */
+       u32 slice_common;
+       u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+       u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+};
+
  struct intel_engine_hangcheck {
         u64 acthd;
         u32 seqno;
         int score;
         enum intel_engine_hangcheck_action action;
         int deadlock;
-       u32 instdone[I915_NUM_INSTDONE_REG];
+       struct intel_instdone instdone;
  };
  
  struct intel_ring {
@@ -130,6 +158,7 @@ struct i915_ctx_workarounds {
  };
  
  struct drm_i915_gem_request;
+struct intel_render_state;
  
  struct intel_engine_cs {
         struct drm_i915_private *i915;
@@ -141,7 +170,6 @@ struct intel_engine_cs {
                 VCS2,   /* Keep instances of the same type engine together. */
                 VECS
         } id;
-#define I915_NUM_ENGINES 5
  #define _VCS(n) (VCS + (n))
         unsigned int exec_id;
         enum intel_engine_hw_id {
@@ -152,10 +180,12 @@ struct intel_engine_cs {
                 VCS2_HW
         } hw_id;
         enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
-       u64 fence_context;
         u32             mmio_base;
         unsigned int irq_shift;
         struct intel_ring *buffer;
+       struct intel_timeline *timeline;
+
+       struct intel_render_state *render_state;
  
         /* Rather than have every client wait upon all user interrupts,
          * with the herd waking after every interrupt and each doing the
@@ -177,7 +207,7 @@ struct intel_engine_cs {
                 struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
                 bool irq_posted;
  
-               spinlock_t lock; /* protects the lists of requests */
+               spinlock_t lock; /* protects the lists of requests; irqsafe */
                 struct rb_root waiters; /* sorted by retirement, priority */
                 struct rb_root signals; /* sorted by retirement */
                 struct intel_wait *first_wait; /* oldest waiter by retirement */
@@ -225,7 +255,9 @@ struct intel_engine_cs {
  #define I915_DISPATCH_SECURE BIT(0)
  #define I915_DISPATCH_PINNED BIT(1)
  #define I915_DISPATCH_RS     BIT(2)
-       int             (*emit_request)(struct drm_i915_gem_request *req);
+       void            (*emit_breadcrumb)(struct drm_i915_gem_request *req,
+                                          u32 *out);
+       int             emit_breadcrumb_sz;
  
         /* Pass the request to the hardware queue (e.g. directly into
          * the legacy ringbuffer or to the end of an execlist).
@@ -282,8 +314,6 @@ struct intel_engine_cs {
          *  ie. transpose of f(x, y)
          */
         struct {
-               u32     sync_seqno[I915_NUM_ENGINES-1];
-
                 union {
  #define GEN6_SEMAPHORE_LAST    VECS_HW
  #define GEN6_NUM_SEMAPHORES    (GEN6_SEMAPHORE_LAST + 1)
@@ -300,7 +330,7 @@ struct intel_engine_cs {
                 /* AKA wait() */
                 int     (*sync_to)(struct drm_i915_gem_request *req,
                                    struct drm_i915_gem_request *signal);
-               int     (*signal)(struct drm_i915_gem_request *req);
+               u32     *(*signal)(struct drm_i915_gem_request *req, u32 *out);
         } semaphore;
  
         /* Execlists */
@@ -316,27 +346,6 @@ struct intel_engine_cs {
         bool preempt_wa;
         u32 ctx_desc_template;
  
-       /**
-        * List of breadcrumbs associated with GPU requests currently
-        * outstanding.
-        */
-       struct list_head request_list;
-
-       /**
-        * Seqno of request most recently submitted to request_list.
-        * Used exclusively by hang checker to avoid grabbing lock while
-        * inspecting request list.
-        */
-       u32 last_submitted_seqno;
-       u32 last_pending_seqno;
-
-       /* An RCU guarded pointer to the last request. No reference is
-        * held to the request, users must carefully acquire a reference to
-        * the request using i915_gem_active_get_rcu(), or hold the
-        * struct_mutex.
-        */
-       struct i915_gem_active last_request;
-
         struct i915_gem_context *last_context;
  
         struct intel_engine_hangcheck hangcheck;
@@ -368,39 +377,12 @@ struct intel_engine_cs {
         u32 (*get_cmd_length_mask)(u32 cmd_header);
  };
  
-static inline bool
-intel_engine_initialized(const struct intel_engine_cs *engine)
-{
-       return engine->i915 != NULL;
-}
-
  static inline unsigned
  intel_engine_flag(const struct intel_engine_cs *engine)
  {
         return 1 << engine->id;
  }
  
-static inline u32
-intel_engine_sync_index(struct intel_engine_cs *engine,
-                       struct intel_engine_cs *other)
-{
-       int idx;
-
-       /*
-        * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
-        * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
-        * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
-        * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
-        * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
-        */
-
-       idx = (other - engine) - 1;
-       if (idx < 0)
-               idx += I915_NUM_ENGINES;
-
-       return idx;
-}
-
  static inline void
  intel_flush_status_page(struct intel_engine_cs *engine, int reg)
  {
@@ -483,30 +465,23 @@ static inline void intel_ring_advance(struct intel_ring *ring)
          */
  }
  
-static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
  {
         /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-       return value & (ring->size - 1);
+       u32 offset = addr - ring->vaddr;
+       return offset & (ring->size - 1);
  }
  
  int __intel_ring_space(int head, int tail, int size);
  void intel_ring_update_space(struct intel_ring *ring);
  
-void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
  
  void intel_engine_setup_common(struct intel_engine_cs *engine);
  int intel_engine_init_common(struct intel_engine_cs *engine);
  int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
  void intel_engine_cleanup_common(struct intel_engine_cs *engine);
  
-static inline int intel_engine_idle(struct intel_engine_cs *engine,
-                                   unsigned int flags)
-{
-       /* Wait upon the last request to be completed */
-       return i915_gem_active_wait_unlocked(&engine->last_request,
-                                            flags, NULL, NULL);
-}
-
  int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
  int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
  int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
@@ -514,13 +489,30 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
  int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
  
  u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
+u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
+
  static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
  {
         return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
  }
  
+static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
+{
+       /* We are only peeking at the tail of the submit queue (and not the
+        * queue itself) in order to gain a hint as to the current active
+        * state of the engine. Callers are not expected to be taking
+        * engine->timeline->lock, nor are they expected to be concerned
+        * wtih serialising this hint with anything, so document it as
+        * a hint and nothing more.
+        */
+       return READ_ONCE(engine->timeline->last_submitted_seqno);
+}
+
  int init_workarounds_ring(struct intel_engine_cs *engine);
  
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+                              struct intel_instdone *instdone);
+
  /*
   * Arbitrary size for largest possible 'add request' sequence. The code paths
   * are complex and variable. Empirical measurement shows that the worst case
@@ -589,9 +581,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
  unsigned int intel_kick_waiters(struct drm_i915_private *i915);
  unsigned int intel_kick_signalers(struct drm_i915_private *i915);
  
-static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
-{
-       return i915_gem_active_isset(&engine->last_request);
-}
-
  #endif /* _INTEL_RINGBUFFER_H_ */