From f787a5f59e1b0e320a6b0a37e9a2e306551d1e40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Sep 2010 16:02:42 +0100 Subject: [PATCH] drm/i915: Only hold a process-local lock whilst throttling. Avoid cause latencies in other clients by not taking the global struct mutex and moving the per-client request manipulation a local per-client mutex. For example, this allows a compositor to schedule a page-flip (through X) whilst an OpenGL application is monopolising the GPU. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_dma.c | 22 ++--- drivers/gpu/drm/i915/i915_drv.h | 15 ++- drivers/gpu/drm/i915/i915_gem.c | 120 ++++++++++++++---------- drivers/gpu/drm/i915/i915_irq.c | 29 +++--- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +- 8 files changed, 123 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 36f0e3630f74..eb5dd52847a9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -261,7 +261,7 @@ static int i915_gem_seqno_info(struct seq_file *m, void *data) if (dev_priv->render_ring.status_page.page_addr != NULL) { seq_printf(m, "Current sequence: %d\n", - i915_get_gem_seqno(dev, &dev_priv->render_ring)); + dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring)); } else { seq_printf(m, "Current sequence: hws uninitialized\n"); } @@ -321,7 +321,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) atomic_read(&dev_priv->irq_received)); if (dev_priv->render_ring.status_page.page_addr != NULL) { seq_printf(m, "Current sequence: %d\n", - i915_get_gem_seqno(dev, &dev_priv->render_ring)); + dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring)); } else { seq_printf(m, "Current sequence: hws uninitialized\n"); } @@ -932,7 +932,7 @@ i915_wedged_write(struct file *filp, atomic_set(&dev_priv->mm.wedged, val); if (val) { - DRM_WAKEUP(&dev_priv->irq_queue); + wake_up_all(&dev_priv->irq_queue); queue_work(dev_priv->wq, &dev_priv->error_work); } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 048c54bdfd4c..a3aea17c964b 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2162,20 +2162,19 @@ int i915_driver_unload(struct drm_device *dev) return 0; } -int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv) +int i915_driver_open(struct drm_device *dev, struct drm_file *file) { - struct drm_i915_file_private *i915_file_priv; + struct drm_i915_file_private *file_priv; DRM_DEBUG_DRIVER("\n"); - i915_file_priv = (struct drm_i915_file_private *) - kmalloc(sizeof(*i915_file_priv), GFP_KERNEL); - - if (!i915_file_priv) + file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) return -ENOMEM; - file_priv->driver_priv = i915_file_priv; + file->driver_priv = file_priv; - INIT_LIST_HEAD(&i915_file_priv->mm.request_list); + INIT_LIST_HEAD(&file_priv->mm.request_list); + mutex_init(&file_priv->mutex); return 0; } @@ -2218,11 +2217,12 @@ void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv) i915_mem_release(dev, file_priv, dev_priv->agp_heap); } -void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) +void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { - struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; + struct drm_i915_file_private *file_priv = file->driver_priv; - kfree(i915_file_priv); + mutex_destroy(&file_priv->mutex); + kfree(file_priv); } struct drm_ioctl_desc i915_ioctls[] = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cbfb99dce6aa..2611e85bdd3d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -844,11 +844,13 @@ struct drm_i915_gem_request { /** global list entry for this request */ struct list_head list; + struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_list; }; struct drm_i915_file_private { + struct mutex mutex; struct { struct list_head request_list; } mm; @@ -1005,9 +1007,16 @@ void i915_gem_object_unpin(struct drm_gem_object *obj); int i915_gem_object_unbind(struct drm_gem_object *obj); void i915_gem_release_mmap(struct drm_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); -uint32_t i915_get_gem_seqno(struct drm_device *dev, - struct intel_ring_buffer *ring); -bool i915_seqno_passed(uint32_t seq1, uint32_t seq2); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool +i915_seqno_passed(uint32_t seq1, uint32_t seq2) +{ + return (int32_t)(seq1 - seq2) >= 0; +} + int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool interruptible); int i915_gem_object_put_fence_reg(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dec7bbc81cb6..9185f098822d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1592,17 +1592,17 @@ i915_gem_process_flushing_list(struct drm_device *dev, uint32_t i915_add_request(struct drm_device *dev, - struct drm_file *file_priv, + struct drm_file *file, struct drm_i915_gem_request *request, struct intel_ring_buffer *ring) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_file_private *i915_file_priv = NULL; + struct drm_i915_file_private *file_priv = NULL; uint32_t seqno; int was_empty; - if (file_priv != NULL) - i915_file_priv = file_priv->driver_priv; + if (file != NULL) + file_priv = file->driver_priv; if (request == NULL) { request = kzalloc(sizeof(*request), GFP_KERNEL); @@ -1610,7 +1610,7 @@ i915_add_request(struct drm_device *dev, return 0; } - seqno = ring->add_request(dev, ring, file_priv, 0); + seqno = ring->add_request(dev, ring, 0); request->seqno = seqno; request->ring = ring; @@ -1618,11 +1618,12 @@ i915_add_request(struct drm_device *dev, was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); - if (i915_file_priv) { + if (file_priv) { + mutex_lock(&file_priv->mutex); + request->file_priv = file_priv; list_add_tail(&request->client_list, - &i915_file_priv->mm.request_list); - } else { - INIT_LIST_HEAD(&request->client_list); + &file_priv->mm.request_list); + mutex_unlock(&file_priv->mutex); } if (!dev_priv->mm.suspended) { @@ -1654,20 +1655,14 @@ i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring) I915_GEM_DOMAIN_COMMAND, flush_domains); } -/** - * Returns true if seq1 is later than seq2. - */ -bool -i915_seqno_passed(uint32_t seq1, uint32_t seq2) -{ - return (int32_t)(seq1 - seq2) >= 0; -} - -uint32_t -i915_get_gem_seqno(struct drm_device *dev, - struct intel_ring_buffer *ring) +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { - return ring->get_gem_seqno(dev, ring); + if (request->file_priv) { + mutex_lock(&request->file_priv->mutex); + list_del(&request->client_list); + mutex_unlock(&request->file_priv->mutex); + } } static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, @@ -1681,7 +1676,7 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, list); list_del(&request->list); - list_del(&request->client_list); + i915_gem_request_remove_from_client(request); kfree(request); } @@ -1746,7 +1741,7 @@ i915_gem_retire_requests_ring(struct drm_device *dev, list_empty(&ring->request_list)) return; - seqno = i915_get_gem_seqno(dev, ring); + seqno = ring->get_seqno(dev, ring); while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; @@ -1760,7 +1755,7 @@ i915_gem_retire_requests_ring(struct drm_device *dev, trace_i915_gem_request_retire(dev, request->seqno); list_del(&request->list); - list_del(&request->client_list); + i915_gem_request_remove_from_client(request); kfree(request); } @@ -1862,7 +1857,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, if (atomic_read(&dev_priv->mm.wedged)) return -EIO; - if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) { + if (!i915_seqno_passed(ring->get_seqno(dev, ring), seqno)) { if (HAS_PCH_SPLIT(dev)) ier = I915_READ(DEIER) | I915_READ(GTIER); else @@ -1881,12 +1876,12 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, if (interruptible) ret = wait_event_interruptible(ring->irq_queue, i915_seqno_passed( - ring->get_gem_seqno(dev, ring), seqno) + ring->get_seqno(dev, ring), seqno) || atomic_read(&dev_priv->mm.wedged)); else wait_event(ring->irq_queue, i915_seqno_passed( - ring->get_gem_seqno(dev, ring), seqno) + ring->get_seqno(dev, ring), seqno) || atomic_read(&dev_priv->mm.wedged)); ring->user_irq_put(dev, ring); @@ -1899,7 +1894,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, if (ret && ret != -ERESTARTSYS) DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", - __func__, ret, seqno, ring->get_gem_seqno(dev, ring), + __func__, ret, seqno, ring->get_seqno(dev, ring), dev_priv->next_seqno); /* Directly dispatch request retiring. While we have the work queue @@ -3384,28 +3379,48 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, * relatively low latency when blocking on a particular request to finish. */ static int -i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) +i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) { - struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; - int ret = 0; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = jiffies - msecs_to_jiffies(20); + struct drm_i915_gem_request *request; + struct intel_ring_buffer *ring = NULL; + u32 seqno = 0; + int ret; - mutex_lock(&dev->struct_mutex); - while (!list_empty(&i915_file_priv->mm.request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&i915_file_priv->mm.request_list, - struct drm_i915_gem_request, - client_list); - + mutex_lock(&file_priv->mutex); + list_for_each_entry(request, &file_priv->mm.request_list, client_list) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - ret = i915_wait_request(dev, request->seqno, request->ring); - if (ret != 0) - break; + ring = request->ring; + seqno = request->seqno; } - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&file_priv->mutex); + + if (seqno == 0) + return 0; + + ret = 0; + if (!i915_seqno_passed(ring->get_seqno(dev, ring), seqno)) { + /* And wait for the seqno passing without holding any locks and + * causing extra latency for others. This is safe as the irq + * generation is designed to be run atomically and so is + * lockless. + */ + ring->user_irq_get(dev, ring); + ret = wait_event_interruptible(ring->irq_queue, + i915_seqno_passed(ring->get_seqno(dev, ring), seqno) + || atomic_read(&dev_priv->mm.wedged)); + ring->user_irq_put(dev, ring); + + if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) + ret = -EIO; + } + + if (ret == 0) + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); return ret; } @@ -4857,17 +4872,26 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, return 0; } -void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) +void i915_gem_release(struct drm_device *dev, struct drm_file *file) { - struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; + struct drm_i915_file_private *file_priv = file->driver_priv; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. */ mutex_lock(&dev->struct_mutex); - while (!list_empty(&i915_file_priv->mm.request_list)) - list_del_init(i915_file_priv->mm.request_list.next); + mutex_lock(&file_priv->mutex); + while (!list_empty(&file_priv->mm.request_list)) { + struct drm_i915_gem_request *request; + + request = list_first_entry(&file_priv->mm.request_list, + struct drm_i915_gem_request, + client_list); + list_del(&request->client_list); + request->file_priv = NULL; + } + mutex_unlock(&file_priv->mutex); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d4c053e1c376..245a07e6f1a4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -327,16 +327,16 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev) } if (gt_iir & GT_PIPE_NOTIFY) { - u32 seqno = render_ring->get_gem_seqno(dev, render_ring); + u32 seqno = render_ring->get_seqno(dev, render_ring); render_ring->irq_gem_seqno = seqno; trace_i915_gem_request_complete(dev, seqno); - DRM_WAKEUP(&dev_priv->render_ring.irq_queue); + wake_up_all(&dev_priv->render_ring.irq_queue); dev_priv->hangcheck_count = 0; mod_timer(&dev_priv->hangcheck_timer, jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); } if (gt_iir & bsd_usr_interrupt) - DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue); + wake_up_all(&dev_priv->bsd_ring.irq_queue); if (de_iir & DE_GSE) intel_opregion_gse_intr(dev); @@ -573,7 +573,8 @@ static void i915_capture_error_state(struct drm_device *dev) return; } - error->seqno = i915_get_gem_seqno(dev, &dev_priv->render_ring); + error->seqno = + dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring); error->eir = I915_READ(EIR); error->pgtbl_er = I915_READ(PGTBL_ER); error->pipeastat = I915_READ(PIPEASTAT); @@ -873,7 +874,9 @@ static void i915_handle_error(struct drm_device *dev, bool wedged) /* * Wakeup waiting processes so they don't hang */ - DRM_WAKEUP(&dev_priv->render_ring.irq_queue); + wake_up_all(&dev_priv->render_ring.irq_queue); + if (HAS_BSD(dev)) + wake_up_all(&dev_priv->bsd_ring.irq_queue); } queue_work(dev_priv->wq, &dev_priv->error_work); @@ -1012,18 +1015,17 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) } if (iir & I915_USER_INTERRUPT) { - u32 seqno = - render_ring->get_gem_seqno(dev, render_ring); + u32 seqno = render_ring->get_seqno(dev, render_ring); render_ring->irq_gem_seqno = seqno; trace_i915_gem_request_complete(dev, seqno); - DRM_WAKEUP(&dev_priv->render_ring.irq_queue); + wake_up_all(&dev_priv->render_ring.irq_queue); dev_priv->hangcheck_count = 0; mod_timer(&dev_priv->hangcheck_timer, jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); } if (HAS_BSD(dev) && (iir & I915_BSD_USER_INTERRUPT)) - DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue); + wake_up_all(&dev_priv->bsd_ring.irq_queue); if (iir & I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT) { intel_prepare_page_flip(dev, 0); @@ -1333,9 +1335,8 @@ void i915_hangcheck_elapsed(unsigned long data) /* If all work is done then ACTHD clearly hasn't advanced. */ if (list_empty(&dev_priv->render_ring.request_list) || - i915_seqno_passed(i915_get_gem_seqno(dev, - &dev_priv->render_ring), - i915_get_tail_request(dev)->seqno)) { + i915_seqno_passed(dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring), + i915_get_tail_request(dev)->seqno)) { bool missed_wakeup = false; dev_priv->hangcheck_count = 0; @@ -1343,13 +1344,13 @@ void i915_hangcheck_elapsed(unsigned long data) /* Issue a wake-up to catch stuck h/w. */ if (dev_priv->render_ring.waiting_gem_seqno && waitqueue_active(&dev_priv->render_ring.irq_queue)) { - DRM_WAKEUP(&dev_priv->render_ring.irq_queue); + wake_up_all(&dev_priv->render_ring.irq_queue); missed_wakeup = true; } if (dev_priv->bsd_ring.waiting_gem_seqno && waitqueue_active(&dev_priv->bsd_ring.irq_queue)) { - DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue); + wake_up_all(&dev_priv->bsd_ring.irq_queue); missed_wakeup = true; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4843d027aaad..00214c123ec2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4983,7 +4983,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev, /* Initial scanout buffer will have a 0 pending flip count */ if ((atomic_read(&obj_priv->pending_flip) == 0) || atomic_dec_and_test(&obj_priv->pending_flip)) - DRM_WAKEUP(&dev_priv->pending_flip_queue); + wake_up(&dev_priv->pending_flip_queue); schedule_work(&work->work); trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1bcea7c85238..9b848be40572 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -239,7 +239,6 @@ do { \ static u32 render_ring_add_request(struct drm_device *dev, struct intel_ring_buffer *ring, - struct drm_file *file_priv, u32 flush_domains) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -303,8 +302,8 @@ render_ring_add_request(struct drm_device *dev, } static u32 -render_ring_get_gem_seqno(struct drm_device *dev, - struct intel_ring_buffer *ring) +render_ring_get_seqno(struct drm_device *dev, + struct intel_ring_buffer *ring) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; if (HAS_PIPE_CONTROL(dev)) @@ -390,7 +389,6 @@ static int init_bsd_ring(struct drm_device *dev, static u32 bsd_ring_add_request(struct drm_device *dev, struct intel_ring_buffer *ring, - struct drm_file *file_priv, u32 flush_domains) { u32 seqno; @@ -432,8 +430,8 @@ bsd_ring_put_user_irq(struct drm_device *dev, } static u32 -bsd_ring_get_gem_seqno(struct drm_device *dev, - struct intel_ring_buffer *ring) +bsd_ring_get_seqno(struct drm_device *dev, + struct intel_ring_buffer *ring) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } @@ -773,7 +771,7 @@ static const struct intel_ring_buffer render_ring = { .get_active_head = render_ring_get_active_head, .flush = render_ring_flush, .add_request = render_ring_add_request, - .get_gem_seqno = render_ring_get_gem_seqno, + .get_seqno = render_ring_get_seqno, .user_irq_get = render_ring_get_user_irq, .user_irq_put = render_ring_put_user_irq, .dispatch_gem_execbuffer = render_ring_dispatch_gem_execbuffer, @@ -792,7 +790,7 @@ static const struct intel_ring_buffer bsd_ring = { .get_active_head = bsd_ring_get_active_head, .flush = bsd_ring_flush, .add_request = bsd_ring_add_request, - .get_gem_seqno = bsd_ring_get_gem_seqno, + .get_seqno = bsd_ring_get_seqno, .user_irq_get = bsd_ring_get_user_irq, .user_irq_put = bsd_ring_put_user_irq, .dispatch_gem_execbuffer = bsd_ring_dispatch_gem_execbuffer, @@ -883,7 +881,7 @@ static const struct intel_ring_buffer gen6_bsd_ring = { .get_active_head = gen6_bsd_ring_get_active_head, .flush = gen6_bsd_ring_flush, .add_request = bsd_ring_add_request, - .get_gem_seqno = bsd_ring_get_gem_seqno, + .get_seqno = bsd_ring_get_seqno, .user_irq_get = bsd_ring_get_user_irq, .user_irq_put = bsd_ring_put_user_irq, .dispatch_gem_execbuffer = gen6_bsd_ring_dispatch_gem_execbuffer, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ce521010ce96..d506da1605b4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -58,10 +58,9 @@ struct intel_ring_buffer { u32 flush_domains); u32 (*add_request)(struct drm_device *dev, struct intel_ring_buffer *ring, - struct drm_file *file_priv, u32 flush_domains); - u32 (*get_gem_seqno)(struct drm_device *dev, - struct intel_ring_buffer *ring); + u32 (*get_seqno)(struct drm_device *dev, + struct intel_ring_buffer *ring); int (*dispatch_gem_execbuffer)(struct drm_device *dev, struct intel_ring_buffer *ring, struct drm_i915_gem_execbuffer2 *exec, -- 2.39.5