From f6168e330438a264123d2e0b502526f06594bb51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:55 +0100 Subject: [PATCH] drm/i915: Convert breadcrumbs spinlock to be irqsafe The breadcrumbs are about to be used from within IRQ context sections (e.g. nouveau signals a fence from an interrupt handler causing us to submit a new request) and/or from bottom-half tasklets (i.e. intel_lrc_irq_handler), therefore we need to employ the irqsafe spinlock variants. For example, deferring the request submission to the intel_lrc_irq_handler generates this trace: [ 66.388639] ================================= [ 66.388650] [ INFO: inconsistent lock state ] [ 66.388663] 4.9.0-rc2+ #56 Not tainted [ 66.388672] --------------------------------- [ 66.388682] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 66.388695] swapper/1/0 [HC0[0]:SC1[1]:HE0:SE0] takes: [ 66.388706] (&(&b->lock)->rlock){+.?...} , at: [] intel_engine_enable_signaling+0x78/0x150 [ 66.388761] {SOFTIRQ-ON-W} state was registered at: [ 66.388772] [ 66.388783] [] __lock_acquire+0x682/0x1870 [ 66.388795] [ 66.388803] [] lock_acquire+0x6c/0xb0 [ 66.388814] [ 66.388824] [] _raw_spin_lock+0x2a/0x40 [ 66.388835] [ 66.388845] [] intel_engine_reset_breadcrumbs+0x21/0xb0 [ 66.388857] [ 66.388866] [] gen8_init_common_ring+0x67/0x100 [ 66.388878] [ 66.388887] [] gen8_init_render_ring+0x12/0x60 [ 66.388903] [ 66.388912] [] i915_gem_init_hw+0xf7/0x2a0 [ 66.388927] [ 66.388936] [] i915_gem_init+0xbb/0xf0 [ 66.388950] [ 66.388959] [] i915_driver_load+0x7e0/0x1330 [ 66.388978] [ 66.388988] [] i915_pci_probe+0x28/0x40 [ 66.389003] [ 66.389013] [] pci_device_probe+0x8b/0xf0 [ 66.389028] [ 66.389037] [] driver_probe_device+0x21e/0x430 [ 66.389056] [ 66.389065] [] __driver_attach+0xde/0xe0 [ 66.389080] [ 66.389090] [] bus_for_each_dev+0x5d/0x90 [ 66.389105] [ 66.389113] [] driver_attach+0x19/0x20 [ 66.389134] [ 66.389144] [] bus_add_driver+0x15d/0x260 [ 66.389159] [ 66.389168] [] driver_register+0x5b/0xd0 [ 66.389183] [ 66.389281] [] __pci_register_driver+0x5b/0x60 [ 66.389301] [ 66.389312] [] i915_init+0x3e/0x45 [ 66.389326] [ 66.389336] [] do_one_initcall+0x8b/0x118 [ 66.389350] [ 66.389359] [] kernel_init_freeable+0x1b3/0x23b [ 66.389378] [ 66.389387] [] kernel_init+0x9/0x100 [ 66.389402] [ 66.389411] [] ret_from_fork+0x27/0x40 [ 66.389426] irq event stamp: 315865 [ 66.389438] hardirqs last enabled at (315864): [] _raw_spin_unlock_irqrestore+0x31/0x50 [ 66.389469] hardirqs last disabled at (315865): [] _raw_spin_lock_irqsave+0x13/0x50 [ 66.389499] softirqs last enabled at (315818): [] _local_bh_enable+0x1c/0x50 [ 66.389530] softirqs last disabled at (315819): [] irq_exit+0xbe/0xd0 [ 66.389559] [ 66.389559] other info that might help us debug this: [ 66.389580] Possible unsafe locking scenario: [ 66.389580] [ 66.389598] CPU0 [ 66.389609] ---- [ 66.389620] lock(&(&b->lock)->rlock); [ 66.389650] [ 66.389661] lock(&(&b->lock)->rlock); [ 66.389690] [ 66.389690] *** DEADLOCK *** [ 66.389690] [ 66.389715] 2 locks held by swapper/1/0: [ 66.389728] #0: (&(&tl->lock)->rlock){..-...}, at: [] intel_lrc_irq_handler+0x201/0x3c0 [ 66.389785] #1: (&(&req->lock)->rlock/1){..-...}, at: [] __i915_gem_request_submit+0x8f/0x170 [ 66.389854] [ 66.389854] stack backtrace: [ 66.389959] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.9.0-rc2+ #56 [ 66.389976] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 66.389999] ffff88027fd03c58 ffffffff812beae5 ffff88027696e680 ffffffff822afe20 [ 66.390036] ffff88027fd03ca8 ffffffff810bb420 0000000000000001 0000000000000000 [ 66.390070] 0000000000000000 0000000000000006 0000000000000004 ffff88027696ee10 [ 66.390104] Call Trace: [ 66.390117] [ 66.390128] [] dump_stack+0x68/0x93 [ 66.390147] [] print_usage_bug+0x1d0/0x1e0 [ 66.390164] [] mark_lock+0x470/0x4f0 [ 66.390181] [] ? print_shortest_lock_dependencies+0x1b0/0x1b0 [ 66.390203] [] __lock_acquire+0x59d/0x1870 [ 66.390221] [] lock_acquire+0x6c/0xb0 [ 66.390237] [] ? lock_acquire+0x6c/0xb0 [ 66.390255] [] ? intel_engine_enable_signaling+0x78/0x150 [ 66.390273] [] _raw_spin_lock+0x2a/0x40 [ 66.390291] [] ? intel_engine_enable_signaling+0x78/0x150 [ 66.390309] [] intel_engine_enable_signaling+0x78/0x150 [ 66.390327] [] __i915_gem_request_submit+0x150/0x170 [ 66.390345] [] intel_lrc_irq_handler+0x28b/0x3c0 [ 66.390363] [] tasklet_action+0x57/0xc0 [ 66.390380] [] __do_softirq+0x119/0x240 [ 66.390396] [] irq_exit+0xbe/0xd0 [ 66.390414] [] do_IRQ+0x65/0x110 [ 66.390431] [] common_interrupt+0x86/0x86 [ 66.390446] [ 66.390457] [] ? cpuidle_enter_state+0x151/0x200 [ 66.390480] [] cpuidle_enter+0x12/0x20 [ 66.390498] [] call_cpuidle+0x1e/0x40 [ 66.390516] [] cpu_startup_entry+0x10e/0x1f0 [ 66.390534] [] start_secondary+0x103/0x130 (This is split out of the defer global seqno allocation patch due to realisation that we need a more complete conversion if we want to defer request submission even further.) v2: lockdep was warning about mixed SOFTIRQ contexts not HARDIRQ contexts so we only need to use spin_lock_bh and not disable interrupts. v3: We need full irq protection as we may be called from a third party interrupt handler (via fences). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-32-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 12 ++++---- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +++--- drivers/gpu/drm/i915/intel_breadcrumbs.c | 35 +++++++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f8604a09c101..1723a1f5b20e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -683,14 +683,14 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - spin_lock(&b->lock); + spin_lock_irq(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); seq_printf(m, "Waiting (%s): %s [%d] on %x\n", engine->name, w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } static int i915_gem_seqno_info(struct seq_file *m, void *data) @@ -1355,14 +1355,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings))); - spin_lock(&b->lock); + spin_lock_irq(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -3265,14 +3265,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) I915_READ(RING_PP_DIR_DCLV(engine))); } - spin_lock(&b->lock); + spin_lock_irq(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); seq_puts(m, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ef3698120d92..7ba40487e345 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1043,7 +1043,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (RB_EMPTY_ROOT(&b->waiters)) return; - if (!spin_trylock(&b->lock)) { + if (!spin_trylock_irq(&b->lock)) { ee->waiters = ERR_PTR(-EDEADLK); return; } @@ -1051,7 +1051,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); waiter = NULL; if (count) @@ -1061,7 +1061,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (!waiter) return; - if (!spin_trylock(&b->lock)) { + if (!spin_trylock_irq(&b->lock)) { kfree(waiter); ee->waiters = ERR_PTR(-EDEADLK); return; @@ -1079,7 +1079,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (++ee->num_waiters == count) break; } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } static void error_record_engine_registers(struct drm_i915_error_state *error, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 0d5def0d2dfe..c410d3d6465f 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -83,16 +83,18 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->breadcrumbs.irq_posted = true; - spin_lock_irq(&engine->i915->irq_lock); + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); engine->irq_enable(engine); - spin_unlock_irq(&engine->i915->irq_lock); + spin_unlock(&engine->i915->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) { - spin_lock_irq(&engine->i915->irq_lock); + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); - spin_unlock_irq(&engine->i915->irq_lock); + spin_unlock(&engine->i915->irq_lock); engine->breadcrumbs.irq_posted = false; } @@ -293,9 +295,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_breadcrumbs *b = &engine->breadcrumbs; bool first; - spin_lock(&b->lock); + spin_lock_irq(&b->lock); first = __intel_engine_add_wait(engine, wait); - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); return first; } @@ -326,7 +328,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, if (RB_EMPTY_NODE(&wait->node)) return; - spin_lock(&b->lock); + spin_lock_irq(&b->lock); if (RB_EMPTY_NODE(&wait->node)) goto out_unlock; @@ -400,7 +402,7 @@ out_unlock: GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } static bool signal_complete(struct drm_i915_gem_request *request) @@ -473,14 +475,14 @@ static int intel_breadcrumbs_signaler(void *arg) * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock(&b->lock); + spin_lock_irq(&b->lock); if (request == b->first_signal) { struct rb_node *rb = rb_next(&request->signaling.node); b->first_signal = rb ? to_signaler(rb) : NULL; } rb_erase(&request->signaling.node, &b->signals); - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); i915_gem_request_put(request); } else { @@ -502,7 +504,14 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct rb_node *parent, **p; bool first, wakeup; - /* locked by dma_fence_enable_sw_signaling() */ + /* Note that we may be called from an interrupt handler on another + * device (e.g. nouveau signaling a fence completion causing us + * to submit a request, and so enable signaling). As such, + * we need to make sure that all other users of b->lock protect + * against interrupts, i.e. use spin_lock_irqsave. + */ + + /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ assert_spin_locked(&request->lock); if (!request->global_seqno) return; @@ -594,7 +603,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; cancel_fake_irq(engine); - spin_lock(&b->lock); + spin_lock_irq(&b->lock); __intel_breadcrumbs_disable_irq(b); if (intel_engine_has_waiter(engine)) { @@ -607,7 +616,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) irq_disable(engine); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 891629caab6c..d16c74ae8f54 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -207,7 +207,7 @@ struct intel_engine_cs { struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ bool irq_posted; - spinlock_t lock; /* protects the lists of requests */ + spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ -- 2.39.5