Merge branch 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 10 Feb 2016 19:04:05 +0000 (11:04 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 10 Feb 2016 19:04:05 +0000 (11:04 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:04:05 +0000 (11:04 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:04:05 +0000 (11:04 -0800)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 551ecf09c8dd820be865ebbbc22fa6b5f608dd98..9a53c929f017d16527270bc2244352edf1d34cd8 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4235,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         The default value of this parameter is determined by
                         the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
  
+       workqueue.debug_force_rr_cpu
+                       Workqueue used to implicitly guarantee that work
+                       items queued without explicit CPU specified are put
+                       on the local CPU.  This guarantee is no longer true
+                       and while local CPU is still preferred work items
+                       may be put on foreign CPUs.  This debug option
+                       forces round-robin CPU selection to flush out
+                       usages which depend on the now broken guarantee.
+                       When enabled, memory and cache locality will be
+                       impacted.
+
         x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
                         default x2apic cluster mode on platforms
                         supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

index 0e32bc71245ef46b90aa21112e4d2bef42cc5950..ca73c503b92a758ad5ce9b6d022c53c0758951c5 100644 (file)
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -311,6 +311,7 @@ enum {
  
         __WQ_DRAINING           = 1 << 16, /* internal: workqueue is draining */
         __WQ_ORDERED            = 1 << 17, /* internal: workqueue is ordered */
+       __WQ_LEGACY             = 1 << 18, /* internal: create*_workqueue() */
  
         WQ_MAX_ACTIVE           = 512,    /* I like 512, better ideas? */
         WQ_MAX_UNBOUND_PER_CPU  = 4,      /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
         alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
  
  #define create_workqueue(name)                                         \
-       alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
  #define create_freezable_workqueue(name)                               \
-       alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
-                       1, (name))
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+                       WQ_MEM_RECLAIM, 1, (name))
  #define create_singlethread_workqueue(name)                            \
-       alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+       alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
  
  extern void destroy_workqueue(struct workqueue_struct *wq);
  
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 61a0264e28f9b5917c0e8a60bb9668b21e59c4b2..7ff5dc7d2ac5f47395bc3be8484c642c5d577b6b 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock);    /* protects wq->maydays list */
  static LIST_HEAD(workqueues);          /* PR: list of all workqueues */
  static bool workqueue_freezing;                /* PL: have wqs started freezing? */
  
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+
+/*
+ * Local execution of unbound work items is no longer guaranteed.  The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
  
  /* the per-cpu worker pools */
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
                                                   int node)
  {
         assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+       /*
+        * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+        * delayed item is pending.  The plan is to keep CPU -> NODE
+        * mapping valid and stable across CPU on/offlines.  Once that
+        * happens, this workaround can be removed.
+        */
+       if (unlikely(node == NUMA_NO_NODE))
+               return wq->dfl_pwq;
+
         return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
  }
  
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
         return worker && worker->current_pwq->wq == wq;
  }
  
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+       static bool printed_dbg_warning;
+       int new_cpu;
+
+       if (likely(!wq_debug_force_rr_cpu)) {
+               if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+                       return cpu;
+       } else if (!printed_dbg_warning) {
+               pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+               printed_dbg_warning = true;
+       }
+
+       if (cpumask_empty(wq_unbound_cpumask))
+               return cpu;
+
+       new_cpu = __this_cpu_read(wq_rr_cpu_last);
+       new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+       if (unlikely(new_cpu >= nr_cpu_ids)) {
+               new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+               if (unlikely(new_cpu >= nr_cpu_ids))
+                       return cpu;
+       }
+       __this_cpu_write(wq_rr_cpu_last, new_cpu);
+
+       return new_cpu;
+}
+
  static void __queue_work(int cpu, struct workqueue_struct *wq,
                          struct work_struct *work)
  {
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
                 return;
  retry:
         if (req_cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
+               cpu = wq_select_unbound_cpu(raw_smp_processor_id());
  
         /* pwq which will be used unless @work is executing elsewhere */
         if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
         timer_stats_timer_set_start_info(&dwork->timer);
  
         dwork->wq = wq;
-       /* timer isn't guaranteed to run in this cpu, record earlier */
-       if (cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
         dwork->cpu = cpu;
         timer->expires = jiffies + delay;
  
-       add_timer_on(timer, cpu);
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
  }
  
  /**
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
         WARN_ONCE(current->flags & PF_MEMALLOC,
                   "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
                   current->pid, current->comm, target_wq->name, target_func);
-       WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+       WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+                             (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
                   "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
                   worker->current_pwq->wq->name, worker->current_func,
                   target_wq->name, target_func);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index ecb9e75614bf87f1e368074d6ef84bfe3f1003b8..8bfd1aca7a3d01a9887700b3f90d1d5697a9dab4 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
  
  endmenu # "RCU Debugging"
  
+config DEBUG_WQ_FORCE_RR_CPU
+       bool "Force round-robin CPU selection for unbound work items"
+       depends on DEBUG_KERNEL
+       default n
+       help
+         Workqueue used to implicitly guarantee that work items queued
+         without explicit CPU specified are put on the local CPU.  This
+         guarantee is no longer true and while local CPU is still
+         preferred work items may be put on foreign CPUs.  Kernel
+         parameter "workqueue.debug_force_rr_cpu" is added to force
+         round-robin CPU selection to flush out usages which depend on the
+         now broken guarantee.  This config option enables the debug
+         feature by default.  When enabled, memory and cache locality will
+         be impacted.
+
  config DEBUG_BLOCK_EXT_DEVT
          bool "Force extended block device numbers and spread them"
         depends on DEBUG_KERNEL
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 10 Feb 2016 19:04:05 +0000 (11:04 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 10 Feb 2016 19:04:05 +0000 (11:04 -0800)
Documentation/kernel-parameters.txt		patch \| blob \| history
include/linux/workqueue.h		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history