workqueue: update sysfs interface to reflect NUMA awareness and a kernel param to...

author Tejun Heo <tj@kernel.org>

Mon, 1 Apr 2013 18:23:38 +0000 (11:23 -0700)

committer Tejun Heo <tj@kernel.org>

Mon, 1 Apr 2013 18:23:38 +0000 (11:23 -0700)
author Tejun Heo <tj@kernel.org>
Mon, 1 Apr 2013 18:23:38 +0000 (11:23 -0700)
committer Tejun Heo <tj@kernel.org>
Mon, 1 Apr 2013 18:23:38 +0000 (11:23 -0700)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 4609e81dbc37fc2dbfa005ff607890df3a8bbc6b..c75ea0b8ec59c999123e391ec0094f52de40d12a 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3222,6 +3222,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         or other driver-specific files in the
                         Documentation/watchdog/ directory.
  
+       workqueue.disable_numa
+                       By default, all work items queued to unbound
+                       workqueues are affine to the NUMA nodes they're
+                       issued on, which results in better behavior in
+                       general.  If NUMA affinity needs to be disabled for
+                       whatever reason, this option can be used.  Note
+                       that this also can be controlled per-workqueue for
+                       workqueues visible under /sys/bus/workqueue/.
+
         x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
                         default x2apic cluster mode on platforms
                         supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

index 835d12b769601ad87360b723b754c136978643a0..7179756393781e3346d5a6ee7445403a63590f3f 100644 (file)
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -119,10 +119,15 @@ struct delayed_work {
  /*
   * A struct for workqueue attributes.  This can be used to change
   * attributes of an unbound workqueue.
+ *
+ * Unlike other fields, ->no_numa isn't a property of a worker_pool.  It
+ * only modifies how apply_workqueue_attrs() select pools and thus doesn't
+ * participate in pool hash calculations or equality comparisons.
   */
  struct workqueue_attrs {
         int                     nice;           /* nice level */
         cpumask_var_t           cpumask;        /* allowed CPUs */
+       bool                    no_numa;        /* disable NUMA affinity */
  };
  
  static inline struct delayed_work *to_delayed_work(struct work_struct *work)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 57cd77de4a4fdd8ead040bffab82f2e5614ecbb8..729ac6a448605feb2e981371db5fda5ece3b9d82 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -268,6 +268,9 @@ static int wq_numa_tbl_len;         /* highest possible NUMA node id + 1 */
  static cpumask_var_t *wq_numa_possible_cpumask;
                                         /* possible CPUs of each node */
  
+static bool wq_disable_numa;
+module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+
  static bool wq_numa_enabled;           /* unbound NUMA affinity enabled */
  
  /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -516,21 +519,6 @@ static int worker_pool_assign_id(struct worker_pool *pool)
         return ret;
  }
  
-/**
- * first_pwq - return the first pool_workqueue of the specified workqueue
- * @wq: the target workqueue
- *
- * This must be called either with wq->mutex held or sched RCU read locked.
- * If the pwq needs to be used beyond the locking in effect, the caller is
- * responsible for guaranteeing that the pwq stays online.
- */
-static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
-{
-       assert_rcu_or_wq_mutex(wq);
-       return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
-                                     pwqs_node);
-}
-
  /**
   * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
   * @wq: the target workqueue
@@ -3114,16 +3102,21 @@ static struct device_attribute wq_sysfs_attrs[] = {
         __ATTR_NULL,
  };
  
-static ssize_t wq_pool_id_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
+static ssize_t wq_pool_ids_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
  {
         struct workqueue_struct *wq = dev_to_wq(dev);
-       struct worker_pool *pool;
-       int written;
+       const char *delim = "";
+       int node, written = 0;
  
         rcu_read_lock_sched();
-       pool = first_pwq(wq)->pool;
-       written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id);
+       for_each_node(node) {
+               written += scnprintf(buf + written, PAGE_SIZE - written,
+                                    "%s%d:%d", delim, node,
+                                    unbound_pwq_by_node(wq, node)->pool->id);
+               delim = " ";
+       }
+       written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
         rcu_read_unlock_sched();
  
         return written;
@@ -3212,10 +3205,46 @@ static ssize_t wq_cpumask_store(struct device *dev,
         return ret ?: count;
  }
  
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct workqueue_struct *wq = dev_to_wq(dev);
+       int written;
+
+       mutex_lock(&wq->mutex);
+       written = scnprintf(buf, PAGE_SIZE, "%d\n",
+                           !wq->unbound_attrs->no_numa);
+       mutex_unlock(&wq->mutex);
+
+       return written;
+}
+
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct workqueue_struct *wq = dev_to_wq(dev);
+       struct workqueue_attrs *attrs;
+       int v, ret;
+
+       attrs = wq_sysfs_prep_attrs(wq);
+       if (!attrs)
+               return -ENOMEM;
+
+       ret = -EINVAL;
+       if (sscanf(buf, "%d", &v) == 1) {
+               attrs->no_numa = !v;
+               ret = apply_workqueue_attrs(wq, attrs);
+       }
+
+       free_workqueue_attrs(attrs);
+       return ret ?: count;
+}
+
  static struct device_attribute wq_sysfs_unbound_attrs[] = {
-       __ATTR(pool_id, 0444, wq_pool_id_show, NULL),
+       __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
         __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
         __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+       __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
         __ATTR_NULL,
  };
  
@@ -3750,7 +3779,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
  static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
                                  int cpu_going_down, cpumask_t *cpumask)
  {
-       if (!wq_numa_enabled)
+       if (!wq_numa_enabled || attrs->no_numa)
                 goto use_dfl;
  
         /* does @node have any online CPUs @attrs wants? */
@@ -3951,6 +3980,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
         cpumask = target_attrs->cpumask;
  
         mutex_lock(&wq->mutex);
+       if (wq->unbound_attrs->no_numa)
+               goto out_unlock;
  
         copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
         pwq = unbound_pwq_by_node(wq, node);
@@ -4763,6 +4794,11 @@ static void __init wq_numa_init(void)
         if (num_possible_nodes() <= 1)
                 return;
  
+       if (wq_disable_numa) {
+               pr_info("workqueue: NUMA affinity support disabled\n");
+               return;
+       }
+
         wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
         BUG_ON(!wq_update_unbound_numa_attrs_buf);
author	Tejun Heo <tj@kernel.org>
	Mon, 1 Apr 2013 18:23:38 +0000 (11:23 -0700)
committer	Tejun Heo <tj@kernel.org>
	Mon, 1 Apr 2013 18:23:38 +0000 (11:23 -0700)
Documentation/kernel-parameters.txt		patch \| blob \| history
include/linux/workqueue.h		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history