/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
+ struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
};
static struct kmem_cache *pwq_cache;
pwqs_node);
}
+/**
+ * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
+ * @wq: the target workqueue
+ * @node: the node ID
+ *
+ * This must be called either with pwq_lock held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
+ */
+static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
+ int node)
+{
+ assert_rcu_or_wq_mutex(wq);
+ return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
+}
+
static unsigned int work_color_to_flags(int color)
{
return color << WORK_STRUCT_COLOR_SHIFT;
WARN_ON_ONCE(!is_chained_work(wq)))
return;
retry:
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = raw_smp_processor_id();
+
/* pwq which will be used unless @work is executing elsewhere */
- if (!(wq->flags & WQ_UNBOUND)) {
- if (cpu == WORK_CPU_UNBOUND)
- cpu = raw_smp_processor_id();
+ if (!(wq->flags & WQ_UNBOUND))
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
- } else {
- pwq = first_pwq(wq);
- }
+ else
+ pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
/*
* If @work was previously on a different pool, it might still be
* pwq is determined and locked. For unbound pools, we could have
* raced with pwq release and it could already be dead. If its
* refcnt is zero, repeat pwq selection. Note that pwqs never die
- * without another pwq replacing it as the first pwq or while a
- * work item is executing on it, so the retying is guaranteed to
+ * without another pwq replacing it in the numa_pwq_tbl or while
+ * work items are executing on it, so the retrying is guaranteed to
* make forward-progress.
*/
if (unlikely(!pwq->refcnt)) {
struct worker_pool *pool,
struct pool_workqueue **p_last_pwq)
{
+ int node;
+
BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
pwq->pool = pool;
/* link in @pwq */
list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
- if (wq->flags & WQ_UNBOUND)
+ if (wq->flags & WQ_UNBOUND) {
copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
+ for_each_node(node)
+ rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+ }
mutex_unlock(&wq->mutex);
}
struct lock_class_key *key,
const char *lock_name, ...)
{
+ size_t tbl_size = 0;
va_list args;
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
/* allocate wq and format name */
- wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (flags & WQ_UNBOUND)
+ tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
+
+ wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
if (!wq)
return NULL;
if (!(wq->flags & WQ_UNBOUND))
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
else
- pwq = first_pwq(wq);
+ pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
ret = !list_empty(&pwq->delayed_works);
rcu_read_unlock_sched();