workqueue: use irqsafe timer for delayed_work

[karo-tx-linux.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 0f50f4078e367a177675d7fc921a44f33e0aece4..b394df8beaee6ea6288c351fbabb806208a62cb7 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -183,7 +183,8 @@ struct global_cwq {
         struct hlist_head       busy_hash[BUSY_WORKER_HASH_SIZE];
                                                 /* L: hash of busy workers */
  
-       struct worker_pool      pools[2];       /* normal and highpri pools */
+       struct worker_pool      pools[NR_WORKER_POOLS];
+                                               /* normal and highpri pools */
  
         wait_queue_head_t       rebind_hold;    /* rebind hold wait */
  } ____cacheline_aligned_in_smp;
@@ -268,17 +269,15 @@ struct workqueue_struct {
  };
  
  struct workqueue_struct *system_wq __read_mostly;
-struct workqueue_struct *system_long_wq __read_mostly;
-struct workqueue_struct *system_nrt_wq __read_mostly;
-struct workqueue_struct *system_unbound_wq __read_mostly;
-struct workqueue_struct *system_freezable_wq __read_mostly;
-struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
  EXPORT_SYMBOL_GPL(system_wq);
+struct workqueue_struct *system_highpri_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_highpri_wq);
+struct workqueue_struct *system_long_wq __read_mostly;
  EXPORT_SYMBOL_GPL(system_long_wq);
-EXPORT_SYMBOL_GPL(system_nrt_wq);
+struct workqueue_struct *system_unbound_wq __read_mostly;
  EXPORT_SYMBOL_GPL(system_unbound_wq);
+struct workqueue_struct *system_freezable_wq __read_mostly;
  EXPORT_SYMBOL_GPL(system_freezable_wq);
-EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/workqueue.h>
@@ -533,19 +532,24 @@ static int work_next_color(int color)
  }
  
  /*
- * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
- * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
- * cleared and the work data contains the cpu number it was last on.
+ * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * is cleared and the high bits contain OFFQ flags and CPU number.
   *
- * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
- * can be used to set the cwq, cpu or clear work->data.  These functions
- * should only be called while the work is owned - ie. while the PENDING
- * bit is set.
+ * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, cpu or clear
+ * work->data.  These functions should only be called while the work is
+ * owned - ie. while the PENDING bit is set.
   *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq
- * corresponding to a work.  gcwq is available once the work has been
- * queued anywhere after initialization.  cwq is available only from
- * queueing until execution starts.
+ * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
+ * a work.  gcwq is available once the work has been queued anywhere after
+ * initialization until it is sync canceled.  cwq is available only while
+ * the work item is queued.
+ *
+ * %WORK_OFFQ_CANCELING is used to mark a work item which is being
+ * canceled.  While being canceled, a work item may have its PENDING set
+ * but stay off timer and worklist for arbitrarily long and nobody should
+ * try to steal the PENDING bit.
   */
  static inline void set_work_data(struct work_struct *work, unsigned long data,
                                  unsigned long flags)
@@ -565,11 +569,19 @@ static void set_work_cwq(struct work_struct *work,
  static void set_work_cpu_and_clear_pending(struct work_struct *work,
                                            unsigned int cpu)
  {
-       set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0);
+       /*
+        * The following wmb is paired with the implied mb in
+        * test_and_set_bit(PENDING) and ensures all updates to @work made
+        * here are visible to and precede any updates by the next PENDING
+        * owner.
+        */
+       smp_wmb();
+       set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
  }
  
  static void clear_work_data(struct work_struct *work)
  {
+       smp_wmb();      /* see set_work_cpu_and_clear_pending() */
         set_work_data(work, WORK_STRUCT_NO_CPU, 0);
  }
  
@@ -592,7 +604,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
                 return ((struct cpu_workqueue_struct *)
                         (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
  
-       cpu = data >> WORK_STRUCT_FLAG_BITS;
+       cpu = data >> WORK_OFFQ_CPU_SHIFT;
         if (cpu == WORK_CPU_NONE)
                 return NULL;
  
@@ -600,6 +612,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
         return get_gcwq(cpu);
  }
  
+static void mark_work_canceling(struct work_struct *work)
+{
+       struct global_cwq *gcwq = get_work_gcwq(work);
+       unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+
+       set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
+                     WORK_STRUCT_PENDING);
+}
+
+static bool work_is_canceling(struct work_struct *work)
+{
+       unsigned long data = atomic_long_read(&work->data);
+
+       return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+}
+
  /*
   * Policy functions.  These define the policies on how the global worker
   * pools are managed.  Unless noted otherwise, these functions assume that
@@ -1004,15 +1032,54 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
                 complete(&cwq->wq->first_flusher->done);
  }
  
-/*
- * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
- * so this work can't be re-armed in any way.
+/**
+ * try_to_grab_pending - steal work item from worklist and disable irq
+ * @work: work item to steal
+ * @is_dwork: @work is a delayed_work
+ * @flags: place to store irq state
+ *
+ * Try to grab PENDING bit of @work.  This function can handle @work in any
+ * stable state - idle, on timer or on worklist.  Return values are
+ *
+ *  1          if @work was pending and we successfully stole PENDING
+ *  0          if @work was idle and we claimed PENDING
+ *  -EAGAIN    if PENDING couldn't be grabbed at the moment, safe to busy-retry
+ *  -ENOENT    if someone else is canceling @work, this state may persist
+ *             for arbitrarily long
+ *
+ * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
+ * interrupted while holding PENDING and @work off queue, irq must be
+ * disabled on entry.  This, combined with delayed_work->timer being
+ * irqsafe, ensures that we return -EAGAIN for finite short period of time.
+ *
+ * On successful return, >= 0, irq is disabled and the caller is
+ * responsible for releasing it using local_irq_restore(*@flags).
+ *
+ * This function is safe to call from any context including IRQ handler.
   */
-static int try_to_grab_pending(struct work_struct *work)
+static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
+                              unsigned long *flags)
  {
         struct global_cwq *gcwq;
-       int ret = -1;
  
+       WARN_ON_ONCE(in_irq());
+
+       local_irq_save(*flags);
+
+       /* try to steal the timer if it exists */
+       if (is_dwork) {
+               struct delayed_work *dwork = to_delayed_work(work);
+
+               /*
+                * dwork->timer is irqsafe.  If del_timer() fails, it's
+                * guaranteed that the timer is not queued anywhere and not
+                * running on the local CPU.
+                */
+               if (likely(del_timer(&dwork->timer)))
+                       return 1;
+       }
+
+       /* try to claim PENDING the normal way */
         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
                 return 0;
  
@@ -1022,9 +1089,9 @@ static int try_to_grab_pending(struct work_struct *work)
          */
         gcwq = get_work_gcwq(work);
         if (!gcwq)
-               return ret;
+               goto fail;
  
-       spin_lock_irq(&gcwq->lock);
+       spin_lock(&gcwq->lock);
         if (!list_empty(&work->entry)) {
                 /*
                  * This work is queued, but perhaps we locked the wrong gcwq.
@@ -1038,12 +1105,18 @@ static int try_to_grab_pending(struct work_struct *work)
                         cwq_dec_nr_in_flight(get_work_cwq(work),
                                 get_work_color(work),
                                 *work_data_bits(work) & WORK_STRUCT_DELAYED);
-                       ret = 1;
+
+                       spin_unlock(&gcwq->lock);
+                       return 1;
                 }
         }
-       spin_unlock_irq(&gcwq->lock);
-
-       return ret;
+       spin_unlock(&gcwq->lock);
+fail:
+       local_irq_restore(*flags);
+       if (work_is_canceling(work))
+               return -ENOENT;
+       cpu_relax();
+       return -EAGAIN;
  }
  
  /**
@@ -1126,6 +1199,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
         struct cpu_workqueue_struct *cwq;
         struct list_head *worklist;
         unsigned int work_flags;
+       unsigned int req_cpu = cpu;
  
         /*
          * While a work item is PENDING && off queue, a task trying to
@@ -1150,14 +1224,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                         cpu = raw_smp_processor_id();
  
                 /*
-                * It's multi cpu.  If @wq is non-reentrant and @work
-                * was previously on a different cpu, it might still
-                * be running there, in which case the work needs to
-                * be queued on that cpu to guarantee non-reentrance.
+                * It's multi cpu.  If @work was previously on a different
+                * cpu, it might still be running there, in which case the
+                * work needs to be queued on that cpu to guarantee
+                * non-reentrancy.
                  */
                 gcwq = get_gcwq(cpu);
-               if (wq->flags & WQ_NON_REENTRANT &&
-                   (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
+               last_gcwq = get_work_gcwq(work);
+
+               if (last_gcwq && last_gcwq != gcwq) {
                         struct worker *worker;
  
                         spin_lock(&last_gcwq->lock);
@@ -1181,7 +1256,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
  
         /* gcwq determined, get cwq and queue */
         cwq = get_cwq(gcwq->cpu, wq);
-       trace_workqueue_queue_work(cpu, cwq, work);
+       trace_workqueue_queue_work(req_cpu, cwq, work);
  
         if (WARN_ON(!list_empty(&work->entry))) {
                 spin_unlock(&gcwq->lock);
@@ -1255,12 +1330,58 @@ void delayed_work_timer_fn(unsigned long __data)
         struct delayed_work *dwork = (struct delayed_work *)__data;
         struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
  
-       local_irq_disable();
-       __queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work);
-       local_irq_enable();
+       /* should have been called from irqsafe timer with irq already off */
+       __queue_work(dwork->cpu, cwq->wq, &dwork->work);
  }
  EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
  
+static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
+                               struct delayed_work *dwork, unsigned long delay)
+{
+       struct timer_list *timer = &dwork->timer;
+       struct work_struct *work = &dwork->work;
+       unsigned int lcpu;
+
+       WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
+                    timer->data != (unsigned long)dwork);
+       BUG_ON(timer_pending(timer));
+       BUG_ON(!list_empty(&work->entry));
+
+       timer_stats_timer_set_start_info(&dwork->timer);
+
+       /*
+        * This stores cwq for the moment, for the timer_fn.  Note that the
+        * work's gcwq is preserved to allow reentrance detection for
+        * delayed works.
+        */
+       if (!(wq->flags & WQ_UNBOUND)) {
+               struct global_cwq *gcwq = get_work_gcwq(work);
+
+               /*
+                * If we cannot get the last gcwq from @work directly,
+                * select the last CPU such that it avoids unnecessarily
+                * triggering non-reentrancy check in __queue_work().
+                */
+               lcpu = cpu;
+               if (gcwq)
+                       lcpu = gcwq->cpu;
+               if (lcpu == WORK_CPU_UNBOUND)
+                       lcpu = raw_smp_processor_id();
+       } else {
+               lcpu = WORK_CPU_UNBOUND;
+       }
+
+       set_work_cwq(work, get_cwq(lcpu, wq), 0);
+
+       dwork->cpu = cpu;
+       timer->expires = jiffies + delay;
+
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
+}
+
  /**
   * queue_delayed_work_on - queue work on specific CPU after delay
   * @cpu: CPU number to execute work on
@@ -1275,7 +1396,6 @@ EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
  bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
                            struct delayed_work *dwork, unsigned long delay)
  {
-       struct timer_list *timer = &dwork->timer;
         struct work_struct *work = &dwork->work;
         bool ret = false;
         unsigned long flags;
@@ -1287,38 +1407,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
         local_irq_save(flags);
  
         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
-               unsigned int lcpu;
-
-               WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
-                            timer->data != (unsigned long)dwork);
-               BUG_ON(timer_pending(timer));
-               BUG_ON(!list_empty(&work->entry));
-
-               timer_stats_timer_set_start_info(&dwork->timer);
-
-               /*
-                * This stores cwq for the moment, for the timer_fn.
-                * Note that the work's gcwq is preserved to allow
-                * reentrance detection for delayed works.
-                */
-               if (!(wq->flags & WQ_UNBOUND)) {
-                       struct global_cwq *gcwq = get_work_gcwq(work);
-
-                       if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
-                               lcpu = gcwq->cpu;
-                       else
-                               lcpu = raw_smp_processor_id();
-               } else
-                       lcpu = WORK_CPU_UNBOUND;
-
-               set_work_cwq(work, get_cwq(lcpu, wq), 0);
-
-               timer->expires = jiffies + delay;
-
-               if (unlikely(cpu != WORK_CPU_UNBOUND))
-                       add_timer_on(timer, cpu);
-               else
-                       add_timer(timer);
+               __queue_delayed_work(cpu, wq, dwork, delay);
                 ret = true;
         }
  
@@ -1342,6 +1431,59 @@ bool queue_delayed_work(struct workqueue_struct *wq,
  }
  EXPORT_SYMBOL_GPL(queue_delayed_work);
  
+/**
+ * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
+ * modify @dwork's timer so that it expires after @delay.  If @delay is
+ * zero, @work is guaranteed to be scheduled immediately regardless of its
+ * current state.
+ *
+ * Returns %false if @dwork was idle and queued, %true if @dwork was
+ * pending and its timer was modified.
+ *
+ * This function is safe to call from any context including IRQ handler.
+ * See try_to_grab_pending() for details.
+ */
+bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
+                        struct delayed_work *dwork, unsigned long delay)
+{
+       unsigned long flags;
+       int ret;
+
+       do {
+               ret = try_to_grab_pending(&dwork->work, true, &flags);
+       } while (unlikely(ret == -EAGAIN));
+
+       if (likely(ret >= 0)) {
+               __queue_delayed_work(cpu, wq, dwork, delay);
+               local_irq_restore(flags);
+       }
+
+       /* -ENOENT from try_to_grab_pending() becomes %true */
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work_on);
+
+/**
+ * mod_delayed_work - modify delay of or queue a delayed work
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * mod_delayed_work_on() on local CPU.
+ */
+bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
+                     unsigned long delay)
+{
+       return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work);
+
  /**
   * worker_enter_idle - enter idle state
   * @worker: worker which is entering idle state
@@ -1598,6 +1740,7 @@ retry:
         /* rebind busy workers */
         for_each_busy_worker(worker, i, pos, gcwq) {
                 struct work_struct *rebind_work = &worker->rebind_work;
+               struct workqueue_struct *wq;
  
                 /* morph UNBOUND to REBIND */
                 worker->flags &= ~WORKER_UNBOUND;
@@ -1607,11 +1750,20 @@ retry:
                                      work_data_bits(rebind_work)))
                         continue;
  
-               /* wq doesn't matter, use the default one */
                 debug_work_activate(rebind_work);
-               insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
-                           worker->scheduled.next,
-                           work_color_to_flags(WORK_NO_COLOR));
+
+               /*
+                * wq doesn't really matter but let's keep @worker->pool
+                * and @cwq->pool consistent for sanity.
+                */
+               if (worker_pool_pri(worker->pool))
+                       wq = system_highpri_wq;
+               else
+                       wq = system_wq;
+
+               insert_work(get_cwq(gcwq->cpu, wq), rebind_work,
+                       worker->scheduled.next,
+                       work_color_to_flags(WORK_NO_COLOR));
         }
  }
  
@@ -2057,14 +2209,11 @@ __acquires(&gcwq->lock)
                 wake_up_worker(pool);
  
         /*
-        * Record the last CPU and clear PENDING.  The following wmb is
-        * paired with the implied mb in test_and_set_bit(PENDING) and
-        * ensures all updates to @work made here are visible to and
-        * precede any updates by the next PENDING owner.  Also, clear
-        * PENDING inside @gcwq->lock so that PENDING and queued state
-        * changes happen together while IRQ is disabled.
+        * Record the last CPU and clear PENDING which should be the last
+        * update to @work.  Also, do this inside @gcwq->lock so that
+        * PENDING and queued state changes happen together while IRQ is
+        * disabled.
          */
-       smp_wmb();
         set_work_cpu_and_clear_pending(work, gcwq->cpu);
  
         spin_unlock_irq(&gcwq->lock);
@@ -2082,11 +2231,9 @@ __acquires(&gcwq->lock)
         lock_map_release(&cwq->wq->lockdep_map);
  
         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
-               printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
-                      "%s/0x%08x/%d\n",
-                      current->comm, preempt_count(), task_pid_nr(current));
-               printk(KERN_ERR "    last function: ");
-               print_symbol("%s\n", (unsigned long)f);
+               pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
+                      "     last function: %pf\n",
+                      current->comm, preempt_count(), task_pid_nr(current), f);
                 debug_show_held_locks(current);
                 dump_stack();
         }
@@ -2640,8 +2787,8 @@ reflush:
  
                 if (++flush_cnt == 10 ||
                     (flush_cnt % 100 == 0 && flush_cnt <= 1000))
-                       pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
-                                  wq->name, flush_cnt);
+                       pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n",
+                               wq->name, flush_cnt);
                 goto reflush;
         }
  
@@ -2652,8 +2799,7 @@ reflush:
  }
  EXPORT_SYMBOL_GPL(drain_workqueue);
  
-static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
-                            bool wait_executing)
+static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
  {
         struct worker *worker = NULL;
         struct global_cwq *gcwq;
@@ -2675,13 +2821,12 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
                 cwq = get_work_cwq(work);
                 if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
                         goto already_gone;
-       } else if (wait_executing) {
+       } else {
                 worker = find_worker_executing_work(gcwq, work);
                 if (!worker)
                         goto already_gone;
                 cwq = worker->current_cwq;
-       } else
-               goto already_gone;
+       }
  
         insert_wq_barrier(cwq, barr, work, worker);
         spin_unlock_irq(&gcwq->lock);
@@ -2708,15 +2853,8 @@ already_gone:
   * flush_work - wait for a work to finish executing the last queueing instance
   * @work: the work to flush
   *
- * Wait until @work has finished execution.  This function considers
- * only the last queueing instance of @work.  If @work has been
- * enqueued across different CPUs on a non-reentrant workqueue or on
- * multiple workqueues, @work might still be executing on return on
- * some of the CPUs from earlier queueing.
- *
- * If @work was queued only on a non-reentrant, ordered or unbound
- * workqueue, @work is guaranteed to be idle on return if it hasn't
- * been requeued since flush started.
+ * Wait until @work has finished execution.  @work is guaranteed to be idle
+ * on return if it hasn't been requeued since flush started.
   *
   * RETURNS:
   * %true if flush_work() waited for the work to finish execution,
@@ -2729,98 +2867,36 @@ bool flush_work(struct work_struct *work)
         lock_map_acquire(&work->lockdep_map);
         lock_map_release(&work->lockdep_map);
  
-       if (start_flush_work(work, &barr, true)) {
+       if (start_flush_work(work, &barr)) {
                 wait_for_completion(&barr.done);
                 destroy_work_on_stack(&barr.work);
                 return true;
-       } else
-               return false;
-}
-EXPORT_SYMBOL_GPL(flush_work);
-
-static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
-{
-       struct wq_barrier barr;
-       struct worker *worker;
-
-       spin_lock_irq(&gcwq->lock);
-
-       worker = find_worker_executing_work(gcwq, work);
-       if (unlikely(worker))
-               insert_wq_barrier(worker->current_cwq, &barr, work, worker);
-
-       spin_unlock_irq(&gcwq->lock);
-
-       if (unlikely(worker)) {
-               wait_for_completion(&barr.done);
-               destroy_work_on_stack(&barr.work);
-               return true;
-       } else
+       } else {
                 return false;
-}
-
-static bool wait_on_work(struct work_struct *work)
-{
-       bool ret = false;
-       int cpu;
-
-       might_sleep();
-
-       lock_map_acquire(&work->lockdep_map);
-       lock_map_release(&work->lockdep_map);
-
-       for_each_gcwq_cpu(cpu)
-               ret |= wait_on_cpu_work(get_gcwq(cpu), work);
-       return ret;
-}
-
-/**
- * flush_work_sync - wait until a work has finished execution
- * @work: the work to flush
- *
- * Wait until @work has finished execution.  On return, it's
- * guaranteed that all queueing instances of @work which happened
- * before this function is called are finished.  In other words, if
- * @work hasn't been requeued since this function was called, @work is
- * guaranteed to be idle on return.
- *
- * RETURNS:
- * %true if flush_work_sync() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_work_sync(struct work_struct *work)
-{
-       struct wq_barrier barr;
-       bool pending, waited;
-
-       /* we'll wait for executions separately, queue barr only if pending */
-       pending = start_flush_work(work, &barr, false);
-
-       /* wait for executions to finish */
-       waited = wait_on_work(work);
-
-       /* wait for the pending one */
-       if (pending) {
-               wait_for_completion(&barr.done);
-               destroy_work_on_stack(&barr.work);
         }
-
-       return pending || waited;
  }
-EXPORT_SYMBOL_GPL(flush_work_sync);
+EXPORT_SYMBOL_GPL(flush_work);
  
-static bool __cancel_work_timer(struct work_struct *work,
-                               struct timer_list* timer)
+static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
  {
+       unsigned long flags;
         int ret;
  
         do {
-               ret = (timer && likely(del_timer(timer)));
-               if (!ret)
-                       ret = try_to_grab_pending(work);
-               wait_on_work(work);
+               ret = try_to_grab_pending(work, is_dwork, &flags);
+               /*
+                * If someone else is canceling, wait for the same event it
+                * would be waiting for before retrying.
+                */
+               if (unlikely(ret == -ENOENT))
+                       flush_work(work);
         } while (unlikely(ret < 0));
  
+       /* tell other tasks trying to grab @work to back off */
+       mark_work_canceling(work);
+       local_irq_restore(flags);
+
+       flush_work(work);
         clear_work_data(work);
         return ret;
  }
@@ -2845,7 +2921,7 @@ static bool __cancel_work_timer(struct work_struct *work,
   */
  bool cancel_work_sync(struct work_struct *work)
  {
-       return __cancel_work_timer(work, NULL);
+       return __cancel_work_timer(work, false);
  }
  EXPORT_SYMBOL_GPL(cancel_work_sync);
  
@@ -2865,36 +2941,13 @@ bool flush_delayed_work(struct delayed_work *dwork)
  {
         local_irq_disable();
         if (del_timer_sync(&dwork->timer))
-               __queue_work(WORK_CPU_UNBOUND,
+               __queue_work(dwork->cpu,
                              get_work_cwq(&dwork->work)->wq, &dwork->work);
         local_irq_enable();
         return flush_work(&dwork->work);
  }
  EXPORT_SYMBOL(flush_delayed_work);
  
-/**
- * flush_delayed_work_sync - wait for a dwork to finish
- * @dwork: the delayed work to flush
- *
- * Delayed timer is cancelled and the pending work is queued for
- * execution immediately.  Other than timer handling, its behavior
- * is identical to flush_work_sync().
- *
- * RETURNS:
- * %true if flush_work_sync() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_delayed_work_sync(struct delayed_work *dwork)
-{
-       local_irq_disable();
-       if (del_timer_sync(&dwork->timer))
-               __queue_work(WORK_CPU_UNBOUND,
-                            get_work_cwq(&dwork->work)->wq, &dwork->work);
-       local_irq_enable();
-       return flush_work_sync(&dwork->work);
-}
-EXPORT_SYMBOL(flush_delayed_work_sync);
-
  /**
   * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
   * @dwork: the delayed work cancel
@@ -2906,7 +2959,7 @@ EXPORT_SYMBOL(flush_delayed_work_sync);
   */
  bool cancel_delayed_work_sync(struct delayed_work *dwork)
  {
-       return __cancel_work_timer(&dwork->work, &dwork->timer);
+       return __cancel_work_timer(&dwork->work, true);
  }
  EXPORT_SYMBOL(cancel_delayed_work_sync);
  
@@ -3117,9 +3170,8 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
         int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
  
         if (max_active < 1 || max_active > lim)
-               printk(KERN_WARNING "workqueue: max_active %d requested for %s "
-                      "is out of range, clamping between %d and %d\n",
-                      max_active, name, 1, lim);
+               pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
+                       max_active, name, 1, lim);
  
         return clamp_val(max_active, 1, lim);
  }
@@ -3522,7 +3574,7 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
         case CPU_DOWN_PREPARE:
                 /* unbinding should happen on the local CPU */
                 INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn);
-               schedule_work_on(cpu, &unbind_work);
+               queue_work_on(cpu, system_highpri_wq, &unbind_work);
                 flush_work(&unbind_work);
                 break;
         }
@@ -3724,6 +3776,10 @@ static int __init init_workqueues(void)
         unsigned int cpu;
         int i;
  
+       /* make sure we have enough bits for OFFQ CPU number */
+       BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
+                    WORK_CPU_LAST);
+
         cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
         cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
  
@@ -3778,17 +3834,14 @@ static int __init init_workqueues(void)
         }
  
         system_wq = alloc_workqueue("events", 0, 0);
+       system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
         system_long_wq = alloc_workqueue("events_long", 0, 0);
-       system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
         system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
                                             WQ_UNBOUND_MAX_ACTIVE);
         system_freezable_wq = alloc_workqueue("events_freezable",
                                               WQ_FREEZABLE, 0);
-       system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
-                       WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
-       BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-              !system_unbound_wq || !system_freezable_wq ||
-               !system_nrt_freezable_wq);
+       BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
+              !system_unbound_wq || !system_freezable_wq);
         return 0;
  }
  early_initcall(init_workqueues);