Merge remote-tracking branch 'security/next'

[karo-tx-linux.git] / kernel / rcutree.c
diff --git a/kernel/rcutree.c b/kernel/rcutree.c

index 068de3a93606980268351ccaf6b19fc45573b4ea..32618b3fe4e6aa375b3cd3bde1acd12bdd4237b3 100644 (file)
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -53,18 +53,38 @@
  #include <linux/delay.h>
  #include <linux/stop_machine.h>
  #include <linux/random.h>
+#include <linux/ftrace_event.h>
+#include <linux/suspend.h>
  
  #include "rcutree.h"
  #include <trace/events/rcu.h>
  
  #include "rcu.h"
  
+/*
+ * Strings used in tracepoints need to be exported via the
+ * tracing system such that tools like perf and trace-cmd can
+ * translate the string address pointers to actual text.
+ */
+#define TPS(x) tracepoint_string(x)
+
  /* Data structures. */
  
  static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
  static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
  
-#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
+/*
+ * In order to export the rcu_state name to the tracing tools, it
+ * needs to be added in the __tracepoint_string section.
+ * This requires defining a separate variable tp_<sname>_varname
+ * that points to the string being used, and this will allow
+ * the tracing userspace tools to be able to decipher the string
+ * address to the matching string.
+ */
+#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+static char sname##_varname[] = #sname; \
+static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
+struct rcu_state sname##_state = { \
         .level = { &sname##_state.node[0] }, \
         .call = cr, \
         .fqs_state = RCU_GP_IDLE, \
@@ -75,16 +95,13 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
         .orphan_donetail = &sname##_state.orphan_donelist, \
         .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
         .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
-       .name = #sname, \
+       .name = sname##_varname, \
         .abbr = sabbr, \
-}
-
-struct rcu_state rcu_sched_state =
-       RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
+}; \
+DEFINE_PER_CPU(struct rcu_data, sname##_data)
  
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
-DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
+RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
  
  static struct rcu_state *rcu_state;
  LIST_HEAD(rcu_struct_flavors);
@@ -178,7 +195,7 @@ void rcu_sched_qs(int cpu)
         struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
  
         if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
+               trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
         rdp->passed_quiesce = 1;
  }
  
@@ -187,7 +204,7 @@ void rcu_bh_qs(int cpu)
         struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
  
         if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
+               trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
         rdp->passed_quiesce = 1;
  }
  
@@ -198,16 +215,20 @@ void rcu_bh_qs(int cpu)
   */
  void rcu_note_context_switch(int cpu)
  {
-       trace_rcu_utilization("Start context switch");
+       trace_rcu_utilization(TPS("Start context switch"));
         rcu_sched_qs(cpu);
         rcu_preempt_note_context_switch(cpu);
-       trace_rcu_utilization("End context switch");
+       trace_rcu_utilization(TPS("End context switch"));
  }
  EXPORT_SYMBOL_GPL(rcu_note_context_switch);
  
  DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
         .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
         .dynticks = ATOMIC_INIT(1),
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+       .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
+       .dynticks_idle = ATOMIC_INIT(1),
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
  };
  
  static long blimit = 10;       /* Maximum callbacks per rcu_do_batch. */
@@ -226,7 +247,10 @@ module_param(jiffies_till_next_fqs, ulong, 0644);
  
  static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                                   struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
+static void force_qs_rnp(struct rcu_state *rsp,
+                        int (*f)(struct rcu_data *rsp, bool *isidle,
+                                 unsigned long *maxj),
+                        bool *isidle, unsigned long *maxj);
  static void force_quiescent_state(struct rcu_state *rsp);
  static int rcu_pending(int cpu);
  
@@ -345,11 +369,11 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
  static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
                                 bool user)
  {
-       trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
+       trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
         if (!user && !is_idle_task(current)) {
                 struct task_struct *idle = idle_task(smp_processor_id());
  
-               trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
+               trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
                 ftrace_dump(DUMP_ORIG);
                 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
                           current->pid, current->comm,
@@ -411,6 +435,7 @@ void rcu_idle_enter(void)
  
         local_irq_save(flags);
         rcu_eqs_enter(false);
+       rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
         local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -428,27 +453,6 @@ void rcu_user_enter(void)
  {
         rcu_eqs_enter(1);
  }
-
-/**
- * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
- * after the current irq returns.
- *
- * This is similar to rcu_user_enter() but in the context of a non-nesting
- * irq. After this call, RCU enters into idle mode when the interrupt
- * returns.
- */
-void rcu_user_enter_after_irq(void)
-{
-       unsigned long flags;
-       struct rcu_dynticks *rdtp;
-
-       local_irq_save(flags);
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       /* Ensure this irq is interrupting a non-idle RCU state.  */
-       WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
-       rdtp->dynticks_nesting = 1;
-       local_irq_restore(flags);
-}
  #endif /* CONFIG_RCU_USER_QS */
  
  /**
@@ -479,9 +483,10 @@ void rcu_irq_exit(void)
         rdtp->dynticks_nesting--;
         WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
         if (rdtp->dynticks_nesting)
-               trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
+               trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
         else
                 rcu_eqs_enter_common(rdtp, oldval, true);
+       rcu_sysidle_enter(rdtp, 1);
         local_irq_restore(flags);
  }
  
@@ -501,11 +506,11 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
         smp_mb__after_atomic_inc();  /* See above. */
         WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
         rcu_cleanup_after_idle(smp_processor_id());
-       trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
+       trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
         if (!user && !is_idle_task(current)) {
                 struct task_struct *idle = idle_task(smp_processor_id());
  
-               trace_rcu_dyntick("Error on exit: not idle task",
+               trace_rcu_dyntick(TPS("Error on exit: not idle task"),
                                   oldval, rdtp->dynticks_nesting);
                 ftrace_dump(DUMP_ORIG);
                 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@ -550,6 +555,7 @@ void rcu_idle_exit(void)
  
         local_irq_save(flags);
         rcu_eqs_exit(false);
+       rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
         local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -565,28 +571,6 @@ void rcu_user_exit(void)
  {
         rcu_eqs_exit(1);
  }
-
-/**
- * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
- * idle mode after the current non-nesting irq returns.
- *
- * This is similar to rcu_user_exit() but in the context of an irq.
- * This is called when the irq has interrupted a userspace RCU idle mode
- * context. When the current non-nesting interrupt returns after this call,
- * the CPU won't restore the RCU idle mode.
- */
-void rcu_user_exit_after_irq(void)
-{
-       unsigned long flags;
-       struct rcu_dynticks *rdtp;
-
-       local_irq_save(flags);
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       /* Ensure we are interrupting an RCU idle mode. */
-       WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
-       rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
-       local_irq_restore(flags);
-}
  #endif /* CONFIG_RCU_USER_QS */
  
  /**
@@ -620,9 +604,10 @@ void rcu_irq_enter(void)
         rdtp->dynticks_nesting++;
         WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
         if (oldval)
-               trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
+               trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
         else
                 rcu_eqs_exit_common(rdtp, oldval, true);
+       rcu_sysidle_exit(rdtp, 1);
         local_irq_restore(flags);
  }
  
@@ -746,9 +731,11 @@ static int rcu_is_cpu_rrupt_from_idle(void)
   * credit them with an implicit quiescent state.  Return 1 if this CPU
   * is in dynticks idle mode, which is an extended quiescent state.
   */
-static int dyntick_save_progress_counter(struct rcu_data *rdp)
+static int dyntick_save_progress_counter(struct rcu_data *rdp,
+                                        bool *isidle, unsigned long *maxj)
  {
         rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+       rcu_sysidle_check_cpu(rdp, isidle, maxj);
         return (rdp->dynticks_snap & 0x1) == 0;
  }
  
@@ -758,7 +745,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
   * idle state since the last call to dyntick_save_progress_counter()
   * for this same CPU, or by virtue of having been offline.
   */
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
+                                   bool *isidle, unsigned long *maxj)
  {
         unsigned int curr;
         unsigned int snap;
@@ -775,7 +763,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
          * of the current RCU grace period.
          */
         if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
-               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
+               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
                 rdp->dynticks_fqs++;
                 return 1;
         }
@@ -795,7 +783,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
                 return 0;  /* Grace period is not old enough. */
         barrier();
         if (cpu_is_offline(rdp->cpu)) {
-               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
+               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
                 rdp->offline_fqs++;
                 return 1;
         }
@@ -1032,7 +1020,7 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
   * rcu_nocb_wait_gp().
   */
  static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-                               unsigned long c, char *s)
+                               unsigned long c, const char *s)
  {
         trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
                                       rnp->completed, c, rnp->level,
@@ -1058,9 +1046,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
          * grace period is already marked as needed, return to the caller.
          */
         c = rcu_cbs_completed(rdp->rsp, rnp);
-       trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
+       trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
         if (rnp->need_future_gp[c & 0x1]) {
-               trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
                 return c;
         }
  
@@ -1074,7 +1062,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
         if (rnp->gpnum != rnp->completed ||
             ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
                 rnp->need_future_gp[c & 0x1]++;
-               trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
                 return c;
         }
  
@@ -1102,7 +1090,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
          * recorded, trace and leave.
          */
         if (rnp_root->need_future_gp[c & 0x1]) {
-               trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
                 goto unlock_out;
         }
  
@@ -1111,9 +1099,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
  
         /* If a grace period is not already in progress, start one. */
         if (rnp_root->gpnum != rnp_root->completed) {
-               trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
         } else {
-               trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
                 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
         }
  unlock_out:
@@ -1137,7 +1125,8 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
         rcu_nocb_gp_cleanup(rsp, rnp);
         rnp->need_future_gp[c & 0x1] = 0;
         needmore = rnp->need_future_gp[(c + 1) & 0x1];
-       trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
+       trace_rcu_future_gp(rnp, rdp, c,
+                           needmore ? TPS("CleanupMore") : TPS("Cleanup"));
         return needmore;
  }
  
@@ -1205,9 +1194,9 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
  
         /* Trace depending on how much we were able to accelerate. */
         if (!*rdp->nxttail[RCU_WAIT_TAIL])
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
         else
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
  }
  
  /*
@@ -1273,7 +1262,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
  
                 /* Remember that we saw this grace-period completion. */
                 rdp->completed = rnp->completed;
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
         }
  
         if (rdp->gpnum != rnp->gpnum) {
@@ -1283,7 +1272,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
                  * go looking for one.
                  */
                 rdp->gpnum = rnp->gpnum;
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
                 rdp->passed_quiesce = 0;
                 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
                 zero_cpu_stall_ticks(rdp);
@@ -1315,6 +1304,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
         struct rcu_data *rdp;
         struct rcu_node *rnp = rcu_get_root(rsp);
  
+       rcu_bind_gp_kthread();
         raw_spin_lock_irq(&rnp->lock);
         rsp->gp_flags = 0; /* Clear all flags: New grace period. */
  
@@ -1326,7 +1316,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
  
         /* Advance to a new grace period and initialize state. */
         rsp->gpnum++;
-       trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
+       trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
         record_gp_stall_check_time(rsp);
         raw_spin_unlock_irq(&rnp->lock);
  
@@ -1379,16 +1369,25 @@ static int rcu_gp_init(struct rcu_state *rsp)
  int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
  {
         int fqs_state = fqs_state_in;
+       bool isidle = false;
+       unsigned long maxj;
         struct rcu_node *rnp = rcu_get_root(rsp);
  
         rsp->n_force_qs++;
         if (fqs_state == RCU_SAVE_DYNTICK) {
                 /* Collect dyntick-idle snapshots. */
-               force_qs_rnp(rsp, dyntick_save_progress_counter);
+               if (is_sysidle_rcu_state(rsp)) {
+                       isidle = 1;
+                       maxj = jiffies - ULONG_MAX / 4;
+               }
+               force_qs_rnp(rsp, dyntick_save_progress_counter,
+                            &isidle, &maxj);
+               rcu_sysidle_report_gp(rsp, isidle, maxj);
                 fqs_state = RCU_FORCE_QS;
         } else {
                 /* Handle dyntick-idle and offline CPUs. */
-               force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
+               isidle = 0;
+               force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
         }
         /* Clear flag to prevent immediate re-entry. */
         if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -1448,7 +1447,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
         rcu_nocb_gp_set(rnp, nocb);
  
         rsp->completed = rsp->gpnum; /* Declare grace period done. */
-       trace_rcu_grace_period(rsp->name, rsp->completed, "end");
+       trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
         rsp->fqs_state = RCU_GP_IDLE;
         rdp = this_cpu_ptr(rsp->rda);
         rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
@@ -1558,10 +1557,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
  
         /*
          * We can't do wakeups while holding the rnp->lock, as that
-        * could cause possible deadlocks with the rq->lock. Deter
-        * the wakeup to interrupt context.
+        * could cause possible deadlocks with the rq->lock. Defer
+        * the wakeup to interrupt context.  And don't bother waking
+        * up the running kthread.
          */
-       irq_work_queue(&rsp->wakeup_work);
+       if (current != rsp->gp_kthread)
+               irq_work_queue(&rsp->wakeup_work);
  }
  
  /*
@@ -1857,7 +1858,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
         RCU_TRACE(mask = rdp->grpmask);
         trace_rcu_grace_period(rsp->name,
                                rnp->gpnum + 1 - !!(rnp->qsmask & mask),
-                              "cpuofl");
+                              TPS("cpuofl"));
  }
  
  /*
@@ -2044,7 +2045,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
   */
  void rcu_check_callbacks(int cpu, int user)
  {
-       trace_rcu_utilization("Start scheduler-tick");
+       trace_rcu_utilization(TPS("Start scheduler-tick"));
         increment_cpu_stall_ticks();
         if (user || rcu_is_cpu_rrupt_from_idle()) {
  
@@ -2077,7 +2078,7 @@ void rcu_check_callbacks(int cpu, int user)
         rcu_preempt_check_callbacks(cpu);
         if (rcu_pending(cpu))
                 invoke_rcu_core();
-       trace_rcu_utilization("End scheduler-tick");
+       trace_rcu_utilization(TPS("End scheduler-tick"));
  }
  
  /*
@@ -2087,7 +2088,10 @@ void rcu_check_callbacks(int cpu, int user)
   *
   * The caller must have suppressed start of new grace periods.
   */
-static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
+static void force_qs_rnp(struct rcu_state *rsp,
+                        int (*f)(struct rcu_data *rsp, bool *isidle,
+                                 unsigned long *maxj),
+                        bool *isidle, unsigned long *maxj)
  {
         unsigned long bit;
         int cpu;
@@ -2110,9 +2114,12 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
                 cpu = rnp->grplo;
                 bit = 1;
                 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
-                       if ((rnp->qsmask & bit) != 0 &&
-                           f(per_cpu_ptr(rsp->rda, cpu)))
-                               mask |= bit;
+                       if ((rnp->qsmask & bit) != 0) {
+                               if ((rnp->qsmaskinit & bit) != 0)
+                                       *isidle = 0;
+                               if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+                                       mask |= bit;
+                       }
                 }
                 if (mask != 0) {
  
@@ -2208,10 +2215,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)
  
         if (cpu_is_offline(smp_processor_id()))
                 return;
-       trace_rcu_utilization("Start RCU core");
+       trace_rcu_utilization(TPS("Start RCU core"));
         for_each_rcu_flavor(rsp)
                 __rcu_process_callbacks(rsp);
-       trace_rcu_utilization("End RCU core");
+       trace_rcu_utilization(TPS("End RCU core"));
  }
  
  /*
@@ -2286,6 +2293,13 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
         }
  }
  
+/*
+ * RCU callback function to leak a callback.
+ */
+static void rcu_leak_callback(struct rcu_head *rhp)
+{
+}
+
  /*
   * Helper function for call_rcu() and friends.  The cpu argument will
   * normally be -1, indicating "currently running CPU".  It may specify
@@ -2300,7 +2314,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         struct rcu_data *rdp;
  
         WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
-       debug_rcu_head_queue(head);
+       if (debug_rcu_head_queue(head)) {
+               /* Probable double call_rcu(), so leak the callback. */
+               ACCESS_ONCE(head->func) = rcu_leak_callback;
+               WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
+               return;
+       }
         head->func = func;
         head->next = NULL;
  
@@ -2720,7 +2739,7 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
   * Helper function for _rcu_barrier() tracing.  If tracing is disabled,
   * the compiler is expected to optimize this away.
   */
-static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
+static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
                                int cpu, unsigned long done)
  {
         trace_rcu_barrier(rsp->name, s, cpu,
@@ -2785,9 +2804,20 @@ static void _rcu_barrier(struct rcu_state *rsp)
          * transition.  The "if" expression below therefore rounds the old
          * value up to the next even number and adds two before comparing.
          */
-       snap_done = ACCESS_ONCE(rsp->n_barrier_done);
+       snap_done = rsp->n_barrier_done;
         _rcu_barrier_trace(rsp, "Check", -1, snap_done);
-       if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
+
+       /*
+        * If the value in snap is odd, we needed to wait for the current
+        * rcu_barrier() to complete, then wait for the next one, in other
+        * words, we need the value of snap_done to be three larger than
+        * the value of snap.  On the other hand, if the value in snap is
+        * even, we only had to wait for the next rcu_barrier() to complete,
+        * in other words, we need the value of snap_done to be only two
+        * greater than the value of snap.  The "(snap + 3) & ~0x1" computes
+        * this for us (thank you, Linus!).
+        */
+       if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
                 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
                 smp_mb(); /* caller's subsequent code after above check. */
                 mutex_unlock(&rsp->barrier_mutex);
@@ -2930,6 +2960,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
         rdp->blimit = blimit;
         init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
         rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+       rcu_sysidle_init_percpu_data(rdp->dynticks);
         atomic_set(&rdp->dynticks->dynticks,
                    (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
         raw_spin_unlock(&rnp->lock);            /* irqs remain disabled. */
@@ -2952,7 +2983,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
                         rdp->completed = rnp->completed;
                         rdp->passed_quiesce = 0;
                         rdp->qs_pending = 0;
-                       trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
+                       trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
                 }
                 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
                 rnp = rnp->parent;
@@ -2982,7 +3013,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
         struct rcu_node *rnp = rdp->mynode;
         struct rcu_state *rsp;
  
-       trace_rcu_utilization("Start CPU hotplug");
+       trace_rcu_utilization(TPS("Start CPU hotplug"));
         switch (action) {
         case CPU_UP_PREPARE:
         case CPU_UP_PREPARE_FROZEN:
@@ -3011,7 +3042,26 @@ static int rcu_cpu_notify(struct notifier_block *self,
         default:
                 break;
         }
-       trace_rcu_utilization("End CPU hotplug");
+       trace_rcu_utilization(TPS("End CPU hotplug"));
+       return NOTIFY_OK;
+}
+
+static int rcu_pm_notify(struct notifier_block *self,
+                        unsigned long action, void *hcpu)
+{
+       switch (action) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
+                       rcu_expedited = 1;
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_SUSPEND:
+               rcu_expedited = 0;
+               break;
+       default:
+               break;
+       }
         return NOTIFY_OK;
  }
  
@@ -3256,6 +3306,7 @@ void __init rcu_init(void)
          * or the scheduler are operational.
          */
         cpu_notifier(rcu_cpu_notify, 0);
+       pm_notifier(rcu_pm_notify, 0);
         for_each_online_cpu(cpu)
                 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
  }