]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - kernel/rcutree.c
Merge remote-tracking branch 'security/next'
[karo-tx-linux.git] / kernel / rcutree.c
index 068de3a93606980268351ccaf6b19fc45573b4ea..32618b3fe4e6aa375b3cd3bde1acd12bdd4237b3 100644 (file)
 #include <linux/delay.h>
 #include <linux/stop_machine.h>
 #include <linux/random.h>
+#include <linux/ftrace_event.h>
+#include <linux/suspend.h>
 
 #include "rcutree.h"
 #include <trace/events/rcu.h>
 
 #include "rcu.h"
 
+/*
+ * Strings used in tracepoints need to be exported via the
+ * tracing system such that tools like perf and trace-cmd can
+ * translate the string address pointers to actual text.
+ */
+#define TPS(x) tracepoint_string(x)
+
 /* Data structures. */
 
 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
 
-#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
+/*
+ * In order to export the rcu_state name to the tracing tools, it
+ * needs to be added in the __tracepoint_string section.
+ * This requires defining a separate variable tp_<sname>_varname
+ * that points to the string being used, and this will allow
+ * the tracing userspace tools to be able to decipher the string
+ * address to the matching string.
+ */
+#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+static char sname##_varname[] = #sname; \
+static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
+struct rcu_state sname##_state = { \
        .level = { &sname##_state.node[0] }, \
        .call = cr, \
        .fqs_state = RCU_GP_IDLE, \
@@ -75,16 +95,13 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
        .orphan_donetail = &sname##_state.orphan_donelist, \
        .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
        .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
-       .name = #sname, \
+       .name = sname##_varname, \
        .abbr = sabbr, \
-}
-
-struct rcu_state rcu_sched_state =
-       RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
+}; \
+DEFINE_PER_CPU(struct rcu_data, sname##_data)
 
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
-DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
+RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
 
 static struct rcu_state *rcu_state;
 LIST_HEAD(rcu_struct_flavors);
@@ -178,7 +195,7 @@ void rcu_sched_qs(int cpu)
        struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
 
        if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
+               trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
        rdp->passed_quiesce = 1;
 }
 
@@ -187,7 +204,7 @@ void rcu_bh_qs(int cpu)
        struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
 
        if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
+               trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
        rdp->passed_quiesce = 1;
 }
 
@@ -198,16 +215,20 @@ void rcu_bh_qs(int cpu)
  */
 void rcu_note_context_switch(int cpu)
 {
-       trace_rcu_utilization("Start context switch");
+       trace_rcu_utilization(TPS("Start context switch"));
        rcu_sched_qs(cpu);
        rcu_preempt_note_context_switch(cpu);
-       trace_rcu_utilization("End context switch");
+       trace_rcu_utilization(TPS("End context switch"));
 }
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
        .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
        .dynticks = ATOMIC_INIT(1),
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+       .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
+       .dynticks_idle = ATOMIC_INIT(1),
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 };
 
 static long blimit = 10;       /* Maximum callbacks per rcu_do_batch. */
@@ -226,7 +247,10 @@ module_param(jiffies_till_next_fqs, ulong, 0644);
 
 static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                                  struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
+static void force_qs_rnp(struct rcu_state *rsp,
+                        int (*f)(struct rcu_data *rsp, bool *isidle,
+                                 unsigned long *maxj),
+                        bool *isidle, unsigned long *maxj);
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(int cpu);
 
@@ -345,11 +369,11 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
                                bool user)
 {
-       trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
+       trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
        if (!user && !is_idle_task(current)) {
                struct task_struct *idle = idle_task(smp_processor_id());
 
-               trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
+               trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
                ftrace_dump(DUMP_ORIG);
                WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
                          current->pid, current->comm,
@@ -411,6 +435,7 @@ void rcu_idle_enter(void)
 
        local_irq_save(flags);
        rcu_eqs_enter(false);
+       rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -428,27 +453,6 @@ void rcu_user_enter(void)
 {
        rcu_eqs_enter(1);
 }
-
-/**
- * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
- * after the current irq returns.
- *
- * This is similar to rcu_user_enter() but in the context of a non-nesting
- * irq. After this call, RCU enters into idle mode when the interrupt
- * returns.
- */
-void rcu_user_enter_after_irq(void)
-{
-       unsigned long flags;
-       struct rcu_dynticks *rdtp;
-
-       local_irq_save(flags);
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       /* Ensure this irq is interrupting a non-idle RCU state.  */
-       WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
-       rdtp->dynticks_nesting = 1;
-       local_irq_restore(flags);
-}
 #endif /* CONFIG_RCU_USER_QS */
 
 /**
@@ -479,9 +483,10 @@ void rcu_irq_exit(void)
        rdtp->dynticks_nesting--;
        WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
        if (rdtp->dynticks_nesting)
-               trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
+               trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
        else
                rcu_eqs_enter_common(rdtp, oldval, true);
+       rcu_sysidle_enter(rdtp, 1);
        local_irq_restore(flags);
 }
 
@@ -501,11 +506,11 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
        smp_mb__after_atomic_inc();  /* See above. */
        WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
        rcu_cleanup_after_idle(smp_processor_id());
-       trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
+       trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
        if (!user && !is_idle_task(current)) {
                struct task_struct *idle = idle_task(smp_processor_id());
 
-               trace_rcu_dyntick("Error on exit: not idle task",
+               trace_rcu_dyntick(TPS("Error on exit: not idle task"),
                                  oldval, rdtp->dynticks_nesting);
                ftrace_dump(DUMP_ORIG);
                WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@ -550,6 +555,7 @@ void rcu_idle_exit(void)
 
        local_irq_save(flags);
        rcu_eqs_exit(false);
+       rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -565,28 +571,6 @@ void rcu_user_exit(void)
 {
        rcu_eqs_exit(1);
 }
-
-/**
- * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
- * idle mode after the current non-nesting irq returns.
- *
- * This is similar to rcu_user_exit() but in the context of an irq.
- * This is called when the irq has interrupted a userspace RCU idle mode
- * context. When the current non-nesting interrupt returns after this call,
- * the CPU won't restore the RCU idle mode.
- */
-void rcu_user_exit_after_irq(void)
-{
-       unsigned long flags;
-       struct rcu_dynticks *rdtp;
-
-       local_irq_save(flags);
-       rdtp = &__get_cpu_var(rcu_dynticks);
-       /* Ensure we are interrupting an RCU idle mode. */
-       WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
-       rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
-       local_irq_restore(flags);
-}
 #endif /* CONFIG_RCU_USER_QS */
 
 /**
@@ -620,9 +604,10 @@ void rcu_irq_enter(void)
        rdtp->dynticks_nesting++;
        WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
        if (oldval)
-               trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
+               trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
        else
                rcu_eqs_exit_common(rdtp, oldval, true);
+       rcu_sysidle_exit(rdtp, 1);
        local_irq_restore(flags);
 }
 
@@ -746,9 +731,11 @@ static int rcu_is_cpu_rrupt_from_idle(void)
  * credit them with an implicit quiescent state.  Return 1 if this CPU
  * is in dynticks idle mode, which is an extended quiescent state.
  */
-static int dyntick_save_progress_counter(struct rcu_data *rdp)
+static int dyntick_save_progress_counter(struct rcu_data *rdp,
+                                        bool *isidle, unsigned long *maxj)
 {
        rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
+       rcu_sysidle_check_cpu(rdp, isidle, maxj);
        return (rdp->dynticks_snap & 0x1) == 0;
 }
 
@@ -758,7 +745,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
  * idle state since the last call to dyntick_save_progress_counter()
  * for this same CPU, or by virtue of having been offline.
  */
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
+                                   bool *isidle, unsigned long *maxj)
 {
        unsigned int curr;
        unsigned int snap;
@@ -775,7 +763,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
         * of the current RCU grace period.
         */
        if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
-               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
+               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
                rdp->dynticks_fqs++;
                return 1;
        }
@@ -795,7 +783,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
                return 0;  /* Grace period is not old enough. */
        barrier();
        if (cpu_is_offline(rdp->cpu)) {
-               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
+               trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
                rdp->offline_fqs++;
                return 1;
        }
@@ -1032,7 +1020,7 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
  * rcu_nocb_wait_gp().
  */
 static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-                               unsigned long c, char *s)
+                               unsigned long c, const char *s)
 {
        trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
                                      rnp->completed, c, rnp->level,
@@ -1058,9 +1046,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
         * grace period is already marked as needed, return to the caller.
         */
        c = rcu_cbs_completed(rdp->rsp, rnp);
-       trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
+       trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
        if (rnp->need_future_gp[c & 0x1]) {
-               trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
                return c;
        }
 
@@ -1074,7 +1062,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
        if (rnp->gpnum != rnp->completed ||
            ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
                rnp->need_future_gp[c & 0x1]++;
-               trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
                return c;
        }
 
@@ -1102,7 +1090,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
         * recorded, trace and leave.
         */
        if (rnp_root->need_future_gp[c & 0x1]) {
-               trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
                goto unlock_out;
        }
 
@@ -1111,9 +1099,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
 
        /* If a grace period is not already in progress, start one. */
        if (rnp_root->gpnum != rnp_root->completed) {
-               trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
        } else {
-               trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
+               trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
                rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
        }
 unlock_out:
@@ -1137,7 +1125,8 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
        rcu_nocb_gp_cleanup(rsp, rnp);
        rnp->need_future_gp[c & 0x1] = 0;
        needmore = rnp->need_future_gp[(c + 1) & 0x1];
-       trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
+       trace_rcu_future_gp(rnp, rdp, c,
+                           needmore ? TPS("CleanupMore") : TPS("Cleanup"));
        return needmore;
 }
 
@@ -1205,9 +1194,9 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 
        /* Trace depending on how much we were able to accelerate. */
        if (!*rdp->nxttail[RCU_WAIT_TAIL])
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
        else
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
 }
 
 /*
@@ -1273,7 +1262,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
 
                /* Remember that we saw this grace-period completion. */
                rdp->completed = rnp->completed;
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
        }
 
        if (rdp->gpnum != rnp->gpnum) {
@@ -1283,7 +1272,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
                 * go looking for one.
                 */
                rdp->gpnum = rnp->gpnum;
-               trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
+               trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
                rdp->passed_quiesce = 0;
                rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
                zero_cpu_stall_ticks(rdp);
@@ -1315,6 +1304,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
        struct rcu_data *rdp;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
+       rcu_bind_gp_kthread();
        raw_spin_lock_irq(&rnp->lock);
        rsp->gp_flags = 0; /* Clear all flags: New grace period. */
 
@@ -1326,7 +1316,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
 
        /* Advance to a new grace period and initialize state. */
        rsp->gpnum++;
-       trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
+       trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
        record_gp_stall_check_time(rsp);
        raw_spin_unlock_irq(&rnp->lock);
 
@@ -1379,16 +1369,25 @@ static int rcu_gp_init(struct rcu_state *rsp)
 int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
 {
        int fqs_state = fqs_state_in;
+       bool isidle = false;
+       unsigned long maxj;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
        rsp->n_force_qs++;
        if (fqs_state == RCU_SAVE_DYNTICK) {
                /* Collect dyntick-idle snapshots. */
-               force_qs_rnp(rsp, dyntick_save_progress_counter);
+               if (is_sysidle_rcu_state(rsp)) {
+                       isidle = 1;
+                       maxj = jiffies - ULONG_MAX / 4;
+               }
+               force_qs_rnp(rsp, dyntick_save_progress_counter,
+                            &isidle, &maxj);
+               rcu_sysidle_report_gp(rsp, isidle, maxj);
                fqs_state = RCU_FORCE_QS;
        } else {
                /* Handle dyntick-idle and offline CPUs. */
-               force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
+               isidle = 0;
+               force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
        }
        /* Clear flag to prevent immediate re-entry. */
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -1448,7 +1447,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        rcu_nocb_gp_set(rnp, nocb);
 
        rsp->completed = rsp->gpnum; /* Declare grace period done. */
-       trace_rcu_grace_period(rsp->name, rsp->completed, "end");
+       trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
        rsp->fqs_state = RCU_GP_IDLE;
        rdp = this_cpu_ptr(rsp->rda);
        rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
@@ -1558,10 +1557,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 
        /*
         * We can't do wakeups while holding the rnp->lock, as that
-        * could cause possible deadlocks with the rq->lock. Deter
-        * the wakeup to interrupt context.
+        * could cause possible deadlocks with the rq->lock. Defer
+        * the wakeup to interrupt context.  And don't bother waking
+        * up the running kthread.
         */
-       irq_work_queue(&rsp->wakeup_work);
+       if (current != rsp->gp_kthread)
+               irq_work_queue(&rsp->wakeup_work);
 }
 
 /*
@@ -1857,7 +1858,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
        RCU_TRACE(mask = rdp->grpmask);
        trace_rcu_grace_period(rsp->name,
                               rnp->gpnum + 1 - !!(rnp->qsmask & mask),
-                              "cpuofl");
+                              TPS("cpuofl"));
 }
 
 /*
@@ -2044,7 +2045,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
  */
 void rcu_check_callbacks(int cpu, int user)
 {
-       trace_rcu_utilization("Start scheduler-tick");
+       trace_rcu_utilization(TPS("Start scheduler-tick"));
        increment_cpu_stall_ticks();
        if (user || rcu_is_cpu_rrupt_from_idle()) {
 
@@ -2077,7 +2078,7 @@ void rcu_check_callbacks(int cpu, int user)
        rcu_preempt_check_callbacks(cpu);
        if (rcu_pending(cpu))
                invoke_rcu_core();
-       trace_rcu_utilization("End scheduler-tick");
+       trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
 /*
@@ -2087,7 +2088,10 @@ void rcu_check_callbacks(int cpu, int user)
  *
  * The caller must have suppressed start of new grace periods.
  */
-static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
+static void force_qs_rnp(struct rcu_state *rsp,
+                        int (*f)(struct rcu_data *rsp, bool *isidle,
+                                 unsigned long *maxj),
+                        bool *isidle, unsigned long *maxj)
 {
        unsigned long bit;
        int cpu;
@@ -2110,9 +2114,12 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
                cpu = rnp->grplo;
                bit = 1;
                for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
-                       if ((rnp->qsmask & bit) != 0 &&
-                           f(per_cpu_ptr(rsp->rda, cpu)))
-                               mask |= bit;
+                       if ((rnp->qsmask & bit) != 0) {
+                               if ((rnp->qsmaskinit & bit) != 0)
+                                       *isidle = 0;
+                               if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+                                       mask |= bit;
+                       }
                }
                if (mask != 0) {
 
@@ -2208,10 +2215,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 
        if (cpu_is_offline(smp_processor_id()))
                return;
-       trace_rcu_utilization("Start RCU core");
+       trace_rcu_utilization(TPS("Start RCU core"));
        for_each_rcu_flavor(rsp)
                __rcu_process_callbacks(rsp);
-       trace_rcu_utilization("End RCU core");
+       trace_rcu_utilization(TPS("End RCU core"));
 }
 
 /*
@@ -2286,6 +2293,13 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
        }
 }
 
+/*
+ * RCU callback function to leak a callback.
+ */
+static void rcu_leak_callback(struct rcu_head *rhp)
+{
+}
+
 /*
  * Helper function for call_rcu() and friends.  The cpu argument will
  * normally be -1, indicating "currently running CPU".  It may specify
@@ -2300,7 +2314,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
        struct rcu_data *rdp;
 
        WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
-       debug_rcu_head_queue(head);
+       if (debug_rcu_head_queue(head)) {
+               /* Probable double call_rcu(), so leak the callback. */
+               ACCESS_ONCE(head->func) = rcu_leak_callback;
+               WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
+               return;
+       }
        head->func = func;
        head->next = NULL;
 
@@ -2720,7 +2739,7 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
  * Helper function for _rcu_barrier() tracing.  If tracing is disabled,
  * the compiler is expected to optimize this away.
  */
-static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
+static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
                               int cpu, unsigned long done)
 {
        trace_rcu_barrier(rsp->name, s, cpu,
@@ -2785,9 +2804,20 @@ static void _rcu_barrier(struct rcu_state *rsp)
         * transition.  The "if" expression below therefore rounds the old
         * value up to the next even number and adds two before comparing.
         */
-       snap_done = ACCESS_ONCE(rsp->n_barrier_done);
+       snap_done = rsp->n_barrier_done;
        _rcu_barrier_trace(rsp, "Check", -1, snap_done);
-       if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
+
+       /*
+        * If the value in snap is odd, we needed to wait for the current
+        * rcu_barrier() to complete, then wait for the next one, in other
+        * words, we need the value of snap_done to be three larger than
+        * the value of snap.  On the other hand, if the value in snap is
+        * even, we only had to wait for the next rcu_barrier() to complete,
+        * in other words, we need the value of snap_done to be only two
+        * greater than the value of snap.  The "(snap + 3) & ~0x1" computes
+        * this for us (thank you, Linus!).
+        */
+       if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
                _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
                smp_mb(); /* caller's subsequent code after above check. */
                mutex_unlock(&rsp->barrier_mutex);
@@ -2930,6 +2960,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
        rdp->blimit = blimit;
        init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
        rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+       rcu_sysidle_init_percpu_data(rdp->dynticks);
        atomic_set(&rdp->dynticks->dynticks,
                   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
        raw_spin_unlock(&rnp->lock);            /* irqs remain disabled. */
@@ -2952,7 +2983,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
                        rdp->completed = rnp->completed;
                        rdp->passed_quiesce = 0;
                        rdp->qs_pending = 0;
-                       trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
+                       trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
                }
                raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
                rnp = rnp->parent;
@@ -2982,7 +3013,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
        struct rcu_node *rnp = rdp->mynode;
        struct rcu_state *rsp;
 
-       trace_rcu_utilization("Start CPU hotplug");
+       trace_rcu_utilization(TPS("Start CPU hotplug"));
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
@@ -3011,7 +3042,26 @@ static int rcu_cpu_notify(struct notifier_block *self,
        default:
                break;
        }
-       trace_rcu_utilization("End CPU hotplug");
+       trace_rcu_utilization(TPS("End CPU hotplug"));
+       return NOTIFY_OK;
+}
+
+static int rcu_pm_notify(struct notifier_block *self,
+                        unsigned long action, void *hcpu)
+{
+       switch (action) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
+                       rcu_expedited = 1;
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_SUSPEND:
+               rcu_expedited = 0;
+               break;
+       default:
+               break;
+       }
        return NOTIFY_OK;
 }
 
@@ -3256,6 +3306,7 @@ void __init rcu_init(void)
         * or the scheduler are operational.
         */
        cpu_notifier(rcu_cpu_notify, 0);
+       pm_notifier(rcu_pm_notify, 0);
        for_each_online_cpu(cpu)
                rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
 }