rcu: increase synchronize_sched_expedited() batching

author Tejun Heo <tj@kernel.org>

Tue, 23 Nov 2010 05:36:11 +0000 (21:36 -0800)

committer Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Fri, 17 Dec 2010 20:34:08 +0000 (12:34 -0800)
author Tejun Heo <tj@kernel.org>
Tue, 23 Nov 2010 05:36:11 +0000 (21:36 -0800)
committer Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Fri, 17 Dec 2010 20:34:08 +0000 (12:34 -0800)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h

index 49e8e16308e15dd36dbd8f1481e68af8e58ee87d..af5614856285d32e0f07d3ca7e7294b03b9b27b7 100644 (file)
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,8 @@
  extern int rcutorture_runnable; /* for sysctl */
  #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
  
  extern int rcutorture_runnable; /* for sysctl */
  #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
  
+#define UINT_CMP_GE(a, b)      (UINT_MAX / 2 >= (a) - (b))
+#define UINT_CMP_LT(a, b)      (UINT_MAX / 2 < (a) - (b))
  #define ULONG_CMP_GE(a, b)     (ULONG_MAX / 2 >= (a) - (b))
  #define ULONG_CMP_LT(a, b)     (ULONG_MAX / 2 < (a) - (b))
  
  #define ULONG_CMP_GE(a, b)     (ULONG_MAX / 2 >= (a) - (b))
  #define ULONG_CMP_LT(a, b)     (ULONG_MAX / 2 < (a) - (b))
  
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h

index c22c4ef2a0d0a7b44f23e8e9f681ff0a1f3c3005..a3638710dc67f4627f5cdb88e1cafb43b500d24a 100644 (file)
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1025,7 +1025,8 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  
  #else /* #ifndef CONFIG_SMP */
  
  
  #else /* #ifndef CONFIG_SMP */
  
-static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
  
  static int synchronize_sched_expedited_cpu_stop(void *data)
  {
  
  static int synchronize_sched_expedited_cpu_stop(void *data)
  {
@@ -1041,8 +1042,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
          * robustness against future implementation changes.
          */
         smp_mb(); /* See above comment block. */
          * robustness against future implementation changes.
          */
         smp_mb(); /* See above comment block. */
-       if (cpumask_first(cpu_online_mask) == smp_processor_id())
-               atomic_inc(&synchronize_sched_expedited_count);
         return 0;
  }
  
         return 0;
  }
  
@@ -1056,43 +1055,86 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
   * lock that is acquired by a CPU-hotplug notifier.  Failing to
   * observe this restriction will result in deadlock.
   *
   * lock that is acquired by a CPU-hotplug notifier.  Failing to
   * observe this restriction will result in deadlock.
   *
- * The synchronize_sched_expedited_cpu_stop() function is called
- * in stop-CPU context, but in order to keep overhead down to a dull
- * roar, we don't force this function to wait for its counterparts
- * on other CPUs.  One instance of this function will increment the
- * synchronize_sched_expedited_count variable per call to
- * try_stop_cpus(), but there is no guarantee what order this instance
- * will occur in.  The worst case is that it is last on one call
- * to try_stop_cpus(), and the first on the next call.  This means
- * that piggybacking requires that synchronize_sched_expedited_count
- * be incremented by 3: this guarantees that the piggybacking
- * task has waited through an entire cycle of context switches,
- * even in the worst case.
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
   */
  void synchronize_sched_expedited(void)
  {
   */
  void synchronize_sched_expedited(void)
  {
-       int snap, trycount = 0;
+       int firstsnap, s, snap, trycount = 0;
  
  
-       smp_mb();  /* ensure prior mod happens before capturing snap. */
-       snap = atomic_read(&synchronize_sched_expedited_count) + 2;
+       /* Note that atomic_inc_return() implies full memory barrier. */
+       firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
         get_online_cpus();
         get_online_cpus();
+
+       /*
+        * Each pass through the following loop attempts to force a
+        * context switch on each CPU.
+        */
         while (try_stop_cpus(cpu_online_mask,
                              synchronize_sched_expedited_cpu_stop,
                              NULL) == -EAGAIN) {
                 put_online_cpus();
         while (try_stop_cpus(cpu_online_mask,
                              synchronize_sched_expedited_cpu_stop,
                              NULL) == -EAGAIN) {
                 put_online_cpus();
+
+               /* No joy, try again later.  Or just synchronize_sched(). */
                 if (trycount++ < 10)
                         udelay(trycount * num_online_cpus());
                 else {
                         synchronize_sched();
                         return;
                 }
                 if (trycount++ < 10)
                         udelay(trycount * num_online_cpus());
                 else {
                         synchronize_sched();
                         return;
                 }
-               if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
+
+               /* Check to see if someone else did our work for us. */
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
                         smp_mb(); /* ensure test happens before caller kfree */
                         return;
                 }
                         smp_mb(); /* ensure test happens before caller kfree */
                         return;
                 }
+
+               /*
+                * Refetching sync_sched_expedited_started allows later
+                * callers to piggyback on our grace period.  We subtract
+                * 1 to get the same token that the last incrementer got.
+                * We retry after they started, so our grace period works
+                * for them, and they started after our first try, so their
+                * grace period works for us.
+                */
                 get_online_cpus();
                 get_online_cpus();
+               snap = atomic_read(&sync_sched_expedited_started) - 1;
+               smp_mb(); /* ensure read is before try_stop_cpus(). */
         }
         }
-       smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
+
+       /*
+        * Everyone up to our most recent fetch is covered by our grace
+        * period.  Update the counter, but only if our work is still
+        * relevant -- which it won't be if someone who started later
+        * than we did beat us to the punch.
+        */
+       do {
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       break;
+               }
+       } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+
         put_online_cpus();
  }
  EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
         put_online_cpus();
  }
  EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
author	Tejun Heo <tj@kernel.org>
	Tue, 23 Nov 2010 05:36:11 +0000 (21:36 -0800)
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	Fri, 17 Dec 2010 20:34:08 +0000 (12:34 -0800)
include/linux/rcupdate.h		patch \| blob \| history
kernel/rcutree_plugin.h		patch \| blob \| history