]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
rcu: Throttle NOCB kthread grace-period rate
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Fri, 9 May 2014 21:30:53 +0000 (14:30 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Thu, 26 Jun 2014 17:59:36 +0000 (10:59 -0700)
On a system with high CPU utilization and high context-switch rates,
grace periods tend to complete quite quickly.  If all CPUs are no-CBs
CPUs, and all are producing at least one callback per grace period, all
the rcuo kthreads will need to be awakened on every grace period, which
on large systems can be an excessive number of wakeups.  This commit
therefore throttles the rcuo kthreads to prevent them from needing
a grace-period wakeup more often than the interval between a pair of
force-quiescent-state scans.

Reported-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
kernel/rcu/tree.c
kernel/rcu/tree_plugin.h

index ebd99af2214ee14226ac95ea520d5b87f865f287..987fd64f70dc1ecd3a95180f7b5b7611498c5d34 100644 (file)
@@ -1578,6 +1578,22 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
                rcu_gp_kthread_wake(rsp);
 }
 
+/*
+ * Read from the jiffies_till_next_fqs boot/sysfs parameter, applying
+ * limits and updating as needed.
+ */
+static unsigned long read_jiffies_till_next_fqs(void)
+{
+       unsigned long j;
+
+       j = jiffies_till_next_fqs;
+       if (j > HZ)
+               j = jiffies_till_next_fqs = HZ;
+       else if (j < 1)
+               j = jiffies_till_next_fqs = 1;
+       return j;
+}
+
 /*
  * Initialize a new grace period.  Return 0 if no grace period required.
  */
@@ -1840,14 +1856,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqswaitsig"));
                        }
-                       j = jiffies_till_next_fqs;
-                       if (j > HZ) {
-                               j = HZ;
-                               jiffies_till_next_fqs = HZ;
-                       } else if (j < 1) {
-                               j = 1;
-                               jiffies_till_next_fqs = 1;
-                       }
+                       j = read_jiffies_till_next_fqs();
                }
 
                /* Handle grace-period end. */
index 5da9f9b3abc91d7c42821f2fb878ded391e45abd..55bbf0fd4f7eaadc4891ea226c1fb0bc7aa079f4 100644 (file)
@@ -2220,6 +2220,8 @@ static int rcu_nocb_kthread(void *arg)
 {
        int c, cl;
        bool firsttime = 1;
+       unsigned long gp_next_start = jiffies;
+       unsigned long j;
        struct rcu_head *list;
        struct rcu_head *next;
        struct rcu_head **tail;
@@ -2227,6 +2229,11 @@ static int rcu_nocb_kthread(void *arg)
 
        /* Each pass through this loop invokes one batch of callbacks */
        for (;;) {
+               /* Avoid excessive wakeups due to short grace periods. */
+               j = jiffies;
+               if (time_before(j, gp_next_start))
+                       schedule_timeout_uninterruptible(gp_next_start - j);
+
                /* If not polling, wait for next batch of callbacks. */
                if (!rcu_nocb_poll) {
                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -2250,6 +2257,14 @@ static int rcu_nocb_kthread(void *arg)
                firsttime = 1;
                trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                    TPS("WokeNonEmpty"));
+               /*
+                * Time of next grace-period start.  The idea is that
+                * we are willing to take a jiffies_till_next_fqs delay
+                * if any CPUs are idle, so we should be willing to take
+                * a similar delay when all are busy context-switching
+                * their little brains out.
+                */
+               gp_next_start = jiffies + read_jiffies_till_next_fqs();
 
                /*
                 * Extract queued callbacks, update counts, and wait