sched/fair: Beef up wake_wide()

author Mike Galbraith <umgwanakikbuti@gmail.com>

Tue, 14 Jul 2015 15:39:50 +0000 (17:39 +0200)

committer Ingo Molnar <mingo@kernel.org>

Mon, 3 Aug 2015 10:21:23 +0000 (12:21 +0200)
author Mike Galbraith <umgwanakikbuti@gmail.com>
Tue, 14 Jul 2015 15:39:50 +0000 (17:39 +0200)
committer Ingo Molnar <mingo@kernel.org>
Mon, 3 Aug 2015 10:21:23 +0000 (12:21 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 7412070a25ccc906558e5ddd2328ff94f28ad3f4..65a8a8651596f600024307c38825e7977d6fa73a 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1359,9 +1359,9 @@ struct task_struct {
  #ifdef CONFIG_SMP
         struct llist_node wake_entry;
         int on_cpu;
-       struct task_struct *last_wakee;
-       unsigned long wakee_flips;
+       unsigned int wakee_flips;
         unsigned long wakee_flip_decay_ts;
+       struct task_struct *last_wakee;
  
         int wake_cpu;
  #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 8b384b8d2f1db681f0a62e2e745e06c963a6e20a..ea23f9f1b51bf93621fa966a2af4ebac13b75a38 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4726,26 +4726,29 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
  
  #endif
  
+/*
+ * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
+ * A waker of many should wake a different task than the one last awakened
+ * at a frequency roughly N times higher than one of its wakees.  In order
+ * to determine whether we should let the load spread vs consolodating to
+ * shared cache, we look for a minimum 'flip' frequency of llc_size in one
+ * partner, and a factor of lls_size higher frequency in the other.  With
+ * both conditions met, we can be relatively sure that the relationship is
+ * non-monogamous, with partner count exceeding socket size.  Waker/wakee
+ * being client/server, worker/dispatcher, interrupt source or whatever is
+ * irrelevant, spread criteria is apparent partner count exceeds socket size.
+ */
  static int wake_wide(struct task_struct *p)
  {
+       unsigned int master = current->wakee_flips;
+       unsigned int slave = p->wakee_flips;
         int factor = this_cpu_read(sd_llc_size);
  
-       /*
-        * Yeah, it's the switching-frequency, could means many wakee or
-        * rapidly switch, use factor here will just help to automatically
-        * adjust the loose-degree, so bigger node will lead to more pull.
-        */
-       if (p->wakee_flips > factor) {
-               /*
-                * wakee is somewhat hot, it needs certain amount of cpu
-                * resource, so if waker is far more hot, prefer to leave
-                * it alone.
-                */
-               if (current->wakee_flips > (factor * p->wakee_flips))
-                       return 1;
-       }
-
-       return 0;
+       if (master < slave)
+               swap(master, slave);
+       if (slave < factor || master < slave * factor)
+               return 0;
+       return 1;
  }
  
  static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
@@ -4757,13 +4760,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
         unsigned long weight;
         int balanced;
  
-       /*
-        * If we wake multiple tasks be careful to not bounce
-        * ourselves around too much.
-        */
-       if (wake_wide(p))
-               return 0;
-
         idx       = sd->wake_idx;
         this_cpu  = smp_processor_id();
         prev_cpu  = task_cpu(p);
@@ -5017,17 +5013,17 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
  {
         struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
         int cpu = smp_processor_id();
-       int new_cpu = cpu;
+       int new_cpu = prev_cpu;
         int want_affine = 0;
         int sync = wake_flags & WF_SYNC;
  
         if (sd_flag & SD_BALANCE_WAKE)
-               want_affine = cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+               want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
  
         rcu_read_lock();
         for_each_domain(cpu, tmp) {
                 if (!(tmp->flags & SD_LOAD_BALANCE))
-                       continue;
+                       break;
  
                 /*
                  * If both cpu and prev_cpu are part of this domain,
@@ -5041,17 +5037,21 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
  
                 if (tmp->flags & sd_flag)
                         sd = tmp;
+               else if (!want_affine)
+                       break;
         }
  
-       if (affine_sd && cpu != prev_cpu && wake_affine(affine_sd, p, sync))
-               prev_cpu = cpu;
-
-       if (sd_flag & SD_BALANCE_WAKE) {
-               new_cpu = select_idle_sibling(p, prev_cpu);
-               goto unlock;
+       if (affine_sd) {
+               sd = NULL; /* Prefer wake_affine over balance flags */
+               if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+                       new_cpu = cpu;
         }
  
-       while (sd) {
+       if (!sd) {
+               if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
+                       new_cpu = select_idle_sibling(p, new_cpu);
+
+       } else while (sd) {
                 struct sched_group *group;
                 int weight;
  
@@ -5085,7 +5085,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                 }
                 /* while loop will break here if sd == NULL */
         }
-unlock:
         rcu_read_unlock();
  
         return new_cpu;
author	Mike Galbraith <umgwanakikbuti@gmail.com>
	Tue, 14 Jul 2015 15:39:50 +0000 (17:39 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 3 Aug 2015 10:21:23 +0000 (12:21 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history