sched: Fix capacity calculations for SMT4

author Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

Tue, 8 Jun 2010 04:57:02 +0000 (14:57 +1000)

committer Ingo Molnar <mingo@elte.hu>

Wed, 9 Jun 2010 08:34:54 +0000 (10:34 +0200)
author Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Tue, 8 Jun 2010 04:57:02 +0000 (14:57 +1000)
committer Ingo Molnar <mingo@elte.hu>
Wed, 9 Jun 2010 08:34:54 +0000 (10:34 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index a3e5b1cd04381f44a6d56301baf4f3a7eecb2cd9..c731296e5e93a9666b30d029b4d78726fa6674a7 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -857,7 +857,7 @@ struct sched_group {
          * CPU power of this group, SCHED_LOAD_SCALE being max power for a
          * single CPU.
          */
-       unsigned int cpu_power;
+       unsigned int cpu_power, cpu_power_orig;
  
         /*
          * The CPUs this group covers.
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 6ee2e0af665b70171b88f6e8b1e1dace0337f052..b9b3462483b75255cfd844c8e3d4d34b9436b254 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2285,13 +2285,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
         unsigned long power = SCHED_LOAD_SCALE;
         struct sched_group *sdg = sd->groups;
  
-       if (sched_feat(ARCH_POWER))
-               power *= arch_scale_freq_power(sd, cpu);
-       else
-               power *= default_scale_freq_power(sd, cpu);
-
-       power >>= SCHED_LOAD_SHIFT;
-
         if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
                 if (sched_feat(ARCH_POWER))
                         power *= arch_scale_smt_power(sd, cpu);
@@ -2301,6 +2294,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
                 power >>= SCHED_LOAD_SHIFT;
         }
  
+       sdg->cpu_power_orig = power;
+
+       if (sched_feat(ARCH_POWER))
+               power *= arch_scale_freq_power(sd, cpu);
+       else
+               power *= default_scale_freq_power(sd, cpu);
+
+       power >>= SCHED_LOAD_SHIFT;
+
         power *= scale_rt_power(cpu);
         power >>= SCHED_LOAD_SHIFT;
  
@@ -2333,6 +2335,31 @@ static void update_group_power(struct sched_domain *sd, int cpu)
         sdg->cpu_power = power;
  }
  
+/*
+ * Try and fix up capacity for tiny siblings, this is needed when
+ * things like SD_ASYM_PACKING need f_b_g to select another sibling
+ * which on its own isn't powerful enough.
+ *
+ * See update_sd_pick_busiest() and check_asym_packing().
+ */
+static inline int
+fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+{
+       /*
+        * Only siblings can have significantly less than SCHED_LOAD_SCALE
+        */
+       if (sd->level != SD_LV_SIBLING)
+               return 0;
+
+       /*
+        * If ~90% of the cpu_power is still there, we're good.
+        */
+       if (group->cpu_power * 32 < group->cpu_power_orig * 29)
+               return 1;
+
+       return 0;
+}
+
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
   * @sd: The sched_domain whose statistics are to be updated.
@@ -2426,6 +2453,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
  
         sgs->group_capacity =
                 DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+       if (!sgs->group_capacity)
+               sgs->group_capacity = fix_small_capacity(sd, group);
  }
  
  /**
@@ -2724,8 +2753,9 @@ ret:
   * find_busiest_queue - find the busiest runqueue among the cpus in group.
   */
  static struct rq *
-find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-                  unsigned long imbalance, const struct cpumask *cpus)
+find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
+                  enum cpu_idle_type idle, unsigned long imbalance,
+                  const struct cpumask *cpus)
  {
         struct rq *busiest = NULL, *rq;
         unsigned long max_load = 0;
@@ -2736,6 +2766,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
                 unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
                 unsigned long wl;
  
+               if (!capacity)
+                       capacity = fix_small_capacity(sd, group);
+
                 if (!cpumask_test_cpu(i, cpus))
                         continue;
  
@@ -2852,7 +2885,7 @@ redo:
                 goto out_balanced;
         }
  
-       busiest = find_busiest_queue(group, idle, imbalance, cpus);
+       busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
         if (!busiest) {
                 schedstat_inc(sd, lb_nobusyq[idle]);
                 goto out_balanced;
author	Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
	Tue, 8 Jun 2010 04:57:02 +0000 (14:57 +1000)
committer	Ingo Molnar <mingo@elte.hu>
	Wed, 9 Jun 2010 08:34:54 +0000 (10:34 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history