Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/ppwaskie/net

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 468bdd44c1baeb914cfc93037b86691b839ccc46..82ad284f823b6a60e81b1ebef44bae94ca73db6e 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1096,7 +1096,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
          * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
          *
          * sched_move_task() holds both and thus holding either pins the cgroup,
-        * see set_task_rq().
+        * see task_group().
          *
          * Furthermore, all task_rq users should acquire both locks, see
          * task_rq_lock().
@@ -1910,12 +1910,12 @@ static inline void
  prepare_task_switch(struct rq *rq, struct task_struct *prev,
                     struct task_struct *next)
  {
+       trace_sched_switch(prev, next);
         sched_info_switch(prev, next);
         perf_event_task_sched_out(prev, next);
         fire_sched_out_preempt_notifiers(prev, next);
         prepare_lock_switch(rq, next);
         prepare_arch_switch(next);
-       trace_sched_switch(prev, next);
  }
  
  /**
@@ -4340,9 +4340,7 @@ recheck:
          */
         if (unlikely(policy == p->policy && (!rt_policy(policy) ||
                         param->sched_priority == p->rt_priority))) {
-
-               __task_rq_unlock(rq);
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               task_rq_unlock(rq, p, &flags);
                 return 0;
         }
  
@@ -6024,6 +6022,11 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
   * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
   * allows us to avoid some pointer chasing select_idle_sibling().
   *
+ * Iterate domains and sched_groups downward, assigning CPUs to be
+ * select_idle_sibling() hw buddy.  Cross-wiring hw makes bouncing
+ * due to random perturbation self canceling, ie sw buddies pull
+ * their counterpart to their CPU's hw counterpart.
+ *
   * Also keep a unique ID per domain (we use the first cpu number in
   * the cpumask of the domain), this allows us to quickly tell if
   * two cpus are in the same cache domain, see cpus_share_cache().
@@ -6037,8 +6040,40 @@ static void update_top_cache_domain(int cpu)
         int id = cpu;
  
         sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-       if (sd)
+       if (sd) {
+               struct sched_domain *tmp = sd;
+               struct sched_group *sg, *prev;
+               bool right;
+
+               /*
+                * Traverse to first CPU in group, and count hops
+                * to cpu from there, switching direction on each
+                * hop, never ever pointing the last CPU rightward.
+                */
+               do {
+                       id = cpumask_first(sched_domain_span(tmp));
+                       prev = sg = tmp->groups;
+                       right = 1;
+
+                       while (cpumask_first(sched_group_cpus(sg)) != id)
+                               sg = sg->next;
+
+                       while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
+                               prev = sg;
+                               sg = sg->next;
+                               right = !right;
+                       }
+
+                       /* A CPU went down, never point back to domain start. */
+                       if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
+                               right = false;
+
+                       sg = right ? sg->next : prev;
+                       tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
+               } while ((tmp = tmp->child));
+
                 id = cpumask_first(sched_domain_span(sd));
+       }
  
         rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
         per_cpu(sd_llc_id, cpu) = id;
@@ -7097,34 +7132,66 @@ match2:
         mutex_unlock(&sched_domains_mutex);
  }
  
+static int num_cpus_frozen;    /* used to mark begin/end of suspend/resume */
+
  /*
   * Update cpusets according to cpu_active mask.  If cpusets are
   * disabled, cpuset_update_active_cpus() becomes a simple wrapper
   * around partition_sched_domains().
+ *
+ * If we come here as part of a suspend/resume, don't touch cpusets because we
+ * want to restore it back to its original state upon resume anyway.
   */
  static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                              void *hcpu)
  {
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
+       case CPU_ONLINE_FROZEN:
+       case CPU_DOWN_FAILED_FROZEN:
+
+               /*
+                * num_cpus_frozen tracks how many CPUs are involved in suspend
+                * resume sequence. As long as this is not the last online
+                * operation in the resume sequence, just build a single sched
+                * domain, ignoring cpusets.
+                */
+               num_cpus_frozen--;
+               if (likely(num_cpus_frozen)) {
+                       partition_sched_domains(1, NULL, NULL);
+                       break;
+               }
+
+               /*
+                * This is the last CPU online operation. So fall through and
+                * restore the original sched domains by considering the
+                * cpuset configurations.
+                */
+
         case CPU_ONLINE:
         case CPU_DOWN_FAILED:
-               cpuset_update_active_cpus();
-               return NOTIFY_OK;
+               cpuset_update_active_cpus(true);
+               break;
         default:
                 return NOTIFY_DONE;
         }
+       return NOTIFY_OK;
  }
  
  static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                                void *hcpu)
  {
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
         case CPU_DOWN_PREPARE:
-               cpuset_update_active_cpus();
-               return NOTIFY_OK;
+               cpuset_update_active_cpus(false);
+               break;
+       case CPU_DOWN_PREPARE_FROZEN:
+               num_cpus_frozen++;
+               partition_sched_domains(1, NULL, NULL);
+               break;
         default:
                 return NOTIFY_DONE;
         }
+       return NOTIFY_OK;
  }
  
  void __init sched_init_smp(void)
@@ -7589,6 +7656,7 @@ void sched_destroy_group(struct task_group *tg)
   */
  void sched_move_task(struct task_struct *tsk)
  {
+       struct task_group *tg;
         int on_rq, running;
         unsigned long flags;
         struct rq *rq;
@@ -7603,6 +7671,12 @@ void sched_move_task(struct task_struct *tsk)
         if (unlikely(running))
                 tsk->sched_class->put_prev_task(rq, tsk);
  
+       tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
+                               lockdep_is_held(&tsk->sighand->siglock)),
+                         struct task_group, css);
+       tg = autogroup_task_group(tsk, tg);
+       tsk->sched_task_group = tg;
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
         if (tsk->sched_class->task_move_group)
                 tsk->sched_class->task_move_group(tsk, on_rq);