Merge remote-tracking branch 'kvm/linux-next'

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index d0c4209bb836a8ccc13688bf8eb0a21e48892f03..1d0bc4fe266d4bf63d4f100a4d6d82048787cf8b 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -689,6 +689,23 @@ static inline bool got_nohz_idle_kick(void)
  #ifdef CONFIG_NO_HZ_FULL
  bool sched_can_stop_tick(void)
  {
+       /*
+        * FIFO realtime policy runs the highest priority task. Other runnable
+        * tasks are of a lower priority. The scheduler tick does nothing.
+        */
+       if (current->policy == SCHED_FIFO)
+               return true;
+
+       /*
+        * Round-robin realtime tasks time slice with other tasks at the same
+        * realtime priority. Is this task the only one at this priority?
+        */
+       if (current->policy == SCHED_RR) {
+               struct sched_rt_entity *rt_se = &current->rt;
+
+               return rt_se->run_list.prev == rt_se->run_list.next;
+       }
+
         /*
          * More than one running task need preemption.
          * nr_running update is assumed to be visible
@@ -2833,7 +2850,7 @@ asmlinkage __visible void __sched schedule_user(void)
          * we find a better solution.
          *
          * NB: There are buggy callers of this function.  Ideally we
-        * should warn if prev_state != IN_USER, but that will trigger
+        * should warn if prev_state != CONTEXT_USER, but that will trigger
          * too frequently to make sense yet.
          */
         enum ctx_state prev_state = exception_enter();
@@ -3049,6 +3066,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
         } else {
                 if (dl_prio(oldprio))
                         p->dl.dl_boosted = 0;
+               if (rt_prio(oldprio))
+                       p->rt.timeout = 0;
                 p->sched_class = &fair_sched_class;
         }
  
@@ -5333,36 +5352,13 @@ static int sched_cpu_active(struct notifier_block *nfb,
  static int sched_cpu_inactive(struct notifier_block *nfb,
                                         unsigned long action, void *hcpu)
  {
-       unsigned long flags;
-       long cpu = (long)hcpu;
-       struct dl_bw *dl_b;
-
         switch (action & ~CPU_TASKS_FROZEN) {
         case CPU_DOWN_PREPARE:
-               set_cpu_active(cpu, false);
-
-               /* explicitly allow suspend */
-               if (!(action & CPU_TASKS_FROZEN)) {
-                       bool overflow;
-                       int cpus;
-
-                       rcu_read_lock_sched();
-                       dl_b = dl_bw_of(cpu);
-
-                       raw_spin_lock_irqsave(&dl_b->lock, flags);
-                       cpus = dl_bw_cpus(cpu);
-                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
-                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-                       rcu_read_unlock_sched();
-
-                       if (overflow)
-                               return notifier_from_errno(-EBUSY);
-               }
+               set_cpu_active((long)hcpu, false);
                 return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
         }
-
-       return NOTIFY_DONE;
  }
  
  static int __init migration_init(void)
@@ -5443,17 +5439,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                         break;
                 }
  
-               /*
-                * Even though we initialize ->capacity to something semi-sane,
-                * we leave capacity_orig unset. This allows us to detect if
-                * domain iteration is still funny without causing /0 traps.
-                */
-               if (!group->sgc->capacity_orig) {
-                       printk(KERN_CONT "\n");
-                       printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
-                       break;
-               }
-
                 if (!cpumask_weight(sched_group_cpus(group))) {
                         printk(KERN_CONT "\n");
                         printk(KERN_ERR "ERROR: empty group\n");
@@ -5937,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                  * die on a /0 trap.
                  */
                 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-               sg->sgc->capacity_orig = sg->sgc->capacity;
  
                 /*
                  * Make sure the first group of this domain contains the
@@ -6248,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
          */
  
         if (sd->flags & SD_SHARE_CPUCAPACITY) {
+               sd->flags |= SD_PREFER_SIBLING;
                 sd->imbalance_pct = 110;
                 sd->smt_gain = 1178; /* ~15% */
  
@@ -7013,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                  */
  
         case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
                 cpuset_update_active_cpus(true);
                 break;
         default:
@@ -7025,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
  static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                                void *hcpu)
  {
-       switch (action) {
+       unsigned long flags;
+       long cpu = (long)hcpu;
+       struct dl_bw *dl_b;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
         case CPU_DOWN_PREPARE:
+               /* explicitly allow suspend */
+               if (!(action & CPU_TASKS_FROZEN)) {
+                       bool overflow;
+                       int cpus;
+
+                       rcu_read_lock_sched();
+                       dl_b = dl_bw_of(cpu);
+
+                       raw_spin_lock_irqsave(&dl_b->lock, flags);
+                       cpus = dl_bw_cpus(cpu);
+                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
+                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+                       rcu_read_unlock_sched();
+
+                       if (overflow)
+                               return notifier_from_errno(-EBUSY);
+               }
                 cpuset_update_active_cpus(false);
                 break;
         case CPU_DOWN_PREPARE_FROZEN:
@@ -7171,8 +7177,8 @@ void __init sched_init(void)
                 rq->calc_load_active = 0;
                 rq->calc_load_update = jiffies + LOAD_FREQ;
                 init_cfs_rq(&rq->cfs);
-               init_rt_rq(&rq->rt, rq);
-               init_dl_rq(&rq->dl, rq);
+               init_rt_rq(&rq->rt);
+               init_dl_rq(&rq->dl);
  #ifdef CONFIG_FAIR_GROUP_SCHED
                 root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -7212,7 +7218,7 @@ void __init sched_init(void)
  #ifdef CONFIG_SMP
                 rq->sd = NULL;
                 rq->rd = NULL;
-               rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+               rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                 rq->post_schedule = 0;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
@@ -7811,7 +7817,7 @@ static int sched_rt_global_constraints(void)
  }
  #endif /* CONFIG_RT_GROUP_SCHED */
  
-static int sched_dl_global_constraints(void)
+static int sched_dl_global_validate(void)
  {
         u64 runtime = global_rt_runtime();
         u64 period = global_rt_period();
@@ -7912,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write,
                 if (ret)
                         goto undo;
  
-               ret = sched_rt_global_constraints();
+               ret = sched_dl_global_validate();
                 if (ret)
                         goto undo;
  
-               ret = sched_dl_global_constraints();
+               ret = sched_rt_global_constraints();
                 if (ret)
                         goto undo;