Merge remote-tracking branch 'cgroup/for-next'

author Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
author Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
diff --combined include/linux/sched.h

index 3f3308824fa41b473ac77e2927cfdec626bd8c0b,ca365d79480c9ee14a46054fe138b92a8c7d3fb4..f74d4cc3a3e54f72026449a8f70ff287dc807248
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -176,14 -176,6 +176,14 @@@ extern void get_iowait_load(unsigned lo
   extern void calc_global_load(unsigned long ticks);
   extern void update_cpu_load_nohz(void);
   
+ +/* Notifier for when a task gets migrated to a new CPU */
+ +struct task_migration_notifier {
+ +      struct task_struct *task;
+ +      int from_cpu;
+ +      int to_cpu;
+ +};
+ +extern void register_task_migration_notifier(struct notifier_block *n);
+ +
   extern unsigned long get_parent_ip(unsigned long addr);
   
   extern void dump_cpu_task(int cpu);
@@@ -337,6 -329,8 +337,8 @@@ extern asmlinkage void schedule_tail(st
   extern void init_idle(struct task_struct *idle, int cpu);
   extern void init_idle_bootup_task(struct task_struct *idle);
   
+ extern cpumask_var_t cpu_isolated_map;
+ 
   extern int runqueue_is_locked(int cpu);
   
   #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
@@@ -1123,28 -1117,15 +1125,28 @@@ struct load_weight 
   };
   
   struct sched_avg {
+ +      u64 last_runnable_update;
+ +      s64 decay_count;
+ +      /*
+ +       * utilization_avg_contrib describes the amount of time that a
+ +       * sched_entity is running on a CPU. It is based on running_avg_sum
+ +       * and is scaled in the range [0..SCHED_LOAD_SCALE].
+ +       * load_avg_contrib described the amount of time that a sched_entity
+ +       * is runnable on a rq. It is based on both runnable_avg_sum and the
+ +       * weight of the task.
+ +       */
+ +      unsigned long load_avg_contrib, utilization_avg_contrib;
         /*
          * These sums represent an infinite geometric series and so are bound
          * above by 1024/(1-y).  Thus we only need a u32 to store them for all
          * choices of y < 1-2^(-32)*1024.
+ +       * running_avg_sum reflects the time that the sched_entity is
+ +       * effectively running on the CPU.
+ +       * runnable_avg_sum represents the amount of time a sched_entity is on
+ +       * a runqueue which includes the running time that is monitored by
+ +       * running_avg_sum.
          */
- -      u32 runnable_avg_sum, runnable_avg_period;
- -      u64 last_runnable_update;
- -      s64 decay_count;
- -      unsigned long load_avg_contrib;
+ +      u32 runnable_avg_sum, avg_period, running_avg_sum;
   };
   
   #ifdef CONFIG_SCHEDSTATS
@@@ -1646,11 -1627,11 +1648,11 @@@ struct task_struct 
   
         /*
          * numa_faults_locality tracks if faults recorded during the last
- -       * scan window were remote/local. The task scan period is adapted
- -       * based on the locality of the faults with different weights
- -       * depending on whether they were shared or private faults
+ +       * scan window were remote/local or failed to migrate. The task scan
+ +       * period is adapted based on the locality of the faults with different
+ +       * weights depending on whether they were shared or private faults
          */
- -      unsigned long numa_faults_locality[2];
+ +      unsigned long numa_faults_locality[3];
   
         unsigned long numa_pages_migrated;
   #endif /* CONFIG_NUMA_BALANCING */
@@@ -1740,7 -1721,6 +1742,7 @@@
   #define TNF_NO_GROUP  0x02
   #define TNF_SHARED    0x04
   #define TNF_FAULT_LOCAL       0x08
+ +#define TNF_MIGRATE_FAIL 0x10
   
   #ifdef CONFIG_NUMA_BALANCING
   extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --combined init/main.c

index 9d353ce1e65d31d941acd0182c6f5bfe683e7999,4a6974e678396d73189f4629537475bc9f3aee42..54565bf57bebfc789e9798f290c0156f538f4848
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -143,7 -143,7 +143,7 @@@ EXPORT_SYMBOL_GPL(static_key_initialize
    * rely on the BIOS and skip the reset operation.
    *
    * This is useful if kernel is booting in an unreliable environment.
- - * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
+ + * For ex. kdump situation where previous kernel has crashed, BIOS has been
    * skipped and devices will be in unknown state.
    */
   unsigned int reset_devices;
@@@ -654,8 -654,8 +654,8 @@@ asmlinkage __visible void __init start_
         page_writeback_init();
         proc_root_init();
         nsfs_init();
-       cgroup_init();
         cpuset_init();
+       cgroup_init();
         taskstats_init_early();
         delayacct_init();
   
diff --combined kernel/sched/core.c

index 1d0bc4fe266d4bf63d4f100a4d6d82048787cf8b,b578bb23410b1f76ebf549e5014d92e8c36d898f..f9123a82cbb614eb26cab55c0f58540a5a3eb24b
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -306,6 -306,9 +306,9 @@@ __read_mostly int scheduler_running
    */
   int sysctl_sched_rt_runtime = 950000;
   
+ /* cpus with isolated domains */
+ cpumask_var_t cpu_isolated_map;
+ 
   /*
    * this_rq_lock - lock this runqueue and disable interrupts.
    */
@@@ -689,23 -692,6 +692,23 @@@ static inline bool got_nohz_idle_kick(v
   #ifdef CONFIG_NO_HZ_FULL
   bool sched_can_stop_tick(void)
   {
+ +      /*
+ +       * FIFO realtime policy runs the highest priority task. Other runnable
+ +       * tasks are of a lower priority. The scheduler tick does nothing.
+ +       */
+ +      if (current->policy == SCHED_FIFO)
+ +              return true;
+ +
+ +      /*
+ +       * Round-robin realtime tasks time slice with other tasks at the same
+ +       * realtime priority. Is this task the only one at this priority?
+ +       */
+ +      if (current->policy == SCHED_RR) {
+ +              struct sched_rt_entity *rt_se = &current->rt;
+ +
+ +              return rt_se->run_list.prev == rt_se->run_list.next;
+ +      }
+ +
         /*
          * More than one running task need preemption.
          * nr_running update is assumed to be visible
@@@ -1013,13 -999,6 +1016,13 @@@ void check_preempt_curr(struct rq *rq, 
                 rq_clock_skip_update(rq, true);
   }
   
+ +static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+ +
+ +void register_task_migration_notifier(struct notifier_block *n)
+ +{
+ +      atomic_notifier_chain_register(&task_migration_notifier, n);
+ +}
+ +
   #ifdef CONFIG_SMP
   void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
   {
@@@ -1050,18 -1029,10 +1053,18 @@@
         trace_sched_migrate_task(p, new_cpu);
   
         if (task_cpu(p) != new_cpu) {
+ +              struct task_migration_notifier tmn;
+ +
                 if (p->sched_class->migrate_task_rq)
                         p->sched_class->migrate_task_rq(p, new_cpu);
                 p->se.nr_migrations++;
                 perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+ +
+ +              tmn.task = p;
+ +              tmn.from_cpu = task_cpu(p);
+ +              tmn.to_cpu = new_cpu;
+ +
+ +              atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
         }
   
         __set_task_cpu(p, new_cpu);
@@@ -2850,7 -2821,7 +2853,7 @@@ asmlinkage __visible void __sched sched
          * we find a better solution.
          *
          * NB: There are buggy callers of this function.  Ideally we
- -       * should warn if prev_state != IN_USER, but that will trigger
+ +       * should warn if prev_state != CONTEXT_USER, but that will trigger
          * too frequently to make sense yet.
          */
         enum ctx_state prev_state = exception_enter();
@@@ -3066,8 -3037,6 +3069,8 @@@ void rt_mutex_setprio(struct task_struc
         } else {
                 if (dl_prio(oldprio))
                         p->dl.dl_boosted = 0;
+ +              if (rt_prio(oldprio))
+ +                      p->rt.timeout = 0;
                 p->sched_class = &fair_sched_class;
         }
   
@@@ -5352,13 -5321,36 +5355,13 @@@ static int sched_cpu_active(struct noti
   static int sched_cpu_inactive(struct notifier_block *nfb,
                                         unsigned long action, void *hcpu)
   {
- -      unsigned long flags;
- -      long cpu = (long)hcpu;
- -      struct dl_bw *dl_b;
- -
         switch (action & ~CPU_TASKS_FROZEN) {
         case CPU_DOWN_PREPARE:
- -              set_cpu_active(cpu, false);
- -
- -              /* explicitly allow suspend */
- -              if (!(action & CPU_TASKS_FROZEN)) {
- -                      bool overflow;
- -                      int cpus;
- -
- -                      rcu_read_lock_sched();
- -                      dl_b = dl_bw_of(cpu);
- -
- -                      raw_spin_lock_irqsave(&dl_b->lock, flags);
- -                      cpus = dl_bw_cpus(cpu);
- -                      overflow = __dl_overflow(dl_b, cpus, 0, 0);
- -                      raw_spin_unlock_irqrestore(&dl_b->lock, flags);
- -
- -                      rcu_read_unlock_sched();
- -
- -                      if (overflow)
- -                              return notifier_from_errno(-EBUSY);
- -              }
+ +              set_cpu_active((long)hcpu, false);
                 return NOTIFY_OK;
+ +      default:
+ +              return NOTIFY_DONE;
         }
- -
- -      return NOTIFY_DONE;
   }
   
   static int __init migration_init(void)
@@@ -5439,6 -5431,17 +5442,6 @@@ static int sched_domain_debug_one(struc
                         break;
                 }
   
- -              /*
- -               * Even though we initialize ->capacity to something semi-sane,
- -               * we leave capacity_orig unset. This allows us to detect if
- -               * domain iteration is still funny without causing /0 traps.
- -               */
- -              if (!group->sgc->capacity_orig) {
- -                      printk(KERN_CONT "\n");
- -                      printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
- -                      break;
- -              }
- -
                 if (!cpumask_weight(sched_group_cpus(group))) {
                         printk(KERN_CONT "\n");
                         printk(KERN_ERR "ERROR: empty group\n");
@@@ -5811,9 -5814,6 +5814,6 @@@ cpu_attach_domain(struct sched_domain *
         update_top_cache_domain(cpu);
   }
   
- /* cpus with isolated domains */
- static cpumask_var_t cpu_isolated_map;
- 
   /* Setup the mask of cpus configured for isolated domains */
   static int __init isolated_cpu_setup(char *str)
   {
@@@ -5922,6 -5922,7 +5922,6 @@@ build_overlap_sched_groups(struct sched
                  * die on a /0 trap.
                  */
                 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
- -              sg->sgc->capacity_orig = sg->sgc->capacity;
   
                 /*
                  * Make sure the first group of this domain contains the
@@@ -6232,7 -6233,6 +6232,7 @@@ sd_init(struct sched_domain_topology_le
          */
   
         if (sd->flags & SD_SHARE_CPUCAPACITY) {
+ +              sd->flags |= SD_PREFER_SIBLING;
                 sd->imbalance_pct = 110;
                 sd->smt_gain = 1178; /* ~15% */
   
@@@ -6998,6 -6998,7 +6998,6 @@@ static int cpuset_cpu_active(struct not
                  */
   
         case CPU_ONLINE:
- -      case CPU_DOWN_FAILED:
                 cpuset_update_active_cpus(true);
                 break;
         default:
@@@ -7009,30 -7010,8 +7009,30 @@@
   static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                                void *hcpu)
   {
- -      switch (action) {
+ +      unsigned long flags;
+ +      long cpu = (long)hcpu;
+ +      struct dl_bw *dl_b;
+ +
+ +      switch (action & ~CPU_TASKS_FROZEN) {
         case CPU_DOWN_PREPARE:
+ +              /* explicitly allow suspend */
+ +              if (!(action & CPU_TASKS_FROZEN)) {
+ +                      bool overflow;
+ +                      int cpus;
+ +
+ +                      rcu_read_lock_sched();
+ +                      dl_b = dl_bw_of(cpu);
+ +
+ +                      raw_spin_lock_irqsave(&dl_b->lock, flags);
+ +                      cpus = dl_bw_cpus(cpu);
+ +                      overflow = __dl_overflow(dl_b, cpus, 0, 0);
+ +                      raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+ +
+ +                      rcu_read_unlock_sched();
+ +
+ +                      if (overflow)
+ +                              return notifier_from_errno(-EBUSY);
+ +              }
                 cpuset_update_active_cpus(false);
                 break;
         case CPU_DOWN_PREPARE_FROZEN:
@@@ -7177,8 -7156,8 +7177,8 @@@ void __init sched_init(void
                 rq->calc_load_active = 0;
                 rq->calc_load_update = jiffies + LOAD_FREQ;
                 init_cfs_rq(&rq->cfs);
- -              init_rt_rq(&rq->rt, rq);
- -              init_dl_rq(&rq->dl, rq);
+ +              init_rt_rq(&rq->rt);
+ +              init_dl_rq(&rq->dl);
   #ifdef CONFIG_FAIR_GROUP_SCHED
                 root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@@ -7218,7 -7197,7 +7218,7 @@@
   #ifdef CONFIG_SMP
                 rq->sd = NULL;
                 rq->rd = NULL;
- -              rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+ +              rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                 rq->post_schedule = 0;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
@@@ -7817,7 -7796,7 +7817,7 @@@ static int sched_rt_global_constraints(
   }
   #endif /* CONFIG_RT_GROUP_SCHED */
   
- -static int sched_dl_global_constraints(void)
+ +static int sched_dl_global_validate(void)
   {
         u64 runtime = global_rt_runtime();
         u64 period = global_rt_period();
@@@ -7918,11 -7897,11 +7918,11 @@@ int sched_rt_handler(struct ctl_table *
                 if (ret)
                         goto undo;
   
- -              ret = sched_rt_global_constraints();
+ +              ret = sched_dl_global_validate();
                 if (ret)
                         goto undo;
   
- -              ret = sched_dl_global_constraints();
+ +              ret = sched_rt_global_constraints();
                 if (ret)
                         goto undo;
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history