]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'cgroup/for-next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 9 Apr 2015 07:35:35 +0000 (17:35 +1000)
1  2 
include/linux/sched.h
init/main.c
kernel/sched/core.c

diff --combined include/linux/sched.h
index 3f3308824fa41b473ac77e2927cfdec626bd8c0b,ca365d79480c9ee14a46054fe138b92a8c7d3fb4..f74d4cc3a3e54f72026449a8f70ff287dc807248
@@@ -176,14 -176,6 +176,14 @@@ extern void get_iowait_load(unsigned lo
  extern void calc_global_load(unsigned long ticks);
  extern void update_cpu_load_nohz(void);
  
 +/* Notifier for when a task gets migrated to a new CPU */
 +struct task_migration_notifier {
 +      struct task_struct *task;
 +      int from_cpu;
 +      int to_cpu;
 +};
 +extern void register_task_migration_notifier(struct notifier_block *n);
 +
  extern unsigned long get_parent_ip(unsigned long addr);
  
  extern void dump_cpu_task(int cpu);
@@@ -337,6 -329,8 +337,8 @@@ extern asmlinkage void schedule_tail(st
  extern void init_idle(struct task_struct *idle, int cpu);
  extern void init_idle_bootup_task(struct task_struct *idle);
  
+ extern cpumask_var_t cpu_isolated_map;
  extern int runqueue_is_locked(int cpu);
  
  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
@@@ -1123,28 -1117,15 +1125,28 @@@ struct load_weight 
  };
  
  struct sched_avg {
 +      u64 last_runnable_update;
 +      s64 decay_count;
 +      /*
 +       * utilization_avg_contrib describes the amount of time that a
 +       * sched_entity is running on a CPU. It is based on running_avg_sum
 +       * and is scaled in the range [0..SCHED_LOAD_SCALE].
 +       * load_avg_contrib described the amount of time that a sched_entity
 +       * is runnable on a rq. It is based on both runnable_avg_sum and the
 +       * weight of the task.
 +       */
 +      unsigned long load_avg_contrib, utilization_avg_contrib;
        /*
         * These sums represent an infinite geometric series and so are bound
         * above by 1024/(1-y).  Thus we only need a u32 to store them for all
         * choices of y < 1-2^(-32)*1024.
 +       * running_avg_sum reflects the time that the sched_entity is
 +       * effectively running on the CPU.
 +       * runnable_avg_sum represents the amount of time a sched_entity is on
 +       * a runqueue which includes the running time that is monitored by
 +       * running_avg_sum.
         */
 -      u32 runnable_avg_sum, runnable_avg_period;
 -      u64 last_runnable_update;
 -      s64 decay_count;
 -      unsigned long load_avg_contrib;
 +      u32 runnable_avg_sum, avg_period, running_avg_sum;
  };
  
  #ifdef CONFIG_SCHEDSTATS
@@@ -1646,11 -1627,11 +1648,11 @@@ struct task_struct 
  
        /*
         * numa_faults_locality tracks if faults recorded during the last
 -       * scan window were remote/local. The task scan period is adapted
 -       * based on the locality of the faults with different weights
 -       * depending on whether they were shared or private faults
 +       * scan window were remote/local or failed to migrate. The task scan
 +       * period is adapted based on the locality of the faults with different
 +       * weights depending on whether they were shared or private faults
         */
 -      unsigned long numa_faults_locality[2];
 +      unsigned long numa_faults_locality[3];
  
        unsigned long numa_pages_migrated;
  #endif /* CONFIG_NUMA_BALANCING */
  #define TNF_NO_GROUP  0x02
  #define TNF_SHARED    0x04
  #define TNF_FAULT_LOCAL       0x08
 +#define TNF_MIGRATE_FAIL 0x10
  
  #ifdef CONFIG_NUMA_BALANCING
  extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --combined init/main.c
index 9d353ce1e65d31d941acd0182c6f5bfe683e7999,4a6974e678396d73189f4629537475bc9f3aee42..54565bf57bebfc789e9798f290c0156f538f4848
@@@ -143,7 -143,7 +143,7 @@@ EXPORT_SYMBOL_GPL(static_key_initialize
   * rely on the BIOS and skip the reset operation.
   *
   * This is useful if kernel is booting in an unreliable environment.
 - * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
 + * For ex. kdump situation where previous kernel has crashed, BIOS has been
   * skipped and devices will be in unknown state.
   */
  unsigned int reset_devices;
@@@ -654,8 -654,8 +654,8 @@@ asmlinkage __visible void __init start_
        page_writeback_init();
        proc_root_init();
        nsfs_init();
-       cgroup_init();
        cpuset_init();
+       cgroup_init();
        taskstats_init_early();
        delayacct_init();
  
diff --combined kernel/sched/core.c
index 1d0bc4fe266d4bf63d4f100a4d6d82048787cf8b,b578bb23410b1f76ebf549e5014d92e8c36d898f..f9123a82cbb614eb26cab55c0f58540a5a3eb24b
@@@ -306,6 -306,9 +306,9 @@@ __read_mostly int scheduler_running
   */
  int sysctl_sched_rt_runtime = 950000;
  
+ /* cpus with isolated domains */
+ cpumask_var_t cpu_isolated_map;
  /*
   * this_rq_lock - lock this runqueue and disable interrupts.
   */
@@@ -689,23 -692,6 +692,23 @@@ static inline bool got_nohz_idle_kick(v
  #ifdef CONFIG_NO_HZ_FULL
  bool sched_can_stop_tick(void)
  {
 +      /*
 +       * FIFO realtime policy runs the highest priority task. Other runnable
 +       * tasks are of a lower priority. The scheduler tick does nothing.
 +       */
 +      if (current->policy == SCHED_FIFO)
 +              return true;
 +
 +      /*
 +       * Round-robin realtime tasks time slice with other tasks at the same
 +       * realtime priority. Is this task the only one at this priority?
 +       */
 +      if (current->policy == SCHED_RR) {
 +              struct sched_rt_entity *rt_se = &current->rt;
 +
 +              return rt_se->run_list.prev == rt_se->run_list.next;
 +      }
 +
        /*
         * More than one running task need preemption.
         * nr_running update is assumed to be visible
@@@ -1013,13 -999,6 +1016,13 @@@ void check_preempt_curr(struct rq *rq, 
                rq_clock_skip_update(rq, true);
  }
  
 +static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
 +
 +void register_task_migration_notifier(struct notifier_block *n)
 +{
 +      atomic_notifier_chain_register(&task_migration_notifier, n);
 +}
 +
  #ifdef CONFIG_SMP
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
  {
        trace_sched_migrate_task(p, new_cpu);
  
        if (task_cpu(p) != new_cpu) {
 +              struct task_migration_notifier tmn;
 +
                if (p->sched_class->migrate_task_rq)
                        p->sched_class->migrate_task_rq(p, new_cpu);
                p->se.nr_migrations++;
                perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
 +
 +              tmn.task = p;
 +              tmn.from_cpu = task_cpu(p);
 +              tmn.to_cpu = new_cpu;
 +
 +              atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
        }
  
        __set_task_cpu(p, new_cpu);
@@@ -2850,7 -2821,7 +2853,7 @@@ asmlinkage __visible void __sched sched
         * we find a better solution.
         *
         * NB: There are buggy callers of this function.  Ideally we
 -       * should warn if prev_state != IN_USER, but that will trigger
 +       * should warn if prev_state != CONTEXT_USER, but that will trigger
         * too frequently to make sense yet.
         */
        enum ctx_state prev_state = exception_enter();
@@@ -3066,8 -3037,6 +3069,8 @@@ void rt_mutex_setprio(struct task_struc
        } else {
                if (dl_prio(oldprio))
                        p->dl.dl_boosted = 0;
 +              if (rt_prio(oldprio))
 +                      p->rt.timeout = 0;
                p->sched_class = &fair_sched_class;
        }
  
@@@ -5352,13 -5321,36 +5355,13 @@@ static int sched_cpu_active(struct noti
  static int sched_cpu_inactive(struct notifier_block *nfb,
                                        unsigned long action, void *hcpu)
  {
 -      unsigned long flags;
 -      long cpu = (long)hcpu;
 -      struct dl_bw *dl_b;
 -
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
 -              set_cpu_active(cpu, false);
 -
 -              /* explicitly allow suspend */
 -              if (!(action & CPU_TASKS_FROZEN)) {
 -                      bool overflow;
 -                      int cpus;
 -
 -                      rcu_read_lock_sched();
 -                      dl_b = dl_bw_of(cpu);
 -
 -                      raw_spin_lock_irqsave(&dl_b->lock, flags);
 -                      cpus = dl_bw_cpus(cpu);
 -                      overflow = __dl_overflow(dl_b, cpus, 0, 0);
 -                      raw_spin_unlock_irqrestore(&dl_b->lock, flags);
 -
 -                      rcu_read_unlock_sched();
 -
 -                      if (overflow)
 -                              return notifier_from_errno(-EBUSY);
 -              }
 +              set_cpu_active((long)hcpu, false);
                return NOTIFY_OK;
 +      default:
 +              return NOTIFY_DONE;
        }
 -
 -      return NOTIFY_DONE;
  }
  
  static int __init migration_init(void)
@@@ -5439,6 -5431,17 +5442,6 @@@ static int sched_domain_debug_one(struc
                        break;
                }
  
 -              /*
 -               * Even though we initialize ->capacity to something semi-sane,
 -               * we leave capacity_orig unset. This allows us to detect if
 -               * domain iteration is still funny without causing /0 traps.
 -               */
 -              if (!group->sgc->capacity_orig) {
 -                      printk(KERN_CONT "\n");
 -                      printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
 -                      break;
 -              }
 -
                if (!cpumask_weight(sched_group_cpus(group))) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: empty group\n");
@@@ -5811,9 -5814,6 +5814,6 @@@ cpu_attach_domain(struct sched_domain *
        update_top_cache_domain(cpu);
  }
  
- /* cpus with isolated domains */
- static cpumask_var_t cpu_isolated_map;
  /* Setup the mask of cpus configured for isolated domains */
  static int __init isolated_cpu_setup(char *str)
  {
@@@ -5922,6 -5922,7 +5922,6 @@@ build_overlap_sched_groups(struct sched
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
 -              sg->sgc->capacity_orig = sg->sgc->capacity;
  
                /*
                 * Make sure the first group of this domain contains the
@@@ -6232,7 -6233,6 +6232,7 @@@ sd_init(struct sched_domain_topology_le
         */
  
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
 +              sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
                sd->smt_gain = 1178; /* ~15% */
  
@@@ -6998,6 -6998,7 +6998,6 @@@ static int cpuset_cpu_active(struct not
                 */
  
        case CPU_ONLINE:
 -      case CPU_DOWN_FAILED:
                cpuset_update_active_cpus(true);
                break;
        default:
  static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                               void *hcpu)
  {
 -      switch (action) {
 +      unsigned long flags;
 +      long cpu = (long)hcpu;
 +      struct dl_bw *dl_b;
 +
 +      switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
 +              /* explicitly allow suspend */
 +              if (!(action & CPU_TASKS_FROZEN)) {
 +                      bool overflow;
 +                      int cpus;
 +
 +                      rcu_read_lock_sched();
 +                      dl_b = dl_bw_of(cpu);
 +
 +                      raw_spin_lock_irqsave(&dl_b->lock, flags);
 +                      cpus = dl_bw_cpus(cpu);
 +                      overflow = __dl_overflow(dl_b, cpus, 0, 0);
 +                      raw_spin_unlock_irqrestore(&dl_b->lock, flags);
 +
 +                      rcu_read_unlock_sched();
 +
 +                      if (overflow)
 +                              return notifier_from_errno(-EBUSY);
 +              }
                cpuset_update_active_cpus(false);
                break;
        case CPU_DOWN_PREPARE_FROZEN:
@@@ -7177,8 -7156,8 +7177,8 @@@ void __init sched_init(void
                rq->calc_load_active = 0;
                rq->calc_load_update = jiffies + LOAD_FREQ;
                init_cfs_rq(&rq->cfs);
 -              init_rt_rq(&rq->rt, rq);
 -              init_dl_rq(&rq->dl, rq);
 +              init_rt_rq(&rq->rt);
 +              init_dl_rq(&rq->dl);
  #ifdef CONFIG_FAIR_GROUP_SCHED
                root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
  #ifdef CONFIG_SMP
                rq->sd = NULL;
                rq->rd = NULL;
 -              rq->cpu_capacity = SCHED_CAPACITY_SCALE;
 +              rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                rq->post_schedule = 0;
                rq->active_balance = 0;
                rq->next_balance = jiffies;
@@@ -7817,7 -7796,7 +7817,7 @@@ static int sched_rt_global_constraints(
  }
  #endif /* CONFIG_RT_GROUP_SCHED */
  
 -static int sched_dl_global_constraints(void)
 +static int sched_dl_global_validate(void)
  {
        u64 runtime = global_rt_runtime();
        u64 period = global_rt_period();
@@@ -7918,11 -7897,11 +7918,11 @@@ int sched_rt_handler(struct ctl_table *
                if (ret)
                        goto undo;
  
 -              ret = sched_rt_global_constraints();
 +              ret = sched_dl_global_validate();
                if (ret)
                        goto undo;
  
 -              ret = sched_dl_global_constraints();
 +              ret = sched_rt_global_constraints();
                if (ret)
                        goto undo;