Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm

[karo-tx-linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 0efd2eefb027bdb269f2a63629fba4bc2ef8ded3..5ac63c9a995a3570e0ad73a20b28c23cb972a963 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,6 +933,8 @@ static int effective_prio(struct task_struct *p)
  /**
   * task_curr - is this task currently executing on a CPU?
   * @p: the task in question.
+ *
+ * Return: 1 if the task is currently executing. 0 otherwise.
   */
  inline int task_curr(const struct task_struct *p)
  {
@@ -1467,7 +1469,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
   * the simpler "current->state = TASK_RUNNING" to mark yourself
   * runnable without the overhead of this.
   *
- * Returns %true if @p was woken up, %false if it was already running
+ * Return: %true if @p was woken up, %false if it was already running.
   * or @state didn't match @p's state.
   */
  static int
@@ -1476,7 +1478,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         unsigned long flags;
         int cpu, success = 0;
  
-       smp_wmb();
+       /*
+        * If we are going to wake up a thread waiting for CONDITION we
+        * need to ensure that CONDITION=1 done by the caller can not be
+        * reordered with p->state check below. This pairs with mb() in
+        * set_current_state() the waiting thread does.
+        */
+       smp_mb__before_spinlock();
         raw_spin_lock_irqsave(&p->pi_lock, flags);
         if (!(p->state & state))
                 goto out;
@@ -1562,8 +1570,9 @@ out:
   * @p: The process to be woken up.
   *
   * Attempt to wake up the nominated process and move it to the set of runnable
- * processes.  Returns 1 if the process was woken up, 0 if it was already
- * running.
+ * processes.
+ *
+ * Return: 1 if the process was woken up, 0 if it was already running.
   *
   * It may be assumed that this function implies a write memory barrier before
   * changing the task state if and only if any tasks are woken up.
@@ -2176,6 +2185,8 @@ void scheduler_tick(void)
   * This makes sure that uptime, CFS vruntime, load
   * balancing, etc... continue to move forward, even
   * with a very low granularity.
+ *
+ * Return: Maximum deferment in nanoseconds.
   */
  u64 scheduler_tick_max_deferment(void)
  {
@@ -2379,6 +2390,12 @@ need_resched:
         if (sched_feat(HRTICK))
                 hrtick_clear(rq);
  
+       /*
+        * Make sure that signal_pending_state()->signal_pending() below
+        * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+        * done by the caller to avoid the race with signal_wake_up().
+        */
+       smp_mb__before_spinlock();
         raw_spin_lock_irq(&rq->lock);
  
         switch_count = &prev->nivcsw;
@@ -2495,13 +2512,11 @@ void __sched schedule_preempt_disabled(void)
   */
  asmlinkage void __sched notrace preempt_schedule(void)
  {
-       struct thread_info *ti = current_thread_info();
-
         /*
          * If there is a non-zero preempt_count or interrupts are disabled,
          * we do not want to preempt the current task. Just return..
          */
-       if (likely(ti->preempt_count || irqs_disabled()))
+       if (likely(!preemptible()))
                 return;
  
         do {
@@ -2645,7 +2660,7 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
         if (unlikely(!q))
                 return;
  
-       if (unlikely(!nr_exclusive))
+       if (unlikely(nr_exclusive != 1))
                 wake_flags = 0;
  
         spin_lock_irqsave(&q->lock, flags);
@@ -2781,8 +2796,8 @@ EXPORT_SYMBOL(wait_for_completion);
   * specified timeout to expire. The timeout is in jiffies. It is not
   * interruptible.
   *
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
   */
  unsigned long __sched
  wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -2814,8 +2829,8 @@ EXPORT_SYMBOL(wait_for_completion_io);
   * specified timeout to expire. The timeout is in jiffies. It is not
   * interruptible. The caller is accounted as waiting for IO.
   *
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
   */
  unsigned long __sched
  wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
@@ -2831,7 +2846,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
   * This waits for completion of a specific task to be signaled. It is
   * interruptible.
   *
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
   */
  int __sched wait_for_completion_interruptible(struct completion *x)
  {
@@ -2850,8 +2865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
   * This waits for either a completion of a specific task to be signaled or for a
   * specified timeout to expire. It is interruptible. The timeout is in jiffies.
   *
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
   */
  long __sched
  wait_for_completion_interruptible_timeout(struct completion *x,
@@ -2868,7 +2883,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
   * This waits to be signaled for completion of a specific task. It can be
   * interrupted by a kill signal.
   *
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
   */
  int __sched wait_for_completion_killable(struct completion *x)
  {
@@ -2888,8 +2903,8 @@ EXPORT_SYMBOL(wait_for_completion_killable);
   * signaled or for a specified timeout to expire. It can be
   * interrupted by a kill signal. The timeout is in jiffies.
   *
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
   */
  long __sched
  wait_for_completion_killable_timeout(struct completion *x,
@@ -2903,7 +2918,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout);
   *     try_wait_for_completion - try to decrement a completion without blocking
   *     @x:     completion structure
   *
- *     Returns: 0 if a decrement cannot be done without blocking
+ *     Return: 0 if a decrement cannot be done without blocking
   *              1 if a decrement succeeded.
   *
   *     If a completion is being used as a counting completion,
@@ -2930,7 +2945,7 @@ EXPORT_SYMBOL(try_wait_for_completion);
   *     completion_done - Test to see if a completion has any waiters
   *     @x:     completion structure
   *
- *     Returns: 0 if there are waiters (wait_for_completion() in progress)
+ *     Return: 0 if there are waiters (wait_for_completion() in progress)
   *              1 if there are no waiters.
   *
   */
@@ -3167,7 +3182,7 @@ SYSCALL_DEFINE1(nice, int, increment)
   * task_prio - return the priority value of a given task.
   * @p: the task in question.
   *
- * This is the priority value as seen by users in /proc.
+ * Return: The priority value as seen by users in /proc.
   * RT tasks are offset by -200. Normal tasks are centered
   * around 0, value goes from -16 to +15.
   */
@@ -3179,6 +3194,8 @@ int task_prio(const struct task_struct *p)
  /**
   * task_nice - return the nice value of a given task.
   * @p: the task in question.
+ *
+ * Return: The nice value [ -20 ... 0 ... 19 ].
   */
  int task_nice(const struct task_struct *p)
  {
@@ -3189,6 +3206,8 @@ EXPORT_SYMBOL(task_nice);
  /**
   * idle_cpu - is a given cpu idle currently?
   * @cpu: the processor in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
   */
  int idle_cpu(int cpu)
  {
@@ -3211,6 +3230,8 @@ int idle_cpu(int cpu)
  /**
   * idle_task - return the idle task for a given cpu.
   * @cpu: the processor in question.
+ *
+ * Return: The idle task for the cpu @cpu.
   */
  struct task_struct *idle_task(int cpu)
  {
@@ -3220,6 +3241,8 @@ struct task_struct *idle_task(int cpu)
  /**
   * find_process_by_pid - find a process with a matching PID value.
   * @pid: the pid in question.
+ *
+ * The task of @pid, if found. %NULL otherwise.
   */
  static struct task_struct *find_process_by_pid(pid_t pid)
  {
@@ -3417,6 +3440,8 @@ recheck:
   * @policy: new policy.
   * @param: structure containing the new RT priority.
   *
+ * Return: 0 on success. An error code otherwise.
+ *
   * NOTE that the task may be already dead.
   */
  int sched_setscheduler(struct task_struct *p, int policy,
@@ -3436,6 +3461,8 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
   * current context has permission.  For example, this is needed in
   * stop_machine(): we create temporary high priority worker threads,
   * but our caller might not have that capability.
+ *
+ * Return: 0 on success. An error code otherwise.
   */
  int sched_setscheduler_nocheck(struct task_struct *p, int policy,
                                const struct sched_param *param)
@@ -3470,6 +3497,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
   * @pid: the pid in question.
   * @policy: new policy.
   * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
   */
  SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
                 struct sched_param __user *, param)
@@ -3485,6 +3514,8 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
   * sys_sched_setparam - set/change the RT priority of a thread
   * @pid: the pid in question.
   * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
   */
  SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
  {
@@ -3494,6 +3525,9 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
  /**
   * sys_sched_getscheduler - get the policy (scheduling class) of a thread
   * @pid: the pid in question.
+ *
+ * Return: On success, the policy of the thread. Otherwise, a negative error
+ * code.
   */
  SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
  {
@@ -3520,6 +3554,9 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
   * sys_sched_getparam - get the RT priority of a thread
   * @pid: the pid in question.
   * @param: structure containing the RT priority.
+ *
+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
+ * code.
   */
  SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
  {
@@ -3644,6 +3681,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
   * @pid: pid of the process
   * @len: length in bytes of the bitmask pointed to by user_mask_ptr
   * @user_mask_ptr: user-space pointer to the new cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
   */
  SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
                 unsigned long __user *, user_mask_ptr)
@@ -3695,6 +3734,8 @@ out_unlock:
   * @pid: pid of the process
   * @len: length in bytes of the bitmask pointed to by user_mask_ptr
   * @user_mask_ptr: user-space pointer to hold the current cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
   */
  SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
                 unsigned long __user *, user_mask_ptr)
@@ -3729,6 +3770,8 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
   *
   * This function yields the current CPU to other tasks. If there are no
   * other threads running on this CPU then this function will return.
+ *
+ * Return: 0.
   */
  SYSCALL_DEFINE0(sched_yield)
  {
@@ -3854,7 +3897,7 @@ EXPORT_SYMBOL(yield);
   * It's the caller's job to ensure that the target task struct
   * can't go away on us before we can do any checks.
   *
- * Returns:
+ * Return:
   *     true (>0) if we indeed boosted the target task.
   *     false (0) if we failed to boost the target.
   *     -ESRCH if there's no task to yield to.
@@ -3957,8 +4000,9 @@ long __sched io_schedule_timeout(long timeout)
   * sys_sched_get_priority_max - return maximum RT priority.
   * @policy: scheduling class.
   *
- * this syscall returns the maximum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the maximum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
   */
  SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
  {
@@ -3982,8 +4026,9 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
   * sys_sched_get_priority_min - return minimum RT priority.
   * @policy: scheduling class.
   *
- * this syscall returns the minimum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the minimum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
   */
  SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
  {
@@ -4009,6 +4054,9 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
   *
   * this syscall writes the default timeslice value of a given process
   * into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
   */
  SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
                 struct timespec __user *, interval)
@@ -4118,7 +4166,7 @@ void show_state_filter(unsigned long state_filter)
                 debug_show_all_locks();
  }
  
-void __cpuinit init_idle_bootup_task(struct task_struct *idle)
+void init_idle_bootup_task(struct task_struct *idle)
  {
         idle->sched_class = &idle_sched_class;
  }
@@ -4131,7 +4179,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle)
   * NOTE: this function does not set the idle thread's NEED_RESCHED
   * flag, to make booting more robust.
   */
-void __cpuinit init_idle(struct task_struct *idle, int cpu)
+void init_idle(struct task_struct *idle, int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
@@ -4615,7 +4663,7 @@ static void set_rq_offline(struct rq *rq)
   * migration_call - callback that gets triggered when a CPU is added.
   * Here we can start up the necessary migration thread for the new CPU.
   */
-static int __cpuinit
+static int
  migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  {
         int cpu = (long)hcpu;
@@ -4669,12 +4717,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
   * happens before everything else.  This has to be lower priority than
   * the notifier in the perf_event subsystem, though.
   */
-static struct notifier_block __cpuinitdata migration_notifier = {
+static struct notifier_block migration_notifier = {
         .notifier_call = migration_call,
         .priority = CPU_PRI_MIGRATION,
  };
  
-static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+static int sched_cpu_active(struct notifier_block *nfb,
                                       unsigned long action, void *hcpu)
  {
         switch (action & ~CPU_TASKS_FROZEN) {
@@ -4687,7 +4735,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
         }
  }
  
-static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+static int sched_cpu_inactive(struct notifier_block *nfb,
                                         unsigned long action, void *hcpu)
  {
         switch (action & ~CPU_TASKS_FROZEN) {
@@ -4899,7 +4947,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
                                 SD_BALANCE_FORK |
                                 SD_BALANCE_EXEC |
                                 SD_SHARE_CPUPOWER |
-                               SD_SHARE_PKG_RESOURCES);
+                               SD_SHARE_PKG_RESOURCES |
+                               SD_PREFER_SIBLING);
                 if (nr_node_ids == 1)
                         pflags &= ~SD_SERIALIZE;
         }
@@ -5068,18 +5117,23 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
   * two cpus are in the same cache domain, see cpus_share_cache().
   */
  DEFINE_PER_CPU(struct sched_domain *, sd_llc);
+DEFINE_PER_CPU(int, sd_llc_size);
  DEFINE_PER_CPU(int, sd_llc_id);
  
  static void update_top_cache_domain(int cpu)
  {
         struct sched_domain *sd;
         int id = cpu;
+       int size = 1;
  
         sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-       if (sd)
+       if (sd) {
                 id = cpumask_first(sched_domain_span(sd));
+               size = cpumask_weight(sched_domain_span(sd));
+       }
  
         rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
+       per_cpu(sd_llc_size, cpu) = size;
         per_cpu(sd_llc_id, cpu) = id;
  }
  
@@ -5103,6 +5157,13 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
                         tmp->parent = parent->parent;
                         if (parent->parent)
                                 parent->parent->child = tmp;
+                       /*
+                        * Transfer SD_PREFER_SIBLING down in case of a
+                        * degenerate parent; the spans match for this
+                        * so the property transfers.
+                        */
+                       if (parent->flags & SD_PREFER_SIBLING)
+                               tmp->flags |= SD_PREFER_SIBLING;
                         destroy_sched_domain(parent, cpu);
                 } else
                         tmp = tmp->parent;
@@ -6169,8 +6230,9 @@ match1:
                 ;
         }
  
+       n = ndoms_cur;
         if (doms_new == NULL) {
-               ndoms_cur = 0;
+               n = 0;
                 doms_new = &fallback_doms;
                 cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
                 WARN_ON_ONCE(dattr_new);
@@ -6178,7 +6240,7 @@ match1:
  
         /* Build new domains */
         for (i = 0; i < ndoms_new; i++) {
-               for (j = 0; j < ndoms_cur && !new_topology; j++) {
+               for (j = 0; j < n && !new_topology; j++) {
                         if (cpumask_equal(doms_new[i], doms_cur[j])
                             && dattrs_equal(dattr_new, i, dattr_cur, j))
                                 goto match2;
@@ -6617,6 +6679,8 @@ void normalize_rt_tasks(void)
   * @cpu: the processor in question.
   *
   * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+ *
+ * Return: The current task for @cpu.
   */
  struct task_struct *curr_task(int cpu)
  {
@@ -6748,7 +6812,7 @@ void sched_move_task(struct task_struct *tsk)
         if (unlikely(running))
                 tsk->sched_class->put_prev_task(rq, tsk);
  
-       tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
+       tg = container_of(task_css_check(tsk, cpu_cgroup_subsys_id,
                                 lockdep_is_held(&tsk->sighand->siglock)),
                           struct task_group, css);
         tg = autogroup_task_group(tsk, tg);
@@ -7070,23 +7134,22 @@ int sched_rt_handler(struct ctl_table *table, int write,
  
  #ifdef CONFIG_CGROUP_SCHED
  
-/* return corresponding task_group object of a cgroup */
-static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
  {
-       return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id),
-                           struct task_group, css);
+       return css ? container_of(css, struct task_group, css) : NULL;
  }
  
-static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
  {
-       struct task_group *tg, *parent;
+       struct task_group *parent = css_tg(parent_css);
+       struct task_group *tg;
  
-       if (!cgrp->parent) {
+       if (!parent) {
                 /* This is early initialization for the top cgroup */
                 return &root_task_group.css;
         }
  
-       parent = cgroup_tg(cgrp->parent);
         tg = sched_create_group(parent);
         if (IS_ERR(tg))
                 return ERR_PTR(-ENOMEM);
@@ -7094,41 +7157,38 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
         return &tg->css;
  }
  
-static int cpu_cgroup_css_online(struct cgroup *cgrp)
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
  {
-       struct task_group *tg = cgroup_tg(cgrp);
-       struct task_group *parent;
+       struct task_group *tg = css_tg(css);
+       struct task_group *parent = css_tg(css_parent(css));
  
-       if (!cgrp->parent)
-               return 0;
-
-       parent = cgroup_tg(cgrp->parent);
-       sched_online_group(tg, parent);
+       if (parent)
+               sched_online_group(tg, parent);
         return 0;
  }
  
-static void cpu_cgroup_css_free(struct cgroup *cgrp)
+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
  {
-       struct task_group *tg = cgroup_tg(cgrp);
+       struct task_group *tg = css_tg(css);
  
         sched_destroy_group(tg);
  }
  
-static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
  {
-       struct task_group *tg = cgroup_tg(cgrp);
+       struct task_group *tg = css_tg(css);
  
         sched_offline_group(tg);
  }
  
-static int cpu_cgroup_can_attach(struct cgroup *cgrp,
+static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
                                  struct cgroup_taskset *tset)
  {
         struct task_struct *task;
  
-       cgroup_taskset_for_each(task, cgrp, tset) {
+       cgroup_taskset_for_each(task, css, tset) {
  #ifdef CONFIG_RT_GROUP_SCHED
-               if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
+               if (!sched_rt_can_attach(css_tg(css), task))
                         return -EINVAL;
  #else
                 /* We don't support RT-tasks being in separate groups */
@@ -7139,18 +7199,18 @@ static int cpu_cgroup_can_attach(struct cgroup *cgrp,
         return 0;
  }
  
-static void cpu_cgroup_attach(struct cgroup *cgrp,
+static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
                               struct cgroup_taskset *tset)
  {
         struct task_struct *task;
  
-       cgroup_taskset_for_each(task, cgrp, tset)
+       cgroup_taskset_for_each(task, css, tset)
                 sched_move_task(task);
  }
  
-static void
-cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
-               struct task_struct *task)
+static void cpu_cgroup_exit(struct cgroup_subsys_state *css,
+                           struct cgroup_subsys_state *old_css,
+                           struct task_struct *task)
  {
         /*
          * cgroup_exit() is called in the copy_process() failure path.
@@ -7164,15 +7224,16 @@ cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
-                               u64 shareval)
+static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
+                               struct cftype *cftype, u64 shareval)
  {
-       return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
+       return sched_group_set_shares(css_tg(css), scale_load(shareval));
  }
  
-static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
+                              struct cftype *cft)
  {
-       struct task_group *tg = cgroup_tg(cgrp);
+       struct task_group *tg = css_tg(css);
  
         return (u64) scale_load_down(tg->shares);
  }
@@ -7294,26 +7355,28 @@ long tg_get_cfs_period(struct task_group *tg)
         return cfs_period_us;
  }
  
-static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
+                                 struct cftype *cft)
  {
-       return tg_get_cfs_quota(cgroup_tg(cgrp));
+       return tg_get_cfs_quota(css_tg(css));
  }
  
-static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
-                               s64 cfs_quota_us)
+static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
+                                  struct cftype *cftype, s64 cfs_quota_us)
  {
-       return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
+       return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
  }
  
-static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css,
+                                  struct cftype *cft)
  {
-       return tg_get_cfs_period(cgroup_tg(cgrp));
+       return tg_get_cfs_period(css_tg(css));
  }
  
-static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
-                               u64 cfs_period_us)
+static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
+                                   struct cftype *cftype, u64 cfs_period_us)
  {
-       return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
+       return tg_set_cfs_period(css_tg(css), cfs_period_us);
  }
  
  struct cfs_schedulable_data {
@@ -7394,10 +7457,10 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
         return ret;
  }
  
-static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
+static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
                 struct cgroup_map_cb *cb)
  {
-       struct task_group *tg = cgroup_tg(cgrp);
+       struct task_group *tg = css_tg(css);
         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
  
         cb->fill(cb, "nr_periods", cfs_b->nr_periods);
@@ -7410,26 +7473,28 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
  #ifdef CONFIG_RT_GROUP_SCHED
-static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
-                               s64 val)
+static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
+                               struct cftype *cft, s64 val)
  {
-       return sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+       return sched_group_set_rt_runtime(css_tg(css), val);
  }
  
-static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft)
+static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css,
+                              struct cftype *cft)
  {
-       return sched_group_rt_runtime(cgroup_tg(cgrp));
+       return sched_group_rt_runtime(css_tg(css));
  }
  
-static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
-               u64 rt_period_us)
+static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css,
+                                   struct cftype *cftype, u64 rt_period_us)
  {
-       return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us);
+       return sched_group_set_rt_period(css_tg(css), rt_period_us);
  }
  
-static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
+                                  struct cftype *cft)
  {
-       return sched_group_rt_period(cgroup_tg(cgrp));
+       return sched_group_rt_period(css_tg(css));
  }
  #endif /* CONFIG_RT_GROUP_SCHED */