Merge git://git.infradead.org/users/eparis/audit

[karo-tx-linux.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index c4689fe92864fc9fa5fbe55004bdb939eee41263..178a8d909f14a3dcdcbc0ce255572975c8b3b221 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -51,6 +51,7 @@ struct sched_param {
  #include <linux/cred.h>
  #include <linux/llist.h>
  #include <linux/uidgid.h>
+#include <linux/gfp.h>
  
  #include <asm/processor.h>
  
@@ -98,7 +99,6 @@ extern int nr_threads;
  DECLARE_PER_CPU(unsigned long, process_counts);
  extern int nr_processes(void);
  extern unsigned long nr_running(void);
-extern unsigned long nr_uninterruptible(void);
  extern unsigned long nr_iowait(void);
  extern unsigned long nr_iowait_cpu(int cpu);
  extern unsigned long this_cpu_load(void);
@@ -127,18 +127,6 @@ extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
  extern void proc_sched_set_task(struct task_struct *p);
  extern void
  print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
-#else
-static inline void
-proc_sched_show_task(struct task_struct *p, struct seq_file *m)
-{
-}
-static inline void proc_sched_set_task(struct task_struct *p)
-{
-}
-static inline void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
-{
-}
  #endif
  
  /*
@@ -163,9 +151,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
  #define TASK_DEAD              64
  #define TASK_WAKEKILL          128
  #define TASK_WAKING            256
-#define TASK_STATE_MAX         512
+#define TASK_PARKED            512
+#define TASK_STATE_MAX         1024
  
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
  
  extern char ___assert_task_state[1 - 2*!!(
                 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -242,7 +231,7 @@ extern void init_idle_bootup_task(struct task_struct *idle);
  
  extern int runqueue_is_locked(int cpu);
  
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
  extern void nohz_balance_enter_idle(int cpu);
  extern void set_cpu_sd_state_idle(void);
  extern int get_nohz_timer_target(void);
@@ -304,19 +293,6 @@ static inline void lockup_detector_init(void)
  }
  #endif
  
-#ifdef CONFIG_DETECT_HUNG_TASK
-extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-                                        void __user *buffer,
-                                        size_t *lenp, loff_t *ppos);
-#else
-/* Avoid need for ifdefs elsewhere in the code */
-enum { sysctl_hung_task_timeout_secs = 0 };
-#endif
-
  /* Attach to any functions which should be ignored in wchan output. */
  #define __sched                __attribute__((__section__(".sched.text")))
  
@@ -333,30 +309,10 @@ extern signed long schedule_timeout_killable(signed long timeout);
  extern signed long schedule_timeout_uninterruptible(signed long timeout);
  asmlinkage void schedule(void);
  extern void schedule_preempt_disabled(void);
-extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
  
  struct nsproxy;
  struct user_namespace;
  
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN       (5)
-#define DEFAULT_MAX_MAP_COUNT  (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
-#include <linux/aio.h>
-
  #ifdef CONFIG_MMU
  extern void arch_pick_mmap_layout(struct mm_struct *mm);
  extern unsigned long
@@ -376,11 +332,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
  extern void set_dumpable(struct mm_struct *mm, int value);
  extern int get_dumpable(struct mm_struct *mm);
  
-/* get/set_dumpable() values */
-#define SUID_DUMPABLE_DISABLED 0
-#define SUID_DUMPABLE_ENABLED  1
-#define SUID_DUMPABLE_SAFE     2
-
  /* mm flags */
  /* dumpable bits */
  #define MMF_DUMPABLE      0  /* core dump is permitted */
@@ -561,7 +512,8 @@ struct signal_struct {
         unsigned int            has_child_subreaper:1;
  
         /* POSIX.1b Interval Timers */
-       struct list_head posix_timers;
+       int                     posix_timer_id;
+       struct list_head        posix_timers;
  
         /* ITIMER_REAL timer for the process */
         struct hrtimer real_timer;
@@ -605,7 +557,7 @@ struct signal_struct {
         cputime_t utime, stime, cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
         struct cputime prev_cputime;
  #endif
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -673,6 +625,7 @@ struct signal_struct {
  #define SIGNAL_STOP_STOPPED    0x00000001 /* job control stop in effect */
  #define SIGNAL_STOP_CONTINUED  0x00000002 /* SIGCONT since WCONTINUED reap */
  #define SIGNAL_GROUP_EXIT      0x00000004 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP  0x00000008 /* coredump in progress */
  /*
   * Pending notifications to parent.
   */
@@ -803,31 +756,6 @@ enum cpu_idle_type {
         CPU_MAX_IDLE_TYPES
  };
  
-/*
- * Increase resolution of nice-level calculations for 64-bit architectures.
- * The extra resolution improves shares distribution and load balancing of
- * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
- * hierarchies, especially on larger systems. This is not a user-visible change
- * and does not change the user-interface for setting shares/weights.
- *
- * We increase resolution only if we have enough bits to allow this increased
- * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
- * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
- * increased costs.
- */
-#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load  */
-# define SCHED_LOAD_RESOLUTION 10
-# define scale_load(w)         ((w) << SCHED_LOAD_RESOLUTION)
-# define scale_load_down(w)    ((w) >> SCHED_LOAD_RESOLUTION)
-#else
-# define SCHED_LOAD_RESOLUTION 0
-# define scale_load(w)         (w)
-# define scale_load_down(w)    (w)
-#endif
-
-#define SCHED_LOAD_SHIFT       (10 + SCHED_LOAD_RESOLUTION)
-#define SCHED_LOAD_SCALE       (1L << SCHED_LOAD_SHIFT)
-
  /*
   * Increase resolution of cpu_power calculations
   */
@@ -853,62 +781,6 @@ enum cpu_idle_type {
  
  extern int __weak arch_sd_sibiling_asym_packing(void);
  
-struct sched_group_power {
-       atomic_t ref;
-       /*
-        * CPU power of this group, SCHED_LOAD_SCALE being max power for a
-        * single CPU.
-        */
-       unsigned int power, power_orig;
-       unsigned long next_update;
-       /*
-        * Number of busy cpus in this group.
-        */
-       atomic_t nr_busy_cpus;
-
-       unsigned long cpumask[0]; /* iteration mask */
-};
-
-struct sched_group {
-       struct sched_group *next;       /* Must be a circular list */
-       atomic_t ref;
-
-       unsigned int group_weight;
-       struct sched_group_power *sgp;
-
-       /*
-        * The CPUs this group covers.
-        *
-        * NOTE: this field is variable length. (Allocated dynamically
-        * by attaching extra space to the end of the structure,
-        * depending on how many CPUs the kernel has booted up with)
-        */
-       unsigned long cpumask[0];
-};
-
-static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
-{
-       return to_cpumask(sg->cpumask);
-}
-
-/*
- * cpumask masking which cpus in the group are allowed to iterate up the domain
- * tree.
- */
-static inline struct cpumask *sched_group_mask(struct sched_group *sg)
-{
-       return to_cpumask(sg->sgp->cpumask);
-}
-
-/**
- * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
- * @group: The group whose first cpu is to be returned.
- */
-static inline unsigned int group_first_cpu(struct sched_group *group)
-{
-       return cpumask_first(sched_group_cpus(group));
-}
-
  struct sched_domain_attr {
         int relax_domain_level;
  };
@@ -919,6 +791,8 @@ struct sched_domain_attr {
  
  extern int sched_domain_level_max;
  
+struct sched_group;
+
  struct sched_domain {
         /* These fields must be setup */
         struct sched_domain *parent;    /* top domain must be null terminated */
@@ -935,6 +809,8 @@ struct sched_domain {
         unsigned int wake_idx;
         unsigned int forkexec_idx;
         unsigned int smt_gain;
+
+       int nohz_idle;                  /* NOHZ IDLE status */
         int flags;                      /* See SD_* */
         int level;
  
@@ -1007,18 +883,6 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
  cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
  void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
  
-/* Test a flag in parent sched domain */
-static inline int test_sd_parent(struct sched_domain *sd, int flag)
-{
-       if (sd->parent && (sd->parent->flags & flag))
-               return 1;
-
-       return 0;
-}
-
-unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
-unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
-
  bool cpus_share_cache(int this_cpu, int that_cpu);
  
  #else /* CONFIG_SMP */
@@ -1053,72 +917,6 @@ struct mempolicy;
  struct pipe_inode_info;
  struct uts_namespace;
  
-struct rq;
-struct sched_domain;
-
-/*
- * wake flags
- */
-#define WF_SYNC                0x01            /* waker goes to sleep after wakup */
-#define WF_FORK                0x02            /* child wakeup after fork */
-#define WF_MIGRATED    0x04            /* internal use, task got migrated */
-
-#define ENQUEUE_WAKEUP         1
-#define ENQUEUE_HEAD           2
-#ifdef CONFIG_SMP
-#define ENQUEUE_WAKING         4       /* sched_class::task_waking was called */
-#else
-#define ENQUEUE_WAKING         0
-#endif
-
-#define DEQUEUE_SLEEP          1
-
-struct sched_class {
-       const struct sched_class *next;
-
-       void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
-       void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
-       void (*yield_task) (struct rq *rq);
-       bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
-
-       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
-
-       struct task_struct * (*pick_next_task) (struct rq *rq);
-       void (*put_prev_task) (struct rq *rq, struct task_struct *p);
-
-#ifdef CONFIG_SMP
-       int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
-       void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
-
-       void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
-       void (*post_schedule) (struct rq *this_rq);
-       void (*task_waking) (struct task_struct *task);
-       void (*task_woken) (struct rq *this_rq, struct task_struct *task);
-
-       void (*set_cpus_allowed)(struct task_struct *p,
-                                const struct cpumask *newmask);
-
-       void (*rq_online)(struct rq *rq);
-       void (*rq_offline)(struct rq *rq);
-#endif
-
-       void (*set_curr_task) (struct rq *rq);
-       void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
-       void (*task_fork) (struct task_struct *p);
-
-       void (*switched_from) (struct rq *this_rq, struct task_struct *task);
-       void (*switched_to) (struct rq *this_rq, struct task_struct *task);
-       void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
-                            int oldprio);
-
-       unsigned int (*get_rr_interval) (struct rq *rq,
-                                        struct task_struct *task);
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-       void (*task_move_group) (struct task_struct *p, int on_rq);
-#endif
-};
-
  struct load_weight {
         unsigned long weight, inv_weight;
  };
@@ -1195,6 +993,7 @@ struct sched_entity {
         /* rq "owned" by this entity/group: */
         struct cfs_rq           *my_q;
  #endif
+
  /*
   * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
   * removed when useful for applications beyond shares distribution (e.g.
@@ -1209,6 +1008,7 @@ struct sched_entity {
  struct sched_rt_entity {
         struct list_head run_list;
         unsigned long timeout;
+       unsigned long watchdog_stamp;
         unsigned int time_slice;
  
         struct sched_rt_entity *back;
@@ -1221,11 +1021,6 @@ struct sched_rt_entity {
  #endif
  };
  
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE           (100 * HZ / 1000)
  
  struct rcu_node;
  
@@ -1313,8 +1108,10 @@ struct task_struct {
         int exit_code, exit_signal;
         int pdeath_signal;  /*  The signal sent when the parent dies  */
         unsigned int jobctl;    /* JOBCTL_*, siglock protected */
-       /* ??? */
+
+       /* Used for emulating ABI behavior of previous Linux versions */
         unsigned int personality;
+
         unsigned did_exec:1;
         unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                  * execve */
@@ -1366,8 +1163,17 @@ struct task_struct {
  
         cputime_t utime, stime, utimescaled, stimescaled;
         cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
         struct cputime prev_cputime;
+#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+       seqlock_t vtime_seqlock;
+       unsigned long long vtime_snap;
+       enum {
+               VTIME_SLEEPING = 0,
+               VTIME_USER,
+               VTIME_SYS,
+       } vtime_snap_whence;
  #endif
         unsigned long nvcsw, nivcsw; /* context switch counts */
         struct timespec start_time;             /* monotonic time */
@@ -1606,6 +1412,10 @@ struct task_struct {
  #ifdef CONFIG_UPROBES
         struct uprobe_task *utask;
  #endif
+#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
+       unsigned int    sequential_io;
+       unsigned int    sequential_io_avg;
+#endif
  };
  
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -1623,37 +1433,6 @@ static inline void set_numabalancing_state(bool enabled)
  }
  #endif
  
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space.  This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO       100
-#define MAX_RT_PRIO            MAX_USER_RT_PRIO
-
-#define MAX_PRIO               (MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO           (MAX_RT_PRIO + 20)
-
-static inline int rt_prio(int prio)
-{
-       if (unlikely(prio < MAX_RT_PRIO))
-               return 1;
-       return 0;
-}
-
-static inline int rt_task(struct task_struct *p)
-{
-       return rt_prio(p->prio);
-}
-
  static inline struct pid *task_pid(struct task_struct *task)
  {
         return task->pids[PIDTYPE_PID].pid;
@@ -1793,6 +1572,37 @@ static inline void put_task_struct(struct task_struct *t)
                 __put_task_struct(t);
  }
  
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+                        cputime_t *utime, cputime_t *stime);
+extern void task_cputime_scaled(struct task_struct *t,
+                               cputime_t *utimescaled, cputime_t *stimescaled);
+extern cputime_t task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+                               cputime_t *utime, cputime_t *stime)
+{
+       if (utime)
+               *utime = t->utime;
+       if (stime)
+               *stime = t->stime;
+}
+
+static inline void task_cputime_scaled(struct task_struct *t,
+                                      cputime_t *utimescaled,
+                                      cputime_t *stimescaled)
+{
+       if (utimescaled)
+               *utimescaled = t->utimescaled;
+       if (stimescaled)
+               *stimescaled = t->stimescaled;
+}
+
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+       return t->gtime;
+}
+#endif
  extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
  extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
  
@@ -1816,13 +1626,14 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
  #define PF_FROZEN      0x00010000      /* frozen for system suspend */
  #define PF_FSTRANS     0x00020000      /* inside a filesystem transaction */
  #define PF_KSWAPD      0x00040000      /* I am kswapd */
+#define PF_MEMALLOC_NOIO 0x00080000    /* Allocating memory without IO involved */
  #define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
  #define PF_KTHREAD     0x00200000      /* I am a kernel thread */
  #define PF_RANDOMIZE   0x00400000      /* randomize virtual address space */
  #define PF_SWAPWRITE   0x00800000      /* Allowed to write to swap */
  #define PF_SPREAD_PAGE 0x01000000      /* Spread page cache over cpuset */
  #define PF_SPREAD_SLAB 0x02000000      /* Spread some slab caches over cpuset */
-#define PF_THREAD_BOUND        0x04000000      /* Thread bound to specific cpu */
+#define PF_NO_SETAFFINITY 0x04000000   /* Userland is not allowed to meddle with cpus_allowed */
  #define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
  #define PF_MEMPOLICY   0x10000000      /* Non-default NUMA mempolicy */
  #define PF_MUTEX_TESTER        0x20000000      /* Thread belongs to the rt mutex tester */
@@ -1853,6 +1664,26 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
  #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
  #define used_math() tsk_used_math(current)
  
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */
+static inline gfp_t memalloc_noio_flags(gfp_t flags)
+{
+       if (unlikely(current->flags & PF_MEMALLOC_NOIO))
+               flags &= ~__GFP_IO;
+       return flags;
+}
+
+static inline unsigned int memalloc_noio_save(void)
+{
+       unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
+       current->flags |= PF_MEMALLOC_NOIO;
+       return flags;
+}
+
+static inline void memalloc_noio_restore(unsigned int flags)
+{
+       current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+}
+
  /*
   * task->jobctl flags
   */
@@ -1936,13 +1767,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
  }
  #endif
  
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
  void calc_load_enter_idle(void);
  void calc_load_exit_idle(void);
  #else
  static inline void calc_load_enter_idle(void) { }
  static inline void calc_load_exit_idle(void) { }
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
  
  #ifndef CONFIG_CPUMASK_OFFSTACK
  static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
@@ -2028,64 +1859,20 @@ extern void idle_task_exit(void);
  static inline void idle_task_exit(void) {}
  #endif
  
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-extern void wake_up_idle_cpu(int cpu);
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+extern void wake_up_nohz_cpu(int cpu);
  #else
-static inline void wake_up_idle_cpu(int cpu) { }
+static inline void wake_up_nohz_cpu(int cpu) { }
  #endif
  
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
-
-enum sched_tunable_scaling {
-       SCHED_TUNABLESCALING_NONE,
-       SCHED_TUNABLESCALING_LOG,
-       SCHED_TUNABLESCALING_LINEAR,
-       SCHED_TUNABLESCALING_END,
-};
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
-extern unsigned int sysctl_numa_balancing_scan_size;
-extern unsigned int sysctl_numa_balancing_settle_count;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_migration_cost;
-extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
-extern unsigned int sysctl_sched_shares_window;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
-               void __user *buffer, size_t *length,
-               loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-       return sysctl_timer_migration;
-}
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(void);
+extern u64 scheduler_tick_max_deferment(void);
  #else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-       return 1;
-}
+static inline bool sched_can_stop_tick(void) { return false; }
  #endif
-extern unsigned int sysctl_sched_rt_period;
-extern int sysctl_sched_rt_runtime;
-
-int sched_rt_handler(struct ctl_table *table, int write,
-               void __user *buffer, size_t *lenp,
-               loff_t *ppos);
  
  #ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-
  extern void sched_autogroup_create_attach(struct task_struct *p);
  extern void sched_autogroup_detach(struct task_struct *p);
  extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2101,30 +1888,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
  static inline void sched_autogroup_exit(struct signal_struct *sig) { }
  #endif
  
-#ifdef CONFIG_CFS_BANDWIDTH
-extern unsigned int sysctl_sched_cfs_bandwidth_slice;
-#endif
-
-#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-       return tsk->pi_blocked_on != NULL;
-}
-#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-       return p->normal_prio;
-}
-# define rt_mutex_adjust_pi(p)         do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-       return false;
-}
-#endif
-
  extern bool yield_to(struct task_struct *p, bool preempt);
  extern void set_user_nice(struct task_struct *p, long nice);
  extern int task_prio(const struct task_struct *p);
@@ -2260,7 +2023,6 @@ extern struct sigqueue *sigqueue_alloc(void);
  extern void sigqueue_free(struct sigqueue *);
  extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
  extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
-extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
  
  static inline void restore_saved_sigmask(void)
  {
@@ -2306,6 +2068,17 @@ static inline int sas_ss_flags(unsigned long sp)
                 : on_sig_stack(sp) ? SS_ONSTACK : 0);
  }
  
+static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
+{
+       if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
+#ifdef CONFIG_STACK_GROWSUP
+               return current->sas_ss_sp;
+#else
+               return current->sas_ss_sp + current->sas_ss_size;
+#endif
+       return sp;
+}
+
  /*
   * Routines for handling mm_structs
   */
@@ -2486,27 +2259,18 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
   *
   * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
   * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
- * perform exec.  This is useful for cases where the threadgroup needs to
- * stay stable across blockable operations.
+ * change ->group_leader/pid.  This is useful for cases where the threadgroup
+ * needs to stay stable across blockable operations.
   *
   * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
   * synchronization.  While held, no new task will be added to threadgroup
   * and no existing live task will have its PF_EXITING set.
   *
- * During exec, a task goes and puts its thread group through unusual
- * changes.  After de-threading, exclusive access is assumed to resources
- * which are usually shared by tasks in the same group - e.g. sighand may
- * be replaced with a new one.  Also, the exec'ing task takes over group
- * leader role including its pid.  Exclude these changes while locked by
- * grabbing cred_guard_mutex which is used to synchronize exec path.
+ * de_thread() does threadgroup_change_{begin|end}() when a non-leader
+ * sub-thread becomes a new leader.
   */
  static inline void threadgroup_lock(struct task_struct *tsk)
  {
-       /*
-        * exec uses exit for de-threading nesting group_rwsem inside
-        * cred_guard_mutex. Grab cred_guard_mutex first.
-        */
-       mutex_lock(&tsk->signal->cred_guard_mutex);
         down_write(&tsk->signal->group_rwsem);
  }
  
@@ -2519,7 +2283,6 @@ static inline void threadgroup_lock(struct task_struct *tsk)
  static inline void threadgroup_unlock(struct task_struct *tsk)
  {
         up_write(&tsk->signal->group_rwsem);
-       mutex_unlock(&tsk->signal->cred_guard_mutex);
  }
  #else
  static inline void threadgroup_change_begin(struct task_struct *tsk) {}
@@ -2695,6 +2458,47 @@ static inline int spin_needbreak(spinlock_t *lock)
  #endif
  }
  
+/*
+ * Idle thread specific functions to determine the need_resched
+ * polling state. We have two versions, one based on TS_POLLING in
+ * thread_info.status and one based on TIF_POLLING_NRFLAG in
+ * thread_info.flags
+ */
+#ifdef TS_POLLING
+static inline int tsk_is_polling(struct task_struct *p)
+{
+       return task_thread_info(p)->status & TS_POLLING;
+}
+static inline void current_set_polling(void)
+{
+       current_thread_info()->status |= TS_POLLING;
+}
+
+static inline void current_clr_polling(void)
+{
+       current_thread_info()->status &= ~TS_POLLING;
+       smp_mb__after_clear_bit();
+}
+#elif defined(TIF_POLLING_NRFLAG)
+static inline int tsk_is_polling(struct task_struct *p)
+{
+       return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+}
+static inline void current_set_polling(void)
+{
+       set_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline void current_clr_polling(void)
+{
+       clear_thread_flag(TIF_POLLING_NRFLAG);
+}
+#else
+static inline int tsk_is_polling(struct task_struct *p) { return 0; }
+static inline void current_set_polling(void) { }
+static inline void current_clr_polling(void) { }
+#endif
+
  /*
   * Thread group CPU time accounting.
   */
@@ -2754,28 +2558,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  
-extern void normalize_rt_tasks(void);
-
  #ifdef CONFIG_CGROUP_SCHED
-
  extern struct task_group root_task_group;
-
-extern struct task_group *sched_create_group(struct task_group *parent);
-extern void sched_destroy_group(struct task_group *tg);
-extern void sched_move_task(struct task_struct *tsk);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
-extern unsigned long sched_group_shares(struct task_group *tg);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-extern int sched_group_set_rt_runtime(struct task_group *tg,
-                                     long rt_runtime_us);
-extern long sched_group_rt_runtime(struct task_group *tg);
-extern int sched_group_set_rt_period(struct task_group *tg,
-                                     long rt_period_us);
-extern long sched_group_rt_period(struct task_group *tg);
-extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
-#endif
  #endif /* CONFIG_CGROUP_SCHED */
  
  extern int task_can_switch_user(struct user_struct *up,