X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=include%2Flinux%2Fsched.h;h=8d6b7aa7f3acbe09cfbab6879089ca4edf6c4121;hb=dcc2dc45f7cf267d37843059ff75b70260596c69;hp=4a28deb5f210a103d486fa12c509405f92f4ca60;hpb=e72e58faa709d554f95431dbafd3374db5ed3fbb;p=karo-tx-linux.git diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a28deb5f210..8d6b7aa7f3ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -5,193 +5,63 @@ #include - -struct sched_param { - int sched_priority; -}; - -#include /* for HZ */ - #include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include -#include - -#include +#include #include -#include #include #include #include -#include -#include +#include #include -#include -#include #include -#include #include #include -#include -#include #include -#include #include #include #include #include #include -#include -#include #include +#include #include #include -#include - -#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ - -/* - * Extended scheduling parameters data structure. - * - * This is needed because the original struct sched_param can not be - * altered without introducing ABI issues with legacy applications - * (e.g., in sched_getparam()). - * - * However, the possibility of specifying more than just a priority for - * the tasks may be useful for a wide variety of application fields, e.g., - * multimedia, streaming, automation and control, and many others. - * - * This variant (sched_attr) is meant at describing a so-called - * sporadic time-constrained task. In such model a task is specified by: - * - the activation period or minimum instance inter-arrival time; - * - the maximum (or average, depending on the actual scheduling - * discipline) computation time of all instances, a.k.a. runtime; - * - the deadline (relative to the actual activation time) of each - * instance. - * Very briefly, a periodic (sporadic) task asks for the execution of - * some specific computation --which is typically called an instance-- - * (at most) every period. Moreover, each instance typically lasts no more - * than the runtime and must be completed by time instant t equal to - * the instance activation time + the deadline. - * - * This is reflected by the actual fields of the sched_attr structure: - * - * @size size of the structure, for fwd/bwd compat. - * - * @sched_policy task's scheduling policy - * @sched_flags for customizing the scheduler behaviour - * @sched_nice task's nice value (SCHED_NORMAL/BATCH) - * @sched_priority task's static priority (SCHED_FIFO/RR) - * @sched_deadline representative of the task's deadline - * @sched_runtime representative of the task's runtime - * @sched_period representative of the task's period - * - * Given this task model, there are a multiplicity of scheduling algorithms - * and policies, that can be used to ensure all the tasks will make their - * timing constraints. - * - * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the - * only user of this new interface. More information about the algorithm - * available in the scheduling class file or in Documentation/. - */ -struct sched_attr { - u32 size; - - u32 sched_policy; - u64 sched_flags; - - /* SCHED_NORMAL, SCHED_BATCH */ - s32 sched_nice; - - /* SCHED_FIFO, SCHED_RR */ - u32 sched_priority; - - /* SCHED_DEADLINE */ - u64 sched_runtime; - u64 sched_deadline; - u64 sched_period; -}; +#include -struct futex_pi_state; -struct robust_list_head; +/* task_struct member predeclarations: */ +struct audit_context; +struct autogroup; +struct backing_dev_info; struct bio_list; -struct fs_struct; -struct perf_event_context; struct blk_plug; +struct cfs_rq; struct filename; +struct fs_struct; +struct futex_pi_state; +struct io_context; +struct mempolicy; struct nameidata; - -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ -extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<>= FSHIFT; - -extern unsigned long total_forks; -extern int nr_threads; -DECLARE_PER_CPU(unsigned long, process_counts); -extern int nr_processes(void); -extern unsigned long nr_running(void); -extern bool single_task_running(void); -extern unsigned long nr_iowait(void); -extern unsigned long nr_iowait_cpu(int cpu); -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); - -extern void calc_global_load(unsigned long ticks); - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - -extern void dump_cpu_task(int cpu); - +struct nsproxy; +struct perf_event_context; +struct pid_namespace; +struct pipe_inode_info; +struct rcu_node; +struct reclaim_state; +struct robust_list_head; +struct sched_attr; +struct sched_param; struct seq_file; -struct cfs_rq; +struct sighand_struct; +struct signal_struct; +struct task_delay_info; struct task_group; -#ifdef CONFIG_SCHED_DEBUG -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); -extern void proc_sched_set_task(struct task_struct *p); -#endif +struct task_struct; +struct uts_namespace; /* * Task state bitmask. NOTE! These bits are also @@ -223,9 +93,6 @@ extern void proc_sched_set_task(struct task_struct *p); #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" -extern char ___assert_task_state[1 - 2*!!( - sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; - /* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) @@ -309,122 +176,11 @@ extern char ___assert_task_state[1 - 2*!!( /* Task command name length */ #define TASK_COMM_LEN 16 -#include - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t mmlist_lock; - -struct task_struct; - -#ifdef CONFIG_PROVE_RCU -extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ - -extern void sched_init(void); -extern void sched_init_smp(void); -extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu); -extern void init_idle_bootup_task(struct task_struct *idle); - extern cpumask_var_t cpu_isolated_map; extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(void); -#else -static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } -#endif - -/* - * Only dump TASK_* tasks. (0 for all tasks) - */ -extern void show_state_filter(unsigned long state_filter); - -static inline void show_state(void) -{ - show_state_filter(0); -} - -extern void show_regs(struct pt_regs *); - -/* - * TASK is a pointer to the task whose backtrace we want to see (or NULL for current - * task), SP is the stack pointer of the first frame that should be shown in the back - * trace (or NULL if the entire call-chain of the task should be shown). - */ -extern void show_stack(struct task_struct *task, unsigned long *sp); - -extern void cpu_init (void); -extern void trap_init(void); -extern void update_process_times(int user); extern void scheduler_tick(void); -extern int sched_cpu_starting(unsigned int cpu); -extern int sched_cpu_activate(unsigned int cpu); -extern int sched_cpu_deactivate(unsigned int cpu); - -#ifdef CONFIG_HOTPLUG_CPU -extern int sched_cpu_dying(unsigned int cpu); -#else -# define sched_cpu_dying NULL -#endif - -extern void sched_show_task(struct task_struct *p); - -#ifdef CONFIG_LOCKUP_DETECTOR -extern void touch_softlockup_watchdog_sched(void); -extern void touch_softlockup_watchdog(void); -extern void touch_softlockup_watchdog_sync(void); -extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); -#else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} -static inline void lockup_detector_init(void) -{ -} -#endif - -#ifdef CONFIG_DETECT_HUNG_TASK -void reset_hung_task_detector(void); -#else -static inline void reset_hung_task_detector(void) -{ -} -#endif - -/* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) - -/* Linker adds these: start and end of __sched functions */ -extern char __sched_text_start[], __sched_text_end[]; - -/* Is this address in the __sched functions? */ -extern int in_sched_functions(unsigned long addr); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long schedule_timeout(signed long timeout); @@ -440,112 +196,6 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -void __noreturn do_task_dead(void); - -struct nsproxy; -struct user_namespace; - -#ifdef CONFIG_MMU -extern void arch_pick_mmap_layout(struct mm_struct *mm); -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long -arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#else -static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} -#endif - -#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ -#define SUID_DUMP_USER 1 /* Dump as user of process */ -#define SUID_DUMP_ROOT 2 /* Dump as root */ - -/* mm flags */ - -/* for SUID_DUMP_* above */ -#define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) - -extern void set_dumpable(struct mm_struct *mm, int value); -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ -static inline int __get_dumpable(unsigned long mm_flags) -{ - return mm_flags & MMF_DUMPABLE_MASK; -} - -static inline int get_dumpable(struct mm_struct *mm) -{ - return __get_dumpable(mm->flags); -} - -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_HUGETLB_PRIVATE 7 -#define MMF_DUMP_HUGETLB_SHARED 8 -#define MMF_DUMP_DAX_PRIVATE 9 -#define MMF_DUMP_DAX_SHARED 10 - -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 9 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) - -#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) -#else -# define MMF_DUMP_MASK_DEFAULT_ELF 0 -#endif - /* leave room for more dump flags */ -#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ -#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ - -#define MMF_HAS_UPROBES 19 /* has uprobes */ -#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ -#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ - -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) - -struct sighand_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; - wait_queue_head_t signalfd_wqh; -}; - -struct pacct_struct { - int ac_flag; - long ac_exitcode; - unsigned long ac_mem; - u64 ac_utime, ac_stime; - unsigned long ac_minflt, ac_majflt; -}; - -struct cpu_itimer { - u64 expires; - u64 incr; -}; - /** * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode @@ -563,14 +213,6 @@ struct prev_cputime { #endif }; -static inline void prev_cputime_init(struct prev_cputime *prev) -{ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - prev->utime = prev->stime = 0; - raw_spin_lock_init(&prev->lock); -#endif -} - /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in nanoseconds @@ -592,2720 +234,1149 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime -/* - * This is the atomic variant of task_cputime, which can be used for - * storing and updating task_cputime statistics without locking. - */ -struct task_cputime_atomic { - atomic64_t utime; - atomic64_t stime; - atomic64_t sum_exec_runtime; -}; +#include -#define INIT_CPUTIME_ATOMIC \ - (struct task_cputime_atomic) { \ - .utime = ATOMIC64_INIT(0), \ - .stime = ATOMIC64_INIT(0), \ - .sum_exec_runtime = ATOMIC64_INIT(0), \ - } +#ifdef CONFIG_SCHED_INFO +struct sched_info { + /* cumulative counters */ + unsigned long pcount; /* # of times run on this cpu */ + unsigned long long run_delay; /* time spent waiting on a runqueue */ -#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + /* timestamps */ + unsigned long long last_arrival,/* when we last ran on a cpu */ + last_queued; /* when we were last queued to run */ +}; +#endif /* CONFIG_SCHED_INFO */ /* - * Disable preemption until the scheduler is running -- use an unconditional - * value so that it also works on !PREEMPT_COUNT kernels. + * Integer metrics need fixed point arithmetic, e.g., sched/fair + * has a few: load, load_avg, util_avg, freq, and capacity. * - * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). + * We define a basic fixed point arithmetic range, and then formalize + * all these metrics based on that basic range. */ -#define INIT_PREEMPT_COUNT PREEMPT_OFFSET +# define SCHED_FIXEDPOINT_SHIFT 10 +# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) + +#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK +extern void prefetch_stack(struct task_struct *t); +#else +static inline void prefetch_stack(struct task_struct *t) { } +#endif + +struct load_weight { + unsigned long weight; + u32 inv_weight; +}; /* - * Initial preempt_count value; reflects the preempt_count schedule invariant - * which states that during context switches: + * The load_avg/util_avg accumulates an infinite geometric series + * (see __update_load_avg() in kernel/sched/fair.c). + * + * [load_avg definition] * - * preempt_count() == 2*PREEMPT_DISABLE_OFFSET + * load_avg = runnable% * scale_load_down(load) * - * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. - * Note: See finish_task_switch(). - */ -#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - -/** - * struct thread_group_cputimer - thread group interval timer counts - * @cputime_atomic: atomic thread group interval timers. - * @running: true when there are timers running and - * @cputime_atomic receives updates. - * @checking_timer: true when a thread in the group is in the - * process of checking for thread group timers. + * where runnable% is the time ratio that a sched_entity is runnable. + * For cfs_rq, it is the aggregated load_avg of all runnable and + * blocked sched_entities. + * + * load_avg may also take frequency scaling into account: + * + * load_avg = runnable% * scale_load_down(load) * freq% + * + * where freq% is the CPU frequency normalized to the highest frequency. + * + * [util_avg definition] + * + * util_avg = running% * SCHED_CAPACITY_SCALE + * + * where running% is the time ratio that a sched_entity is running on + * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable + * and blocked sched_entities. + * + * util_avg may also factor frequency scaling and CPU capacity scaling: * - * This structure contains the version of task_cputime, above, that is - * used for thread group CPU timer calculations. + * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% + * + * where freq% is the same as above, and capacity% is the CPU capacity + * normalized to the greatest capacity (due to uarch differences, etc). + * + * N.B., the above ratios (runnable%, running%, freq%, and capacity%) + * themselves are in the range of [0, 1]. To do fixed point arithmetics, + * we therefore scale them to as large a range as necessary. This is for + * example reflected by util_avg's SCHED_CAPACITY_SCALE. + * + * [Overflow issue] + * + * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities + * with the highest load (=88761), always runnable on a single cfs_rq, + * and should not overflow as the number already hits PID_MAX_LIMIT. + * + * For all other cases (including 32-bit kernels), struct load_weight's + * weight will overflow first before we do, because: + * + * Max(load_avg) <= Max(load.weight) + * + * Then it is the load_weight's responsibility to consider overflow + * issues. */ -struct thread_group_cputimer { - struct task_cputime_atomic cputime_atomic; - bool running; - bool checking_timer; +struct sched_avg { + u64 last_update_time, load_sum; + u32 util_sum, period_contrib; + unsigned long load_avg, util_avg; }; -#include -struct autogroup; - -/* - * NOTE! "signal_struct" does not have its own - * locking, because a shared signal_struct always - * implies a shared sighand_struct, so locking - * sighand_struct is always a proper superset of - * the locking of signal_struct. - */ -struct signal_struct { - atomic_t sigcnt; - atomic_t live; - int nr_threads; - struct list_head thread_head; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - - /* current thread group signal load-balancing target: */ - struct task_struct *curr_target; - - /* shared signal handling: */ - struct sigpending shared_pending; - - /* thread group exit support */ - int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ - int notify_count; - struct task_struct *group_exit_task; - - /* thread group stop support, overloads group_exit_code too */ - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - - /* - * PR_SET_CHILD_SUBREAPER marks a process, like a service - * manager, to re-parent orphan (double-forking) child processes - * to this process instead of 'init'. The service manager is - * able to receive SIGCHLD signals and is able to investigate - * the process until it calls wait(). All children of this - * process will inherit a flag if they should look for a - * child_subreaper process at exit. - */ - unsigned int is_child_subreaper:1; - unsigned int has_child_subreaper:1; - -#ifdef CONFIG_POSIX_TIMERS - - /* POSIX.1b Interval Timers */ - int posix_timer_id; - struct list_head posix_timers; +#ifdef CONFIG_SCHEDSTATS +struct sched_statistics { + u64 wait_start; + u64 wait_max; + u64 wait_count; + u64 wait_sum; + u64 iowait_count; + u64 iowait_sum; - /* ITIMER_REAL timer for the process */ - struct hrtimer real_timer; - ktime_t it_real_incr; + u64 sleep_start; + u64 sleep_max; + s64 sum_sleep_runtime; - /* - * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use - * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these - * values are defined to 0 and 1 respectively - */ - struct cpu_itimer it[2]; + u64 block_start; + u64 block_max; + u64 exec_max; + u64 slice_max; - /* - * Thread group totals for process CPU timers. - * See thread_group_cputimer(), et al, for details. - */ - struct thread_group_cputimer cputimer; + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; - /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; + u64 nr_wakeups; + u64 nr_wakeups_sync; + u64 nr_wakeups_migrate; + u64 nr_wakeups_local; + u64 nr_wakeups_remote; + u64 nr_wakeups_affine; + u64 nr_wakeups_affine_attempts; + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; +}; +#endif - struct list_head cpu_timers[3]; +struct sched_entity { + struct load_weight load; /* for load-balancing */ + struct rb_node run_node; + struct list_head group_node; + unsigned int on_rq; -#endif + u64 exec_start; + u64 sum_exec_runtime; + u64 vruntime; + u64 prev_sum_exec_runtime; - struct pid *leader_pid; + u64 nr_migrations; -#ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; +#ifdef CONFIG_SCHEDSTATS + struct sched_statistics statistics; #endif - struct pid *tty_old_pgrp; +#ifdef CONFIG_FAIR_GROUP_SCHED + int depth; + struct sched_entity *parent; + /* rq on which this entity is (to be) queued: */ + struct cfs_rq *cfs_rq; + /* rq "owned" by this entity/group: */ + struct cfs_rq *my_q; +#endif - /* boolean value for session group leader */ - int leader; +#ifdef CONFIG_SMP + /* + * Per entity load average tracking. + * + * Put into separate cache line so it does not + * collide with read-mostly values above. + */ + struct sched_avg avg ____cacheline_aligned_in_smp; +#endif +}; - struct tty_struct *tty; /* NULL if no tty */ +struct sched_rt_entity { + struct list_head run_list; + unsigned long timeout; + unsigned long watchdog_stamp; + unsigned int time_slice; + unsigned short on_rq; + unsigned short on_list; -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; + struct sched_rt_entity *back; +#ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity *parent; + /* rq on which this entity is (to be) queued: */ + struct rt_rq *rt_rq; + /* rq "owned" by this entity/group: */ + struct rt_rq *my_q; #endif +}; + +struct sched_dl_entity { + struct rb_node rb_node; + /* - * Cumulative resource counters for dead threads in the group, - * and for reaped dead child processes forked by this group. - * Live threads maintain their own counters and add to these - * in __exit_signal, except for the group leader. + * Original scheduling parameters. Copied here from sched_attr + * during sched_setattr(), they will remain the same until + * the next sched_setattr(). */ - seqlock_t stats_lock; - u64 utime, stime, cutime, cstime; - u64 gtime; - u64 cgtime; - struct prev_cputime prev_cputime; - unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; - unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; - unsigned long inblock, oublock, cinblock, coublock; - unsigned long maxrss, cmaxrss; - struct task_io_accounting ioac; + u64 dl_runtime; /* maximum runtime for each instance */ + u64 dl_deadline; /* relative deadline of each instance */ + u64 dl_period; /* separation of two instances (period) */ + u64 dl_bw; /* dl_runtime / dl_deadline */ /* - * Cumulative ns of schedule CPU time fo dead threads in the - * group, not including a zombie group leader, (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) + * Actual scheduling parameters. Initialized with the values above, + * they are continously updated during task execution. Note that + * the remaining runtime could be < 0 in case we are in overrun. */ - unsigned long long sum_sched_runtime; + s64 runtime; /* remaining runtime for this instance */ + u64 deadline; /* absolute deadline for this instance */ + unsigned int flags; /* specifying the scheduler behaviour */ /* - * We don't bother to synchronize most readers of this at all, - * because there is no reader checking a limit that actually needs - * to get both rlim_cur and rlim_max atomically, and either one - * alone is a single word that can safely be read normally. - * getrlimit/setrlimit use task_lock(current->group_leader) to - * protect this instead of the siglock, because they really - * have no need to disable irqs. + * Some bool flags: + * + * @dl_throttled tells if we exhausted the runtime. If so, the + * task has to wait for a replenishment to be performed at the + * next firing of dl_timer. + * + * @dl_boosted tells if we are boosted due to DI. If so we are + * outside bandwidth enforcement mechanism (but only until we + * exit the critical section); + * + * @dl_yielded tells if task gave up the cpu before consuming + * all its available runtime during the last job. */ - struct rlimit rlim[RLIM_NLIMITS]; - -#ifdef CONFIG_BSD_PROCESS_ACCT - struct pacct_struct pacct; /* per-process accounting information */ -#endif -#ifdef CONFIG_TASKSTATS - struct taskstats *stats; -#endif -#ifdef CONFIG_AUDIT - unsigned audit_tty; - struct tty_audit_buf *tty_audit_buf; -#endif + int dl_throttled, dl_boosted, dl_yielded; /* - * Thread is the potential origin of an oom condition; kill first on - * oom + * Bandwidth enforcement timer. Each -deadline task has its + * own bandwidth to be enforced, thus we need one timer per task. */ - bool oom_flag_origin; - short oom_score_adj; /* OOM kill score adjustment */ - short oom_score_adj_min; /* OOM kill score adjustment min value. - * Only settable by CAP_SYS_RESOURCE. */ - struct mm_struct *oom_mm; /* recorded mm when the thread group got - * killed by the oom killer */ - - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ + struct hrtimer dl_timer; }; -/* - * Bits in flags field of signal_struct. - */ -#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ -/* - * Pending notifications to parent. - */ -#define SIGNAL_CLD_STOPPED 0x00000010 -#define SIGNAL_CLD_CONTINUED 0x00000020 -#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) - -#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ - -#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ - SIGNAL_STOP_CONTINUED) +union rcu_special { + struct { + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + u8 pad; /* Otherwise the compiler can store garbage here. */ + } b; /* Bits. */ + u32 s; /* Set of bits. */ +}; -static inline void signal_set_stop_flags(struct signal_struct *sig, - unsigned int flags) -{ - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); - sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; -} +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_sw_context, + perf_nr_task_contexts, +}; -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} +struct wake_q_node { + struct wake_q_node *next; +}; -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_FANOTIFY - atomic_t fanotify_listeners; -#endif -#ifdef CONFIG_EPOLL - atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ +struct task_struct { +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* + * For reasons of header soup (see current_thread_info()), this + * must be the first element of task_struct. + */ + struct thread_info thread_info; #endif -#ifdef CONFIG_POSIX_MQUEUE - /* protected by mq_lock */ - unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ + void *stack; + atomic_t usage; + unsigned int flags; /* per process flags, defined below */ + unsigned int ptrace; + +#ifdef CONFIG_SMP + struct llist_node wake_entry; + int on_cpu; +#ifdef CONFIG_THREAD_INFO_IN_TASK + unsigned int cpu; /* current CPU */ #endif - unsigned long locked_shm; /* How many pages of mlocked shm ? */ - unsigned long unix_inflight; /* How many files in flight in unix sockets */ - atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; -#ifdef CONFIG_KEYS - struct key *uid_keyring; /* UID specific keyring */ - struct key *session_keyring; /* UID's default session keyring */ + int wake_cpu; #endif + int on_rq; - /* Hash table maintenance information */ - struct hlist_node uidhash_node; - kuid_t uid; + int prio, static_prio, normal_prio; + unsigned int rt_priority; + const struct sched_class *sched_class; + struct sched_entity se; + struct sched_rt_entity rt; +#ifdef CONFIG_CGROUP_SCHED + struct task_group *sched_task_group; +#endif + struct sched_dl_entity dl; -#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) - atomic_long_t locked_vm; +#ifdef CONFIG_PREEMPT_NOTIFIERS + /* list of struct preempt_notifier: */ + struct hlist_head preempt_notifiers; #endif -}; -extern int uids_sysfs_init(void); +#ifdef CONFIG_BLK_DEV_IO_TRACE + unsigned int btrace_seq; +#endif -extern struct user_struct *find_user(kuid_t); + unsigned int policy; + int nr_cpus_allowed; + cpumask_t cpus_allowed; -extern struct user_struct root_user; -#define INIT_USER (&root_user) +#ifdef CONFIG_PREEMPT_RCU + int rcu_read_lock_nesting; + union rcu_special rcu_read_unlock_special; + struct list_head rcu_node_entry; + struct rcu_node *rcu_blocked_node; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + unsigned long rcu_tasks_nvcsw; + bool rcu_tasks_holdout; + struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; +#endif /* #ifdef CONFIG_TASKS_RCU */ +#ifdef CONFIG_SCHED_INFO + struct sched_info sched_info; +#endif -struct backing_dev_info; -struct reclaim_state; + struct list_head tasks; +#ifdef CONFIG_SMP + struct plist_node pushable_tasks; + struct rb_node pushable_dl_tasks; +#endif -#ifdef CONFIG_SCHED_INFO -struct sched_info { - /* cumulative counters */ - unsigned long pcount; /* # of times run on this cpu */ - unsigned long long run_delay; /* time spent waiting on a runqueue */ + struct mm_struct *mm, *active_mm; - /* timestamps */ - unsigned long long last_arrival,/* when we last ran on a cpu */ - last_queued; /* when we were last queued to run */ -}; -#endif /* CONFIG_SCHED_INFO */ + /* Per-thread vma caching: */ + struct vmacache vmacache; -#ifdef CONFIG_TASK_DELAY_ACCT -struct task_delay_info { - spinlock_t lock; - unsigned int flags; /* Private per-task flags */ +#if defined(SPLIT_RSS_COUNTING) + struct task_rss_stat rss_stat; +#endif +/* task state */ + int exit_state; + int exit_code, exit_signal; + int pdeath_signal; /* The signal sent when the parent dies */ + unsigned long jobctl; /* JOBCTL_*, siglock protected */ - /* For each stat XXX, add following, aligned appropriately - * - * struct timespec XXX_start, XXX_end; - * u64 XXX_delay; - * u32 XXX_count; - * - * Atomicity of updates to XXX_delay, XXX_count protected by - * single lock above (split into XXX_lock if contention is an issue). - */ + /* Used for emulating ABI behavior of previous Linux versions */ + unsigned int personality; - /* - * XXX_count is incremented on every XXX operation, the delay - * associated with the operation is added to XXX_delay. - * XXX_delay contains the accumulated delay time in nanoseconds. - */ - u64 blkio_start; /* Shared by blkio, swapin */ - u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ - u32 blkio_count; /* total count of the number of sync block */ - /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ - - u64 freepages_start; - u64 freepages_delay; /* wait for memory reclaim */ - u32 freepages_count; /* total count of memory reclaim */ -}; -#endif /* CONFIG_TASK_DELAY_ACCT */ + /* scheduler bits, serialized by scheduler locks */ + unsigned sched_reset_on_fork:1; + unsigned sched_contributes_to_load:1; + unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; + unsigned :0; /* force alignment to the next boundary */ -static inline int sched_info_on(void) -{ -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; + /* unserialized, strictly 'current' */ + unsigned in_execve:1; /* bit to tell LSMs we're in execve */ + unsigned in_iowait:1; +#if !defined(TIF_RESTORE_SIGMASK) + unsigned restore_sigmask:1; +#endif +#ifdef CONFIG_MEMCG + unsigned memcg_may_oom:1; +#ifndef CONFIG_SLOB + unsigned memcg_kmem_skip_account:1; #endif -} - -#ifdef CONFIG_SCHEDSTATS -void force_schedstat_enabled(void); +#endif +#ifdef CONFIG_COMPAT_BRK + unsigned brk_randomized:1; #endif -enum cpu_idle_type { - CPU_IDLE, - CPU_NOT_IDLE, - CPU_NEWLY_IDLE, - CPU_MAX_IDLE_TYPES -}; + unsigned long atomic_flags; /* Flags needing atomic access. */ -/* - * Integer metrics need fixed point arithmetic, e.g., sched/fair - * has a few: load, load_avg, util_avg, freq, and capacity. - * - * We define a basic fixed point arithmetic range, and then formalize - * all these metrics based on that basic range. - */ -# define SCHED_FIXEDPOINT_SHIFT 10 -# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) - -/* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - -/* - * Wake-queues are lists of tasks with a pending wakeup, whose - * callers have already marked the task as woken internally, - * and can thus carry on. A common use case is being able to - * do the wakeups once the corresponding user lock as been - * released. - * - * We hold reference to each task in the list across the wakeup, - * thus guaranteeing that the memory is still valid by the time - * the actual wakeups are performed in wake_up_q(). - * - * One per task suffices, because there's never a need for a task to be - * in two wake queues simultaneously; it is forbidden to abandon a task - * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is - * already in a wake queue, the wakeup will happen soon and the second - * waker can just skip it. - * - * The DEFINE_WAKE_Q macro declares and initializes the list head. - * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function. Otherwise, the list can be - * re-initialized for later re-use by wake_q_init(). - * - * Note that this can cause spurious wakeups. schedule() callers - * must ensure the call is done inside a loop, confirming that the - * wakeup condition has in fact occurred. - */ -struct wake_q_node { - struct wake_q_node *next; -}; + struct restart_block restart_block; -struct wake_q_head { - struct wake_q_node *first; - struct wake_q_node **lastp; -}; + pid_t pid; + pid_t tgid; -#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) +#ifdef CONFIG_CC_STACKPROTECTOR + /* Canary value for the -fstack-protector gcc feature */ + unsigned long stack_canary; +#endif + /* + * pointers to (original) parent process, youngest child, younger sibling, + * older sibling, respectively. (p->father can be replaced with + * p->real_parent->pid) + */ + struct task_struct __rcu *real_parent; /* real parent process */ + struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ + /* + * children/sibling forms the list of my natural children + */ + struct list_head children; /* list of my children */ + struct list_head sibling; /* linkage in my parent's children list */ + struct task_struct *group_leader; /* threadgroup leader */ -#define DEFINE_WAKE_Q(name) \ - struct wake_q_head name = { WAKE_Q_TAIL, &name.first } + /* + * ptraced is the list of tasks this task is using ptrace on. + * This includes both natural children and PTRACE_ATTACH targets. + * p->ptrace_entry is p's link on the p->parent->ptraced list. + */ + struct list_head ptraced; + struct list_head ptrace_entry; -static inline void wake_q_init(struct wake_q_head *head) -{ - head->first = WAKE_Q_TAIL; - head->lastp = &head->first; -} + /* PID/PID hash table linkage. */ + struct pid_link pids[PIDTYPE_MAX]; + struct list_head thread_group; + struct list_head thread_node; -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); -extern void wake_up_q(struct wake_q_head *head); + struct completion *vfork_done; /* for vfork() */ + int __user *set_child_tid; /* CLONE_CHILD_SETTID */ + int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ -/* - * sched-domains (multiprocessor balancing) declarations: - */ -#ifdef CONFIG_SMP -#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ -#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ -#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ -#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ -#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ -#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ -#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ -#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ -#define SD_NUMA 0x4000 /* cross-node balancing */ - -#ifdef CONFIG_SCHED_SMT -static inline int cpu_smt_flags(void) -{ - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; -} + u64 utime, stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME + u64 utimescaled, stimescaled; #endif - -#ifdef CONFIG_SCHED_MC -static inline int cpu_core_flags(void) -{ - return SD_SHARE_PKG_RESOURCES; -} + u64 gtime; + struct prev_cputime prev_cputime; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqcount_t vtime_seqcount; + unsigned long long vtime_snap; + enum { + /* Task is sleeping or running in a CPU with VTIME inactive */ + VTIME_INACTIVE = 0, + /* Task runs in userspace in a CPU with VTIME active */ + VTIME_USER, + /* Task runs in kernelspace in a CPU with VTIME active */ + VTIME_SYS, + } vtime_snap_whence; #endif -#ifdef CONFIG_NUMA -static inline int cpu_numa_flags(void) -{ - return SD_NUMA; -} +#ifdef CONFIG_NO_HZ_FULL + atomic_t tick_dep_mask; #endif + unsigned long nvcsw, nivcsw; /* context switch counts */ + u64 start_time; /* monotonic time in nsec */ + u64 real_start_time; /* boot based time in nsec */ +/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ + unsigned long min_flt, maj_flt; -extern int arch_asym_cpu_priority(int cpu); - -struct sched_domain_attr { - int relax_domain_level; -}; - -#define SD_ATTR_INIT (struct sched_domain_attr) { \ - .relax_domain_level = -1, \ -} - -extern int sched_domain_level_max; - -struct sched_group; - -struct sched_domain_shared { - atomic_t ref; - atomic_t nr_busy_cpus; - int has_idle_cores; -}; - -struct sched_domain { - /* These fields must be setup */ - struct sched_domain *parent; /* top domain must be null terminated */ - struct sched_domain *child; /* bottom domain must be null terminated */ - struct sched_group *groups; /* the balancing groups of the domain */ - unsigned long min_interval; /* Minimum balance interval ms */ - unsigned long max_interval; /* Maximum balance interval ms */ - unsigned int busy_factor; /* less balancing by factor if busy */ - unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; - unsigned int smt_gain; - - int nohz_idle; /* NOHZ IDLE status */ - int flags; /* See SD_* */ - int level; - - /* Runtime fields. */ - unsigned long last_balance; /* init to jiffies. units in jiffies */ - unsigned int balance_interval; /* initialise to 1. units in ms. */ - unsigned int nr_balance_failed; /* initialise to 0 */ - - /* idle_balance() stats */ - u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; - - u64 avg_scan_cost; /* select_idle_sibling */ +#ifdef CONFIG_POSIX_TIMERS + struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; +#endif -#ifdef CONFIG_SCHEDSTATS - /* load_balance() stats */ - unsigned int lb_count[CPU_MAX_IDLE_TYPES]; - unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; - unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; - unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; - - /* Active load balancing */ - unsigned int alb_count; - unsigned int alb_failed; - unsigned int alb_pushed; - - /* SD_BALANCE_EXEC stats */ - unsigned int sbe_count; - unsigned int sbe_balanced; - unsigned int sbe_pushed; - - /* SD_BALANCE_FORK stats */ - unsigned int sbf_count; - unsigned int sbf_balanced; - unsigned int sbf_pushed; - - /* try_to_wake_up() stats */ - unsigned int ttwu_wake_remote; - unsigned int ttwu_move_affine; - unsigned int ttwu_move_balance; -#endif -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif - union { - void *private; /* used during construction */ - struct rcu_head rcu; /* used during destruction */ - }; - struct sched_domain_shared *shared; - - unsigned int span_weight; - /* - * Span of all CPUs in this domain. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long span[0]; -}; +/* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ + const struct cred __rcu *real_cred; /* objective and real subjective task + * credentials (COW) */ + const struct cred __rcu *cred; /* effective (overridable) subjective task + * credentials (COW) */ + char comm[TASK_COMM_LEN]; /* executable name excluding path + - access with [gs]et_task_comm (which lock + it with task_lock()) + - initialized normally by setup_new_exec */ +/* file system info */ + struct nameidata *nameidata; +#ifdef CONFIG_SYSVIPC +/* ipc stuff */ + struct sysv_sem sysvsem; + struct sysv_shm sysvshm; +#endif +#ifdef CONFIG_DETECT_HUNG_TASK +/* hung task detection */ + unsigned long last_switch_count; +#endif +/* filesystem information */ + struct fs_struct *fs; +/* open file information */ + struct files_struct *files; +/* namespaces */ + struct nsproxy *nsproxy; +/* signal handlers */ + struct signal_struct *signal; + struct sighand_struct *sighand; -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} + sigset_t blocked, real_blocked; + sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ + struct sigpending pending; -extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new); + unsigned long sas_ss_sp; + size_t sas_ss_size; + unsigned sas_ss_flags; -/* Allocate an array of sched domains, for partition_sched_domains(). */ -cpumask_var_t *alloc_sched_domains(unsigned int ndoms); -void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + struct callback_head *task_works; -bool cpus_share_cache(int this_cpu, int that_cpu); + struct audit_context *audit_context; +#ifdef CONFIG_AUDITSYSCALL + kuid_t loginuid; + unsigned int sessionid; +#endif + struct seccomp seccomp; -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); -typedef int (*sched_domain_flags_f)(void); +/* Thread group tracking */ + u32 parent_exec_id; + u32 self_exec_id; +/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, + * mempolicy */ + spinlock_t alloc_lock; -#define SDTL_OVERLAP 0x01 + /* Protection of the PI data structures: */ + raw_spinlock_t pi_lock; -struct sd_data { - struct sched_domain **__percpu sd; - struct sched_domain_shared **__percpu sds; - struct sched_group **__percpu sg; - struct sched_group_capacity **__percpu sgc; -}; + struct wake_q_node wake_q; -struct sched_domain_topology_level { - sched_domain_mask_f mask; - sched_domain_flags_f sd_flags; - int flags; - int numa_level; - struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG - char *name; +#ifdef CONFIG_RT_MUTEXES + /* PI waiters blocked on a rt_mutex held by this task */ + struct rb_root pi_waiters; + struct rb_node *pi_waiters_leftmost; + /* Deadlock detection and priority inheritance handling */ + struct rt_mutex_waiter *pi_blocked_on; #endif -}; -extern void set_sched_topology(struct sched_domain_topology_level *tl); -extern void wake_up_if_idle(int cpu); - -#ifdef CONFIG_SCHED_DEBUG -# define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) +#ifdef CONFIG_DEBUG_MUTEXES + /* mutex deadlock detection */ + struct mutex_waiter *blocked_on; +#endif +#ifdef CONFIG_TRACE_IRQFLAGS + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + int hardirqs_enabled; + int hardirq_context; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; + int softirqs_enabled; + int softirq_context; +#endif +#ifdef CONFIG_LOCKDEP +# define MAX_LOCK_DEPTH 48UL + u64 curr_chain_key; + int lockdep_depth; + unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + gfp_t lockdep_reclaim_gfp; +#endif +#ifdef CONFIG_UBSAN + unsigned int in_ubsan; #endif -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} +/* journalling filesystem info */ + void *journal_info; -static inline bool cpus_share_cache(int this_cpu, int that_cpu) -{ - return true; -} +/* stacked block device info */ + struct bio_list *bio_list; -#endif /* !CONFIG_SMP */ +#ifdef CONFIG_BLOCK +/* stack plugging */ + struct blk_plug *plug; +#endif +/* VM state */ + struct reclaim_state *reclaim_state; -struct io_context; /* See blkdev.h */ + struct backing_dev_info *backing_dev_info; + struct io_context *io_context; -#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct *t); -#else -static inline void prefetch_stack(struct task_struct *t) { } + unsigned long ptrace_message; + siginfo_t *last_siginfo; /* For ptrace use. */ + struct task_io_accounting ioac; +#if defined(CONFIG_TASK_XACCT) + u64 acct_rss_mem1; /* accumulated rss usage */ + u64 acct_vm_mem1; /* accumulated virtual memory usage */ + u64 acct_timexpd; /* stime + utime since last update */ #endif - -struct audit_context; /* See audit.c */ -struct mempolicy; -struct pipe_inode_info; -struct uts_namespace; - -struct load_weight { - unsigned long weight; - u32 inv_weight; -}; - -/* - * The load_avg/util_avg accumulates an infinite geometric series - * (see __update_load_avg() in kernel/sched/fair.c). - * - * [load_avg definition] - * - * load_avg = runnable% * scale_load_down(load) - * - * where runnable% is the time ratio that a sched_entity is runnable. - * For cfs_rq, it is the aggregated load_avg of all runnable and - * blocked sched_entities. - * - * load_avg may also take frequency scaling into account: - * - * load_avg = runnable% * scale_load_down(load) * freq% - * - * where freq% is the CPU frequency normalized to the highest frequency. - * - * [util_avg definition] - * - * util_avg = running% * SCHED_CAPACITY_SCALE - * - * where running% is the time ratio that a sched_entity is running on - * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable - * and blocked sched_entities. - * - * util_avg may also factor frequency scaling and CPU capacity scaling: - * - * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% - * - * where freq% is the same as above, and capacity% is the CPU capacity - * normalized to the greatest capacity (due to uarch differences, etc). - * - * N.B., the above ratios (runnable%, running%, freq%, and capacity%) - * themselves are in the range of [0, 1]. To do fixed point arithmetics, - * we therefore scale them to as large a range as necessary. This is for - * example reflected by util_avg's SCHED_CAPACITY_SCALE. - * - * [Overflow issue] - * - * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities - * with the highest load (=88761), always runnable on a single cfs_rq, - * and should not overflow as the number already hits PID_MAX_LIMIT. - * - * For all other cases (including 32-bit kernels), struct load_weight's - * weight will overflow first before we do, because: - * - * Max(load_avg) <= Max(load.weight) - * - * Then it is the load_weight's responsibility to consider overflow - * issues. - */ -struct sched_avg { - u64 last_update_time, load_sum; - u32 util_sum, period_contrib; - unsigned long load_avg, util_avg; -}; - -#ifdef CONFIG_SCHEDSTATS -struct sched_statistics { - u64 wait_start; - u64 wait_max; - u64 wait_count; - u64 wait_sum; - u64 iowait_count; - u64 iowait_sum; - - u64 sleep_start; - u64 sleep_max; - s64 sum_sleep_runtime; - - u64 block_start; - u64 block_max; - u64 exec_max; - u64 slice_max; - - u64 nr_migrations_cold; - u64 nr_failed_migrations_affine; - u64 nr_failed_migrations_running; - u64 nr_failed_migrations_hot; - u64 nr_forced_migrations; - - u64 nr_wakeups; - u64 nr_wakeups_sync; - u64 nr_wakeups_migrate; - u64 nr_wakeups_local; - u64 nr_wakeups_remote; - u64 nr_wakeups_affine; - u64 nr_wakeups_affine_attempts; - u64 nr_wakeups_passive; - u64 nr_wakeups_idle; -}; +#ifdef CONFIG_CPUSETS + nodemask_t mems_allowed; /* Protected by alloc_lock */ + seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ + int cpuset_mem_spread_rotor; + int cpuset_slab_spread_rotor; #endif - -struct sched_entity { - struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; - unsigned int on_rq; - - u64 exec_start; - u64 sum_exec_runtime; - u64 vruntime; - u64 prev_sum_exec_runtime; - - u64 nr_migrations; - -#ifdef CONFIG_SCHEDSTATS - struct sched_statistics statistics; +#ifdef CONFIG_CGROUPS + /* Control Group info protected by css_set_lock */ + struct css_set __rcu *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock */ + struct list_head cg_list; #endif - -#ifdef CONFIG_FAIR_GROUP_SCHED - int depth; - struct sched_entity *parent; - /* rq on which this entity is (to be) queued: */ - struct cfs_rq *cfs_rq; - /* rq "owned" by this entity/group: */ - struct cfs_rq *my_q; +#ifdef CONFIG_INTEL_RDT_A + int closid; #endif - -#ifdef CONFIG_SMP - /* - * Per entity load average tracking. - * - * Put into separate cache line so it does not - * collide with read-mostly values above. - */ - struct sched_avg avg ____cacheline_aligned_in_smp; +#ifdef CONFIG_FUTEX + struct robust_list_head __user *robust_list; +#ifdef CONFIG_COMPAT + struct compat_robust_list_head __user *compat_robust_list; #endif -}; - -struct sched_rt_entity { - struct list_head run_list; - unsigned long timeout; - unsigned long watchdog_stamp; - unsigned int time_slice; - unsigned short on_rq; - unsigned short on_list; - - struct sched_rt_entity *back; -#ifdef CONFIG_RT_GROUP_SCHED - struct sched_rt_entity *parent; - /* rq on which this entity is (to be) queued: */ - struct rt_rq *rt_rq; - /* rq "owned" by this entity/group: */ - struct rt_rq *my_q; + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; #endif -}; - -struct sched_dl_entity { - struct rb_node rb_node; - - /* - * Original scheduling parameters. Copied here from sched_attr - * during sched_setattr(), they will remain the same until - * the next sched_setattr(). - */ - u64 dl_runtime; /* maximum runtime for each instance */ - u64 dl_deadline; /* relative deadline of each instance */ - u64 dl_period; /* separation of two instances (period) */ - u64 dl_bw; /* dl_runtime / dl_deadline */ - - /* - * Actual scheduling parameters. Initialized with the values above, - * they are continously updated during task execution. Note that - * the remaining runtime could be < 0 in case we are in overrun. - */ - s64 runtime; /* remaining runtime for this instance */ - u64 deadline; /* absolute deadline for this instance */ - unsigned int flags; /* specifying the scheduler behaviour */ - - /* - * Some bool flags: - * - * @dl_throttled tells if we exhausted the runtime. If so, the - * task has to wait for a replenishment to be performed at the - * next firing of dl_timer. - * - * @dl_boosted tells if we are boosted due to DI. If so we are - * outside bandwidth enforcement mechanism (but only until we - * exit the critical section); - * - * @dl_yielded tells if task gave up the cpu before consuming - * all its available runtime during the last job. - */ - int dl_throttled, dl_boosted, dl_yielded; - - /* - * Bandwidth enforcement timer. Each -deadline task has its - * own bandwidth to be enforced, thus we need one timer per task. - */ - struct hrtimer dl_timer; -}; - -union rcu_special { - struct { - u8 blocked; - u8 need_qs; - u8 exp_need_qs; - u8 pad; /* Otherwise the compiler can store garbage here. */ - } b; /* Bits. */ - u32 s; /* Set of bits. */ -}; -struct rcu_node; - -enum perf_event_task_context { - perf_invalid_context = -1, - perf_hw_context = 0, - perf_sw_context, - perf_nr_task_contexts, -}; - -/* Track pages that require TLB flushes */ -struct tlbflush_unmap_batch { - /* - * Each bit set is a CPU that potentially has a TLB entry for one of - * the PFNs being flushed. See set_tlb_ubc_flush_pending(). - */ - struct cpumask cpumask; - - /* True if any bit in cpumask is set */ - bool flush_required; - - /* - * If true then the PTE was dirty when unmapped. The entry must be - * flushed before IO is initiated or a stale TLB entry potentially - * allows an update without redirtying the page. - */ - bool writable; -}; - -struct task_struct { -#ifdef CONFIG_THREAD_INFO_IN_TASK - /* - * For reasons of header soup (see current_thread_info()), this - * must be the first element of task_struct. - */ - struct thread_info thread_info; +#ifdef CONFIG_PERF_EVENTS + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - void *stack; - atomic_t usage; - unsigned int flags; /* per process flags, defined below */ - unsigned int ptrace; - -#ifdef CONFIG_SMP - struct llist_node wake_entry; - int on_cpu; -#ifdef CONFIG_THREAD_INFO_IN_TASK - unsigned int cpu; /* current CPU */ +#ifdef CONFIG_DEBUG_PREEMPT + unsigned long preempt_disable_ip; #endif - unsigned int wakee_flips; - unsigned long wakee_flip_decay_ts; - struct task_struct *last_wakee; - - int wake_cpu; +#ifdef CONFIG_NUMA + struct mempolicy *mempolicy; /* Protected by alloc_lock */ + short il_next; + short pref_node_fork; #endif - int on_rq; - - int prio, static_prio, normal_prio; - unsigned int rt_priority; - const struct sched_class *sched_class; - struct sched_entity se; - struct sched_rt_entity rt; -#ifdef CONFIG_CGROUP_SCHED - struct task_group *sched_task_group; -#endif - struct sched_dl_entity dl; - -#ifdef CONFIG_PREEMPT_NOTIFIERS - /* list of struct preempt_notifier: */ - struct hlist_head preempt_notifiers; -#endif - -#ifdef CONFIG_BLK_DEV_IO_TRACE - unsigned int btrace_seq; -#endif - - unsigned int policy; - int nr_cpus_allowed; - cpumask_t cpus_allowed; - -#ifdef CONFIG_PREEMPT_RCU - int rcu_read_lock_nesting; - union rcu_special rcu_read_unlock_special; - struct list_head rcu_node_entry; - struct rcu_node *rcu_blocked_node; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_TASKS_RCU - unsigned long rcu_tasks_nvcsw; - bool rcu_tasks_holdout; - struct list_head rcu_tasks_holdout_list; - int rcu_tasks_idle_cpu; -#endif /* #ifdef CONFIG_TASKS_RCU */ - -#ifdef CONFIG_SCHED_INFO - struct sched_info sched_info; -#endif - - struct list_head tasks; -#ifdef CONFIG_SMP - struct plist_node pushable_tasks; - struct rb_node pushable_dl_tasks; -#endif - - struct mm_struct *mm, *active_mm; - /* per-thread vma caching */ - u32 vmacache_seqnum; - struct vm_area_struct *vmacache[VMACACHE_SIZE]; -#if defined(SPLIT_RSS_COUNTING) - struct task_rss_stat rss_stat; -#endif -/* task state */ - int exit_state; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - unsigned long jobctl; /* JOBCTL_*, siglock protected */ - - /* Used for emulating ABI behavior of previous Linux versions */ - unsigned int personality; - - /* scheduler bits, serialized by scheduler locks */ - unsigned sched_reset_on_fork:1; - unsigned sched_contributes_to_load:1; - unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; - unsigned :0; /* force alignment to the next boundary */ - - /* unserialized, strictly 'current' */ - unsigned in_execve:1; /* bit to tell LSMs we're in execve */ - unsigned in_iowait:1; -#if !defined(TIF_RESTORE_SIGMASK) - unsigned restore_sigmask:1; -#endif -#ifdef CONFIG_MEMCG - unsigned memcg_may_oom:1; -#ifndef CONFIG_SLOB - unsigned memcg_kmem_skip_account:1; -#endif -#endif -#ifdef CONFIG_COMPAT_BRK - unsigned brk_randomized:1; -#endif - - unsigned long atomic_flags; /* Flags needing atomic access. */ - - struct restart_block restart_block; - - pid_t pid; - pid_t tgid; - -#ifdef CONFIG_CC_STACKPROTECTOR - /* Canary value for the -fstack-protector gcc feature */ - unsigned long stack_canary; -#endif - /* - * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with - * p->real_parent->pid) - */ - struct task_struct __rcu *real_parent; /* real parent process */ - struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ - /* - * children/sibling forms the list of my natural children - */ - struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ - struct task_struct *group_leader; /* threadgroup leader */ - - /* - * ptraced is the list of tasks this task is using ptrace on. - * This includes both natural children and PTRACE_ATTACH targets. - * p->ptrace_entry is p's link on the p->parent->ptraced list. - */ - struct list_head ptraced; - struct list_head ptrace_entry; - - /* PID/PID hash table linkage. */ - struct pid_link pids[PIDTYPE_MAX]; - struct list_head thread_group; - struct list_head thread_node; - - struct completion *vfork_done; /* for vfork() */ - int __user *set_child_tid; /* CLONE_CHILD_SETTID */ - int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - - u64 utime, stime; -#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - u64 utimescaled, stimescaled; -#endif - u64 gtime; - struct prev_cputime prev_cputime; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_t vtime_seqcount; - unsigned long long vtime_snap; - enum { - /* Task is sleeping or running in a CPU with VTIME inactive */ - VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active */ - VTIME_USER, - /* Task runs in kernelspace in a CPU with VTIME active */ - VTIME_SYS, - } vtime_snap_whence; -#endif - -#ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; -#endif - unsigned long nvcsw, nivcsw; /* context switch counts */ - u64 start_time; /* monotonic time in nsec */ - u64 real_start_time; /* boot based time in nsec */ -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt; - -#ifdef CONFIG_POSIX_TIMERS - struct task_cputime cputime_expires; - struct list_head cpu_timers[3]; -#endif - -/* process credentials */ - const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ - const struct cred __rcu *real_cred; /* objective and real subjective task - * credentials (COW) */ - const struct cred __rcu *cred; /* effective (overridable) subjective task - * credentials (COW) */ - char comm[TASK_COMM_LEN]; /* executable name excluding path - - access with [gs]et_task_comm (which lock - it with task_lock()) - - initialized normally by setup_new_exec */ -/* file system info */ - struct nameidata *nameidata; -#ifdef CONFIG_SYSVIPC -/* ipc stuff */ - struct sysv_sem sysvsem; - struct sysv_shm sysvshm; -#endif -#ifdef CONFIG_DETECT_HUNG_TASK -/* hung task detection */ - unsigned long last_switch_count; -#endif -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespaces */ - struct nsproxy *nsproxy; -/* signal handlers */ - struct signal_struct *signal; - struct sighand_struct *sighand; - - sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - unsigned sas_ss_flags; - - struct callback_head *task_works; - - struct audit_context *audit_context; -#ifdef CONFIG_AUDITSYSCALL - kuid_t loginuid; - unsigned int sessionid; -#endif - struct seccomp seccomp; - -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, - * mempolicy */ - spinlock_t alloc_lock; - - /* Protection of the PI data structures: */ - raw_spinlock_t pi_lock; - - struct wake_q_node wake_q; - -#ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; - /* Deadlock detection and priority inheritance handling */ - struct rt_mutex_waiter *pi_blocked_on; -#endif - -#ifdef CONFIG_DEBUG_MUTEXES - /* mutex deadlock detection */ - struct mutex_waiter *blocked_on; -#endif -#ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; - int softirqs_enabled; - int softirq_context; -#endif -#ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 48UL - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; -#endif -#ifdef CONFIG_UBSAN - unsigned int in_ubsan; -#endif - -/* journalling filesystem info */ - void *journal_info; - -/* stacked block device info */ - struct bio_list *bio_list; - -#ifdef CONFIG_BLOCK -/* stack plugging */ - struct blk_plug *plug; -#endif - -/* VM state */ - struct reclaim_state *reclaim_state; - - struct backing_dev_info *backing_dev_info; - - struct io_context *io_context; - - unsigned long ptrace_message; - siginfo_t *last_siginfo; /* For ptrace use. */ - struct task_io_accounting ioac; -#if defined(CONFIG_TASK_XACCT) - u64 acct_rss_mem1; /* accumulated rss usage */ - u64 acct_vm_mem1; /* accumulated virtual memory usage */ - u64 acct_timexpd; /* stime + utime since last update */ -#endif -#ifdef CONFIG_CPUSETS - nodemask_t mems_allowed; /* Protected by alloc_lock */ - seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ - int cpuset_mem_spread_rotor; - int cpuset_slab_spread_rotor; -#endif -#ifdef CONFIG_CGROUPS - /* Control Group info protected by css_set_lock */ - struct css_set __rcu *cgroups; - /* cg_list protected by css_set_lock and tsk->alloc_lock */ - struct list_head cg_list; -#endif -#ifdef CONFIG_INTEL_RDT_A - int closid; -#endif -#ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; -#ifdef CONFIG_COMPAT - struct compat_robust_list_head __user *compat_robust_list; -#endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; -#endif -#ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; - struct mutex perf_event_mutex; - struct list_head perf_event_list; -#endif -#ifdef CONFIG_DEBUG_PREEMPT - unsigned long preempt_disable_ip; -#endif -#ifdef CONFIG_NUMA - struct mempolicy *mempolicy; /* Protected by alloc_lock */ - short il_next; - short pref_node_fork; -#endif -#ifdef CONFIG_NUMA_BALANCING - int numa_scan_seq; - unsigned int numa_scan_period; - unsigned int numa_scan_period_max; - int numa_preferred_nid; - unsigned long numa_migrate_retry; - u64 node_stamp; /* migration stamp */ - u64 last_task_numa_placement; - u64 last_sum_exec_runtime; - struct callback_head numa_work; - - struct list_head numa_entry; - struct numa_group *numa_group; - - /* - * numa_faults is an array split into four regions: - * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer - * in this precise order. - * - * faults_memory: Exponential decaying average of faults on a per-node - * basis. Scheduling placement decisions are made based on these - * counts. The values remain static for the duration of a PTE scan. - * faults_cpu: Track the nodes the process was running on when a NUMA - * hinting fault was incurred. - * faults_memory_buffer and faults_cpu_buffer: Record faults per node - * during the current scan window. When the scan completes, the counts - * in faults_memory and faults_cpu decay and these values are copied. - */ - unsigned long *numa_faults; - unsigned long total_numa_faults; - - /* - * numa_faults_locality tracks if faults recorded during the last - * scan window were remote/local or failed to migrate. The task scan - * period is adapted based on the locality of the faults with different - * weights depending on whether they were shared or private faults - */ - unsigned long numa_faults_locality[3]; - - unsigned long numa_pages_migrated; -#endif /* CONFIG_NUMA_BALANCING */ - -#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH - struct tlbflush_unmap_batch tlb_ubc; -#endif - - struct rcu_head rcu; - - /* - * cache last used pipe for splice - */ - struct pipe_inode_info *splice_pipe; - - struct page_frag task_frag; - -#ifdef CONFIG_TASK_DELAY_ACCT - struct task_delay_info *delays; -#endif -#ifdef CONFIG_FAULT_INJECTION - int make_it_fail; -#endif - /* - * when (nr_dirtied >= nr_dirtied_pause), it's time to call - * balance_dirty_pages() for some dirty throttling pause - */ - int nr_dirtied; - int nr_dirtied_pause; - unsigned long dirty_paused_when; /* start of a write-and-pause period */ - -#ifdef CONFIG_LATENCYTOP - int latency_record_count; - struct latency_record latency_record[LT_SAVECOUNT]; -#endif - /* - * time slack values; these are used to round up poll() and - * select() etc timeout values. These are in nanoseconds. - */ - u64 timer_slack_ns; - u64 default_timer_slack_ns; - -#ifdef CONFIG_KASAN - unsigned int kasan_depth; -#endif -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Index of current stored address in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack *ret_stack; - /* time stamp for last schedule */ - unsigned long long ftrace_timestamp; - /* - * Number of functions that haven't been traced - * because of depth overrun. - */ - atomic_t trace_overrun; - /* Pause for the tracing */ - atomic_t tracing_graph_pause; -#endif -#ifdef CONFIG_TRACING - /* state flags for use by tracers */ - unsigned long trace; - /* bitmask and counter of trace recursion */ - unsigned long trace_recursion; -#endif /* CONFIG_TRACING */ -#ifdef CONFIG_KCOV - /* Coverage collection mode enabled for this task (0 if disabled). */ - enum kcov_mode kcov_mode; - /* Size of the kcov_area. */ - unsigned kcov_size; - /* Buffer for coverage collection. */ - void *kcov_area; - /* kcov desciptor wired with this task or NULL. */ - struct kcov *kcov; -#endif -#ifdef CONFIG_MEMCG - struct mem_cgroup *memcg_in_oom; - gfp_t memcg_oom_gfp_mask; - int memcg_oom_order; - - /* number of pages to reclaim on returning to userland */ - unsigned int memcg_nr_pages_over_high; -#endif -#ifdef CONFIG_UPROBES - struct uprobe_task *utask; -#endif -#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) - unsigned int sequential_io; - unsigned int sequential_io_avg; -#endif -#ifdef CONFIG_DEBUG_ATOMIC_SLEEP - unsigned long task_state_change; -#endif - int pagefault_disabled; -#ifdef CONFIG_MMU - struct task_struct *oom_reaper_list; -#endif -#ifdef CONFIG_VMAP_STACK - struct vm_struct *stack_vm_area; -#endif -#ifdef CONFIG_THREAD_INFO_IN_TASK - /* A live task holds one reference. */ - atomic_t stack_refcount; -#endif -/* CPU-specific state of this task */ - struct thread_struct thread; -/* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! - */ -}; - -#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT -extern int arch_task_struct_size __read_mostly; -#else -# define arch_task_struct_size (sizeof(struct task_struct)) -#endif - -#ifdef CONFIG_VMAP_STACK -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return t->stack_vm_area; -} -#else -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return NULL; -} -#endif - -/* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) - -static inline int tsk_nr_cpus_allowed(struct task_struct *p) -{ - return p->nr_cpus_allowed; -} - -#define TNF_MIGRATED 0x01 -#define TNF_NO_GROUP 0x02 -#define TNF_SHARED 0x04 -#define TNF_FAULT_LOCAL 0x08 -#define TNF_MIGRATE_FAIL 0x10 - -static inline bool in_vfork(struct task_struct *tsk) -{ - bool ret; - - /* - * need RCU to access ->real_parent if CLONE_VM was used along with - * CLONE_PARENT. - * - * We check real_parent->mm == tsk->mm because CLONE_VFORK does not - * imply CLONE_VM - * - * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus - * ->real_parent is not necessarily the task doing vfork(), so in - * theory we can't rely on task_lock() if we want to dereference it. - * - * And in this case we can't trust the real_parent->mm == tsk->mm - * check, it can be false negative. But we do not care, if init or - * another oom-unkillable task does this it should blame itself. - */ - rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; - rcu_read_unlock(); - - return ret; -} - -#ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int last_node, int node, int pages, int flags); -extern pid_t task_numa_group_id(struct task_struct *p); -extern void set_numabalancing_state(bool enabled); -extern void task_numa_free(struct task_struct *p); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); -#else -static inline void task_numa_fault(int last_node, int node, int pages, - int flags) -{ -} -static inline pid_t task_numa_group_id(struct task_struct *p) -{ - return 0; -} -static inline void set_numabalancing_state(bool enabled) -{ -} -static inline void task_numa_free(struct task_struct *p) -{ -} -static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) -{ - return true; -} -#endif - -static inline struct pid *task_pid(struct task_struct *task) -{ - return task->pids[PIDTYPE_PID].pid; -} - -static inline struct pid *task_tgid(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_PID].pid; -} - -/* - * Without tasklist or rcu lock it is not safe to dereference - * the result of task_pgrp/task_session even if task == current, - * we can race with another thread doing sys_setsid/sys_setpgid. - */ -static inline struct pid *task_pgrp(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_PGID].pid; -} - -static inline struct pid *task_session(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_SID].pid; -} - -struct pid_namespace; - -/* - * the helpers to get the task's different pids as they are seen - * from various namespaces - * - * task_xid_nr() : global id, i.e. the id seen from the init namespace; - * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of - * current. - * task_xid_nr_ns() : id seen from the ns specified; - * - * set_task_vxid() : assigns a virtual id to a task; - * - * see also pid_nr() etc in include/linux/pid.h - */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, - struct pid_namespace *ns); - -static inline pid_t task_pid_nr(struct task_struct *tsk) -{ - return tsk->pid; -} - -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); -} - -static inline pid_t task_pid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); -} - - -static inline pid_t task_tgid_nr(struct task_struct *tsk) -{ - return tsk->tgid; -} - -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_tgid(tsk)); -} - - -static inline int pid_alive(const struct task_struct *p); -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); -} - -static inline pid_t task_pgrp_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); -} - - -static inline pid_t task_session_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); -} - -static inline pid_t task_session_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); -} - -/* obsolete, do not use */ -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return task_pgrp_nr_ns(tsk, &init_pid_ns); -} - -/** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->pids[PIDTYPE_PID].pid != NULL; -} - -/** - * is_global_init - check if a task structure is init. Since init - * is free to have sub-threads we need to check tgid. - * @tsk: Task structure to be checked. - * - * Check if a task structure is the first user space task the kernel created. - * - * Return: 1 if the task structure is init. 0 otherwise. - */ -static inline int is_global_init(struct task_struct *tsk) -{ - return task_tgid_nr(tsk) == 1; -} - -extern struct pid *cad_pid; - -extern void free_task(struct task_struct *tsk); -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) - -extern void __put_task_struct(struct task_struct *t); - -static inline void put_task_struct(struct task_struct *t) -{ - if (atomic_dec_and_test(&t->usage)) - __put_task_struct(t); -} - -struct task_struct *task_rcu_dereference(struct task_struct **ptask); -struct task_struct *try_get_task_struct(struct task_struct **ptask); - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime); -extern u64 task_gtime(struct task_struct *t); -#else -static inline void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime) -{ - *utime = t->utime; - *stime = t->stime; -} - -static inline u64 task_gtime(struct task_struct *t) -{ - return t->gtime; -} -#endif - -#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - *utimescaled = t->utimescaled; - *stimescaled = t->stimescaled; -} -#else -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - task_cputime(t, utimescaled, stimescaled); -} -#endif - -extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); - -/* - * Per process flags - */ -#define PF_IDLE 0x00000002 /* I am an IDLE thread */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ -#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ -#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ -#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ -#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ - -/* - * Only the _current_ task can read/write to tsk->flags, but other - * tasks can access tsk->flags in readonly mode for example - * with tsk_used_math (like during threaded core dumping). - * There is however an exception to this rule during ptrace - * or during fork: the ptracer task is allowed to write to the - * child->flags of its traced child (same goes for fork, the parent - * can write to the child->flags), because we're guaranteed the - * child is not running and in turn not changing child->flags - * at the same time the parent does it. - */ -#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) -#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) -#define clear_used_math() clear_stopped_child_used_math(current) -#define set_used_math() set_stopped_child_used_math(current) -#define conditional_stopped_child_used_math(condition, child) \ - do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) -#define conditional_used_math(condition) \ - conditional_stopped_child_used_math(condition, current) -#define copy_to_stopped_child_used_math(child) \ - do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) -/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ -#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) -#define used_math() tsk_used_math(current) - -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. - */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) -{ - if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~(__GFP_IO | __GFP_FS); - return flags; -} - -static inline unsigned int memalloc_noio_save(void) -{ - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; -} - -static inline void memalloc_noio_restore(unsigned int flags) -{ - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; -} - -/* Per-process atomic flags. */ -#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ -#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ -#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ -#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ - - -#define TASK_PFA_TEST(name, func) \ - static inline bool task_##func(struct task_struct *p) \ - { return test_bit(PFA_##name, &p->atomic_flags); } -#define TASK_PFA_SET(name, func) \ - static inline void task_set_##func(struct task_struct *p) \ - { set_bit(PFA_##name, &p->atomic_flags); } -#define TASK_PFA_CLEAR(name, func) \ - static inline void task_clear_##func(struct task_struct *p) \ - { clear_bit(PFA_##name, &p->atomic_flags); } - -TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) -TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) - -TASK_PFA_TEST(SPREAD_PAGE, spread_page) -TASK_PFA_SET(SPREAD_PAGE, spread_page) -TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) - -TASK_PFA_TEST(SPREAD_SLAB, spread_slab) -TASK_PFA_SET(SPREAD_SLAB, spread_slab) -TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) - -TASK_PFA_TEST(LMK_WAITING, lmk_waiting) -TASK_PFA_SET(LMK_WAITING, lmk_waiting) - -/* - * task->jobctl flags - */ -#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ - -#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ -#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ -#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ -#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ -#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ -#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ -#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ - -#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) -#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) -#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) -#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) -#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) -#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) -#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) - -#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) - -extern bool task_set_jobctl_pending(struct task_struct *task, - unsigned long mask); -extern void task_clear_jobctl_trapping(struct task_struct *task); -extern void task_clear_jobctl_pending(struct task_struct *task, - unsigned long mask); - -static inline void rcu_copy_process(struct task_struct *p) -{ -#ifdef CONFIG_PREEMPT_RCU - p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special.s = 0; - p->rcu_blocked_node = NULL; - INIT_LIST_HEAD(&p->rcu_node_entry); -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_TASKS_RCU - p->rcu_tasks_holdout = false; - INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); - p->rcu_tasks_idle_cpu = -1; -#endif /* #ifdef CONFIG_TASKS_RCU */ -} - -static inline void tsk_restore_flags(struct task_struct *task, - unsigned long orig_flags, unsigned long flags) -{ - task->flags &= ~flags; - task->flags |= orig_flags & flags; -} - -extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, - const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, - const struct cpumask *cs_cpus_allowed); -#ifdef CONFIG_SMP -extern void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask); - -extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); -#else -static inline void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask) -{ -} -static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) -{ - if (!cpumask_test_cpu(0, new_mask)) - return -EINVAL; - return 0; -} -#endif - -#ifdef CONFIG_NO_HZ_COMMON -void calc_load_enter_idle(void); -void calc_load_exit_idle(void); -#else -static inline void calc_load_enter_idle(void) { } -static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ_COMMON */ - -#ifndef cpu_relax_yield -#define cpu_relax_yield() cpu_relax() -#endif - -/* - * Do not use outside of architecture code which knows its limitations. - * - * sched_clock() has no promise of monotonicity or bounded drift between - * CPUs, use (which you should not) requires disabling IRQs. - * - * Please use one of the three interfaces below. - */ -extern unsigned long long notrace sched_clock(void); -/* - * See the comment in kernel/sched/clock.c - */ -extern u64 running_clock(void); -extern u64 sched_clock_cpu(int cpu); - - -extern void sched_clock_init(void); - -#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_init_late(void) -{ -} +#ifdef CONFIG_NUMA_BALANCING + int numa_scan_seq; + unsigned int numa_scan_period; + unsigned int numa_scan_period_max; + int numa_preferred_nid; + unsigned long numa_migrate_retry; + u64 node_stamp; /* migration stamp */ + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; + struct callback_head numa_work; -static inline void sched_clock_tick(void) -{ -} + struct list_head numa_entry; + struct numa_group *numa_group; -static inline void clear_sched_clock_stable(void) -{ -} + /* + * numa_faults is an array split into four regions: + * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer + * in this precise order. + * + * faults_memory: Exponential decaying average of faults on a per-node + * basis. Scheduling placement decisions are made based on these + * counts. The values remain static for the duration of a PTE scan. + * faults_cpu: Track the nodes the process was running on when a NUMA + * hinting fault was incurred. + * faults_memory_buffer and faults_cpu_buffer: Record faults per node + * during the current scan window. When the scan completes, the counts + * in faults_memory and faults_cpu decay and these values are copied. + */ + unsigned long *numa_faults; + unsigned long total_numa_faults; -static inline void sched_clock_idle_sleep_event(void) -{ -} + /* + * numa_faults_locality tracks if faults recorded during the last + * scan window were remote/local or failed to migrate. The task scan + * period is adapted based on the locality of the faults with different + * weights depending on whether they were shared or private faults + */ + unsigned long numa_faults_locality[3]; -static inline void sched_clock_idle_wakeup_event(u64 delta_ns) -{ -} + unsigned long numa_pages_migrated; +#endif /* CONFIG_NUMA_BALANCING */ -static inline u64 cpu_clock(int cpu) -{ - return sched_clock(); -} + struct tlbflush_unmap_batch tlb_ubc; -static inline u64 local_clock(void) -{ - return sched_clock(); -} -#else -extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ -extern int sched_clock_stable(void); -extern void clear_sched_clock_stable(void); + struct rcu_head rcu; -extern void sched_clock_tick(void); -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); + /* + * cache last used pipe for splice + */ + struct pipe_inode_info *splice_pipe; -/* - * As outlined in clock.c, provides a fast, high resolution, nanosecond - * time source that is monotonic per cpu argument and has bounded drift - * between cpus. - * - * ######################### BIG FAT WARNING ########################## - * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # - * # go backwards !! # - * #################################################################### - */ -static inline u64 cpu_clock(int cpu) -{ - return sched_clock_cpu(cpu); -} + struct page_frag task_frag; -static inline u64 local_clock(void) -{ - return sched_clock_cpu(raw_smp_processor_id()); -} +#ifdef CONFIG_TASK_DELAY_ACCT + struct task_delay_info *delays; #endif -#ifdef CONFIG_IRQ_TIME_ACCOUNTING -/* - * An i/f to runtime opt-in for irq time accounting based off of sched_clock. - * The reason for this explicit opt-in is not to have perf penalty with - * slow sched_clocks. - */ -extern void enable_sched_clock_irqtime(void); -extern void disable_sched_clock_irqtime(void); -#else -static inline void enable_sched_clock_irqtime(void) {} -static inline void disable_sched_clock_irqtime(void) {} +#ifdef CONFIG_FAULT_INJECTION + int make_it_fail; #endif + /* + * when (nr_dirtied >= nr_dirtied_pause), it's time to call + * balance_dirty_pages() for some dirty throttling pause + */ + int nr_dirtied; + int nr_dirtied_pause; + unsigned long dirty_paused_when; /* start of a write-and-pause period */ -extern unsigned long long -task_sched_runtime(struct task_struct *task); - -/* sched_exec is called by processes performing an exec */ -#ifdef CONFIG_SMP -extern void sched_exec(void); -#else -#define sched_exec() {} +#ifdef CONFIG_LATENCYTOP + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; #endif + /* + * time slack values; these are used to round up poll() and + * select() etc timeout values. These are in nanoseconds. + */ + u64 timer_slack_ns; + u64 default_timer_slack_ns; -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else -static inline void idle_task_exit(void) {} +#ifdef CONFIG_KASAN + unsigned int kasan_depth; #endif - -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) -extern void wake_up_nohz_cpu(int cpu); -#else -static inline void wake_up_nohz_cpu(int cpu) { } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* Index of current stored address in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* time stamp for last schedule */ + unsigned long long ftrace_timestamp; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; + /* Pause for the tracing */ + atomic_t tracing_graph_pause; #endif - -#ifdef CONFIG_NO_HZ_FULL -extern u64 scheduler_tick_max_deferment(void); +#ifdef CONFIG_TRACING + /* state flags for use by tracers */ + unsigned long trace; + /* bitmask and counter of trace recursion */ + unsigned long trace_recursion; +#endif /* CONFIG_TRACING */ +#ifdef CONFIG_KCOV + /* Coverage collection mode enabled for this task (0 if disabled). */ + enum kcov_mode kcov_mode; + /* Size of the kcov_area. */ + unsigned kcov_size; + /* Buffer for coverage collection. */ + void *kcov_area; + /* kcov desciptor wired with this task or NULL. */ + struct kcov *kcov; #endif +#ifdef CONFIG_MEMCG + struct mem_cgroup *memcg_in_oom; + gfp_t memcg_oom_gfp_mask; + int memcg_oom_order; -#ifdef CONFIG_SCHED_AUTOGROUP -extern void sched_autogroup_create_attach(struct task_struct *p); -extern void sched_autogroup_detach(struct task_struct *p); -extern void sched_autogroup_fork(struct signal_struct *sig); -extern void sched_autogroup_exit(struct signal_struct *sig); -extern void sched_autogroup_exit_task(struct task_struct *p); -#ifdef CONFIG_PROC_FS -extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); + /* number of pages to reclaim on returning to userland */ + unsigned int memcg_nr_pages_over_high; #endif -#else -static inline void sched_autogroup_create_attach(struct task_struct *p) { } -static inline void sched_autogroup_detach(struct task_struct *p) { } -static inline void sched_autogroup_fork(struct signal_struct *sig) { } -static inline void sched_autogroup_exit(struct signal_struct *sig) { } -static inline void sched_autogroup_exit_task(struct task_struct *p) { } +#ifdef CONFIG_UPROBES + struct uprobe_task *utask; #endif - -extern int yield_to(struct task_struct *p, bool preempt); -extern void set_user_nice(struct task_struct *p, long nice); -extern int task_prio(const struct task_struct *p); -/** - * task_nice - return the nice value of a given task. - * @p: the task in question. - * - * Return: The nice value [ -20 ... 0 ... 19 ]. - */ -static inline int task_nice(const struct task_struct *p) -{ - return PRIO_TO_NICE((p)->static_prio); -} -extern int can_nice(const struct task_struct *p, const int nice); -extern int task_curr(const struct task_struct *p); -extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, - const struct sched_param *); -extern int sched_setscheduler_nocheck(struct task_struct *, int, - const struct sched_param *); -extern int sched_setattr(struct task_struct *, - const struct sched_attr *); -extern struct task_struct *idle_task(int cpu); -/** - * is_idle_task - is the specified task an idle task? - * @p: the task in question. - * - * Return: 1 if @p is an idle task. 0 otherwise. - */ -static inline bool is_idle_task(const struct task_struct *p) -{ - return !!(p->flags & PF_IDLE); -} -extern struct task_struct *curr_task(int cpu); -extern void ia64_set_curr_task(int cpu, struct task_struct *p); - -void yield(void); - -union thread_union { -#ifndef CONFIG_THREAD_INFO_IN_TASK - struct thread_info thread_info; +#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) + unsigned int sequential_io; + unsigned int sequential_io_avg; #endif - unsigned long stack[THREAD_SIZE/sizeof(long)]; -}; - -#ifndef __HAVE_ARCH_KSTACK_END -static inline int kstack_end(void *addr) -{ - /* Reliable end of stack detection: - * Some APM bios versions misalign the stack - */ - return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); -} +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP + unsigned long task_state_change; #endif - -extern union thread_union init_thread_union; -extern struct task_struct init_task; - -extern struct mm_struct init_mm; - -extern struct pid_namespace init_pid_ns; - -/* - * find a task by one of its numerical ids - * - * find_task_by_pid_ns(): - * finds a task by its pid in the specified namespace - * find_task_by_vpid(): - * finds a task by its virtual pid - * - * see also find_vpid() etc in include/linux/pid.h - */ - -extern struct task_struct *find_task_by_vpid(pid_t nr); -extern struct task_struct *find_task_by_pid_ns(pid_t nr, - struct pid_namespace *ns); - -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(kuid_t); -static inline struct user_struct *get_uid(struct user_struct *u) -{ - atomic_inc(&u->__count); - return u; -} -extern void free_uid(struct user_struct *); - -#include - -extern void xtime_update(unsigned long ticks); - -extern int wake_up_state(struct task_struct *tsk, unsigned int state); -extern int wake_up_process(struct task_struct *tsk); -extern void wake_up_new_task(struct task_struct *tsk); -#ifdef CONFIG_SMP - extern void kick_process(struct task_struct *tsk); -#else - static inline void kick_process(struct task_struct *tsk) { } + int pagefault_disabled; +#ifdef CONFIG_MMU + struct task_struct *oom_reaper_list; #endif -extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_dead(struct task_struct *p); - -extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void ignore_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); - -static inline int kernel_dequeue_signal(siginfo_t *info) -{ - struct task_struct *tsk = current; - siginfo_t __info; - int ret; - - spin_lock_irq(&tsk->sighand->siglock); - ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); - spin_unlock_irq(&tsk->sighand->siglock); - - return ret; -} - -static inline void kernel_signal_stop(void) -{ - spin_lock_irq(¤t->sighand->siglock); - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - - schedule(); -} - -extern void release_task(struct task_struct * p); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); -extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, - const struct cred *, u32); -extern int kill_pgrp(struct pid *pid, int sig, int priv); -extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern __must_check bool do_notify_parent(struct task_struct *, int); -extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); - -#ifdef TIF_RESTORE_SIGMASK +#ifdef CONFIG_VMAP_STACK + struct vm_struct *stack_vm_area; +#endif +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* A live task holds one reference. */ + atomic_t stack_refcount; +#endif +/* CPU-specific state of this task */ + struct thread_struct thread; /* - * Legacy restore_sigmask accessors. These are inefficient on - * SMP architectures because they require atomic operations. - */ - -/** - * set_restore_sigmask() - make sure saved_sigmask processing gets done + * WARNING: on x86, 'thread_struct' contains a variable-sized + * structure. It *MUST* be at the end of 'task_struct'. * - * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. For - * all callers, TIF_SIGPENDING is already set or it's no harm to set - * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the - * arch code will notice on return to user mode, in case those bits - * are scarce. We set TIF_SIGPENDING here to ensure that the arch - * signal code always gets run when TIF_RESTORE_SIGMASK is set. + * Do not put anything below here! */ -static inline void set_restore_sigmask(void) -{ - set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - clear_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_restore_sigmask(void) -{ - return test_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_and_clear_restore_sigmask(void) +}; + +static inline struct pid *task_pid(struct task_struct *task) { - return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); + return task->pids[PIDTYPE_PID].pid; } -#else /* TIF_RESTORE_SIGMASK */ - -/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ -static inline void set_restore_sigmask(void) +static inline struct pid *task_tgid(struct task_struct *task) { - current->restore_sigmask = true; - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + return task->group_leader->pids[PIDTYPE_PID].pid; } -static inline void clear_restore_sigmask(void) + +/* + * Without tasklist or rcu lock it is not safe to dereference + * the result of task_pgrp/task_session even if task == current, + * we can race with another thread doing sys_setsid/sys_setpgid. + */ +static inline struct pid *task_pgrp(struct task_struct *task) { - current->restore_sigmask = false; + return task->group_leader->pids[PIDTYPE_PGID].pid; } -static inline bool test_restore_sigmask(void) + +static inline struct pid *task_session(struct task_struct *task) { - return current->restore_sigmask; + return task->group_leader->pids[PIDTYPE_SID].pid; } -static inline bool test_and_clear_restore_sigmask(void) + +/* + * the helpers to get the task's different pids as they are seen + * from various namespaces + * + * task_xid_nr() : global id, i.e. the id seen from the init namespace; + * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of + * current. + * task_xid_nr_ns() : id seen from the ns specified; + * + * set_task_vxid() : assigns a virtual id to a task; + * + * see also pid_nr() etc in include/linux/pid.h + */ +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, + struct pid_namespace *ns); + +static inline pid_t task_pid_nr(struct task_struct *tsk) { - if (!current->restore_sigmask) - return false; - current->restore_sigmask = false; - return true; + return tsk->pid; } -#endif -static inline void restore_saved_sigmask(void) +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) { - if (test_and_clear_restore_sigmask()) - __set_current_blocked(¤t->saved_sigmask); + return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); } -static inline sigset_t *sigmask_to_save(void) +static inline pid_t task_pid_vnr(struct task_struct *tsk) { - sigset_t *res = ¤t->blocked; - if (unlikely(test_restore_sigmask())) - res = ¤t->saved_sigmask; - return res; + return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); } -static inline int kill_cad_pid(int sig, int priv) + +static inline pid_t task_tgid_nr(struct task_struct *tsk) { - return kill_pid(cad_pid, sig, priv); + return tsk->tgid; } -/* These can be the second arg to send_sig_info/send_group_sig_info. */ -#define SEND_SIG_NOINFO ((struct siginfo *) 0) -#define SEND_SIG_PRIV ((struct siginfo *) 1) -#define SEND_SIG_FORCED ((struct siginfo *) 2) +pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); -/* - * True if we are on the alternate signal stack. - */ -static inline int on_sig_stack(unsigned long sp) +static inline pid_t task_tgid_vnr(struct task_struct *tsk) { - /* - * If the signal stack is SS_AUTODISARM then, by construction, we - * can't be on the signal stack unless user code deliberately set - * SS_AUTODISARM when we were already on it. - * - * This improves reliability: if user state gets corrupted such that - * the stack pointer points very close to the end of the signal stack, - * then this check will enable the signal to be handled anyway. - */ - if (current->sas_ss_flags & SS_AUTODISARM) - return 0; - -#ifdef CONFIG_STACK_GROWSUP - return sp >= current->sas_ss_sp && - sp - current->sas_ss_sp < current->sas_ss_size; -#else - return sp > current->sas_ss_sp && - sp - current->sas_ss_sp <= current->sas_ss_size; -#endif + return pid_vnr(task_tgid(tsk)); } -static inline int sas_ss_flags(unsigned long sp) + +static inline int pid_alive(const struct task_struct *p); +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { - if (!current->sas_ss_size) - return SS_DISABLE; + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); - return on_sig_stack(sp) ? SS_ONSTACK : 0; + return pid; } -static inline void sas_ss_reset(struct task_struct *p) +static inline pid_t task_ppid_nr(const struct task_struct *tsk) { - p->sas_ss_sp = 0; - p->sas_ss_size = 0; - p->sas_ss_flags = SS_DISABLE; + return task_ppid_nr_ns(tsk, &init_pid_ns); } -static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) { - if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) -#ifdef CONFIG_STACK_GROWSUP - return current->sas_ss_sp; -#else - return current->sas_ss_sp + current->sas_ss_size; -#endif - return sp; + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); } -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -/** - * mmgrab() - Pin a &struct mm_struct. - * @mm: The &struct mm_struct to pin. - * - * Make sure that @mm will not get freed even after the owning task - * exits. This doesn't guarantee that the associated address space - * will still exist later on and mmget_not_zero() has to be used before - * accessing it. - * - * This is a preferred way to to pin @mm for a longer/unbounded amount - * of time. - * - * Use mmdrop() to release the reference acquired by mmgrab(). - * - * See also for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmgrab(struct mm_struct *mm) +static inline pid_t task_pgrp_vnr(struct task_struct *tsk) { - atomic_inc(&mm->mm_count); + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); } -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct *mm) + +static inline pid_t task_session_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) { - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); + return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); } -static inline void mmdrop_async_fn(struct work_struct *work) +static inline pid_t task_session_vnr(struct task_struct *tsk) { - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); - __mmdrop(mm); + return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } -static inline void mmdrop_async(struct mm_struct *mm) +/* obsolete, do not use */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) { - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); - schedule_work(&mm->async_put_work); - } + return task_pgrp_nr_ns(tsk, &init_pid_ns); } /** - * mmget() - Pin the address space associated with a &struct mm_struct. - * @mm: The address space to pin. - * - * Make sure that the address space of the given &struct mm_struct doesn't - * go away. This does not protect against parts of the address space being - * modified or freed, however. - * - * Never use this function to pin this address space for an - * unbounded/indefinite amount of time. + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. * - * Use mmput() to release the reference acquired by mmget(). + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. * - * See also for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. + * Return: 1 if the process is alive. 0 otherwise. */ -static inline void mmget(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_users); -} - -static inline bool mmget_not_zero(struct mm_struct *mm) +static inline int pid_alive(const struct task_struct *p) { - return atomic_inc_not_zero(&mm->mm_users); + return p->pids[PIDTYPE_PID].pid != NULL; } -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU -/* same as above but performs the slow path from the async context. Can - * be called from the atomic context as well - */ -extern void mmput_async(struct mm_struct *); -#endif - -/* Grab a reference to a task's mm, if it is not already going away */ -extern struct mm_struct *get_task_mm(struct task_struct *task); -/* - * Grab a reference to a task's mm, if it is not already going away - * and ptrace_may_access with the mode parameter passed to it - * succeeds. +/** + * is_global_init - check if a task structure is init. Since init + * is free to have sub-threads we need to check tgid. + * @tsk: Task structure to be checked. + * + * Check if a task structure is the first user space task the kernel created. + * + * Return: 1 if the task structure is init. 0 otherwise. */ -extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); - -#ifdef CONFIG_HAVE_COPY_THREAD_TLS -extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); -#else -extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *); - -/* Architectures that haven't opted into copy_thread_tls get the tls argument - * via pt_regs, so ignore the tls argument passed via C. */ -static inline int copy_thread_tls( - unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) -{ - return copy_thread(clone_flags, sp, arg, p); -} -#endif -extern void flush_thread(void); - -#ifdef CONFIG_HAVE_EXIT_THREAD -extern void exit_thread(struct task_struct *tsk); -#else -static inline void exit_thread(struct task_struct *tsk) +static inline int is_global_init(struct task_struct *tsk) { + return task_tgid_nr(tsk) == 1; } -#endif - -extern void exit_files(struct task_struct *); -extern void __cleanup_sighand(struct sighand_struct *); -extern void exit_itimers(struct signal_struct *); -extern void flush_itimer_signals(void); - -extern void do_group_exit(int); +extern struct pid *cad_pid; -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); -extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); -extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); -struct task_struct *fork_idle(int); -extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); +extern void free_task(struct task_struct *tsk); +#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) -extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); -static inline void set_task_comm(struct task_struct *tsk, const char *from) -{ - __set_task_comm(tsk, from, false); -} -extern char *get_task_comm(char *to, struct task_struct *tsk); +extern void __put_task_struct(struct task_struct *t); -#ifdef CONFIG_SMP -void scheduler_ipi(void); -extern unsigned long wait_task_inactive(struct task_struct *, long match_state); -#else -static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, - long match_state) +static inline void put_task_struct(struct task_struct *t) { - return 1; + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); } -#endif - -#define tasklist_empty() \ - list_empty(&init_task.tasks) -#define next_task(p) \ - list_entry_rcu((p)->tasks.next, struct task_struct, tasks) - -#define for_each_process(p) \ - for (p = &init_task ; (p = next_task(p)) != &init_task ; ) - -extern bool current_is_single_threaded(void); +struct task_struct *task_rcu_dereference(struct task_struct **ptask); +struct task_struct *try_get_task_struct(struct task_struct **ptask); /* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. + * Per process flags */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - -#define while_each_thread(g, t) \ - while ((t = next_thread(t)) != g) - -#define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) - -#define for_each_thread(p, t) \ - __for_each_thread((p)->signal, t) - -/* Careful: this is a double loop, 'break' won't work as expected. */ -#define for_each_process_thread(p, t) \ - for_each_process(p) for_each_thread(p, t) - -typedef int (*proc_visitor)(struct task_struct *p, void *data); -void walk_process_tree(struct task_struct *top, proc_visitor, void *); +#define PF_IDLE 0x00000002 /* I am an IDLE thread */ +#define PF_EXITING 0x00000004 /* getting shut down */ +#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ +#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ +#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* dumped core */ +#define PF_SIGNALED 0x00000400 /* killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ +#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ +#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ +#define PF_FROZEN 0x00010000 /* frozen for system suspend */ +#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ +#define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ +#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ -static inline int get_nr_threads(struct task_struct *tsk) -{ - return tsk->signal->nr_threads; -} +/* + * Only the _current_ task can read/write to tsk->flags, but other + * tasks can access tsk->flags in readonly mode for example + * with tsk_used_math (like during threaded core dumping). + * There is however an exception to this rule during ptrace + * or during fork: the ptracer task is allowed to write to the + * child->flags of its traced child (same goes for fork, the parent + * can write to the child->flags), because we're guaranteed the + * child is not running and in turn not changing child->flags + * at the same time the parent does it. + */ +#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) +#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) +#define clear_used_math() clear_stopped_child_used_math(current) +#define set_used_math() set_stopped_child_used_math(current) +#define conditional_stopped_child_used_math(condition, child) \ + do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) +#define conditional_used_math(condition) \ + conditional_stopped_child_used_math(condition, current) +#define copy_to_stopped_child_used_math(child) \ + do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) +/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ +#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) +#define used_math() tsk_used_math(current) -static inline bool thread_group_leader(struct task_struct *p) -{ - return p->exit_signal >= 0; -} +/* Per-process atomic flags. */ +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ -/* Do to the insanities of de_thread it is possible for a process - * to have the pid of the thread group leader without actually being - * the thread group leader. For iteration through the pids in proc - * all we care about is that we have a task with the appropriate - * pid, we don't actually care if we have the right task. - */ -static inline bool has_group_leader_pid(struct task_struct *p) -{ - return task_pid(p) == p->signal->leader_pid; -} -static inline -bool same_thread_group(struct task_struct *p1, struct task_struct *p2) -{ - return p1->signal == p2->signal; -} +#define TASK_PFA_TEST(name, func) \ + static inline bool task_##func(struct task_struct *p) \ + { return test_bit(PFA_##name, &p->atomic_flags); } +#define TASK_PFA_SET(name, func) \ + static inline void task_set_##func(struct task_struct *p) \ + { set_bit(PFA_##name, &p->atomic_flags); } +#define TASK_PFA_CLEAR(name, func) \ + static inline void task_clear_##func(struct task_struct *p) \ + { clear_bit(PFA_##name, &p->atomic_flags); } -static inline struct task_struct *next_thread(const struct task_struct *p) -{ - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); -} +TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) +TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) -static inline int thread_group_empty(struct task_struct *p) -{ - return list_empty(&p->thread_group); -} +TASK_PFA_TEST(SPREAD_PAGE, spread_page) +TASK_PFA_SET(SPREAD_PAGE, spread_page) +TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) -#define delay_group_leader(p) \ - (thread_group_leader(p) && !thread_group_empty(p)) +TASK_PFA_TEST(SPREAD_SLAB, spread_slab) +TASK_PFA_SET(SPREAD_SLAB, spread_slab) +TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) -/* - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. - * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. - */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} +TASK_PFA_TEST(LMK_WAITING, lmk_waiting) +TASK_PFA_SET(LMK_WAITING, lmk_waiting) -static inline void task_unlock(struct task_struct *p) +static inline void tsk_restore_flags(struct task_struct *task, + unsigned long orig_flags, unsigned long flags) { - spin_unlock(&p->alloc_lock); + task->flags &= ~flags; + task->flags |= orig_flags & flags; } -extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, - unsigned long *flags); +extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, + const struct cpumask *trial); +extern int task_can_attach(struct task_struct *p, + const struct cpumask *cs_cpus_allowed); +#ifdef CONFIG_SMP +extern void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask); -static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, - unsigned long *flags) +extern int set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask); +#else +static inline void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask) { - struct sighand_struct *ret; - - ret = __lock_task_sighand(tsk, flags); - (void)__cond_lock(&tsk->sighand->siglock, ret); - return ret; } - -static inline void unlock_task_sighand(struct task_struct *tsk, - unsigned long *flags) +static inline int set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask) { - spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); + if (!cpumask_test_cpu(0, new_mask)) + return -EINVAL; + return 0; } +#endif + +#ifndef cpu_relax_yield +#define cpu_relax_yield() cpu_relax() +#endif + +/* sched_exec is called by processes performing an exec */ +#ifdef CONFIG_SMP +extern void sched_exec(void); +#else +#define sched_exec() {} +#endif +extern int yield_to(struct task_struct *p, bool preempt); +extern void set_user_nice(struct task_struct *p, long nice); +extern int task_prio(const struct task_struct *p); /** - * threadgroup_change_begin - mark the beginning of changes to a threadgroup - * @tsk: task causing the changes + * task_nice - return the nice value of a given task. + * @p: the task in question. * - * All operations which modify a threadgroup - a new thread joining the - * group, death of a member thread (the assertion of PF_EXITING) and - * exec(2) dethreading the process and replacing the leader - are wrapped - * by threadgroup_change_{begin|end}(). This is to provide a place which - * subsystems needing threadgroup stability can hook into for - * synchronization. + * Return: The nice value [ -20 ... 0 ... 19 ]. */ -static inline void threadgroup_change_begin(struct task_struct *tsk) +static inline int task_nice(const struct task_struct *p) { - might_sleep(); - cgroup_threadgroup_change_begin(tsk); + return PRIO_TO_NICE((p)->static_prio); } - +extern int can_nice(const struct task_struct *p, const int nice); +extern int task_curr(const struct task_struct *p); +extern int idle_cpu(int cpu); +extern int sched_setscheduler(struct task_struct *, int, + const struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, + const struct sched_param *); +extern int sched_setattr(struct task_struct *, + const struct sched_attr *); +extern struct task_struct *idle_task(int cpu); /** - * threadgroup_change_end - mark the end of changes to a threadgroup - * @tsk: task causing the changes + * is_idle_task - is the specified task an idle task? + * @p: the task in question. * - * See threadgroup_change_begin(). + * Return: 1 if @p is an idle task. 0 otherwise. */ -static inline void threadgroup_change_end(struct task_struct *tsk) +static inline bool is_idle_task(const struct task_struct *p) { - cgroup_threadgroup_change_end(tsk); + return !!(p->flags & PF_IDLE); } +extern struct task_struct *curr_task(int cpu); +extern void ia64_set_curr_task(int cpu, struct task_struct *p); -#ifdef CONFIG_THREAD_INFO_IN_TASK +void yield(void); + +union thread_union { +#ifndef CONFIG_THREAD_INFO_IN_TASK + struct thread_info thread_info; +#endif + unsigned long stack[THREAD_SIZE/sizeof(long)]; +}; +#ifdef CONFIG_THREAD_INFO_IN_TASK static inline struct thread_info *task_thread_info(struct task_struct *task) { return &task->thread_info; } - -/* - * When accessing the stack of a non-current task that might exit, use - * try_get_task_stack() instead. task_stack_page will return a pointer - * that could get freed out from under you. - */ -static inline void *task_stack_page(const struct task_struct *task) -{ - return task->stack; -} - -#define setup_thread_stack(new,old) do { } while(0) - -static inline unsigned long *end_of_stack(const struct task_struct *task) -{ - return task->stack; -} - #elif !defined(__HAVE_THREAD_FUNCTIONS) +# define task_thread_info(task) ((struct thread_info *)(task)->stack) +#endif -#define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((void *)(task)->stack) - -static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) -{ - *task_thread_info(p) = *task_thread_info(org); - task_thread_info(p)->task = p; -} +extern struct pid_namespace init_pid_ns; /* - * Return the address of the last usable long on the stack. + * find a task by one of its numerical ids * - * When the stack grows down, this is just above the thread - * info struct. Going any lower will corrupt the threadinfo. + * find_task_by_pid_ns(): + * finds a task by its pid in the specified namespace + * find_task_by_vpid(): + * finds a task by its virtual pid * - * When the stack grows up, this is the highest address. - * Beyond that position, we corrupt data on the next page. + * see also find_vpid() etc in include/linux/pid.h */ -static inline unsigned long *end_of_stack(struct task_struct *p) -{ -#ifdef CONFIG_STACK_GROWSUP - return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; -#else - return (unsigned long *)(task_thread_info(p) + 1); -#endif -} -#endif - -#ifdef CONFIG_THREAD_INFO_IN_TASK -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return atomic_inc_not_zero(&tsk->stack_refcount) ? - task_stack_page(tsk) : NULL; -} +extern struct task_struct *find_task_by_vpid(pid_t nr); +extern struct task_struct *find_task_by_pid_ns(pid_t nr, + struct pid_namespace *ns); -extern void put_task_stack(struct task_struct *tsk); +extern int wake_up_state(struct task_struct *tsk, unsigned int state); +extern int wake_up_process(struct task_struct *tsk); +extern void wake_up_new_task(struct task_struct *tsk); +#ifdef CONFIG_SMP + extern void kick_process(struct task_struct *tsk); #else -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return task_stack_page(tsk); -} - -static inline void put_task_stack(struct task_struct *tsk) {} + static inline void kick_process(struct task_struct *tsk) { } #endif -#define task_stack_end_corrupted(task) \ - (*(end_of_stack(task)) != STACK_END_MAGIC) - -static inline int object_is_on_stack(void *obj) +extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); +static inline void set_task_comm(struct task_struct *tsk, const char *from) { - void *stack = task_stack_page(current); - - return (obj >= stack) && (obj < (stack + THREAD_SIZE)); + __set_task_comm(tsk, from, false); } +extern char *get_task_comm(char *to, struct task_struct *tsk); -extern void thread_stack_cache_init(void); - -#ifdef CONFIG_DEBUG_STACK_USAGE -static inline unsigned long stack_not_used(struct task_struct *p) +#ifdef CONFIG_SMP +void scheduler_ipi(void); +extern unsigned long wait_task_inactive(struct task_struct *, long match_state); +#else +static inline void scheduler_ipi(void) { } +static inline unsigned long wait_task_inactive(struct task_struct *p, + long match_state) { - unsigned long *n = end_of_stack(p); - - do { /* Skip over canary */ -# ifdef CONFIG_STACK_GROWSUP - n--; -# else - n++; -# endif - } while (!*n); - -# ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; -# else - return (unsigned long)n - (unsigned long)end_of_stack(p); -# endif + return 1; } #endif -extern void set_task_stack_end_magic(struct task_struct *tsk); /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available @@ -3350,37 +1421,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } -static inline int restart_syscall(void) -{ - set_tsk_thread_flag(current, TIF_SIGPENDING); - return -ERESTARTNOINTR; -} - -static inline int signal_pending(struct task_struct *p) -{ - return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); -} - -static inline int __fatal_signal_pending(struct task_struct *p) -{ - return unlikely(sigismember(&p->pending.signal, SIGKILL)); -} - -static inline int fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && __fatal_signal_pending(p); -} - -static inline int signal_pending_state(long state, struct task_struct *p) -{ - if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) - return 0; - if (!signal_pending(p)) - return 0; - - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -3422,15 +1462,6 @@ static inline void cond_resched_rcu(void) #endif } -static inline unsigned long get_preempt_disable_ip(struct task_struct *p) -{ -#ifdef CONFIG_DEBUG_PREEMPT - return p->preempt_disable_ip; -#else - return 0; -#endif -} - /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, @@ -3445,113 +1476,11 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } -/* - * Idle thread specific functions to determine the need_resched - * polling state. - */ -#ifdef TIF_POLLING_NRFLAG -static inline int tsk_is_polling(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -} - -static inline void __current_set_polling(void) -{ - set_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) -{ - clear_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_clr_polling_and_test(void) -{ - __current_clr_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -#else -static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void __current_set_polling(void) { } -static inline void __current_clr_polling(void) { } - -static inline bool __must_check current_set_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -static inline bool __must_check current_clr_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -#endif - -static inline void current_clr_polling(void) -{ - __current_clr_polling(); - - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ - smp_mb(); /* paired with resched_curr() */ - - preempt_fold_need_resched(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); } -/* - * Thread group CPU time accounting. - */ -void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); - -/* - * Reevaluate whether the task has signals pending delivery. - * Wake the task if so. - * This is required every time the blocked sigset_t changes. - * callers must hold sighand->siglock. - */ -extern void recalc_sigpending_and_wake(struct task_struct *t); -extern void recalc_sigpending(void); - -extern void signal_wake_up_state(struct task_struct *t, unsigned int state); - -static inline void signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); -} -static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); -} - /* * Wrappers for p->thread_info->cpu access. No-op on UP. */ @@ -3601,100 +1530,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -#ifdef CONFIG_CGROUP_SCHED -extern struct task_group root_task_group; -#endif /* CONFIG_CGROUP_SCHED */ - extern int task_can_switch_user(struct user_struct *up, struct task_struct *tsk); -#ifdef CONFIG_TASK_XACCT -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.rchar += amt; -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.wchar += amt; -} - -static inline void inc_syscr(struct task_struct *tsk) -{ - tsk->ioac.syscr++; -} - -static inline void inc_syscw(struct task_struct *tsk) -{ - tsk->ioac.syscw++; -} -#else -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void inc_syscr(struct task_struct *tsk) -{ -} - -static inline void inc_syscw(struct task_struct *tsk) -{ -} -#endif - #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MEMCG -extern void mm_update_next_owner(struct mm_struct *mm); -#else -static inline void mm_update_next_owner(struct mm_struct *mm) -{ -} -#endif /* CONFIG_MEMCG */ - -static inline unsigned long task_rlimit(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); -} - -static inline unsigned long task_rlimit_max(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_max); -} - -static inline unsigned long rlimit(unsigned int limit) -{ - return task_rlimit(current, limit); -} - -static inline unsigned long rlimit_max(unsigned int limit) -{ - return task_rlimit_max(current, limit); -} - -#define SCHED_CPUFREQ_RT (1U << 0) -#define SCHED_CPUFREQ_DL (1U << 1) -#define SCHED_CPUFREQ_IOWAIT (1U << 2) - -#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) - -#ifdef CONFIG_CPU_FREQ -struct update_util_data { - void (*func)(struct update_util_data *data, u64 time, unsigned int flags); -}; - -void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned int flags)); -void cpufreq_remove_update_util_hook(int cpu); -#endif /* CONFIG_CPU_FREQ */ - #endif