sched/debug: Make schedstats a runtime tunable that is disabled by default

author Mel Gorman <mgorman@techsingularity.net>

Fri, 5 Feb 2016 09:08:36 +0000 (09:08 +0000)

committer Ingo Molnar <mingo@kernel.org>

Tue, 9 Feb 2016 10:54:23 +0000 (11:54 +0100)
author Mel Gorman <mgorman@techsingularity.net>
Fri, 5 Feb 2016 09:08:36 +0000 (09:08 +0000)
committer Ingo Molnar <mingo@kernel.org>
Tue, 9 Feb 2016 10:54:23 +0000 (11:54 +0100)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 551ecf09c8dd820be865ebbbc22fa6b5f608dd98..ed47b609530b8578d081d0785216d73a8fc579ca 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3528,6 +3528,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         sched_debug     [KNL] Enables verbose scheduler debug messages.
  
+       schedstats=     [KNL,X86] Enable or disable scheduled statistics.
+                       Allowed values are enable and disable. This feature
+                       incurs a small amount of overhead in the scheduler
+                       but is useful for debugging and performance tuning.
+
         skew_tick=      [KNL] Offset the periodic timer tick per cpu to mitigate
                         xtime_lock contention on larger systems, and/or RCU lock
                         contention on all systems with CONFIG_MAXSMP set.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt

index a93b414672a71ac6fa9bac1e848215804bde139c..87119dc9bc649b9a9ee0aadde427b6bb0f3762cf 100644 (file)
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -760,6 +760,14 @@ rtsig-nr shows the number of RT signals currently queued.
  
  ==============================================================
  
+sched_schedstats:
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+==============================================================
+
  sg-big-buff:
  
  This file shows the size of the generic SCSI (sg) buffer.
diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h

index e23121f9d82a042a9b10907fe40e468e5711f99f..59ccab297ae061ba03494ccede8eae2d7a5236a1 100644 (file)
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -37,6 +37,9 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
  
  void clear_all_latency_tracing(struct task_struct *p);
  
+extern int sysctl_latencytop(struct ctl_table *table, int write,
+                       void __user *buffer, size_t *lenp, loff_t *ppos);
+
  #else
  
  static inline void
diff --git a/include/linux/sched.h b/include/linux/sched.h

index a10494a94cc30f24d65ab866771833883c6f886d..a292c4b7e94cdeb9ca79d1f446c6c7cd41501d42 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -920,6 +920,10 @@ static inline int sched_info_on(void)
  #endif
  }
  
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
  enum cpu_idle_type {
         CPU_IDLE,
         CPU_NOT_IDLE,
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h

index c9e4731cf10b8e97956b160c503e447490991931..4f080ab4f2cd1199f3f5aee15e7b06fdebb61333 100644 (file)
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -95,4 +95,8 @@ extern int sysctl_numa_balancing(struct ctl_table *table, int write,
                                  void __user *buffer, size_t *lenp,
                                  loff_t *ppos);
  
+extern int sysctl_schedstats(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp,
+                                loff_t *ppos);
+
  #endif /* _SCHED_SYSCTL_H */
diff --git a/kernel/latencytop.c b/kernel/latencytop.c

index a02812743a7e63378a79cc768255f807a7fd469b..b5c30d9f46c5084acddbd34e533f91e9c98e8300 100644 (file)
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -47,12 +47,12 @@
   * of times)
   */
  
-#include <linux/latencytop.h>
  #include <linux/kallsyms.h>
  #include <linux/seq_file.h>
  #include <linux/notifier.h>
  #include <linux/spinlock.h>
  #include <linux/proc_fs.h>
+#include <linux/latencytop.h>
  #include <linux/export.h>
  #include <linux/sched.h>
  #include <linux/list.h>
@@ -289,4 +289,16 @@ static int __init init_lstats_procfs(void)
         proc_create("latency_stats", 0644, NULL, &lstats_fops);
         return 0;
  }
+
+int sysctl_latencytop(struct ctl_table *table, int write,
+                       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int err;
+
+       err = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (latencytop_enabled)
+               force_schedstat_enabled();
+
+       return err;
+}
  device_initcall(init_lstats_procfs);
diff --git a/kernel/profile.c b/kernel/profile.c

index 99513e1160e518d322f6d0ce0f346e6da9fcbbf0..51369697466e36ba52af710863376d4847d43e36 100644 (file)
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -59,6 +59,7 @@ int profile_setup(char *str)
  
         if (!strncmp(str, sleepstr, strlen(sleepstr))) {
  #ifdef CONFIG_SCHEDSTATS
+               force_schedstat_enabled();
                 prof_on = SLEEP_PROFILING;
                 if (str[strlen(sleepstr)] == ',')
                         str += strlen(sleepstr) + 1;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 24fcdbf28b18a6dc23a32f651e3fc6f6918d90f6..7e548bde67ee255e0d1bfdb018f1b7af912f4b29 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2093,7 +2093,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  
         ttwu_queue(p, cpu);
  stat:
-       ttwu_stat(p, cpu, wake_flags);
+       if (schedstat_enabled())
+               ttwu_stat(p, cpu, wake_flags);
  out:
         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
  
@@ -2141,7 +2142,8 @@ static void try_to_wake_up_local(struct task_struct *p)
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  
         ttwu_do_wakeup(rq, p, 0);
-       ttwu_stat(p, smp_processor_id(), 0);
+       if (schedstat_enabled())
+               ttwu_stat(p, smp_processor_id(), 0);
  out:
         raw_spin_unlock(&p->pi_lock);
  }
@@ -2210,6 +2212,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  #endif
  
  #ifdef CONFIG_SCHEDSTATS
+       /* Even if schedstat is disabled, there should not be garbage */
         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
  #endif
  
@@ -2281,6 +2284,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
  #endif
  #endif
  
+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+
+#ifdef CONFIG_SCHEDSTATS
+static void set_schedstats(bool enabled)
+{
+       if (enabled)
+               static_branch_enable(&sched_schedstats);
+       else
+               static_branch_disable(&sched_schedstats);
+}
+
+void force_schedstat_enabled(void)
+{
+       if (!schedstat_enabled()) {
+               pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
+               static_branch_enable(&sched_schedstats);
+       }
+}
+
+static int __init setup_schedstats(char *str)
+{
+       int ret = 0;
+       if (!str)
+               goto out;
+
+       if (!strcmp(str, "enable")) {
+               set_schedstats(true);
+               ret = 1;
+       } else if (!strcmp(str, "disable")) {
+               set_schedstats(false);
+               ret = 1;
+       }
+out:
+       if (!ret)
+               pr_warn("Unable to parse schedstats=\n");
+
+       return ret;
+}
+__setup("schedstats=", setup_schedstats);
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_schedstats(struct ctl_table *table, int write,
+                        void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table t;
+       int err;
+       int state = static_branch_likely(&sched_schedstats);
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       t = *table;
+       t.data = &state;
+       err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+       if (err < 0)
+               return err;
+       if (write)
+               set_schedstats(state);
+       return err;
+}
+#endif
+#endif
+
  /*
   * fork()/clone()-time setup:
   */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c

index 641511771ae6a696271f77532ac9e40e28175749..7cfa87bd8b89681ce6862e9934c020bf076c69ae 100644 (file)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -75,16 +75,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
         PN(se->vruntime);
         PN(se->sum_exec_runtime);
  #ifdef CONFIG_SCHEDSTATS
-       PN(se->statistics.wait_start);
-       PN(se->statistics.sleep_start);
-       PN(se->statistics.block_start);
-       PN(se->statistics.sleep_max);
-       PN(se->statistics.block_max);
-       PN(se->statistics.exec_max);
-       PN(se->statistics.slice_max);
-       PN(se->statistics.wait_max);
-       PN(se->statistics.wait_sum);
-       P(se->statistics.wait_count);
+       if (schedstat_enabled()) {
+               PN(se->statistics.wait_start);
+               PN(se->statistics.sleep_start);
+               PN(se->statistics.block_start);
+               PN(se->statistics.sleep_max);
+               PN(se->statistics.block_max);
+               PN(se->statistics.exec_max);
+               PN(se->statistics.slice_max);
+               PN(se->statistics.wait_max);
+               PN(se->statistics.wait_sum);
+               P(se->statistics.wait_count);
+       }
  #endif
         P(se->load.weight);
  #ifdef CONFIG_SMP
@@ -122,10 +124,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
                 (long long)(p->nvcsw + p->nivcsw),
                 p->prio);
  #ifdef CONFIG_SCHEDSTATS
-       SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-               SPLIT_NS(p->se.statistics.wait_sum),
-               SPLIT_NS(p->se.sum_exec_runtime),
-               SPLIT_NS(p->se.statistics.sum_sleep_runtime));
+       if (schedstat_enabled()) {
+               SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+                       SPLIT_NS(p->se.statistics.wait_sum),
+                       SPLIT_NS(p->se.sum_exec_runtime),
+                       SPLIT_NS(p->se.statistics.sum_sleep_runtime));
+       }
  #else
         SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
                 0LL, 0L,
@@ -313,17 +317,18 @@ do {                                                                      \
  #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
  #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
  
-       P(yld_count);
-
-       P(sched_count);
-       P(sched_goidle);
  #ifdef CONFIG_SMP
         P64(avg_idle);
         P64(max_idle_balance_cost);
  #endif
  
-       P(ttwu_count);
-       P(ttwu_local);
+       if (schedstat_enabled()) {
+               P(yld_count);
+               P(sched_count);
+               P(sched_goidle);
+               P(ttwu_count);
+               P(ttwu_local);
+       }
  
  #undef P
  #undef P64
@@ -569,38 +574,39 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
         nr_switches = p->nvcsw + p->nivcsw;
  
  #ifdef CONFIG_SCHEDSTATS
-       PN(se.statistics.sum_sleep_runtime);
-       PN(se.statistics.wait_start);
-       PN(se.statistics.sleep_start);
-       PN(se.statistics.block_start);
-       PN(se.statistics.sleep_max);
-       PN(se.statistics.block_max);
-       PN(se.statistics.exec_max);
-       PN(se.statistics.slice_max);
-       PN(se.statistics.wait_max);
-       PN(se.statistics.wait_sum);
-       P(se.statistics.wait_count);
-       PN(se.statistics.iowait_sum);
-       P(se.statistics.iowait_count);
         P(se.nr_migrations);
-       P(se.statistics.nr_migrations_cold);
-       P(se.statistics.nr_failed_migrations_affine);
-       P(se.statistics.nr_failed_migrations_running);
-       P(se.statistics.nr_failed_migrations_hot);
-       P(se.statistics.nr_forced_migrations);
-       P(se.statistics.nr_wakeups);
-       P(se.statistics.nr_wakeups_sync);
-       P(se.statistics.nr_wakeups_migrate);
-       P(se.statistics.nr_wakeups_local);
-       P(se.statistics.nr_wakeups_remote);
-       P(se.statistics.nr_wakeups_affine);
-       P(se.statistics.nr_wakeups_affine_attempts);
-       P(se.statistics.nr_wakeups_passive);
-       P(se.statistics.nr_wakeups_idle);
  
-       {
+       if (schedstat_enabled()) {
                 u64 avg_atom, avg_per_cpu;
  
+               PN(se.statistics.sum_sleep_runtime);
+               PN(se.statistics.wait_start);
+               PN(se.statistics.sleep_start);
+               PN(se.statistics.block_start);
+               PN(se.statistics.sleep_max);
+               PN(se.statistics.block_max);
+               PN(se.statistics.exec_max);
+               PN(se.statistics.slice_max);
+               PN(se.statistics.wait_max);
+               PN(se.statistics.wait_sum);
+               P(se.statistics.wait_count);
+               PN(se.statistics.iowait_sum);
+               P(se.statistics.iowait_count);
+               P(se.statistics.nr_migrations_cold);
+               P(se.statistics.nr_failed_migrations_affine);
+               P(se.statistics.nr_failed_migrations_running);
+               P(se.statistics.nr_failed_migrations_hot);
+               P(se.statistics.nr_forced_migrations);
+               P(se.statistics.nr_wakeups);
+               P(se.statistics.nr_wakeups_sync);
+               P(se.statistics.nr_wakeups_migrate);
+               P(se.statistics.nr_wakeups_local);
+               P(se.statistics.nr_wakeups_remote);
+               P(se.statistics.nr_wakeups_affine);
+               P(se.statistics.nr_wakeups_affine_attempts);
+               P(se.statistics.nr_wakeups_passive);
+               P(se.statistics.nr_wakeups_idle);
+
                 avg_atom = p->se.sum_exec_runtime;
                 if (nr_switches)
                         avg_atom = div64_ul(avg_atom, nr_switches);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 56b7d4b839476b6ed1692e786abe9ed6cda64a5f..51a45502d8a60de7af29a490db507ed1fa6ee792 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,8 +20,8 @@
   *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
   */
  
-#include <linux/latencytop.h>
  #include <linux/sched.h>
+#include <linux/latencytop.h>
  #include <linux/cpumask.h>
  #include <linux/cpuidle.h>
  #include <linux/slab.h>
@@ -755,7 +755,9 @@ static void
  update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         struct task_struct *p;
-       u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+       u64 delta;
+
+       delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
  
         if (entity_is_task(se)) {
                 p = task_of(se);
@@ -776,22 +778,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
         se->statistics.wait_sum += delta;
         se->statistics.wait_start = 0;
  }
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-#endif
  
  /*
   * Task is being enqueued - update stats:
   */
-static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         /*
          * Are we enqueueing a waiting task? (for current tasks
@@ -802,7 +794,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  }
  
  static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
         /*
          * Mark the end of the wait period if dequeueing a
@@ -810,7 +802,40 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
          */
         if (se != cfs_rq->curr)
                 update_stats_wait_end(cfs_rq, se);
+
+       if (flags & DEQUEUE_SLEEP) {
+               if (entity_is_task(se)) {
+                       struct task_struct *tsk = task_of(se);
+
+                       if (tsk->state & TASK_INTERRUPTIBLE)
+                               se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
+                       if (tsk->state & TASK_UNINTERRUPTIBLE)
+                               se->statistics.block_start = rq_clock(rq_of(cfs_rq));
+               }
+       }
+
+}
+#else
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+{
  }
+#endif
  
  /*
   * We are picking a new current task - update its stats:
@@ -3102,6 +3127,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
  
+static inline void check_schedstat_required(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+       if (schedstat_enabled())
+               return;
+
+       /* Force schedstat enabled if a dependent tracepoint is active */
+       if (trace_sched_stat_wait_enabled()    ||
+                       trace_sched_stat_sleep_enabled()   ||
+                       trace_sched_stat_iowait_enabled()  ||
+                       trace_sched_stat_blocked_enabled() ||
+                       trace_sched_stat_runtime_enabled())  {
+               pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+                            "stat_blocked and stat_runtime require the "
+                            "kernel parameter schedstats=enabled or "
+                            "kernel.sched_schedstats=1\n");
+       }
+#endif
+}
+
  static void
  enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
@@ -3122,11 +3167,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  
         if (flags & ENQUEUE_WAKEUP) {
                 place_entity(cfs_rq, se, 0);
-               enqueue_sleeper(cfs_rq, se);
+               if (schedstat_enabled())
+                       enqueue_sleeper(cfs_rq, se);
         }
  
-       update_stats_enqueue(cfs_rq, se);
-       check_spread(cfs_rq, se);
+       check_schedstat_required();
+       if (schedstat_enabled()) {
+               update_stats_enqueue(cfs_rq, se);
+               check_spread(cfs_rq, se);
+       }
         if (se != cfs_rq->curr)
                 __enqueue_entity(cfs_rq, se);
         se->on_rq = 1;
@@ -3193,19 +3242,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         update_curr(cfs_rq);
         dequeue_entity_load_avg(cfs_rq, se);
  
-       update_stats_dequeue(cfs_rq, se);
-       if (flags & DEQUEUE_SLEEP) {
-#ifdef CONFIG_SCHEDSTATS
-               if (entity_is_task(se)) {
-                       struct task_struct *tsk = task_of(se);
-
-                       if (tsk->state & TASK_INTERRUPTIBLE)
-                               se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-                       if (tsk->state & TASK_UNINTERRUPTIBLE)
-                               se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-               }
-#endif
-       }
+       if (schedstat_enabled())
+               update_stats_dequeue(cfs_rq, se, flags);
  
         clear_buddies(cfs_rq, se);
  
@@ -3279,7 +3317,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
                  * a CPU. So account for the time it spent waiting on the
                  * runqueue.
                  */
-               update_stats_wait_end(cfs_rq, se);
+               if (schedstat_enabled())
+                       update_stats_wait_end(cfs_rq, se);
                 __dequeue_entity(cfs_rq, se);
                 update_load_avg(se, 1);
         }
@@ -3292,7 +3331,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
          * least twice that of our own weight (i.e. dont track it
          * when there are only lesser-weight tasks around):
          */
-       if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
+       if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
                 se->statistics.slice_max = max(se->statistics.slice_max,
                         se->sum_exec_runtime - se->prev_sum_exec_runtime);
         }
@@ -3375,9 +3414,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
         /* throttle cfs_rqs exceeding runtime */
         check_cfs_rq_runtime(cfs_rq);
  
-       check_spread(cfs_rq, prev);
+       if (schedstat_enabled()) {
+               check_spread(cfs_rq, prev);
+               if (prev->on_rq)
+                       update_stats_wait_start(cfs_rq, prev);
+       }
+
         if (prev->on_rq) {
-               update_stats_wait_start(cfs_rq, prev);
                 /* Put 'current' back into the tree. */
                 __enqueue_entity(cfs_rq, prev);
                 /* in !on_rq case, update occurred at dequeue */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 10f16374df7f3a3f0f3dc0eb281559cacd223311..1d583870e1a61ac21e48a4d034400041dca16758 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1022,6 +1022,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
  #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
  
  extern struct static_key_false sched_numa_balancing;
+extern struct static_key_false sched_schedstats;
  
  static inline u64 global_rt_period(void)
  {
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h

index b0fbc7632de5f9b13d8ccd2c42d73560c347669a..70b3b6a20fb0e362f4c816fe0f069c7c8576c7f4 100644 (file)
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,9 +29,10 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
         if (rq)
                 rq->rq_sched_info.run_delay += delta;
  }
-# define schedstat_inc(rq, field)      do { (rq)->field++; } while (0)
-# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
-# define schedstat_set(var, val)       do { var = (val); } while (0)
+# define schedstat_enabled()           static_branch_unlikely(&sched_schedstats)
+# define schedstat_inc(rq, field)      do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
+# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
+# define schedstat_set(var, val)       do { if (schedstat_enabled()) { var = (val); } } while (0)
  #else /* !CONFIG_SCHEDSTATS */
  static inline void
  rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -42,6 +43,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
  static inline void
  rq_sched_info_depart(struct rq *rq, unsigned long long delta)
  {}
+# define schedstat_enabled()           0
  # define schedstat_inc(rq, field)      do { } while (0)
  # define schedstat_add(rq, field, amt) do { } while (0)
  # define schedstat_set(var, val)       do { } while (0)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 97715fd9e790ade5d7cd7731107de2dafb8272e3..f5102fabef7f525f6c79d66756336c262e465caa 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -350,6 +350,17 @@ static struct ctl_table kern_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec,
         },
+#ifdef CONFIG_SCHEDSTATS
+       {
+               .procname       = "sched_schedstats",
+               .data           = NULL,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = sysctl_schedstats,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif /* CONFIG_SCHEDSTATS */
  #endif /* CONFIG_SMP */
  #ifdef CONFIG_NUMA_BALANCING
         {
@@ -505,7 +516,7 @@ static struct ctl_table kern_table[] = {
                 .data           = &latencytop_enabled,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = sysctl_latencytop,
         },
  #endif
  #ifdef CONFIG_BLK_DEV_INITRD
author	Mel Gorman <mgorman@techsingularity.net>
	Fri, 5 Feb 2016 09:08:36 +0000 (09:08 +0000)
committer	Ingo Molnar <mingo@kernel.org>
	Tue, 9 Feb 2016 10:54:23 +0000 (11:54 +0100)
Documentation/kernel-parameters.txt		patch \| blob \| history
Documentation/sysctl/kernel.txt		patch \| blob \| history
include/linux/latencytop.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/sched/sysctl.h		patch \| blob \| history
kernel/latencytop.c		patch \| blob \| history
kernel/profile.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/sched/debug.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history
kernel/sched/stats.h		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history