From 1ac9bc6943edf7d181b4b1cc734981350d4f6bae Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Wed, 21 Dec 2011 16:15:40 -0800 Subject: [PATCH] sched/tracing: Add a new tracepoint for sleeptime If CONFIG_SCHEDSTATS is defined, the kernel maintains information about how long the task was sleeping or in the case of iowait, blocking in the kernel before getting woken up. This will be useful for sleep time profiling. Note: this information is only provided for sched_fair. Other scheduling classes may choose to provide this in the future. Note: the delay includes the time spent on the runqueue as well. Signed-off-by: Arun Sharma Acked-by: Peter Zijlstra Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Andrew Vagin Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 50 ++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 1 + kernel/sched/fair.c | 2 -- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e33ed1bfa11..6ba596b07a7 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); +#ifdef CREATE_TRACE_POINTS +static inline u64 trace_get_sleeptime(struct task_struct *tsk) +{ +#ifdef CONFIG_SCHEDSTATS + u64 block, sleep; + + block = tsk->se.statistics.block_start; + sleep = tsk->se.statistics.sleep_start; + tsk->se.statistics.block_start = 0; + tsk->se.statistics.sleep_start = 0; + + return block ? block : sleep ? sleep : 0; +#else + return 0; +#endif +} +#endif + +/* + * Tracepoint for accounting sleeptime (time the task is sleeping + * or waiting for I/O). + */ +TRACE_EVENT(sched_stat_sleeptime, + + TP_PROTO(struct task_struct *tsk, u64 now), + + TP_ARGS(tsk, now), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, sleeptime ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->sleeptime = trace_get_sleeptime(tsk); + __entry->sleeptime = __entry->sleeptime ? + now - __entry->sleeptime : 0; + ) + TP_perf_assign( + __perf_count(__entry->sleeptime); + ), + + TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->sleeptime) +); + /* * Tracepoint for showing priority inheritance modifying a tasks * priority. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8ffe523dfa8..4dbfd04a214 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1937,6 +1937,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); + trace_sched_stat_sleeptime(current, rq->clock); fire_sched_in_preempt_notifiers(current); if (mm) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bdf18836f74..8e42de9105f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1003,7 +1003,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (unlikely(delta > se->statistics.sleep_max)) se->statistics.sleep_max = delta; - se->statistics.sleep_start = 0; se->statistics.sum_sleep_runtime += delta; if (tsk) { @@ -1020,7 +1019,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (unlikely(delta > se->statistics.block_max)) se->statistics.block_max = delta; - se->statistics.block_start = 0; se->statistics.sum_sleep_runtime += delta; if (tsk) { -- 2.39.5