sched: Do not account irq time to current task

author Venkatesh Pallipadi <venki@google.com>

Thu, 10 Feb 2011 09:23:27 +0000 (10:23 +0100)

committer Greg Kroah-Hartman <gregkh@suse.de>

Thu, 17 Feb 2011 23:37:26 +0000 (15:37 -0800)
author Venkatesh Pallipadi <venki@google.com>
Thu, 10 Feb 2011 09:23:27 +0000 (10:23 +0100)
committer Greg Kroah-Hartman <gregkh@suse.de>
Thu, 17 Feb 2011 23:37:26 +0000 (15:37 -0800)
diff --git a/kernel/sched.c b/kernel/sched.c

index fe708277597354cc65a62d93cd72badbfaf4a026..5761f098a4bf7991a33f015765636f5862972c1b 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -525,6 +525,7 @@ struct rq {
         struct mm_struct *prev_mm;
  
         u64 clock;
+       u64 clock_task;
  
         atomic_t nr_iowait;
  
@@ -620,9 +621,17 @@ static inline int cpu_of(struct rq *rq)
  #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
  #define raw_rq()               (&__raw_get_cpu_var(runqueues))
  
+static u64 irq_time_cpu(int cpu);
+
  inline void update_rq_clock(struct rq *rq)
  {
+       int cpu = cpu_of(rq);
+       u64 irq_time;
+
         rq->clock = sched_clock_cpu(cpu_of(rq));
+       irq_time = irq_time_cpu(cpu);
+       if (rq->clock - irq_time > rq->clock_task)
+               rq->clock_task = rq->clock - irq_time;
  }
  
  /*
@@ -1809,6 +1818,18 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  
  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  
+/*
+ * There are no locks covering percpu hardirq/softirq time.
+ * They are only modified in account_system_vtime, on corresponding CPU
+ * with interrupts disabled. So, writes are safe.
+ * They are read and saved off onto struct rq in update_rq_clock().
+ * This may result in other CPU reading this CPU's irq time and can
+ * race with irq/account_system_vtime on this CPU. We would either get old
+ * or new value (or semi updated value on 32 bit) with a side effect of
+ * accounting a slice of irq time to wrong task when irq is in progress
+ * while we read rq->clock. That is a worthy compromise in place of having
+ * locks on each irq in account_system_time.
+ */
  static DEFINE_PER_CPU(u64, cpu_hardirq_time);
  static DEFINE_PER_CPU(u64, cpu_softirq_time);
  
@@ -1825,6 +1846,14 @@ void disable_sched_clock_irqtime(void)
         sched_clock_irqtime = 0;
  }
  
+static u64 irq_time_cpu(int cpu)
+{
+       if (!sched_clock_irqtime)
+               return 0;
+
+       return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+
  void account_system_vtime(struct task_struct *curr)
  {
         unsigned long flags;
@@ -1854,6 +1883,13 @@ void account_system_vtime(struct task_struct *curr)
         local_irq_restore(flags);
  }
  
+#else
+
+static u64 irq_time_cpu(int cpu)
+{
+       return 0;
+}
+
  #endif
  
  #include "sched_stats.h"
@@ -3295,7 +3331,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
          * 2) too many balance attempts have failed.
          */
  
-       tsk_cache_hot = task_hot(p, rq->clock, sd);
+       tsk_cache_hot = task_hot(p, rq->clock_task, sd);
         if (!tsk_cache_hot ||
                 sd->nr_balance_failed > sd->cache_nice_tries) {
  #ifdef CONFIG_SCHEDSTATS
@@ -5083,7 +5119,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
  
         if (task_current(rq, p)) {
                 update_rq_clock(rq);
-               ns = rq->clock - p->se.exec_start;
+               ns = rq->clock_task - p->se.exec_start;
                 if ((s64)ns < 0)
                         ns = 0;
         }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index e9dba101d18b72277eed4bb338953739e5d9a56f..c07663eee1566db3204de312f538f35dceddffde 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -496,7 +496,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
  static void update_curr(struct cfs_rq *cfs_rq)
  {
         struct sched_entity *curr = cfs_rq->curr;
-       u64 now = rq_of(cfs_rq)->clock;
+       u64 now = rq_of(cfs_rq)->clock_task;
         unsigned long delta_exec;
  
         if (unlikely(!curr))
@@ -579,7 +579,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
         /*
          * We are starting a new run period:
          */
-       se->exec_start = rq_of(cfs_rq)->clock;
+       se->exec_start = rq_of(cfs_rq)->clock_task;
  }
  
  /**************************************************
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 3782369ed6f577d364e7bfdd3b1e07c65fb0ee30..9d9a7b1a05109ec372d8aeb32de4acce91f2d29e 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -603,7 +603,7 @@ static void update_curr_rt(struct rq *rq)
         if (!task_has_rt_policy(curr))
                 return;
  
-       delta_exec = rq->clock - curr->se.exec_start;
+       delta_exec = rq->clock_task - curr->se.exec_start;
         if (unlikely((s64)delta_exec < 0))
                 delta_exec = 0;
  
@@ -612,7 +612,7 @@ static void update_curr_rt(struct rq *rq)
         curr->se.sum_exec_runtime += delta_exec;
         account_group_exec_runtime(curr, delta_exec);
  
-       curr->se.exec_start = rq->clock;
+       curr->se.exec_start = rq->clock_task;
         cpuacct_charge(curr, delta_exec);
  
         sched_rt_avg_update(rq, delta_exec);
@@ -1069,7 +1069,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
         } while (rt_rq);
  
         p = rt_task_of(rt_se);
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         return p;
  }
@@ -1735,7 +1735,7 @@ static void set_curr_task_rt(struct rq *rq)
  {
         struct task_struct *p = rq->curr;
  
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         /* The running task is never eligible for pushing */
         dequeue_pushable_task(rq, p);
author	Venkatesh Pallipadi <venki@google.com>
	Thu, 10 Feb 2011 09:23:27 +0000 (10:23 +0100)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Thu, 17 Feb 2011 23:37:26 +0000 (15:37 -0800)
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history