sched: Do not account irq time to current task

author Venkatesh Pallipadi <venki@google.com>

Thu, 10 Feb 2011 09:23:27 +0000 (10:23 +0100)

committer AK <andi@firstfloor.org>

Thu, 31 Mar 2011 18:58:01 +0000 (11:58 -0700)
author Venkatesh Pallipadi <venki@google.com>
Thu, 10 Feb 2011 09:23:27 +0000 (10:23 +0100)
committer AK <andi@firstfloor.org>
Thu, 31 Mar 2011 18:58:01 +0000 (11:58 -0700)
diff --git a/kernel/sched.c b/kernel/sched.c

index 8efa15c99f559f794707d5d42766c94d7a432404..c8796ca43b6e9cfa852c159cbeb60b00d1d63733 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -491,6 +491,7 @@ struct rq {
         struct mm_struct *prev_mm;
  
         u64 clock;
+       u64 clock_task;
  
         atomic_t nr_iowait;
  
@@ -641,10 +642,18 @@ static inline struct task_group *task_group(struct task_struct *p)
  
  #endif /* CONFIG_CGROUP_SCHED */
  
+static u64 irq_time_cpu(int cpu);
+
  inline void update_rq_clock(struct rq *rq)
  {
+       int cpu = cpu_of(rq);
+       u64 irq_time;
+
         if (!rq->skip_clock_update)
                 rq->clock = sched_clock_cpu(cpu_of(rq));
+       irq_time = irq_time_cpu(cpu);
+       if (rq->clock - irq_time > rq->clock_task)
+               rq->clock_task = rq->clock - irq_time;
  }
  
  /*
@@ -1821,6 +1830,18 @@ static const struct sched_class rt_sched_class;
  
  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  
+/*
+ * There are no locks covering percpu hardirq/softirq time.
+ * They are only modified in account_system_vtime, on corresponding CPU
+ * with interrupts disabled. So, writes are safe.
+ * They are read and saved off onto struct rq in update_rq_clock().
+ * This may result in other CPU reading this CPU's irq time and can
+ * race with irq/account_system_vtime on this CPU. We would either get old
+ * or new value (or semi updated value on 32 bit) with a side effect of
+ * accounting a slice of irq time to wrong task when irq is in progress
+ * while we read rq->clock. That is a worthy compromise in place of having
+ * locks on each irq in account_system_time.
+ */
  static DEFINE_PER_CPU(u64, cpu_hardirq_time);
  static DEFINE_PER_CPU(u64, cpu_softirq_time);
  
@@ -1837,6 +1858,14 @@ void disable_sched_clock_irqtime(void)
         sched_clock_irqtime = 0;
  }
  
+static u64 irq_time_cpu(int cpu)
+{
+       if (!sched_clock_irqtime)
+               return 0;
+
+       return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+
  void account_system_vtime(struct task_struct *curr)
  {
         unsigned long flags;
@@ -1866,6 +1895,13 @@ void account_system_vtime(struct task_struct *curr)
         local_irq_restore(flags);
  }
  
+#else
+
+static u64 irq_time_cpu(int cpu)
+{
+       return 0;
+}
+
  #endif
  
  #include "sched_stats.h"
@@ -3263,7 +3299,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
  
         if (task_current(rq, p)) {
                 update_rq_clock(rq);
-               ns = rq->clock - p->se.exec_start;
+               ns = rq->clock_task - p->se.exec_start;
                 if ((s64)ns < 0)
                         ns = 0;
         }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 91508ba76214f83395a8b84e8f69f46b886930e8..32112033b7bcbfc4f63e9c486eff63184d5da2a5 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -519,7 +519,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
  static void update_curr(struct cfs_rq *cfs_rq)
  {
         struct sched_entity *curr = cfs_rq->curr;
-       u64 now = rq_of(cfs_rq)->clock;
+       u64 now = rq_of(cfs_rq)->clock_task;
         unsigned long delta_exec;
  
         if (unlikely(!curr))
@@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
         /*
          * We are starting a new run period:
          */
-       se->exec_start = rq_of(cfs_rq)->clock;
+       se->exec_start = rq_of(cfs_rq)->clock_task;
  }
  
  /**************************************************
@@ -1803,7 +1803,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
          * 2) too many balance attempts have failed.
          */
  
-       tsk_cache_hot = task_hot(p, rq->clock, sd);
+       tsk_cache_hot = task_hot(p, rq->clock_task, sd);
         if (!tsk_cache_hot ||
                 sd->nr_balance_failed > sd->cache_nice_tries) {
  #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 25bd3a3ca0cced7e8b2a7e3cb0f402f6cd5fcfee..a851cc0796b31c8e0c65837111e743bebb67e2cc 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq)
         if (!task_has_rt_policy(curr))
                 return;
  
-       delta_exec = rq->clock - curr->se.exec_start;
+       delta_exec = rq->clock_task - curr->se.exec_start;
         if (unlikely((s64)delta_exec < 0))
                 delta_exec = 0;
  
@@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq)
         curr->se.sum_exec_runtime += delta_exec;
         account_group_exec_runtime(curr, delta_exec);
  
-       curr->se.exec_start = rq->clock;
+       curr->se.exec_start = rq->clock_task;
         cpuacct_charge(curr, delta_exec);
  
         sched_rt_avg_update(rq, delta_exec);
@@ -1075,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
         } while (rt_rq);
  
         p = rt_task_of(rt_se);
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         return p;
  }
@@ -1716,7 +1716,7 @@ static void set_curr_task_rt(struct rq *rq)
  {
         struct task_struct *p = rq->curr;
  
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         /* The running task is never eligible for pushing */
         dequeue_pushable_task(rq, p);
author	Venkatesh Pallipadi <venki@google.com>
	Thu, 10 Feb 2011 09:23:27 +0000 (10:23 +0100)
committer	AK <andi@firstfloor.org>
	Thu, 31 Mar 2011 18:58:01 +0000 (11:58 -0700)
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history