sched: rt time limit

author Peter Zijlstra <a.p.zijlstra@chello.nl>

Fri, 25 Jan 2008 20:08:29 +0000 (21:08 +0100)

committer Ingo Molnar <mingo@elte.hu>

Fri, 25 Jan 2008 20:08:29 +0000 (21:08 +0100)
author Peter Zijlstra <a.p.zijlstra@chello.nl>
Fri, 25 Jan 2008 20:08:29 +0000 (21:08 +0100)
committer Ingo Molnar <mingo@elte.hu>
Fri, 25 Jan 2008 20:08:29 +0000 (21:08 +0100)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 43e0339d65fce894de1cabe5f80e349513bbb7da..d5ea144df83633ab6da6692811c886917a710d85 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1490,6 +1490,8 @@ extern unsigned int sysctl_sched_child_runs_first;
  extern unsigned int sysctl_sched_features;
  extern unsigned int sysctl_sched_migration_cost;
  extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_rt_period;
+extern unsigned int sysctl_sched_rt_ratio;
  #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
  extern unsigned int sysctl_sched_min_bal_int_shares;
  extern unsigned int sysctl_sched_max_bal_int_shares;
diff --git a/kernel/sched.c b/kernel/sched.c

index 17f93d3eda9187c24d480be18c0bbe5e611265c1..e9a7beee9b790f2cddede096edd84ef663953047 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -342,13 +342,14 @@ struct cfs_rq {
  /* Real-Time classes' related field in a runqueue: */
  struct rt_rq {
         struct rt_prio_array active;
-       int rt_load_balance_idx;
-       struct list_head *rt_load_balance_head, *rt_load_balance_curr;
         unsigned long rt_nr_running;
+#ifdef CONFIG_SMP
         unsigned long rt_nr_migratory;
-       /* highest queued rt task prio */
-       int highest_prio;
+       int highest_prio; /* highest queued rt task prio */
         int overloaded;
+#endif
+       u64 rt_time;
+       u64 rt_throttled;
  };
  
  #ifdef CONFIG_SMP
@@ -415,6 +416,7 @@ struct rq {
         struct list_head leaf_cfs_rq_list;
  #endif
         struct rt_rq rt;
+       u64 rt_period_expire;
  
         /*
          * This is part of a global counter where only the total sum
@@ -600,6 +602,21 @@ const_debug unsigned int sysctl_sched_features =
   */
  const_debug unsigned int sysctl_sched_nr_migrate = 32;
  
+/*
+ * period over which we measure -rt task cpu usage in ms.
+ * default: 1s
+ */
+const_debug unsigned int sysctl_sched_rt_period = 1000;
+
+#define SCHED_RT_FRAC_SHIFT    16
+#define SCHED_RT_FRAC          (1UL << SCHED_RT_FRAC_SHIFT)
+
+/*
+ * ratio of time -rt tasks may consume.
+ * default: 100%
+ */
+const_debug unsigned int sysctl_sched_rt_ratio = SCHED_RT_FRAC;
+
  /*
   * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
   * clock constructed from sched_clock():
@@ -3674,8 +3691,8 @@ void scheduler_tick(void)
                 rq->clock = next_tick;
         rq->tick_timestamp = rq->clock;
         update_cpu_load(rq);
-       if (curr != rq->idle) /* FIXME: needed? */
-               curr->sched_class->task_tick(rq, curr, 0);
+       curr->sched_class->task_tick(rq, curr, 0);
+       update_sched_rt_period(rq);
         spin_unlock(&rq->lock);
  
  #ifdef CONFIG_SMP
@@ -7041,6 +7058,29 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
         cfs_rq->min_vruntime = (u64)(-(1LL << 20));
  }
  
+static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+{
+       struct rt_prio_array *array;
+       int i;
+
+       array = &rt_rq->active;
+       for (i = 0; i < MAX_RT_PRIO; i++) {
+               INIT_LIST_HEAD(array->queue + i);
+               __clear_bit(i, array->bitmap);
+       }
+       /* delimiter for bitsearch: */
+       __set_bit(MAX_RT_PRIO, array->bitmap);
+
+#ifdef CONFIG_SMP
+       rt_rq->rt_nr_migratory = 0;
+       rt_rq->highest_prio = MAX_RT_PRIO;
+       rt_rq->overloaded = 0;
+#endif
+
+       rt_rq->rt_time = 0;
+       rt_rq->rt_throttled = 0;
+}
+
  void __init sched_init(void)
  {
         int highest_cpu = 0;
@@ -7051,7 +7091,6 @@ void __init sched_init(void)
  #endif
  
         for_each_possible_cpu(i) {
-               struct rt_prio_array *array;
                 struct rq *rq;
  
                 rq = cpu_rq(i);
@@ -7083,6 +7122,8 @@ void __init sched_init(void)
                 }
                 init_task_group.shares = init_task_group_load;
  #endif
+               init_rt_rq(&rq->rt, rq);
+               rq->rt_period_expire = 0;
  
                 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
                         rq->cpu_load[j] = 0;
@@ -7095,22 +7136,11 @@ void __init sched_init(void)
                 rq->cpu = i;
                 rq->migration_thread = NULL;
                 INIT_LIST_HEAD(&rq->migration_queue);
-               rq->rt.highest_prio = MAX_RT_PRIO;
-               rq->rt.overloaded = 0;
                 rq_attach_root(rq, &def_root_domain);
  #endif
                 init_rq_hrtick(rq);
-
                 atomic_set(&rq->nr_iowait, 0);
-
-               array = &rq->rt.active;
-               for (j = 0; j < MAX_RT_PRIO; j++) {
-                       INIT_LIST_HEAD(array->queue + j);
-                       __clear_bit(j, array->bitmap);
-               }
                 highest_cpu = i;
-               /* delimiter for bitsearch: */
-               __set_bit(MAX_RT_PRIO, array->bitmap);
         }
  
         set_load_weight(&init_task);
@@ -7282,7 +7312,7 @@ void set_curr_task(int cpu, struct task_struct *p)
  #ifdef CONFIG_SMP
  /*
   * distribute shares of all task groups among their schedulable entities,
- * to reflect load distrbution across cpus.
+ * to reflect load distribution across cpus.
   */
  static int rebalance_shares(struct sched_domain *sd, int this_cpu)
  {
@@ -7349,7 +7379,7 @@ static int rebalance_shares(struct sched_domain *sd, int this_cpu)
   * sysctl_sched_max_bal_int_shares represents the maximum interval between
   * consecutive calls to rebalance_shares() in the same sched domain.
   *
- * These settings allows for the appropriate tradeoff between accuracy of
+ * These settings allows for the appropriate trade-off between accuracy of
   * fairness and the associated overhead.
   *
   */
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 83fbbcb8019e733f97d6465229cae192549d85aa..fd10d965aa063f7e2bd1767dd234f1f5e89bd59b 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -45,6 +45,50 @@ static void update_rt_migration(struct rq *rq)
  }
  #endif /* CONFIG_SMP */
  
+static int sched_rt_ratio_exceeded(struct rq *rq, struct rt_rq *rt_rq)
+{
+       u64 period, ratio;
+
+       if (sysctl_sched_rt_ratio == SCHED_RT_FRAC)
+               return 0;
+
+       if (rt_rq->rt_throttled)
+               return 1;
+
+       period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
+       ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+
+       if (rt_rq->rt_time > ratio) {
+               rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time;
+               return 1;
+       }
+
+       return 0;
+}
+
+static void update_sched_rt_period(struct rq *rq)
+{
+       while (rq->clock > rq->rt_period_expire) {
+               u64 period, ratio;
+
+               period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
+               ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+
+               rq->rt.rt_time -= min(rq->rt.rt_time, ratio);
+               rq->rt_period_expire += period;
+       }
+
+       /*
+        * When the rt throttle is expired, let them rip.
+        * (XXX: use hrtick when available)
+        */
+       if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) {
+               rq->rt.rt_throttled = 0;
+               if (!sched_rt_ratio_exceeded(rq, &rq->rt))
+                       resched_task(rq->curr);
+       }
+}
+
  /*
   * Update the current task's runtime statistics. Skip current tasks that
   * are not in our scheduling class.
@@ -66,6 +110,11 @@ static void update_curr_rt(struct rq *rq)
         curr->se.sum_exec_runtime += delta_exec;
         curr->se.exec_start = rq->clock;
         cpuacct_charge(curr, delta_exec);
+
+       rq->rt.rt_time += delta_exec;
+       update_sched_rt_period(rq);
+       if (sched_rt_ratio_exceeded(rq, &rq->rt))
+               resched_task(curr);
  }
  
  static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
@@ -208,8 +257,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
         struct rt_prio_array *array = &rq->rt.active;
         struct task_struct *next;
         struct list_head *queue;
+       struct rt_rq *rt_rq = &rq->rt;
         int idx;
  
+       if (sched_rt_ratio_exceeded(rq, rt_rq))
+               return NULL;
+
         idx = sched_find_first_bit(array->bitmap);
         if (idx >= MAX_RT_PRIO)
                 return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 96f31c1bc4f0ad3223b92d64a6bfd9a6f22c1eb0..3afbd25f43eb84da2278216e06004a6c560d6d10 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -306,7 +306,23 @@ static struct ctl_table kern_table[] = {
                 .procname       = "sched_nr_migrate",
                 .data           = &sysctl_sched_nr_migrate,
                 .maxlen         = sizeof(unsigned int),
-               .mode           = 644,
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "sched_rt_period_ms",
+               .data           = &sysctl_sched_rt_period,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "sched_rt_ratio",
+               .data           = &sysctl_sched_rt_ratio,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
         },
  #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
author	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Fri, 25 Jan 2008 20:08:29 +0000 (21:08 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 25 Jan 2008 20:08:29 +0000 (21:08 +0100)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history