sched: Return unused runtime on group dequeue

author Paul Turner <pjt@google.com>

Thu, 21 Jul 2011 16:43:41 +0000 (09:43 -0700)

committer Ingo Molnar <mingo@elte.hu>

Sun, 14 Aug 2011 10:03:54 +0000 (12:03 +0200)
author Paul Turner <pjt@google.com>
Thu, 21 Jul 2011 16:43:41 +0000 (09:43 -0700)
committer Ingo Molnar <mingo@elte.hu>
Sun, 14 Aug 2011 10:03:54 +0000 (12:03 +0200)
diff --git a/kernel/sched.c b/kernel/sched.c

index 35c91859f8a666d53c7465e24453a34389ea4cb5..6baade0d76491341481141bce4dd3a60654851ca 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -259,7 +259,7 @@ struct cfs_bandwidth {
         u64 runtime_expires;
  
         int idle, timer_active;
-       struct hrtimer period_timer;
+       struct hrtimer period_timer, slack_timer;
         struct list_head throttled_cfs_rq;
  
         /* statistics */
@@ -421,6 +421,16 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
  
  static inline u64 default_cfs_period(void);
  static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
+static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
+
+static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
+{
+       struct cfs_bandwidth *cfs_b =
+               container_of(timer, struct cfs_bandwidth, slack_timer);
+       do_sched_cfs_slack_timer(cfs_b);
+
+       return HRTIMER_NORESTART;
+}
  
  static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
  {
@@ -453,6 +463,8 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
         INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
         hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         cfs_b->period_timer.function = sched_cfs_period_timer;
+       hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       cfs_b->slack_timer.function = sched_cfs_slack_timer;
  }
  
  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -488,6 +500,7 @@ static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
  static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
  {
         hrtimer_cancel(&cfs_b->period_timer);
+       hrtimer_cancel(&cfs_b->slack_timer);
  }
  #else
  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index d201f28c1de701b7d0c4651851a83aa4f8bad3b1..1ca2cd44d64afed367d643251d9539f5a28b3e0b 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1052,6 +1052,8 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
                 __clear_buddies_skip(se);
  }
  
+static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+
  static void
  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
@@ -1090,6 +1092,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         if (!(flags & DEQUEUE_SLEEP))
                 se->vruntime -= cfs_rq->min_vruntime;
  
+       /* return excess runtime on last dequeue */
+       return_cfs_rq_runtime(cfs_rq);
+
         update_min_vruntime(cfs_rq);
         update_cfs_shares(cfs_rq);
  }
@@ -1674,6 +1679,108 @@ out_unlock:
         return idle;
  }
  
+/* a cfs_rq won't donate quota below this amount */
+static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC;
+/* minimum remaining period time to redistribute slack quota */
+static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC;
+/* how long we wait to gather additional slack before distributing */
+static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC;
+
+/* are we near the end of the current quota period? */
+static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire)
+{
+       struct hrtimer *refresh_timer = &cfs_b->period_timer;
+       u64 remaining;
+
+       /* if the call-back is running a quota refresh is already occurring */
+       if (hrtimer_callback_running(refresh_timer))
+               return 1;
+
+       /* is a quota refresh about to occur? */
+       remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer));
+       if (remaining < min_expire)
+               return 1;
+
+       return 0;
+}
+
+static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+       u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration;
+
+       /* if there's a quota refresh soon don't bother with slack */
+       if (runtime_refresh_within(cfs_b, min_left))
+               return;
+
+       start_bandwidth_timer(&cfs_b->slack_timer,
+                               ns_to_ktime(cfs_bandwidth_slack_period));
+}
+
+/* we know any runtime found here is valid as update_curr() precedes return */
+static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+       struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+       s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime;
+
+       if (slack_runtime <= 0)
+               return;
+
+       raw_spin_lock(&cfs_b->lock);
+       if (cfs_b->quota != RUNTIME_INF &&
+           cfs_rq->runtime_expires == cfs_b->runtime_expires) {
+               cfs_b->runtime += slack_runtime;
+
+               /* we are under rq->lock, defer unthrottling using a timer */
+               if (cfs_b->runtime > sched_cfs_bandwidth_slice() &&
+                   !list_empty(&cfs_b->throttled_cfs_rq))
+                       start_cfs_slack_bandwidth(cfs_b);
+       }
+       raw_spin_unlock(&cfs_b->lock);
+
+       /* even if it's not valid for return we don't want to try again */
+       cfs_rq->runtime_remaining -= slack_runtime;
+}
+
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+       if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running)
+               return;
+
+       __return_cfs_rq_runtime(cfs_rq);
+}
+
+/*
+ * This is done with a timer (instead of inline with bandwidth return) since
+ * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs.
+ */
+static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
+{
+       u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
+       u64 expires;
+
+       /* confirm we're still not at a refresh boundary */
+       if (runtime_refresh_within(cfs_b, min_bandwidth_expiration))
+               return;
+
+       raw_spin_lock(&cfs_b->lock);
+       if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) {
+               runtime = cfs_b->runtime;
+               cfs_b->runtime = 0;
+       }
+       expires = cfs_b->runtime_expires;
+       raw_spin_unlock(&cfs_b->lock);
+
+       if (!runtime)
+               return;
+
+       runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
+
+       raw_spin_lock(&cfs_b->lock);
+       if (expires == cfs_b->runtime_expires)
+               cfs_b->runtime = runtime;
+       raw_spin_unlock(&cfs_b->lock);
+}
+
  /*
   * When a group wakes up we want to make sure that its quota is not already
   * expired/exceeded, otherwise it may be allowed to steal additional ticks of
@@ -1715,6 +1822,7 @@ static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
                                      unsigned long delta_exec) {}
  static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
+static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
  
  static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
  {
author	Paul Turner <pjt@google.com>
	Thu, 21 Jul 2011 16:43:41 +0000 (09:43 -0700)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 14 Aug 2011 10:03:54 +0000 (12:03 +0200)
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history