__sched_fork(clone_flags, p);
/*
- * We mark the process as running here. This guarantees that
+ * We mark the process as NEW here. This guarantees that
* nobody will actually run it, and a signal or other external
* event cannot wake it up and insert it on the runqueue either.
*/
- p->state = TASK_RUNNING;
+ p->state = TASK_NEW;
/*
* Make sure we do not leak PI boosting priority to the child.
p->sched_class = &fair_sched_class;
}
+ init_entity_runnable_average(&p->se);
+
/*
* The child is not yet in the pid-hash so no cgroup attach races,
* and the cgroup is pinned to this child due to cgroup_fork()
struct rq_flags rf;
struct rq *rq;
- /* Initialize new task's runnable average */
- init_entity_runnable_average(&p->se);
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+ p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
{
struct task_struct *task;
struct cgroup_subsys_state *css;
+ int ret = 0;
cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
if (task->sched_class != &fair_sched_class)
return -EINVAL;
#endif
+ /*
+ * Serialize against wake_up_new_task() such that if its
+ * running, we're sure to observe its full state.
+ */
+ raw_spin_lock_irq(&task->pi_lock);
+ /*
+ * Avoid calling sched_move_task() before wake_up_new_task()
+ * has happened. This would lead to problems with PELT, due to
+ * move wanting to detach+attach while we're not attached yet.
+ */
+ if (task->state == TASK_NEW)
+ ret = -EINVAL;
+ raw_spin_unlock_irq(&task->pi_lock);
+
+ if (ret)
+ break;
}
- return 0;
+ return ret;
}
static void cpu_cgroup_attach(struct cgroup_taskset *tset)
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
}
+static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
+static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
+
/*
* With new tasks being created, their initial util_avgs are extrapolated
* based on the cfs_rq's current util_avg:
struct cfs_rq *cfs_rq = cfs_rq_of(se);
struct sched_avg *sa = &se->avg;
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
+ u64 now = cfs_rq_clock_task(cfs_rq);
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
}
sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
}
+
+ if (entity_is_task(se)) {
+ struct task_struct *p = task_of(se);
+ if (p->sched_class != &fair_sched_class) {
+ /*
+ * For !fair tasks do:
+ *
+ update_cfs_rq_load_avg(now, cfs_rq, false);
+ attach_entity_load_avg(cfs_rq, se);
+ switched_from_fair(rq, p);
+ *
+ * such that the next switched_to_fair() has the
+ * expected state.
+ */
+ se->avg.last_update_time = now;
+ return;
+ }
+ }
+
+ update_cfs_rq_load_avg(now, cfs_rq, false);
+ attach_entity_load_avg(cfs_rq, se);
}
-#else
+#else /* !CONFIG_SMP */
void init_entity_runnable_average(struct sched_entity *se)
{
}
void post_init_entity_util_avg(struct sched_entity *se)
{
}
-#endif
+#endif /* CONFIG_SMP */
/*
* Update the current task's runtime statistics.
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
#endif /* CONFIG_FAIR_GROUP_SCHED */
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
-
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
/*
* If we got migrated (either between CPUs or between cgroups) we'll
* have aged the average right before clearing @last_update_time.
+ *
+ * Or we're fresh through post_init_entity_util_avg().
*/
if (se->avg.last_update_time) {
__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
u64 last_update_time;
/*
- * Newly created task or never used group entity should not be removed
- * from its (source) cfs_rq
+ * tasks cannot exit without having gone through wake_up_new_task() ->
+ * post_init_entity_util_avg() which will have added things to the
+ * cfs_rq, so we can remove unconditionally.
+ *
+ * Similarly for groups, they will have passed through
+ * post_init_entity_util_avg() before unregister_sched_fair_group()
+ * calls this.
*/
- if (se->avg.last_update_time == 0)
- return;
last_update_time = cfs_rq_last_update_time(cfs_rq);