From 72d1410db9681024c583e44ee2e581370387cf9f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 29 Jan 2016 23:53:50 +0100 Subject: [PATCH] cpufreq: Add a mechanism for registering utilization update callbacks Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The scheduler changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Tested-by: Gautham R. Shenoy --- drivers/cpufreq/cpufreq.c | 44 +++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 7 +++++++ include/linux/sched.h | 2 ++ kernel/sched/fair.c | 29 +++++++++++++++++++++++++- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 34b17447e0d1..29755fcbf200 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -102,6 +102,50 @@ static LIST_HEAD(cpufreq_governor_list); static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); + +static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); + +/** + * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * @cpu: The CPU to set the pointer for. + * @data: New pointer value. + * + * Set and publish the update_util_data pointer for the given CPU. That pointer + * points to a struct update_util_data object containing a callback function + * to call from cpufreq_update_util(). That function will be called from an RCU + * read-side critical section, so it must not sleep. + * + * Callers must use RCU callbacks to free any memory that might be accessed + * via the old update_util_data pointer or invoke synchronize_rcu() right after + * this function to avoid use-after-free. + */ +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +{ + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); +} +EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); + +/** + * cpufreq_update_util - Take a note about CPU utilization changes. + * @util: Current utilization. + * @max: Utilization ceiling. + * + * This function is called by the scheduler on every invocation of + * update_load_avg() on the CPU whose utilization is being updated. + */ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) +{ + struct update_util_data *data; + + rcu_read_lock(); + + data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data && data->func) + data->func(data, time, util, max); + + rcu_read_unlock(); +} + DEFINE_MUTEX(cpufreq_governor_lock); /* Flag to suspend/resume CPUFreq governors */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbf..11aa93134493 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -322,6 +322,13 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); + static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) { diff --git a/include/linux/sched.h b/include/linux/sched.h index a10494a94cc3..c3e4b3e5b508 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3207,4 +3207,6 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); + #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 56b7d4b83947..db10fcf4d11e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2819,12 +2819,17 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) return decayed || removed; } +__weak void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) +{ +} + /* Update task and its cfs_rq load average */ static inline void update_load_avg(struct sched_entity *se, int update_tg) { struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 now = cfs_rq_clock_task(cfs_rq); - int cpu = cpu_of(rq_of(cfs_rq)); + struct rq *rq = rq_of(cfs_rq); + int cpu = cpu_of(rq); /* * Track task load average for carrying it to new CPU after migrated, and @@ -2836,6 +2841,28 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) update_tg_load_avg(cfs_rq, 0); + + if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { + unsigned long max = rq->cpu_capacity_orig; + + /* + * There are a few boundary cases this might miss but it should + * get called often enough that that should (hopefully) not be + * a real problem -- added to that it only calls on the local + * CPU, so if we enqueue remotely we'll loose an update, but + * the next tick/schedule should update. + * + * It will not get called when we go idle, because the idle + * thread is a different class (!fair), nor will the utilization + * number include things like RT tasks. + * + * As is, the util number is not freq invariant (we'd have to + * implement arch_scale_freq_capacity() for that). + * + * See cpu_util(). + */ + cpufreq_update_util(now, min(cfs_rq->avg.util_avg, max), max); + } } static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) -- 2.39.5