2 * drivers/cpufreq/cpufreq_governor.c
4 * CPUFREQ governors common code
6 * Copyright (C) 2001 Russell King
7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 #include <linux/export.h>
20 #include <linux/kernel_stat.h>
21 #include <linux/slab.h>
23 #include "cpufreq_governor.h"
25 DEFINE_MUTEX(dbs_data_mutex);
26 EXPORT_SYMBOL_GPL(dbs_data_mutex);
28 static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov)
30 return have_governor_per_policy() ?
31 gov->attr_group_gov_pol : gov->attr_group_gov_sys;
34 void dbs_check_cpu(struct cpufreq_policy *policy, int cpu)
36 struct dbs_governor *gov = dbs_governor_of(policy);
37 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
38 struct dbs_data *dbs_data = policy->governor_data;
39 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
40 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
41 unsigned int sampling_rate;
42 unsigned int max_load = 0;
43 unsigned int ignore_nice;
46 if (gov->governor == GOV_ONDEMAND) {
47 struct od_cpu_dbs_info_s *od_dbs_info =
48 gov->get_cpu_dbs_info_s(cpu);
51 * Sometimes, the ondemand governor uses an additional
52 * multiplier to give long delays. So apply this multiplier to
53 * the 'sampling_rate', so as to keep the wake-up-from-idle
54 * detection logic a bit conservative.
56 sampling_rate = od_tuners->sampling_rate;
57 sampling_rate *= od_dbs_info->rate_mult;
59 ignore_nice = od_tuners->ignore_nice_load;
61 sampling_rate = cs_tuners->sampling_rate;
62 ignore_nice = cs_tuners->ignore_nice_load;
65 /* Get Absolute Load */
66 for_each_cpu(j, policy->cpus) {
67 struct cpu_dbs_info *j_cdbs;
68 u64 cur_wall_time, cur_idle_time;
69 unsigned int idle_time, wall_time;
73 j_cdbs = gov->get_cpu_cdbs(j);
76 * For the purpose of ondemand, waiting for disk IO is
77 * an indication that you're performance critical, and
78 * not that the system is actually idle. So do not add
79 * the iowait time to the cpu idle time.
81 if (gov->governor == GOV_ONDEMAND)
82 io_busy = od_tuners->io_is_busy;
83 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
85 wall_time = (unsigned int)
86 (cur_wall_time - j_cdbs->prev_cpu_wall);
87 j_cdbs->prev_cpu_wall = cur_wall_time;
89 if (cur_idle_time < j_cdbs->prev_cpu_idle)
90 cur_idle_time = j_cdbs->prev_cpu_idle;
92 idle_time = (unsigned int)
93 (cur_idle_time - j_cdbs->prev_cpu_idle);
94 j_cdbs->prev_cpu_idle = cur_idle_time;
98 unsigned long cur_nice_jiffies;
100 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
103 * Assumption: nice time between sampling periods will
104 * be less than 2^32 jiffies for 32 bit sys
106 cur_nice_jiffies = (unsigned long)
107 cputime64_to_jiffies64(cur_nice);
109 cdbs->prev_cpu_nice =
110 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
111 idle_time += jiffies_to_usecs(cur_nice_jiffies);
114 if (unlikely(!wall_time || wall_time < idle_time))
118 * If the CPU had gone completely idle, and a task just woke up
119 * on this CPU now, it would be unfair to calculate 'load' the
120 * usual way for this elapsed time-window, because it will show
121 * near-zero load, irrespective of how CPU intensive that task
122 * actually is. This is undesirable for latency-sensitive bursty
125 * To avoid this, we reuse the 'load' from the previous
126 * time-window and give this task a chance to start with a
127 * reasonably high CPU frequency. (However, we shouldn't over-do
128 * this copy, lest we get stuck at a high load (high frequency)
129 * for too long, even when the current system load has actually
130 * dropped down. So we perform the copy only once, upon the
131 * first wake-up from idle.)
133 * Detecting this situation is easy: the governor's utilization
134 * update handler would not have run during CPU-idle periods.
135 * Hence, an unusually large 'wall_time' (as compared to the
136 * sampling rate) indicates this scenario.
138 * prev_load can be zero in two cases and we must recalculate it
140 * - during long idle intervals
141 * - explicitly set to zero
143 if (unlikely(wall_time > (2 * sampling_rate) &&
144 j_cdbs->prev_load)) {
145 load = j_cdbs->prev_load;
148 * Perform a destructive copy, to ensure that we copy
149 * the previous load only once, upon the first wake-up
152 j_cdbs->prev_load = 0;
154 load = 100 * (wall_time - idle_time) / wall_time;
155 j_cdbs->prev_load = load;
162 gov->gov_check_cpu(cpu, max_load);
164 EXPORT_SYMBOL_GPL(dbs_check_cpu);
166 void gov_set_update_util(struct cpu_common_dbs_info *shared,
167 unsigned int delay_us)
169 struct cpufreq_policy *policy = shared->policy;
170 struct dbs_governor *gov = dbs_governor_of(policy);
173 gov_update_sample_delay(shared, delay_us);
174 shared->last_sample_time = 0;
176 for_each_cpu(cpu, policy->cpus) {
177 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
179 cpufreq_set_update_util_data(cpu, &cdbs->update_util);
182 EXPORT_SYMBOL_GPL(gov_set_update_util);
184 static inline void gov_clear_update_util(struct cpufreq_policy *policy)
188 for_each_cpu(i, policy->cpus)
189 cpufreq_set_update_util_data(i, NULL);
194 static void gov_cancel_work(struct cpu_common_dbs_info *shared)
196 /* Tell dbs_update_util_handler() to skip queuing up work items. */
197 atomic_inc(&shared->skip_work);
199 * If dbs_update_util_handler() is already running, it may not notice
200 * the incremented skip_work, so wait for it to complete to prevent its
201 * work item from being queued up after the cancel_work_sync() below.
203 gov_clear_update_util(shared->policy);
204 irq_work_sync(&shared->irq_work);
205 cancel_work_sync(&shared->work);
206 atomic_set(&shared->skip_work, 0);
209 static void dbs_work_handler(struct work_struct *work)
211 struct cpu_common_dbs_info *shared = container_of(work, struct
212 cpu_common_dbs_info, work);
213 struct cpufreq_policy *policy;
214 struct dbs_governor *gov;
217 policy = shared->policy;
218 gov = dbs_governor_of(policy);
221 * Make sure cpufreq_governor_limits() isn't evaluating load or the
222 * ondemand governor isn't updating the sampling rate in parallel.
224 mutex_lock(&shared->timer_mutex);
225 delay = gov->gov_dbs_timer(policy);
226 shared->sample_delay_ns = jiffies_to_nsecs(delay);
227 mutex_unlock(&shared->timer_mutex);
230 * If the atomic operation below is reordered with respect to the
231 * sample delay modification, the utilization update handler may end
232 * up using a stale sample delay value.
234 smp_mb__before_atomic();
235 atomic_dec(&shared->skip_work);
238 static void dbs_irq_work(struct irq_work *irq_work)
240 struct cpu_common_dbs_info *shared;
242 shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work);
243 schedule_work(&shared->work);
246 static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared)
249 irq_work_queue_on(&shared->irq_work, smp_processor_id());
251 irq_work_queue(&shared->irq_work);
255 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
256 unsigned long util, unsigned long max)
258 struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
259 struct cpu_common_dbs_info *shared = cdbs->shared;
262 * The work may not be allowed to be queued up right now.
264 * - Work has already been queued up or is in progress.
265 * - The governor is being stopped.
266 * - It is too early (too little time from the previous sample).
268 if (atomic_inc_return(&shared->skip_work) == 1) {
271 delta_ns = time - shared->last_sample_time;
272 if ((s64)delta_ns >= shared->sample_delay_ns) {
273 shared->last_sample_time = time;
274 gov_queue_irq_work(shared);
278 atomic_dec(&shared->skip_work);
281 static void set_sampling_rate(struct dbs_data *dbs_data,
282 struct dbs_governor *gov,
283 unsigned int sampling_rate)
285 if (gov->governor == GOV_CONSERVATIVE) {
286 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
287 cs_tuners->sampling_rate = sampling_rate;
289 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
290 od_tuners->sampling_rate = sampling_rate;
294 static int alloc_common_dbs_info(struct cpufreq_policy *policy,
295 struct dbs_governor *gov)
297 struct cpu_common_dbs_info *shared;
300 /* Allocate memory for the common information for policy->cpus */
301 shared = kzalloc(sizeof(*shared), GFP_KERNEL);
305 /* Set shared for all CPUs, online+offline */
306 for_each_cpu(j, policy->related_cpus)
307 gov->get_cpu_cdbs(j)->shared = shared;
309 mutex_init(&shared->timer_mutex);
310 atomic_set(&shared->skip_work, 0);
311 init_irq_work(&shared->irq_work, dbs_irq_work);
312 INIT_WORK(&shared->work, dbs_work_handler);
316 static void free_common_dbs_info(struct cpufreq_policy *policy,
317 struct dbs_governor *gov)
319 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
320 struct cpu_common_dbs_info *shared = cdbs->shared;
323 mutex_destroy(&shared->timer_mutex);
325 for_each_cpu(j, policy->cpus)
326 gov->get_cpu_cdbs(j)->shared = NULL;
331 static int cpufreq_governor_init(struct cpufreq_policy *policy)
333 struct dbs_governor *gov = dbs_governor_of(policy);
334 struct dbs_data *dbs_data = gov->gdbs_data;
335 unsigned int latency;
338 /* State should be equivalent to EXIT */
339 if (policy->governor_data)
343 if (WARN_ON(have_governor_per_policy()))
346 ret = alloc_common_dbs_info(policy, gov);
350 dbs_data->usage_count++;
351 policy->governor_data = dbs_data;
355 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
359 ret = alloc_common_dbs_info(policy, gov);
363 dbs_data->usage_count = 1;
365 ret = gov->init(dbs_data, !policy->governor->initialized);
367 goto free_common_dbs_info;
369 /* policy latency is in ns. Convert it to us first */
370 latency = policy->cpuinfo.transition_latency / 1000;
374 /* Bring kernel and HW constraints together */
375 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
376 MIN_LATENCY_MULTIPLIER * latency);
377 set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate,
378 latency * LATENCY_MULTIPLIER));
380 if (!have_governor_per_policy())
381 gov->gdbs_data = dbs_data;
383 policy->governor_data = dbs_data;
385 ret = sysfs_create_group(get_governor_parent_kobj(policy),
386 get_sysfs_attr(gov));
388 goto reset_gdbs_data;
393 policy->governor_data = NULL;
395 if (!have_governor_per_policy())
396 gov->gdbs_data = NULL;
397 gov->exit(dbs_data, !policy->governor->initialized);
398 free_common_dbs_info:
399 free_common_dbs_info(policy, gov);
405 static int cpufreq_governor_exit(struct cpufreq_policy *policy)
407 struct dbs_governor *gov = dbs_governor_of(policy);
408 struct dbs_data *dbs_data = policy->governor_data;
409 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
411 /* State should be equivalent to INIT */
412 if (!cdbs->shared || cdbs->shared->policy)
415 if (!--dbs_data->usage_count) {
416 sysfs_remove_group(get_governor_parent_kobj(policy),
417 get_sysfs_attr(gov));
419 policy->governor_data = NULL;
421 if (!have_governor_per_policy())
422 gov->gdbs_data = NULL;
424 gov->exit(dbs_data, policy->governor->initialized == 1);
427 policy->governor_data = NULL;
430 free_common_dbs_info(policy, gov);
434 static int cpufreq_governor_start(struct cpufreq_policy *policy)
436 struct dbs_governor *gov = dbs_governor_of(policy);
437 struct dbs_data *dbs_data = policy->governor_data;
438 unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
439 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
440 struct cpu_common_dbs_info *shared = cdbs->shared;
446 /* State should be equivalent to INIT */
447 if (!shared || shared->policy)
450 if (gov->governor == GOV_CONSERVATIVE) {
451 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
453 sampling_rate = cs_tuners->sampling_rate;
454 ignore_nice = cs_tuners->ignore_nice_load;
456 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
458 sampling_rate = od_tuners->sampling_rate;
459 ignore_nice = od_tuners->ignore_nice_load;
460 io_busy = od_tuners->io_is_busy;
463 for_each_cpu(j, policy->cpus) {
464 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
465 unsigned int prev_load;
467 j_cdbs->prev_cpu_idle =
468 get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
470 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
471 j_cdbs->prev_cpu_idle);
472 j_cdbs->prev_load = 100 * prev_load /
473 (unsigned int)j_cdbs->prev_cpu_wall;
476 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
478 j_cdbs->update_util.func = dbs_update_util_handler;
480 shared->policy = policy;
482 if (gov->governor == GOV_CONSERVATIVE) {
483 struct cs_cpu_dbs_info_s *cs_dbs_info =
484 gov->get_cpu_dbs_info_s(cpu);
486 cs_dbs_info->down_skip = 0;
487 cs_dbs_info->requested_freq = policy->cur;
489 struct od_ops *od_ops = gov->gov_ops;
490 struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu);
492 od_dbs_info->rate_mult = 1;
493 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
494 od_ops->powersave_bias_init_cpu(cpu);
497 gov_set_update_util(shared, sampling_rate);
501 static int cpufreq_governor_stop(struct cpufreq_policy *policy)
503 struct dbs_governor *gov = dbs_governor_of(policy);
504 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
505 struct cpu_common_dbs_info *shared = cdbs->shared;
507 /* State should be equivalent to START */
508 if (!shared || !shared->policy)
511 gov_cancel_work(shared);
512 shared->policy = NULL;
517 static int cpufreq_governor_limits(struct cpufreq_policy *policy)
519 struct dbs_governor *gov = dbs_governor_of(policy);
520 unsigned int cpu = policy->cpu;
521 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
523 /* State should be equivalent to START */
524 if (!cdbs->shared || !cdbs->shared->policy)
527 mutex_lock(&cdbs->shared->timer_mutex);
528 if (policy->max < cdbs->shared->policy->cur)
529 __cpufreq_driver_target(cdbs->shared->policy, policy->max,
531 else if (policy->min > cdbs->shared->policy->cur)
532 __cpufreq_driver_target(cdbs->shared->policy, policy->min,
534 dbs_check_cpu(policy, cpu);
535 mutex_unlock(&cdbs->shared->timer_mutex);
540 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
544 /* Lock governor to block concurrent initialization of governor */
545 mutex_lock(&dbs_data_mutex);
547 if (event == CPUFREQ_GOV_POLICY_INIT) {
548 ret = cpufreq_governor_init(policy);
549 } else if (policy->governor_data) {
551 case CPUFREQ_GOV_POLICY_EXIT:
552 ret = cpufreq_governor_exit(policy);
554 case CPUFREQ_GOV_START:
555 ret = cpufreq_governor_start(policy);
557 case CPUFREQ_GOV_STOP:
558 ret = cpufreq_governor_stop(policy);
560 case CPUFREQ_GOV_LIMITS:
561 ret = cpufreq_governor_limits(policy);
566 mutex_unlock(&dbs_data_mutex);
569 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);