]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/cpufreq/cpufreq_governor.c
cpufreq: governor: Rename skip_work to work_count
[karo-tx-linux.git] / drivers / cpufreq / cpufreq_governor.c
1 /*
2  * drivers/cpufreq/cpufreq_governor.c
3  *
4  * CPUFREQ governors common code
5  *
6  * Copyright    (C) 2001 Russell King
7  *              (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8  *              (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9  *              (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10  *              (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/export.h>
20 #include <linux/kernel_stat.h>
21 #include <linux/slab.h>
22
23 #include "cpufreq_governor.h"
24
25 DEFINE_MUTEX(dbs_data_mutex);
26 EXPORT_SYMBOL_GPL(dbs_data_mutex);
27
28 static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov)
29 {
30         return have_governor_per_policy() ?
31                 gov->attr_group_gov_pol : gov->attr_group_gov_sys;
32 }
33
34 void dbs_check_cpu(struct cpufreq_policy *policy)
35 {
36         int cpu = policy->cpu;
37         struct dbs_governor *gov = dbs_governor_of(policy);
38         struct policy_dbs_info *policy_dbs = policy->governor_data;
39         struct dbs_data *dbs_data = policy_dbs->dbs_data;
40         struct od_dbs_tuners *od_tuners = dbs_data->tuners;
41         struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
42         unsigned int sampling_rate;
43         unsigned int max_load = 0;
44         unsigned int ignore_nice;
45         unsigned int j;
46
47         if (gov->governor == GOV_ONDEMAND) {
48                 struct od_cpu_dbs_info_s *od_dbs_info =
49                                 gov->get_cpu_dbs_info_s(cpu);
50
51                 /*
52                  * Sometimes, the ondemand governor uses an additional
53                  * multiplier to give long delays. So apply this multiplier to
54                  * the 'sampling_rate', so as to keep the wake-up-from-idle
55                  * detection logic a bit conservative.
56                  */
57                 sampling_rate = od_tuners->sampling_rate;
58                 sampling_rate *= od_dbs_info->rate_mult;
59
60                 ignore_nice = od_tuners->ignore_nice_load;
61         } else {
62                 sampling_rate = cs_tuners->sampling_rate;
63                 ignore_nice = cs_tuners->ignore_nice_load;
64         }
65
66         /* Get Absolute Load */
67         for_each_cpu(j, policy->cpus) {
68                 struct cpu_dbs_info *j_cdbs;
69                 u64 cur_wall_time, cur_idle_time;
70                 unsigned int idle_time, wall_time;
71                 unsigned int load;
72                 int io_busy = 0;
73
74                 j_cdbs = gov->get_cpu_cdbs(j);
75
76                 /*
77                  * For the purpose of ondemand, waiting for disk IO is
78                  * an indication that you're performance critical, and
79                  * not that the system is actually idle. So do not add
80                  * the iowait time to the cpu idle time.
81                  */
82                 if (gov->governor == GOV_ONDEMAND)
83                         io_busy = od_tuners->io_is_busy;
84                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
85
86                 wall_time = (unsigned int)
87                         (cur_wall_time - j_cdbs->prev_cpu_wall);
88                 j_cdbs->prev_cpu_wall = cur_wall_time;
89
90                 if (cur_idle_time < j_cdbs->prev_cpu_idle)
91                         cur_idle_time = j_cdbs->prev_cpu_idle;
92
93                 idle_time = (unsigned int)
94                         (cur_idle_time - j_cdbs->prev_cpu_idle);
95                 j_cdbs->prev_cpu_idle = cur_idle_time;
96
97                 if (ignore_nice) {
98                         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
99                         u64 cur_nice;
100                         unsigned long cur_nice_jiffies;
101
102                         cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
103                                          cdbs->prev_cpu_nice;
104                         /*
105                          * Assumption: nice time between sampling periods will
106                          * be less than 2^32 jiffies for 32 bit sys
107                          */
108                         cur_nice_jiffies = (unsigned long)
109                                         cputime64_to_jiffies64(cur_nice);
110
111                         cdbs->prev_cpu_nice =
112                                 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
113                         idle_time += jiffies_to_usecs(cur_nice_jiffies);
114                 }
115
116                 if (unlikely(!wall_time || wall_time < idle_time))
117                         continue;
118
119                 /*
120                  * If the CPU had gone completely idle, and a task just woke up
121                  * on this CPU now, it would be unfair to calculate 'load' the
122                  * usual way for this elapsed time-window, because it will show
123                  * near-zero load, irrespective of how CPU intensive that task
124                  * actually is. This is undesirable for latency-sensitive bursty
125                  * workloads.
126                  *
127                  * To avoid this, we reuse the 'load' from the previous
128                  * time-window and give this task a chance to start with a
129                  * reasonably high CPU frequency. (However, we shouldn't over-do
130                  * this copy, lest we get stuck at a high load (high frequency)
131                  * for too long, even when the current system load has actually
132                  * dropped down. So we perform the copy only once, upon the
133                  * first wake-up from idle.)
134                  *
135                  * Detecting this situation is easy: the governor's utilization
136                  * update handler would not have run during CPU-idle periods.
137                  * Hence, an unusually large 'wall_time' (as compared to the
138                  * sampling rate) indicates this scenario.
139                  *
140                  * prev_load can be zero in two cases and we must recalculate it
141                  * for both cases:
142                  * - during long idle intervals
143                  * - explicitly set to zero
144                  */
145                 if (unlikely(wall_time > (2 * sampling_rate) &&
146                              j_cdbs->prev_load)) {
147                         load = j_cdbs->prev_load;
148
149                         /*
150                          * Perform a destructive copy, to ensure that we copy
151                          * the previous load only once, upon the first wake-up
152                          * from idle.
153                          */
154                         j_cdbs->prev_load = 0;
155                 } else {
156                         load = 100 * (wall_time - idle_time) / wall_time;
157                         j_cdbs->prev_load = load;
158                 }
159
160                 if (load > max_load)
161                         max_load = load;
162         }
163
164         gov->gov_check_cpu(cpu, max_load);
165 }
166 EXPORT_SYMBOL_GPL(dbs_check_cpu);
167
168 void gov_set_update_util(struct policy_dbs_info *policy_dbs,
169                          unsigned int delay_us)
170 {
171         struct cpufreq_policy *policy = policy_dbs->policy;
172         struct dbs_governor *gov = dbs_governor_of(policy);
173         int cpu;
174
175         gov_update_sample_delay(policy_dbs, delay_us);
176         policy_dbs->last_sample_time = 0;
177
178         for_each_cpu(cpu, policy->cpus) {
179                 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
180
181                 cpufreq_set_update_util_data(cpu, &cdbs->update_util);
182         }
183 }
184 EXPORT_SYMBOL_GPL(gov_set_update_util);
185
186 static inline void gov_clear_update_util(struct cpufreq_policy *policy)
187 {
188         int i;
189
190         for_each_cpu(i, policy->cpus)
191                 cpufreq_set_update_util_data(i, NULL);
192
193         synchronize_rcu();
194 }
195
196 static void gov_cancel_work(struct policy_dbs_info *policy_dbs)
197 {
198         /* Tell dbs_update_util_handler() to skip queuing up work items. */
199         atomic_inc(&policy_dbs->work_count);
200         /*
201          * If dbs_update_util_handler() is already running, it may not notice
202          * the incremented work_count, so wait for it to complete to prevent its
203          * work item from being queued up after the cancel_work_sync() below.
204          */
205         gov_clear_update_util(policy_dbs->policy);
206         irq_work_sync(&policy_dbs->irq_work);
207         cancel_work_sync(&policy_dbs->work);
208         atomic_set(&policy_dbs->work_count, 0);
209 }
210
211 static void dbs_work_handler(struct work_struct *work)
212 {
213         struct policy_dbs_info *policy_dbs;
214         struct cpufreq_policy *policy;
215         struct dbs_governor *gov;
216         unsigned int delay;
217
218         policy_dbs = container_of(work, struct policy_dbs_info, work);
219         policy = policy_dbs->policy;
220         gov = dbs_governor_of(policy);
221
222         /*
223          * Make sure cpufreq_governor_limits() isn't evaluating load or the
224          * ondemand governor isn't updating the sampling rate in parallel.
225          */
226         mutex_lock(&policy_dbs->timer_mutex);
227         delay = gov->gov_dbs_timer(policy);
228         policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay);
229         mutex_unlock(&policy_dbs->timer_mutex);
230
231         /*
232          * If the atomic operation below is reordered with respect to the
233          * sample delay modification, the utilization update handler may end
234          * up using a stale sample delay value.
235          */
236         smp_mb__before_atomic();
237         atomic_dec(&policy_dbs->work_count);
238 }
239
240 static void dbs_irq_work(struct irq_work *irq_work)
241 {
242         struct policy_dbs_info *policy_dbs;
243
244         policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
245         schedule_work(&policy_dbs->work);
246 }
247
248 static inline void gov_queue_irq_work(struct policy_dbs_info *policy_dbs)
249 {
250 #ifdef CONFIG_SMP
251         irq_work_queue_on(&policy_dbs->irq_work, smp_processor_id());
252 #else
253         irq_work_queue(&policy_dbs->irq_work);
254 #endif
255 }
256
257 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
258                                     unsigned long util, unsigned long max)
259 {
260         struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
261         struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
262
263         /*
264          * The work may not be allowed to be queued up right now.
265          * Possible reasons:
266          * - Work has already been queued up or is in progress.
267          * - The governor is being stopped.
268          * - It is too early (too little time from the previous sample).
269          */
270         if (atomic_inc_return(&policy_dbs->work_count) == 1) {
271                 u64 delta_ns;
272
273                 delta_ns = time - policy_dbs->last_sample_time;
274                 if ((s64)delta_ns >= policy_dbs->sample_delay_ns) {
275                         policy_dbs->last_sample_time = time;
276                         gov_queue_irq_work(policy_dbs);
277                         return;
278                 }
279         }
280         atomic_dec(&policy_dbs->work_count);
281 }
282
283 static void set_sampling_rate(struct dbs_data *dbs_data,
284                               struct dbs_governor *gov,
285                               unsigned int sampling_rate)
286 {
287         if (gov->governor == GOV_CONSERVATIVE) {
288                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
289                 cs_tuners->sampling_rate = sampling_rate;
290         } else {
291                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
292                 od_tuners->sampling_rate = sampling_rate;
293         }
294 }
295
296 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
297                                                      struct dbs_governor *gov)
298 {
299         struct policy_dbs_info *policy_dbs;
300         int j;
301
302         /* Allocate memory for the common information for policy->cpus */
303         policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL);
304         if (!policy_dbs)
305                 return NULL;
306
307         mutex_init(&policy_dbs->timer_mutex);
308         atomic_set(&policy_dbs->work_count, 0);
309         init_irq_work(&policy_dbs->irq_work, dbs_irq_work);
310         INIT_WORK(&policy_dbs->work, dbs_work_handler);
311
312         /* Set policy_dbs for all CPUs, online+offline */
313         for_each_cpu(j, policy->related_cpus) {
314                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
315
316                 j_cdbs->policy_dbs = policy_dbs;
317                 j_cdbs->update_util.func = dbs_update_util_handler;
318         }
319         return policy_dbs;
320 }
321
322 static void free_policy_dbs_info(struct cpufreq_policy *policy,
323                                  struct dbs_governor *gov)
324 {
325         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
326         struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
327         int j;
328
329         mutex_destroy(&policy_dbs->timer_mutex);
330
331         for_each_cpu(j, policy->related_cpus) {
332                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
333
334                 j_cdbs->policy_dbs = NULL;
335                 j_cdbs->update_util.func = NULL;
336         }
337         kfree(policy_dbs);
338 }
339
340 static int cpufreq_governor_init(struct cpufreq_policy *policy)
341 {
342         struct dbs_governor *gov = dbs_governor_of(policy);
343         struct dbs_data *dbs_data = gov->gdbs_data;
344         struct policy_dbs_info *policy_dbs;
345         unsigned int latency;
346         int ret;
347
348         /* State should be equivalent to EXIT */
349         if (policy->governor_data)
350                 return -EBUSY;
351
352         policy_dbs = alloc_policy_dbs_info(policy, gov);
353         if (!policy_dbs)
354                 return -ENOMEM;
355
356         if (dbs_data) {
357                 if (WARN_ON(have_governor_per_policy())) {
358                         ret = -EINVAL;
359                         goto free_policy_dbs_info;
360                 }
361                 dbs_data->usage_count++;
362                 policy_dbs->dbs_data = dbs_data;
363                 policy->governor_data = policy_dbs;
364                 return 0;
365         }
366
367         dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
368         if (!dbs_data) {
369                 ret = -ENOMEM;
370                 goto free_policy_dbs_info;
371         }
372
373         dbs_data->usage_count = 1;
374
375         ret = gov->init(dbs_data, !policy->governor->initialized);
376         if (ret)
377                 goto free_policy_dbs_info;
378
379         /* policy latency is in ns. Convert it to us first */
380         latency = policy->cpuinfo.transition_latency / 1000;
381         if (latency == 0)
382                 latency = 1;
383
384         /* Bring kernel and HW constraints together */
385         dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
386                                           MIN_LATENCY_MULTIPLIER * latency);
387         set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate,
388                                         latency * LATENCY_MULTIPLIER));
389
390         if (!have_governor_per_policy())
391                 gov->gdbs_data = dbs_data;
392
393         policy_dbs->dbs_data = dbs_data;
394         policy->governor_data = policy_dbs;
395
396         ret = sysfs_create_group(get_governor_parent_kobj(policy),
397                                  get_sysfs_attr(gov));
398         if (ret)
399                 goto reset_gdbs_data;
400
401         return 0;
402
403 reset_gdbs_data:
404         policy->governor_data = NULL;
405
406         if (!have_governor_per_policy())
407                 gov->gdbs_data = NULL;
408         gov->exit(dbs_data, !policy->governor->initialized);
409         kfree(dbs_data);
410
411 free_policy_dbs_info:
412         free_policy_dbs_info(policy, gov);
413         return ret;
414 }
415
416 static int cpufreq_governor_exit(struct cpufreq_policy *policy)
417 {
418         struct dbs_governor *gov = dbs_governor_of(policy);
419         struct policy_dbs_info *policy_dbs = policy->governor_data;
420         struct dbs_data *dbs_data = policy_dbs->dbs_data;
421
422         /* State should be equivalent to INIT */
423         if (policy_dbs->policy)
424                 return -EBUSY;
425
426         if (!--dbs_data->usage_count) {
427                 sysfs_remove_group(get_governor_parent_kobj(policy),
428                                    get_sysfs_attr(gov));
429
430                 policy->governor_data = NULL;
431
432                 if (!have_governor_per_policy())
433                         gov->gdbs_data = NULL;
434
435                 gov->exit(dbs_data, policy->governor->initialized == 1);
436                 kfree(dbs_data);
437         } else {
438                 policy->governor_data = NULL;
439         }
440
441         free_policy_dbs_info(policy, gov);
442         return 0;
443 }
444
445 static int cpufreq_governor_start(struct cpufreq_policy *policy)
446 {
447         struct dbs_governor *gov = dbs_governor_of(policy);
448         struct policy_dbs_info *policy_dbs = policy->governor_data;
449         struct dbs_data *dbs_data = policy_dbs->dbs_data;
450         unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
451         int io_busy = 0;
452
453         if (!policy->cur)
454                 return -EINVAL;
455
456         /* State should be equivalent to INIT */
457         if (policy_dbs->policy)
458                 return -EBUSY;
459
460         if (gov->governor == GOV_CONSERVATIVE) {
461                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
462
463                 sampling_rate = cs_tuners->sampling_rate;
464                 ignore_nice = cs_tuners->ignore_nice_load;
465         } else {
466                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
467
468                 sampling_rate = od_tuners->sampling_rate;
469                 ignore_nice = od_tuners->ignore_nice_load;
470                 io_busy = od_tuners->io_is_busy;
471         }
472
473         for_each_cpu(j, policy->cpus) {
474                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
475                 unsigned int prev_load;
476
477                 j_cdbs->prev_cpu_idle =
478                         get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
479
480                 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
481                                             j_cdbs->prev_cpu_idle);
482                 j_cdbs->prev_load = 100 * prev_load /
483                                     (unsigned int)j_cdbs->prev_cpu_wall;
484
485                 if (ignore_nice)
486                         j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
487         }
488         policy_dbs->policy = policy;
489
490         if (gov->governor == GOV_CONSERVATIVE) {
491                 struct cs_cpu_dbs_info_s *cs_dbs_info =
492                         gov->get_cpu_dbs_info_s(cpu);
493
494                 cs_dbs_info->down_skip = 0;
495                 cs_dbs_info->requested_freq = policy->cur;
496         } else {
497                 struct od_ops *od_ops = gov->gov_ops;
498                 struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu);
499
500                 od_dbs_info->rate_mult = 1;
501                 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
502                 od_ops->powersave_bias_init_cpu(cpu);
503         }
504
505         gov_set_update_util(policy_dbs, sampling_rate);
506         return 0;
507 }
508
509 static int cpufreq_governor_stop(struct cpufreq_policy *policy)
510 {
511         struct policy_dbs_info *policy_dbs = policy->governor_data;
512
513         /* State should be equivalent to START */
514         if (!policy_dbs->policy)
515                 return -EBUSY;
516
517         gov_cancel_work(policy_dbs);
518         policy_dbs->policy = NULL;
519
520         return 0;
521 }
522
523 static int cpufreq_governor_limits(struct cpufreq_policy *policy)
524 {
525         struct policy_dbs_info *policy_dbs = policy->governor_data;
526
527         /* State should be equivalent to START */
528         if (!policy_dbs->policy)
529                 return -EBUSY;
530
531         mutex_lock(&policy_dbs->timer_mutex);
532         if (policy->max < policy->cur)
533                 __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
534         else if (policy->min > policy->cur)
535                 __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
536         dbs_check_cpu(policy);
537         mutex_unlock(&policy_dbs->timer_mutex);
538
539         return 0;
540 }
541
542 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
543 {
544         int ret = -EINVAL;
545
546         /* Lock governor to block concurrent initialization of governor */
547         mutex_lock(&dbs_data_mutex);
548
549         if (event == CPUFREQ_GOV_POLICY_INIT) {
550                 ret = cpufreq_governor_init(policy);
551         } else if (policy->governor_data) {
552                 switch (event) {
553                 case CPUFREQ_GOV_POLICY_EXIT:
554                         ret = cpufreq_governor_exit(policy);
555                         break;
556                 case CPUFREQ_GOV_START:
557                         ret = cpufreq_governor_start(policy);
558                         break;
559                 case CPUFREQ_GOV_STOP:
560                         ret = cpufreq_governor_stop(policy);
561                         break;
562                 case CPUFREQ_GOV_LIMITS:
563                         ret = cpufreq_governor_limits(policy);
564                         break;
565                 }
566         }
567
568         mutex_unlock(&dbs_data_mutex);
569         return ret;
570 }
571 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);