]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/cpufreq/cpufreq_governor.c
cpufreq: governor: Drop the gov pointer from struct dbs_data
[karo-tx-linux.git] / drivers / cpufreq / cpufreq_governor.c
1 /*
2  * drivers/cpufreq/cpufreq_governor.c
3  *
4  * CPUFREQ governors common code
5  *
6  * Copyright    (C) 2001 Russell King
7  *              (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8  *              (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9  *              (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10  *              (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/export.h>
20 #include <linux/kernel_stat.h>
21 #include <linux/slab.h>
22
23 #include "cpufreq_governor.h"
24
25 DEFINE_MUTEX(dbs_data_mutex);
26 EXPORT_SYMBOL_GPL(dbs_data_mutex);
27
28 static struct attribute_group *get_sysfs_attr(struct dbs_governor *gov)
29 {
30         return have_governor_per_policy() ?
31                 gov->attr_group_gov_pol : gov->attr_group_gov_sys;
32 }
33
34 void dbs_check_cpu(struct cpufreq_policy *policy, int cpu)
35 {
36         struct dbs_governor *gov = dbs_governor_of(policy);
37         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
38         struct dbs_data *dbs_data = policy->governor_data;
39         struct od_dbs_tuners *od_tuners = dbs_data->tuners;
40         struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
41         unsigned int sampling_rate;
42         unsigned int max_load = 0;
43         unsigned int ignore_nice;
44         unsigned int j;
45
46         if (gov->governor == GOV_ONDEMAND) {
47                 struct od_cpu_dbs_info_s *od_dbs_info =
48                                 gov->get_cpu_dbs_info_s(cpu);
49
50                 /*
51                  * Sometimes, the ondemand governor uses an additional
52                  * multiplier to give long delays. So apply this multiplier to
53                  * the 'sampling_rate', so as to keep the wake-up-from-idle
54                  * detection logic a bit conservative.
55                  */
56                 sampling_rate = od_tuners->sampling_rate;
57                 sampling_rate *= od_dbs_info->rate_mult;
58
59                 ignore_nice = od_tuners->ignore_nice_load;
60         } else {
61                 sampling_rate = cs_tuners->sampling_rate;
62                 ignore_nice = cs_tuners->ignore_nice_load;
63         }
64
65         /* Get Absolute Load */
66         for_each_cpu(j, policy->cpus) {
67                 struct cpu_dbs_info *j_cdbs;
68                 u64 cur_wall_time, cur_idle_time;
69                 unsigned int idle_time, wall_time;
70                 unsigned int load;
71                 int io_busy = 0;
72
73                 j_cdbs = gov->get_cpu_cdbs(j);
74
75                 /*
76                  * For the purpose of ondemand, waiting for disk IO is
77                  * an indication that you're performance critical, and
78                  * not that the system is actually idle. So do not add
79                  * the iowait time to the cpu idle time.
80                  */
81                 if (gov->governor == GOV_ONDEMAND)
82                         io_busy = od_tuners->io_is_busy;
83                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
84
85                 wall_time = (unsigned int)
86                         (cur_wall_time - j_cdbs->prev_cpu_wall);
87                 j_cdbs->prev_cpu_wall = cur_wall_time;
88
89                 if (cur_idle_time < j_cdbs->prev_cpu_idle)
90                         cur_idle_time = j_cdbs->prev_cpu_idle;
91
92                 idle_time = (unsigned int)
93                         (cur_idle_time - j_cdbs->prev_cpu_idle);
94                 j_cdbs->prev_cpu_idle = cur_idle_time;
95
96                 if (ignore_nice) {
97                         u64 cur_nice;
98                         unsigned long cur_nice_jiffies;
99
100                         cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
101                                          cdbs->prev_cpu_nice;
102                         /*
103                          * Assumption: nice time between sampling periods will
104                          * be less than 2^32 jiffies for 32 bit sys
105                          */
106                         cur_nice_jiffies = (unsigned long)
107                                         cputime64_to_jiffies64(cur_nice);
108
109                         cdbs->prev_cpu_nice =
110                                 kcpustat_cpu(j).cpustat[CPUTIME_NICE];
111                         idle_time += jiffies_to_usecs(cur_nice_jiffies);
112                 }
113
114                 if (unlikely(!wall_time || wall_time < idle_time))
115                         continue;
116
117                 /*
118                  * If the CPU had gone completely idle, and a task just woke up
119                  * on this CPU now, it would be unfair to calculate 'load' the
120                  * usual way for this elapsed time-window, because it will show
121                  * near-zero load, irrespective of how CPU intensive that task
122                  * actually is. This is undesirable for latency-sensitive bursty
123                  * workloads.
124                  *
125                  * To avoid this, we reuse the 'load' from the previous
126                  * time-window and give this task a chance to start with a
127                  * reasonably high CPU frequency. (However, we shouldn't over-do
128                  * this copy, lest we get stuck at a high load (high frequency)
129                  * for too long, even when the current system load has actually
130                  * dropped down. So we perform the copy only once, upon the
131                  * first wake-up from idle.)
132                  *
133                  * Detecting this situation is easy: the governor's utilization
134                  * update handler would not have run during CPU-idle periods.
135                  * Hence, an unusually large 'wall_time' (as compared to the
136                  * sampling rate) indicates this scenario.
137                  *
138                  * prev_load can be zero in two cases and we must recalculate it
139                  * for both cases:
140                  * - during long idle intervals
141                  * - explicitly set to zero
142                  */
143                 if (unlikely(wall_time > (2 * sampling_rate) &&
144                              j_cdbs->prev_load)) {
145                         load = j_cdbs->prev_load;
146
147                         /*
148                          * Perform a destructive copy, to ensure that we copy
149                          * the previous load only once, upon the first wake-up
150                          * from idle.
151                          */
152                         j_cdbs->prev_load = 0;
153                 } else {
154                         load = 100 * (wall_time - idle_time) / wall_time;
155                         j_cdbs->prev_load = load;
156                 }
157
158                 if (load > max_load)
159                         max_load = load;
160         }
161
162         gov->gov_check_cpu(cpu, max_load);
163 }
164 EXPORT_SYMBOL_GPL(dbs_check_cpu);
165
166 void gov_set_update_util(struct cpu_common_dbs_info *shared,
167                          unsigned int delay_us)
168 {
169         struct cpufreq_policy *policy = shared->policy;
170         struct dbs_governor *gov = dbs_governor_of(policy);
171         int cpu;
172
173         gov_update_sample_delay(shared, delay_us);
174         shared->last_sample_time = 0;
175
176         for_each_cpu(cpu, policy->cpus) {
177                 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
178
179                 cpufreq_set_update_util_data(cpu, &cdbs->update_util);
180         }
181 }
182 EXPORT_SYMBOL_GPL(gov_set_update_util);
183
184 static inline void gov_clear_update_util(struct cpufreq_policy *policy)
185 {
186         int i;
187
188         for_each_cpu(i, policy->cpus)
189                 cpufreq_set_update_util_data(i, NULL);
190
191         synchronize_rcu();
192 }
193
194 static void gov_cancel_work(struct cpu_common_dbs_info *shared)
195 {
196         /* Tell dbs_update_util_handler() to skip queuing up work items. */
197         atomic_inc(&shared->skip_work);
198         /*
199          * If dbs_update_util_handler() is already running, it may not notice
200          * the incremented skip_work, so wait for it to complete to prevent its
201          * work item from being queued up after the cancel_work_sync() below.
202          */
203         gov_clear_update_util(shared->policy);
204         irq_work_sync(&shared->irq_work);
205         cancel_work_sync(&shared->work);
206         atomic_set(&shared->skip_work, 0);
207 }
208
209 static void dbs_work_handler(struct work_struct *work)
210 {
211         struct cpu_common_dbs_info *shared = container_of(work, struct
212                                         cpu_common_dbs_info, work);
213         struct cpufreq_policy *policy;
214         struct dbs_governor *gov;
215         unsigned int delay;
216
217         policy = shared->policy;
218         gov = dbs_governor_of(policy);
219
220         /*
221          * Make sure cpufreq_governor_limits() isn't evaluating load or the
222          * ondemand governor isn't updating the sampling rate in parallel.
223          */
224         mutex_lock(&shared->timer_mutex);
225         delay = gov->gov_dbs_timer(policy);
226         shared->sample_delay_ns = jiffies_to_nsecs(delay);
227         mutex_unlock(&shared->timer_mutex);
228
229         /*
230          * If the atomic operation below is reordered with respect to the
231          * sample delay modification, the utilization update handler may end
232          * up using a stale sample delay value.
233          */
234         smp_mb__before_atomic();
235         atomic_dec(&shared->skip_work);
236 }
237
238 static void dbs_irq_work(struct irq_work *irq_work)
239 {
240         struct cpu_common_dbs_info *shared;
241
242         shared = container_of(irq_work, struct cpu_common_dbs_info, irq_work);
243         schedule_work(&shared->work);
244 }
245
246 static inline void gov_queue_irq_work(struct cpu_common_dbs_info *shared)
247 {
248 #ifdef CONFIG_SMP
249         irq_work_queue_on(&shared->irq_work, smp_processor_id());
250 #else
251         irq_work_queue(&shared->irq_work);
252 #endif
253 }
254
255 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
256                                     unsigned long util, unsigned long max)
257 {
258         struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
259         struct cpu_common_dbs_info *shared = cdbs->shared;
260
261         /*
262          * The work may not be allowed to be queued up right now.
263          * Possible reasons:
264          * - Work has already been queued up or is in progress.
265          * - The governor is being stopped.
266          * - It is too early (too little time from the previous sample).
267          */
268         if (atomic_inc_return(&shared->skip_work) == 1) {
269                 u64 delta_ns;
270
271                 delta_ns = time - shared->last_sample_time;
272                 if ((s64)delta_ns >= shared->sample_delay_ns) {
273                         shared->last_sample_time = time;
274                         gov_queue_irq_work(shared);
275                         return;
276                 }
277         }
278         atomic_dec(&shared->skip_work);
279 }
280
281 static void set_sampling_rate(struct dbs_data *dbs_data,
282                               struct dbs_governor *gov,
283                               unsigned int sampling_rate)
284 {
285         if (gov->governor == GOV_CONSERVATIVE) {
286                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
287                 cs_tuners->sampling_rate = sampling_rate;
288         } else {
289                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
290                 od_tuners->sampling_rate = sampling_rate;
291         }
292 }
293
294 static int alloc_common_dbs_info(struct cpufreq_policy *policy,
295                                  struct dbs_governor *gov)
296 {
297         struct cpu_common_dbs_info *shared;
298         int j;
299
300         /* Allocate memory for the common information for policy->cpus */
301         shared = kzalloc(sizeof(*shared), GFP_KERNEL);
302         if (!shared)
303                 return -ENOMEM;
304
305         /* Set shared for all CPUs, online+offline */
306         for_each_cpu(j, policy->related_cpus)
307                 gov->get_cpu_cdbs(j)->shared = shared;
308
309         mutex_init(&shared->timer_mutex);
310         atomic_set(&shared->skip_work, 0);
311         init_irq_work(&shared->irq_work, dbs_irq_work);
312         INIT_WORK(&shared->work, dbs_work_handler);
313         return 0;
314 }
315
316 static void free_common_dbs_info(struct cpufreq_policy *policy,
317                                  struct dbs_governor *gov)
318 {
319         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
320         struct cpu_common_dbs_info *shared = cdbs->shared;
321         int j;
322
323         mutex_destroy(&shared->timer_mutex);
324
325         for_each_cpu(j, policy->cpus)
326                 gov->get_cpu_cdbs(j)->shared = NULL;
327
328         kfree(shared);
329 }
330
331 static int cpufreq_governor_init(struct cpufreq_policy *policy)
332 {
333         struct dbs_governor *gov = dbs_governor_of(policy);
334         struct dbs_data *dbs_data = gov->gdbs_data;
335         unsigned int latency;
336         int ret;
337
338         /* State should be equivalent to EXIT */
339         if (policy->governor_data)
340                 return -EBUSY;
341
342         if (dbs_data) {
343                 if (WARN_ON(have_governor_per_policy()))
344                         return -EINVAL;
345
346                 ret = alloc_common_dbs_info(policy, gov);
347                 if (ret)
348                         return ret;
349
350                 dbs_data->usage_count++;
351                 policy->governor_data = dbs_data;
352                 return 0;
353         }
354
355         dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
356         if (!dbs_data)
357                 return -ENOMEM;
358
359         ret = alloc_common_dbs_info(policy, gov);
360         if (ret)
361                 goto free_dbs_data;
362
363         dbs_data->usage_count = 1;
364
365         ret = gov->init(dbs_data, !policy->governor->initialized);
366         if (ret)
367                 goto free_common_dbs_info;
368
369         /* policy latency is in ns. Convert it to us first */
370         latency = policy->cpuinfo.transition_latency / 1000;
371         if (latency == 0)
372                 latency = 1;
373
374         /* Bring kernel and HW constraints together */
375         dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
376                                           MIN_LATENCY_MULTIPLIER * latency);
377         set_sampling_rate(dbs_data, gov, max(dbs_data->min_sampling_rate,
378                                         latency * LATENCY_MULTIPLIER));
379
380         if (!have_governor_per_policy())
381                 gov->gdbs_data = dbs_data;
382
383         policy->governor_data = dbs_data;
384
385         ret = sysfs_create_group(get_governor_parent_kobj(policy),
386                                  get_sysfs_attr(gov));
387         if (ret)
388                 goto reset_gdbs_data;
389
390         return 0;
391
392 reset_gdbs_data:
393         policy->governor_data = NULL;
394
395         if (!have_governor_per_policy())
396                 gov->gdbs_data = NULL;
397         gov->exit(dbs_data, !policy->governor->initialized);
398 free_common_dbs_info:
399         free_common_dbs_info(policy, gov);
400 free_dbs_data:
401         kfree(dbs_data);
402         return ret;
403 }
404
405 static int cpufreq_governor_exit(struct cpufreq_policy *policy)
406 {
407         struct dbs_governor *gov = dbs_governor_of(policy);
408         struct dbs_data *dbs_data = policy->governor_data;
409         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
410
411         /* State should be equivalent to INIT */
412         if (!cdbs->shared || cdbs->shared->policy)
413                 return -EBUSY;
414
415         if (!--dbs_data->usage_count) {
416                 sysfs_remove_group(get_governor_parent_kobj(policy),
417                                    get_sysfs_attr(gov));
418
419                 policy->governor_data = NULL;
420
421                 if (!have_governor_per_policy())
422                         gov->gdbs_data = NULL;
423
424                 gov->exit(dbs_data, policy->governor->initialized == 1);
425                 kfree(dbs_data);
426         } else {
427                 policy->governor_data = NULL;
428         }
429
430         free_common_dbs_info(policy, gov);
431         return 0;
432 }
433
434 static int cpufreq_governor_start(struct cpufreq_policy *policy)
435 {
436         struct dbs_governor *gov = dbs_governor_of(policy);
437         struct dbs_data *dbs_data = policy->governor_data;
438         unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
439         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
440         struct cpu_common_dbs_info *shared = cdbs->shared;
441         int io_busy = 0;
442
443         if (!policy->cur)
444                 return -EINVAL;
445
446         /* State should be equivalent to INIT */
447         if (!shared || shared->policy)
448                 return -EBUSY;
449
450         if (gov->governor == GOV_CONSERVATIVE) {
451                 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
452
453                 sampling_rate = cs_tuners->sampling_rate;
454                 ignore_nice = cs_tuners->ignore_nice_load;
455         } else {
456                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
457
458                 sampling_rate = od_tuners->sampling_rate;
459                 ignore_nice = od_tuners->ignore_nice_load;
460                 io_busy = od_tuners->io_is_busy;
461         }
462
463         for_each_cpu(j, policy->cpus) {
464                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
465                 unsigned int prev_load;
466
467                 j_cdbs->prev_cpu_idle =
468                         get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
469
470                 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
471                                             j_cdbs->prev_cpu_idle);
472                 j_cdbs->prev_load = 100 * prev_load /
473                                     (unsigned int)j_cdbs->prev_cpu_wall;
474
475                 if (ignore_nice)
476                         j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
477
478                 j_cdbs->update_util.func = dbs_update_util_handler;
479         }
480         shared->policy = policy;
481
482         if (gov->governor == GOV_CONSERVATIVE) {
483                 struct cs_cpu_dbs_info_s *cs_dbs_info =
484                         gov->get_cpu_dbs_info_s(cpu);
485
486                 cs_dbs_info->down_skip = 0;
487                 cs_dbs_info->requested_freq = policy->cur;
488         } else {
489                 struct od_ops *od_ops = gov->gov_ops;
490                 struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu);
491
492                 od_dbs_info->rate_mult = 1;
493                 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
494                 od_ops->powersave_bias_init_cpu(cpu);
495         }
496
497         gov_set_update_util(shared, sampling_rate);
498         return 0;
499 }
500
501 static int cpufreq_governor_stop(struct cpufreq_policy *policy)
502 {
503         struct dbs_governor *gov = dbs_governor_of(policy);
504         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
505         struct cpu_common_dbs_info *shared = cdbs->shared;
506
507         /* State should be equivalent to START */
508         if (!shared || !shared->policy)
509                 return -EBUSY;
510
511         gov_cancel_work(shared);
512         shared->policy = NULL;
513
514         return 0;
515 }
516
517 static int cpufreq_governor_limits(struct cpufreq_policy *policy)
518 {
519         struct dbs_governor *gov = dbs_governor_of(policy);
520         unsigned int cpu = policy->cpu;
521         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
522
523         /* State should be equivalent to START */
524         if (!cdbs->shared || !cdbs->shared->policy)
525                 return -EBUSY;
526
527         mutex_lock(&cdbs->shared->timer_mutex);
528         if (policy->max < cdbs->shared->policy->cur)
529                 __cpufreq_driver_target(cdbs->shared->policy, policy->max,
530                                         CPUFREQ_RELATION_H);
531         else if (policy->min > cdbs->shared->policy->cur)
532                 __cpufreq_driver_target(cdbs->shared->policy, policy->min,
533                                         CPUFREQ_RELATION_L);
534         dbs_check_cpu(policy, cpu);
535         mutex_unlock(&cdbs->shared->timer_mutex);
536
537         return 0;
538 }
539
540 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
541 {
542         int ret = -EINVAL;
543
544         /* Lock governor to block concurrent initialization of governor */
545         mutex_lock(&dbs_data_mutex);
546
547         if (event == CPUFREQ_GOV_POLICY_INIT) {
548                 ret = cpufreq_governor_init(policy);
549         } else if (policy->governor_data) {
550                 switch (event) {
551                 case CPUFREQ_GOV_POLICY_EXIT:
552                         ret = cpufreq_governor_exit(policy);
553                         break;
554                 case CPUFREQ_GOV_START:
555                         ret = cpufreq_governor_start(policy);
556                         break;
557                 case CPUFREQ_GOV_STOP:
558                         ret = cpufreq_governor_stop(policy);
559                         break;
560                 case CPUFREQ_GOV_LIMITS:
561                         ret = cpufreq_governor_limits(policy);
562                         break;
563                 }
564         }
565
566         mutex_unlock(&dbs_data_mutex);
567         return ret;
568 }
569 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);