]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/cpufreq/cpufreq_governor.c
cpufreq: governor: Move rate_mult to struct policy_dbs
[karo-tx-linux.git] / drivers / cpufreq / cpufreq_governor.c
1 /*
2  * drivers/cpufreq/cpufreq_governor.c
3  *
4  * CPUFREQ governors common code
5  *
6  * Copyright    (C) 2001 Russell King
7  *              (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
8  *              (C) 2003 Jun Nakajima <jun.nakajima@intel.com>
9  *              (C) 2009 Alexander Clouter <alex@digriz.org.uk>
10  *              (c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/export.h>
20 #include <linux/kernel_stat.h>
21 #include <linux/slab.h>
22
23 #include "cpufreq_governor.h"
24
25 DEFINE_MUTEX(dbs_data_mutex);
26 EXPORT_SYMBOL_GPL(dbs_data_mutex);
27
28 /* Common sysfs tunables */
29 /**
30  * store_sampling_rate - update sampling rate effective immediately if needed.
31  *
32  * If new rate is smaller than the old, simply updating
33  * dbs.sampling_rate might not be appropriate. For example, if the
34  * original sampling_rate was 1 second and the requested new sampling rate is 10
35  * ms because the user needs immediate reaction from ondemand governor, but not
36  * sure if higher frequency will be required or not, then, the governor may
37  * change the sampling rate too late; up to 1 second later. Thus, if we are
38  * reducing the sampling rate, we need to make the new value effective
39  * immediately.
40  *
41  * This must be called with dbs_data->mutex held, otherwise traversing
42  * policy_dbs_list isn't safe.
43  */
44 ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
45                             size_t count)
46 {
47         struct policy_dbs_info *policy_dbs;
48         unsigned int rate;
49         int ret;
50         ret = sscanf(buf, "%u", &rate);
51         if (ret != 1)
52                 return -EINVAL;
53
54         dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate);
55
56         /*
57          * We are operating under dbs_data->mutex and so the list and its
58          * entries can't be freed concurrently.
59          */
60         list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
61                 mutex_lock(&policy_dbs->timer_mutex);
62                 /*
63                  * On 32-bit architectures this may race with the
64                  * sample_delay_ns read in dbs_update_util_handler(), but that
65                  * really doesn't matter.  If the read returns a value that's
66                  * too big, the sample will be skipped, but the next invocation
67                  * of dbs_update_util_handler() (when the update has been
68                  * completed) will take a sample.
69                  *
70                  * If this runs in parallel with dbs_work_handler(), we may end
71                  * up overwriting the sample_delay_ns value that it has just
72                  * written, but it will be corrected next time a sample is
73                  * taken, so it shouldn't be significant.
74                  */
75                 gov_update_sample_delay(policy_dbs, 0);
76                 mutex_unlock(&policy_dbs->timer_mutex);
77         }
78
79         return count;
80 }
81 EXPORT_SYMBOL_GPL(store_sampling_rate);
82
83 static inline struct dbs_data *to_dbs_data(struct kobject *kobj)
84 {
85         return container_of(kobj, struct dbs_data, kobj);
86 }
87
88 static inline struct governor_attr *to_gov_attr(struct attribute *attr)
89 {
90         return container_of(attr, struct governor_attr, attr);
91 }
92
93 static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
94                              char *buf)
95 {
96         struct dbs_data *dbs_data = to_dbs_data(kobj);
97         struct governor_attr *gattr = to_gov_attr(attr);
98         int ret = -EIO;
99
100         if (gattr->show)
101                 ret = gattr->show(dbs_data, buf);
102
103         return ret;
104 }
105
106 static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
107                               const char *buf, size_t count)
108 {
109         struct dbs_data *dbs_data = to_dbs_data(kobj);
110         struct governor_attr *gattr = to_gov_attr(attr);
111         int ret = -EIO;
112
113         mutex_lock(&dbs_data->mutex);
114
115         if (gattr->store)
116                 ret = gattr->store(dbs_data, buf, count);
117
118         mutex_unlock(&dbs_data->mutex);
119
120         return ret;
121 }
122
123 /*
124  * Sysfs Ops for accessing governor attributes.
125  *
126  * All show/store invocations for governor specific sysfs attributes, will first
127  * call the below show/store callbacks and the attribute specific callback will
128  * be called from within it.
129  */
130 static const struct sysfs_ops governor_sysfs_ops = {
131         .show   = governor_show,
132         .store  = governor_store,
133 };
134
135 unsigned int dbs_update(struct cpufreq_policy *policy)
136 {
137         struct dbs_governor *gov = dbs_governor_of(policy);
138         struct policy_dbs_info *policy_dbs = policy->governor_data;
139         struct dbs_data *dbs_data = policy_dbs->dbs_data;
140         struct od_dbs_tuners *od_tuners = dbs_data->tuners;
141         unsigned int ignore_nice = dbs_data->ignore_nice_load;
142         unsigned int max_load = 0;
143         unsigned int sampling_rate, j;
144
145         /*
146          * Sometimes governors may use an additional multiplier to increase
147          * sample delays temporarily.  Apply that multiplier to sampling_rate
148          * so as to keep the wake-up-from-idle detection logic a bit
149          * conservative.
150          */
151         sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult;
152
153         /* Get Absolute Load */
154         for_each_cpu(j, policy->cpus) {
155                 struct cpu_dbs_info *j_cdbs;
156                 u64 cur_wall_time, cur_idle_time;
157                 unsigned int idle_time, wall_time;
158                 unsigned int load;
159                 int io_busy = 0;
160
161                 j_cdbs = gov->get_cpu_cdbs(j);
162
163                 /*
164                  * For the purpose of ondemand, waiting for disk IO is
165                  * an indication that you're performance critical, and
166                  * not that the system is actually idle. So do not add
167                  * the iowait time to the cpu idle time.
168                  */
169                 if (gov->governor == GOV_ONDEMAND)
170                         io_busy = od_tuners->io_is_busy;
171                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
172
173                 wall_time = cur_wall_time - j_cdbs->prev_cpu_wall;
174                 j_cdbs->prev_cpu_wall = cur_wall_time;
175
176                 if (cur_idle_time <= j_cdbs->prev_cpu_idle) {
177                         idle_time = 0;
178                 } else {
179                         idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
180                         j_cdbs->prev_cpu_idle = cur_idle_time;
181                 }
182
183                 if (ignore_nice) {
184                         u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
185
186                         idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice);
187                         j_cdbs->prev_cpu_nice = cur_nice;
188                 }
189
190                 if (unlikely(!wall_time || wall_time < idle_time))
191                         continue;
192
193                 /*
194                  * If the CPU had gone completely idle, and a task just woke up
195                  * on this CPU now, it would be unfair to calculate 'load' the
196                  * usual way for this elapsed time-window, because it will show
197                  * near-zero load, irrespective of how CPU intensive that task
198                  * actually is. This is undesirable for latency-sensitive bursty
199                  * workloads.
200                  *
201                  * To avoid this, we reuse the 'load' from the previous
202                  * time-window and give this task a chance to start with a
203                  * reasonably high CPU frequency. (However, we shouldn't over-do
204                  * this copy, lest we get stuck at a high load (high frequency)
205                  * for too long, even when the current system load has actually
206                  * dropped down. So we perform the copy only once, upon the
207                  * first wake-up from idle.)
208                  *
209                  * Detecting this situation is easy: the governor's utilization
210                  * update handler would not have run during CPU-idle periods.
211                  * Hence, an unusually large 'wall_time' (as compared to the
212                  * sampling rate) indicates this scenario.
213                  *
214                  * prev_load can be zero in two cases and we must recalculate it
215                  * for both cases:
216                  * - during long idle intervals
217                  * - explicitly set to zero
218                  */
219                 if (unlikely(wall_time > (2 * sampling_rate) &&
220                              j_cdbs->prev_load)) {
221                         load = j_cdbs->prev_load;
222
223                         /*
224                          * Perform a destructive copy, to ensure that we copy
225                          * the previous load only once, upon the first wake-up
226                          * from idle.
227                          */
228                         j_cdbs->prev_load = 0;
229                 } else {
230                         load = 100 * (wall_time - idle_time) / wall_time;
231                         j_cdbs->prev_load = load;
232                 }
233
234                 if (load > max_load)
235                         max_load = load;
236         }
237         return max_load;
238 }
239 EXPORT_SYMBOL_GPL(dbs_update);
240
241 void gov_set_update_util(struct policy_dbs_info *policy_dbs,
242                          unsigned int delay_us)
243 {
244         struct cpufreq_policy *policy = policy_dbs->policy;
245         struct dbs_governor *gov = dbs_governor_of(policy);
246         int cpu;
247
248         gov_update_sample_delay(policy_dbs, delay_us);
249         policy_dbs->last_sample_time = 0;
250
251         for_each_cpu(cpu, policy->cpus) {
252                 struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(cpu);
253
254                 cpufreq_set_update_util_data(cpu, &cdbs->update_util);
255         }
256 }
257 EXPORT_SYMBOL_GPL(gov_set_update_util);
258
259 static inline void gov_clear_update_util(struct cpufreq_policy *policy)
260 {
261         int i;
262
263         for_each_cpu(i, policy->cpus)
264                 cpufreq_set_update_util_data(i, NULL);
265
266         synchronize_rcu();
267 }
268
269 static void gov_cancel_work(struct cpufreq_policy *policy)
270 {
271         struct policy_dbs_info *policy_dbs = policy->governor_data;
272
273         gov_clear_update_util(policy_dbs->policy);
274         irq_work_sync(&policy_dbs->irq_work);
275         cancel_work_sync(&policy_dbs->work);
276         atomic_set(&policy_dbs->work_count, 0);
277         policy_dbs->work_in_progress = false;
278 }
279
280 static void dbs_work_handler(struct work_struct *work)
281 {
282         struct policy_dbs_info *policy_dbs;
283         struct cpufreq_policy *policy;
284         struct dbs_governor *gov;
285         unsigned int delay;
286
287         policy_dbs = container_of(work, struct policy_dbs_info, work);
288         policy = policy_dbs->policy;
289         gov = dbs_governor_of(policy);
290
291         /*
292          * Make sure cpufreq_governor_limits() isn't evaluating load or the
293          * ondemand governor isn't updating the sampling rate in parallel.
294          */
295         mutex_lock(&policy_dbs->timer_mutex);
296         delay = gov->gov_dbs_timer(policy);
297         policy_dbs->sample_delay_ns = jiffies_to_nsecs(delay);
298         mutex_unlock(&policy_dbs->timer_mutex);
299
300         /* Allow the utilization update handler to queue up more work. */
301         atomic_set(&policy_dbs->work_count, 0);
302         /*
303          * If the update below is reordered with respect to the sample delay
304          * modification, the utilization update handler may end up using a stale
305          * sample delay value.
306          */
307         smp_wmb();
308         policy_dbs->work_in_progress = false;
309 }
310
311 static void dbs_irq_work(struct irq_work *irq_work)
312 {
313         struct policy_dbs_info *policy_dbs;
314
315         policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
316         schedule_work(&policy_dbs->work);
317 }
318
319 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
320                                     unsigned long util, unsigned long max)
321 {
322         struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
323         struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
324         u64 delta_ns;
325
326         /*
327          * The work may not be allowed to be queued up right now.
328          * Possible reasons:
329          * - Work has already been queued up or is in progress.
330          * - It is too early (too little time from the previous sample).
331          */
332         if (policy_dbs->work_in_progress)
333                 return;
334
335         /*
336          * If the reads below are reordered before the check above, the value
337          * of sample_delay_ns used in the computation may be stale.
338          */
339         smp_rmb();
340         delta_ns = time - policy_dbs->last_sample_time;
341         if ((s64)delta_ns < policy_dbs->sample_delay_ns)
342                 return;
343
344         /*
345          * If the policy is not shared, the irq_work may be queued up right away
346          * at this point.  Otherwise, we need to ensure that only one of the
347          * CPUs sharing the policy will do that.
348          */
349         if (policy_dbs->is_shared &&
350             !atomic_add_unless(&policy_dbs->work_count, 1, 1))
351                 return;
352
353         policy_dbs->last_sample_time = time;
354         policy_dbs->work_in_progress = true;
355         irq_work_queue(&policy_dbs->irq_work);
356 }
357
358 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
359                                                      struct dbs_governor *gov)
360 {
361         struct policy_dbs_info *policy_dbs;
362         int j;
363
364         /* Allocate memory for the common information for policy->cpus */
365         policy_dbs = kzalloc(sizeof(*policy_dbs), GFP_KERNEL);
366         if (!policy_dbs)
367                 return NULL;
368
369         policy_dbs->policy = policy;
370         mutex_init(&policy_dbs->timer_mutex);
371         atomic_set(&policy_dbs->work_count, 0);
372         init_irq_work(&policy_dbs->irq_work, dbs_irq_work);
373         INIT_WORK(&policy_dbs->work, dbs_work_handler);
374
375         /* Set policy_dbs for all CPUs, online+offline */
376         for_each_cpu(j, policy->related_cpus) {
377                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
378
379                 j_cdbs->policy_dbs = policy_dbs;
380                 j_cdbs->update_util.func = dbs_update_util_handler;
381         }
382         return policy_dbs;
383 }
384
385 static void free_policy_dbs_info(struct cpufreq_policy *policy,
386                                  struct dbs_governor *gov)
387 {
388         struct cpu_dbs_info *cdbs = gov->get_cpu_cdbs(policy->cpu);
389         struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
390         int j;
391
392         mutex_destroy(&policy_dbs->timer_mutex);
393
394         for_each_cpu(j, policy->related_cpus) {
395                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
396
397                 j_cdbs->policy_dbs = NULL;
398                 j_cdbs->update_util.func = NULL;
399         }
400         kfree(policy_dbs);
401 }
402
403 static int cpufreq_governor_init(struct cpufreq_policy *policy)
404 {
405         struct dbs_governor *gov = dbs_governor_of(policy);
406         struct dbs_data *dbs_data = gov->gdbs_data;
407         struct policy_dbs_info *policy_dbs;
408         unsigned int latency;
409         int ret;
410
411         /* State should be equivalent to EXIT */
412         if (policy->governor_data)
413                 return -EBUSY;
414
415         policy_dbs = alloc_policy_dbs_info(policy, gov);
416         if (!policy_dbs)
417                 return -ENOMEM;
418
419         if (dbs_data) {
420                 if (WARN_ON(have_governor_per_policy())) {
421                         ret = -EINVAL;
422                         goto free_policy_dbs_info;
423                 }
424                 policy_dbs->dbs_data = dbs_data;
425                 policy->governor_data = policy_dbs;
426
427                 mutex_lock(&dbs_data->mutex);
428                 dbs_data->usage_count++;
429                 list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
430                 mutex_unlock(&dbs_data->mutex);
431
432                 return 0;
433         }
434
435         dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
436         if (!dbs_data) {
437                 ret = -ENOMEM;
438                 goto free_policy_dbs_info;
439         }
440
441         INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
442         mutex_init(&dbs_data->mutex);
443
444         ret = gov->init(dbs_data, !policy->governor->initialized);
445         if (ret)
446                 goto free_policy_dbs_info;
447
448         /* policy latency is in ns. Convert it to us first */
449         latency = policy->cpuinfo.transition_latency / 1000;
450         if (latency == 0)
451                 latency = 1;
452
453         /* Bring kernel and HW constraints together */
454         dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
455                                           MIN_LATENCY_MULTIPLIER * latency);
456         dbs_data->sampling_rate = max(dbs_data->min_sampling_rate,
457                                       LATENCY_MULTIPLIER * latency);
458
459         if (!have_governor_per_policy())
460                 gov->gdbs_data = dbs_data;
461
462         policy->governor_data = policy_dbs;
463
464         policy_dbs->dbs_data = dbs_data;
465         dbs_data->usage_count = 1;
466         list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
467
468         gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
469         ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
470                                    get_governor_parent_kobj(policy),
471                                    "%s", gov->gov.name);
472         if (!ret)
473                 return 0;
474
475         /* Failure, so roll back. */
476         pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret);
477
478         policy->governor_data = NULL;
479
480         if (!have_governor_per_policy())
481                 gov->gdbs_data = NULL;
482         gov->exit(dbs_data, !policy->governor->initialized);
483         kfree(dbs_data);
484
485 free_policy_dbs_info:
486         free_policy_dbs_info(policy, gov);
487         return ret;
488 }
489
490 static int cpufreq_governor_exit(struct cpufreq_policy *policy)
491 {
492         struct dbs_governor *gov = dbs_governor_of(policy);
493         struct policy_dbs_info *policy_dbs = policy->governor_data;
494         struct dbs_data *dbs_data = policy_dbs->dbs_data;
495         int count;
496
497         mutex_lock(&dbs_data->mutex);
498         list_del(&policy_dbs->list);
499         count = --dbs_data->usage_count;
500         mutex_unlock(&dbs_data->mutex);
501
502         if (!count) {
503                 kobject_put(&dbs_data->kobj);
504
505                 policy->governor_data = NULL;
506
507                 if (!have_governor_per_policy())
508                         gov->gdbs_data = NULL;
509
510                 gov->exit(dbs_data, policy->governor->initialized == 1);
511                 mutex_destroy(&dbs_data->mutex);
512                 kfree(dbs_data);
513         } else {
514                 policy->governor_data = NULL;
515         }
516
517         free_policy_dbs_info(policy, gov);
518         return 0;
519 }
520
521 static int cpufreq_governor_start(struct cpufreq_policy *policy)
522 {
523         struct dbs_governor *gov = dbs_governor_of(policy);
524         struct policy_dbs_info *policy_dbs = policy->governor_data;
525         struct dbs_data *dbs_data = policy_dbs->dbs_data;
526         unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
527         int io_busy = 0;
528
529         if (!policy->cur)
530                 return -EINVAL;
531
532         policy_dbs->is_shared = policy_is_shared(policy);
533         policy_dbs->rate_mult = 1;
534
535         sampling_rate = dbs_data->sampling_rate;
536         ignore_nice = dbs_data->ignore_nice_load;
537
538         if (gov->governor == GOV_ONDEMAND) {
539                 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
540
541                 io_busy = od_tuners->io_is_busy;
542         }
543
544         for_each_cpu(j, policy->cpus) {
545                 struct cpu_dbs_info *j_cdbs = gov->get_cpu_cdbs(j);
546                 unsigned int prev_load;
547
548                 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
549
550                 prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle;
551                 j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall;
552
553                 if (ignore_nice)
554                         j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
555         }
556
557         if (gov->governor == GOV_CONSERVATIVE) {
558                 struct cs_cpu_dbs_info_s *cs_dbs_info =
559                         gov->get_cpu_dbs_info_s(cpu);
560
561                 cs_dbs_info->down_skip = 0;
562                 cs_dbs_info->requested_freq = policy->cur;
563         } else {
564                 struct od_ops *od_ops = gov->gov_ops;
565                 struct od_cpu_dbs_info_s *od_dbs_info = gov->get_cpu_dbs_info_s(cpu);
566
567                 od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
568                 od_ops->powersave_bias_init_cpu(cpu);
569         }
570
571         gov_set_update_util(policy_dbs, sampling_rate);
572         return 0;
573 }
574
575 static int cpufreq_governor_stop(struct cpufreq_policy *policy)
576 {
577         gov_cancel_work(policy);
578
579         return 0;
580 }
581
582 static int cpufreq_governor_limits(struct cpufreq_policy *policy)
583 {
584         struct policy_dbs_info *policy_dbs = policy->governor_data;
585
586         mutex_lock(&policy_dbs->timer_mutex);
587
588         if (policy->max < policy->cur)
589                 __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
590         else if (policy->min > policy->cur)
591                 __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
592
593         gov_update_sample_delay(policy_dbs, 0);
594
595         mutex_unlock(&policy_dbs->timer_mutex);
596
597         return 0;
598 }
599
600 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
601 {
602         int ret = -EINVAL;
603
604         /* Lock governor to block concurrent initialization of governor */
605         mutex_lock(&dbs_data_mutex);
606
607         if (event == CPUFREQ_GOV_POLICY_INIT) {
608                 ret = cpufreq_governor_init(policy);
609         } else if (policy->governor_data) {
610                 switch (event) {
611                 case CPUFREQ_GOV_POLICY_EXIT:
612                         ret = cpufreq_governor_exit(policy);
613                         break;
614                 case CPUFREQ_GOV_START:
615                         ret = cpufreq_governor_start(policy);
616                         break;
617                 case CPUFREQ_GOV_STOP:
618                         ret = cpufreq_governor_stop(policy);
619                         break;
620                 case CPUFREQ_GOV_LIMITS:
621                         ret = cpufreq_governor_limits(policy);
622                         break;
623                 }
624         }
625
626         mutex_unlock(&dbs_data_mutex);
627         return ret;
628 }
629 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);