2 * cpufreq_snb.c: Native P state management for Intel processors
4 * (C) Copyright 2012 Intel Corporation
5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
27 #include <linux/debugfs.h>
28 #include <trace/events/power.h>
30 #include <asm/div64.h>
32 #include <asm/cpu_device_id.h>
34 #define SAMPLE_COUNT 3
37 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
38 #define fp_toint(X) ((X) >> FRAC_BITS)
40 static inline int32_t mul_fp(int32_t x, int32_t y)
42 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
45 static inline int32_t div_fp(int32_t x, int32_t y)
47 return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
84 struct timer_list timer;
86 struct pstate_adjust_policy *pstate_policy;
87 struct pstate_data pstate;
95 u64 prev_idle_time_us;
99 struct sample samples[SAMPLE_COUNT];
102 static struct cpudata **all_cpu_data;
103 struct pstate_adjust_policy {
112 static struct pstate_adjust_policy default_policy = {
113 .sample_rate_ms = 10,
129 static struct perf_limits limits = {
132 .max_perf = int_tofp(1),
137 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
138 int deadband, int integral) {
139 pid->setpoint = setpoint;
140 pid->deadband = deadband;
141 pid->integral = int_tofp(integral);
142 pid->last_err = setpoint - busy;
145 static inline void pid_p_gain_set(struct _pid *pid, int percent)
147 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
150 static inline void pid_i_gain_set(struct _pid *pid, int percent)
152 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
155 static inline void pid_d_gain_set(struct _pid *pid, int percent)
158 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
161 static signed int pid_calc(struct _pid *pid, int busy)
163 signed int err, result;
164 int32_t pterm, dterm, fp_error;
165 int32_t integral_limit;
167 err = pid->setpoint - busy;
168 fp_error = int_tofp(err);
170 if (abs(err) <= pid->deadband)
173 pterm = mul_fp(pid->p_gain, fp_error);
175 pid->integral += fp_error;
177 /* limit the integral term */
178 integral_limit = int_tofp(30);
179 if (pid->integral > integral_limit)
180 pid->integral = integral_limit;
181 if (pid->integral < -integral_limit)
182 pid->integral = -integral_limit;
184 dterm = mul_fp(pid->d_gain, (err - pid->last_err));
187 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
189 return (signed int)fp_toint(result);
192 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
194 pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct);
195 pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct);
196 pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct);
199 cpu->pstate_policy->setpoint,
201 cpu->pstate_policy->deadband,
205 static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu)
207 pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct);
208 pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct);
209 pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct);
211 pid_reset(&cpu->idle_pid,
214 cpu->pstate_policy->deadband,
218 static inline void intel_pstate_reset_all_pid(void)
221 for_each_online_cpu(cpu) {
222 if (all_cpu_data[cpu])
223 intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
227 /************************** debugfs begin ************************/
228 static int pid_param_set(void *data, u64 val)
231 intel_pstate_reset_all_pid();
234 static int pid_param_get(void *data, u64 *val)
239 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get,
240 pid_param_set, "%llu\n");
247 static struct pid_param pid_files[] = {
248 {"sample_rate_ms", &default_policy.sample_rate_ms},
249 {"d_gain_pct", &default_policy.d_gain_pct},
250 {"i_gain_pct", &default_policy.i_gain_pct},
251 {"deadband", &default_policy.deadband},
252 {"setpoint", &default_policy.setpoint},
253 {"p_gain_pct", &default_policy.p_gain_pct},
257 static struct dentry *debugfs_parent;
258 static void intel_pstate_debug_expose_params(void)
262 debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
263 if (IS_ERR_OR_NULL(debugfs_parent))
265 while (pid_files[i].name) {
266 debugfs_create_file(pid_files[i].name, 0660,
267 debugfs_parent, pid_files[i].value,
273 /************************** debugfs end ************************/
275 /************************** sysfs begin ************************/
276 #define show_one(file_name, object) \
277 static ssize_t show_##file_name \
278 (struct kobject *kobj, struct attribute *attr, char *buf) \
280 return sprintf(buf, "%u\n", limits.object); \
283 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
284 const char *buf, size_t count)
288 ret = sscanf(buf, "%u", &input);
291 limits.no_turbo = clamp_t(int, input, 0 , 1);
296 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
297 const char *buf, size_t count)
301 ret = sscanf(buf, "%u", &input);
305 limits.max_perf_pct = clamp_t(int, input, 0 , 100);
306 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
310 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
311 const char *buf, size_t count)
315 ret = sscanf(buf, "%u", &input);
318 limits.min_perf_pct = clamp_t(int, input, 0 , 100);
319 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
324 show_one(no_turbo, no_turbo);
325 show_one(max_perf_pct, max_perf_pct);
326 show_one(min_perf_pct, min_perf_pct);
328 define_one_global_rw(no_turbo);
329 define_one_global_rw(max_perf_pct);
330 define_one_global_rw(min_perf_pct);
332 static struct attribute *intel_pstate_attributes[] = {
339 static struct attribute_group intel_pstate_attr_group = {
340 .attrs = intel_pstate_attributes,
342 static struct kobject *intel_pstate_kobject;
344 static void intel_pstate_sysfs_expose_params(void)
348 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
349 &cpu_subsys.dev_root->kobj);
350 BUG_ON(!intel_pstate_kobject);
351 rc = sysfs_create_group(intel_pstate_kobject,
352 &intel_pstate_attr_group);
356 /************************** sysfs end ************************/
358 static int intel_pstate_min_pstate(void)
362 return (value >> 40) & 0xFF;
365 static int intel_pstate_max_pstate(void)
369 return (value >> 8) & 0xFF;
372 static int intel_pstate_turbo_pstate(void)
376 rdmsrl(0x1AD, value);
377 nont = intel_pstate_max_pstate();
378 ret = ((value) & 255);
384 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
386 int max_perf = cpu->pstate.turbo_pstate;
389 max_perf = cpu->pstate.max_pstate;
391 max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
392 *max = clamp_t(int, max_perf,
393 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
395 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
396 *min = clamp_t(int, min_perf,
397 cpu->pstate.min_pstate, max_perf);
400 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
402 int max_perf, min_perf;
404 intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
406 pstate = clamp_t(int, pstate, min_perf, max_perf);
408 if (pstate == cpu->pstate.current_pstate)
412 trace_cpu_frequency(pstate * 100000, cpu->cpu);
414 cpu->pstate.current_pstate = pstate;
415 wrmsrl(MSR_IA32_PERF_CTL, pstate << 8);
419 static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps)
422 target = cpu->pstate.current_pstate + steps;
424 intel_pstate_set_pstate(cpu, target);
427 static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps)
430 target = cpu->pstate.current_pstate - steps;
431 intel_pstate_set_pstate(cpu, target);
434 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
436 sprintf(cpu->name, "Intel 2nd generation core");
438 cpu->pstate.min_pstate = intel_pstate_min_pstate();
439 cpu->pstate.max_pstate = intel_pstate_max_pstate();
440 cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate();
443 * goto max pstate so we don't slow up boot if we are built-in if we are
444 * a module we will take care of it during normal operation
446 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
449 static inline void intel_pstate_calc_busy(struct cpudata *cpu,
450 struct sample *sample)
453 sample->pstate_pct_busy = 100 - div64_u64(
454 sample->idletime_us * 100,
455 sample->duration_us);
456 core_pct = div64_u64(sample->aperf * 100, sample->mperf);
457 sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000;
459 sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct),
463 static inline void intel_pstate_sample(struct cpudata *cpu)
470 idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL);
472 rdmsrl(MSR_IA32_APERF, aperf);
473 rdmsrl(MSR_IA32_MPERF, mperf);
474 /* for the first sample, don't actually record a sample, just
475 * set the baseline */
476 if (cpu->prev_idle_time_us > 0) {
477 cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
478 cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample;
479 cpu->samples[cpu->sample_ptr].end_time = now;
480 cpu->samples[cpu->sample_ptr].duration_us =
481 ktime_us_delta(now, cpu->prev_sample);
482 cpu->samples[cpu->sample_ptr].idletime_us =
483 idle_time_us - cpu->prev_idle_time_us;
485 cpu->samples[cpu->sample_ptr].aperf = aperf;
486 cpu->samples[cpu->sample_ptr].mperf = mperf;
487 cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
488 cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
490 intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
493 cpu->prev_sample = now;
494 cpu->prev_idle_time_us = idle_time_us;
495 cpu->prev_aperf = aperf;
496 cpu->prev_mperf = mperf;
499 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
501 int sample_time, delay;
503 sample_time = cpu->pstate_policy->sample_rate_ms;
504 delay = msecs_to_jiffies(sample_time);
505 delay -= jiffies % delay;
506 mod_timer_pinned(&cpu->timer, jiffies + delay);
509 static inline void intel_pstate_idle_mode(struct cpudata *cpu)
514 static inline void intel_pstate_normal_mode(struct cpudata *cpu)
519 static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu)
522 int32_t core_busy, turbo_pstate, current_pstate;
524 core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy);
525 turbo_pstate = int_tofp(cpu->pstate.turbo_pstate);
526 current_pstate = int_tofp(cpu->pstate.current_pstate);
527 busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate));
529 return fp_toint(busy_scaled);
532 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
540 busy_scaled = intel_pstate_get_scaled_busy(cpu);
542 ctl = pid_calc(pid, busy_scaled);
546 intel_pstate_pstate_increase(cpu, steps);
548 intel_pstate_pstate_decrease(cpu, steps);
551 static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu)
558 pid = &cpu->idle_pid;
560 busy_scaled = intel_pstate_get_scaled_busy(cpu);
562 ctl = pid_calc(pid, 100 - busy_scaled);
566 intel_pstate_pstate_decrease(cpu, steps);
568 intel_pstate_pstate_increase(cpu, steps);
570 if (cpu->pstate.current_pstate == cpu->pstate.min_pstate)
571 intel_pstate_normal_mode(cpu);
574 static void intel_pstate_timer_func(unsigned long __data)
576 struct cpudata *cpu = (struct cpudata *) __data;
578 intel_pstate_sample(cpu);
581 intel_pstate_adjust_busy_pstate(cpu);
583 intel_pstate_adjust_idle_pstate(cpu);
585 #if defined(XPERF_FIX)
586 if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
587 cpu->min_pstate_count++;
588 if (!(cpu->min_pstate_count % 5)) {
589 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
590 intel_pstate_idle_mode(cpu);
593 cpu->min_pstate_count = 0;
595 intel_pstate_set_sample_time(cpu);
598 #define ICPU(model, policy) \
599 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&policy }
601 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
602 ICPU(0x2a, default_policy),
603 ICPU(0x2d, default_policy),
606 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
608 static int intel_pstate_init_cpu(unsigned int cpunum)
611 const struct x86_cpu_id *id;
614 id = x86_match_cpu(intel_pstate_cpu_ids);
618 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
619 if (!all_cpu_data[cpunum])
622 cpu = all_cpu_data[cpunum];
624 intel_pstate_get_cpu_pstates(cpu);
628 (struct pstate_adjust_policy *)id->driver_data;
629 init_timer_deferrable(&cpu->timer);
630 cpu->timer.function = intel_pstate_timer_func;
633 cpu->timer.expires = jiffies + HZ/100;
634 intel_pstate_busy_pid_reset(cpu);
635 intel_pstate_idle_pid_reset(cpu);
636 intel_pstate_sample(cpu);
637 intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
639 add_timer_on(&cpu->timer, cpunum);
641 pr_info("Intel pstate controlling: cpu %d\n", cpunum);
646 static unsigned int intel_pstate_get(unsigned int cpu_num)
648 struct sample *sample;
651 cpu = all_cpu_data[cpu_num];
654 sample = &cpu->samples[cpu->sample_ptr];
658 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
663 cpu = all_cpu_data[policy->cpu];
665 intel_pstate_get_min_max(cpu, &min, &max);
667 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
668 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
669 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
671 limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq;
672 limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100);
673 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
675 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
676 limits.min_perf_pct = 100;
677 limits.min_perf = int_tofp(1);
678 limits.max_perf_pct = 100;
679 limits.max_perf = int_tofp(1);
686 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
688 cpufreq_verify_within_limits(policy,
689 policy->cpuinfo.min_freq,
690 policy->cpuinfo.max_freq);
692 if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
693 (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
699 static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy)
701 int cpu = policy->cpu;
703 del_timer(&all_cpu_data[cpu]->timer);
704 kfree(all_cpu_data[cpu]);
705 all_cpu_data[cpu] = NULL;
709 static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy)
711 int rc, min_pstate, max_pstate;
714 rc = intel_pstate_init_cpu(policy->cpu);
718 cpu = all_cpu_data[policy->cpu];
720 if (!limits.no_turbo &&
721 limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
722 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
724 policy->policy = CPUFREQ_POLICY_POWERSAVE;
726 intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
727 policy->min = min_pstate * 100000;
728 policy->max = max_pstate * 100000;
730 /* cpuinfo and default policy values */
731 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
732 policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
733 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
734 cpumask_set_cpu(policy->cpu, policy->cpus);
739 static struct cpufreq_driver intel_pstate_driver = {
740 .flags = CPUFREQ_CONST_LOOPS,
741 .verify = intel_pstate_verify_policy,
742 .setpolicy = intel_pstate_set_policy,
743 .get = intel_pstate_get,
744 .init = intel_pstate_cpu_init,
745 .exit = intel_pstate_cpu_exit,
746 .name = "intel_pstate",
747 .owner = THIS_MODULE,
750 static void intel_pstate_exit(void)
754 sysfs_remove_group(intel_pstate_kobject,
755 &intel_pstate_attr_group);
756 debugfs_remove_recursive(debugfs_parent);
758 cpufreq_unregister_driver(&intel_pstate_driver);
764 for_each_online_cpu(cpu) {
765 if (all_cpu_data[cpu]) {
766 del_timer_sync(&all_cpu_data[cpu]->timer);
767 kfree(all_cpu_data[cpu]);
774 module_exit(intel_pstate_exit);
776 static int __initdata no_load;
778 static int __init intel_pstate_init(void)
781 const struct x86_cpu_id *id;
786 id = x86_match_cpu(intel_pstate_cpu_ids);
790 pr_info("Intel P-state driver initializing.\n");
792 all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus());
795 memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus());
797 rc = cpufreq_register_driver(&intel_pstate_driver);
801 intel_pstate_debug_expose_params();
802 intel_pstate_sysfs_expose_params();
808 device_initcall(intel_pstate_init);
810 static int __init intel_pstate_setup(char *str)
815 if (!strcmp(str, "disable"))
819 early_param("intel_pstate", intel_pstate_setup);
821 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
822 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
823 MODULE_LICENSE("GPL");