From 7de32556dfc62b9e1203730cc26b71292da8a244 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Mar 2017 00:57:39 +0100 Subject: [PATCH] cpufreq: intel_pstate: One set of global limits in active mode In the active mode intel_pstate currently uses two sets of global limits, each associated with one of the possible scaling_governor settings in that mode: "powersave" or "performance". The driver switches over from one of those sets to the other depending on the scaling_governor setting for the last CPU whose per-policy cpufreq interface in sysfs was last used to change parameters exposed in there. That obviously leads to no end of issues when the scaling_governor settings differ between CPUs. The most recent issue was introduced by commit a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy) that eliminated the reinitialization of "performance" limits in intel_pstate_set_policy() preventing the max limit from being set to anything below 100, among other things. Namely, an undesirable side effect of commit a240c4aa5d0f is that now, after setting scaling_governor to "performance" in the active mode, the per-policy limits for the CPU in question go to the highest level and stay there even when it is switched back to "powersave" later. As it turns out, some distributions set scaling_governor to "performance" temporarily for all CPUs to speed-up system initialization, so that change causes them to misbehave later. To fix that, get rid of the performance/powersave global limits split and use just one set of global limits for everything. From the user's persepctive, after this modification, when scaling_governor is switched from "performance" to "powersave" or the other way around on one CPU, the limits settings (ie. the global max/min_perf_pct and per-policy scaling_max/min_freq for any CPUs) will not change. Still, switching from "performance" to "powersave" or the other way around changes the way in which P-states are selected and in particular "performance" causes the driver to always request the highest P-state it is allowed to ask for for the given CPU. Fixes: a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 142 +++++++++++---------------------- 1 file changed, 46 insertions(+), 96 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 08e134ffba68..7b07803e7042 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -364,9 +364,7 @@ static bool driver_registered __read_mostly; static bool acpi_ppc; #endif -static struct perf_limits performance_limits; -static struct perf_limits powersave_limits; -static struct perf_limits *limits; +static struct perf_limits global; static void intel_pstate_init_limits(struct perf_limits *limits) { @@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits) limits->max_sysfs_pct = 100; } -static void intel_pstate_set_performance_limits(struct perf_limits *limits) -{ - intel_pstate_init_limits(limits); - limits->min_perf_pct = 100; - limits->min_perf = int_ext_tofp(1); - limits->min_sysfs_pct = 100; -} - static DEFINE_MUTEX(intel_pstate_driver_lock); static DEFINE_MUTEX(intel_pstate_limits_lock); @@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) * correct max turbo frequency based on the turbo state. * Also need to convert to MHz as _PSS freq is in MHz. */ - if (!limits->turbo_disabled) + if (!global.turbo_disabled) cpu->acpi_perf_data.states[0].core_frequency = policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; @@ -626,7 +616,7 @@ static inline void update_turbo_state(void) cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - limits->turbo_disabled = + global.turbo_disabled = (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } @@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { static void intel_pstate_hwp_set(struct cpufreq_policy *policy) { int min, hw_min, max, hw_max, cpu; - struct perf_limits *perf_limits = limits; + struct perf_limits *perf_limits = &global; u64 value, cap; for_each_cpu(cpu, policy->cpus) { @@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); hw_min = HWP_LOWEST_PERF(cap); - if (limits->no_turbo) + if (global.no_turbo) hw_max = HWP_GUARANTEED_PERF(cap); else hw_max = HWP_HIGHEST_PERF(cap); - min = fp_ext_toint(hw_max * perf_limits->min_perf); + max = fp_ext_toint(hw_max * perf_limits->max_perf); + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) + min = max; + else + min = fp_ext_toint(hw_max * perf_limits->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - max = fp_ext_toint(hw_max * perf_limits->max_perf); value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); @@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy) } static void intel_pstate_update_policies(void) - __releases(&intel_pstate_limits_lock) - __acquires(&intel_pstate_limits_lock) { - struct perf_limits *saved_limits = limits; int cpu; - mutex_unlock(&intel_pstate_limits_lock); - for_each_possible_cpu(cpu) cpufreq_update_policy(cpu); - - mutex_lock(&intel_pstate_limits_lock); - - limits = saved_limits; } /************************** debugfs begin ************************/ @@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void) static ssize_t show_##file_name \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - return sprintf(buf, "%u\n", limits->object); \ + return sprintf(buf, "%u\n", global.object); \ } static ssize_t intel_pstate_show_status(char *buf); @@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj, } update_turbo_state(); - if (limits->turbo_disabled) - ret = sprintf(buf, "%u\n", limits->turbo_disabled); + if (global.turbo_disabled) + ret = sprintf(buf, "%u\n", global.turbo_disabled); else - ret = sprintf(buf, "%u\n", limits->no_turbo); + ret = sprintf(buf, "%u\n", global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); update_turbo_state(); - if (limits->turbo_disabled) { + if (global.turbo_disabled) { pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); mutex_unlock(&intel_pstate_driver_lock); return -EPERM; } - limits->no_turbo = clamp_t(int, input, 0, 1); - - intel_pstate_update_policies(); + global.no_turbo = clamp_t(int, input, 0, 1); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); - limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); - limits->max_perf_pct = min(limits->max_policy_pct, - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); - limits->max_perf_pct = max(limits->min_perf_pct, - limits->max_perf_pct); - limits->max_perf = percent_ext_fp(limits->max_perf_pct); - - intel_pstate_update_policies(); + global.max_sysfs_pct = clamp_t(int, input, 0 , 100); + global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct); + global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct); + global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct); + global.max_perf = percent_ext_fp(global.max_perf_pct); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); - limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); - limits->min_perf_pct = max(limits->min_policy_pct, - limits->min_sysfs_pct); - limits->min_perf_pct = min(limits->max_policy_pct, - limits->min_perf_pct); - limits->min_perf_pct = min(limits->max_perf_pct, - limits->min_perf_pct); - limits->min_perf = percent_ext_fp(limits->min_perf_pct); - - intel_pstate_update_policies(); + global.min_sysfs_pct = clamp_t(int, input, 0 , 100); + global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); + global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct); + global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct); + global.min_perf = percent_ext_fp(global.min_perf_pct); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (limits->no_turbo && !limits->turbo_disabled) + if (global.no_turbo && !global.turbo_disabled) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (limits->no_turbo && !limits->turbo_disabled) + if (global.no_turbo && !global.turbo_disabled) val |= (u64)1 << 32; return val; @@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) int max_perf = cpu->pstate.turbo_pstate; int max_perf_adj; int min_perf; - struct perf_limits *perf_limits = limits; + struct perf_limits *perf_limits = &global; - if (limits->no_turbo || limits->turbo_disabled) + if (global.no_turbo || global.turbo_disabled) max_perf = cpu->pstate.max_pstate; if (per_cpu_limits) @@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) sample->busy_scaled = busy_frac * 100; - target = limits->no_turbo || limits->turbo_disabled ? + target = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; target += target >> 2; target = mul_fp(target, busy_frac); @@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, static int intel_pstate_set_policy(struct cpufreq_policy *policy) { struct cpudata *cpu; - struct perf_limits *perf_limits = NULL; + struct perf_limits *perf_limits = &global; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("set performance\n"); - if (!perf_limits) { - limits = &performance_limits; - perf_limits = limits; - } - } else { - pr_debug("set powersave\n"); - if (!perf_limits) { - limits = &powersave_limits; - perf_limits = limits; - } - - } - intel_pstate_update_perf_limits(policy, perf_limits); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) static int intel_pstate_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - struct perf_limits *perf_limits; - - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) - perf_limits = &performance_limits; - else - perf_limits = &powersave_limits; update_turbo_state(); - policy->cpuinfo.max_freq = perf_limits->turbo_disabled || - perf_limits->no_turbo ? + policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; @@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) unsigned int max_freq, min_freq; max_freq = policy->cpuinfo.max_freq * - perf_limits->max_sysfs_pct / 100; + global.max_sysfs_pct / 100; min_freq = policy->cpuinfo.max_freq * - perf_limits->min_sysfs_pct / 100; + global.min_sysfs_pct / 100; cpufreq_verify_within_limits(policy, min_freq, max_freq); } @@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); - policy->cpuinfo.max_freq = limits->turbo_disabled ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; @@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) return ret; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) + if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE)) policy->policy = CPUFREQ_POLICY_PERFORMANCE; else policy->policy = CPUFREQ_POLICY_POWERSAVE; @@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = limits->turbo_disabled ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; cpufreq_verify_within_cpu_limits(policy); @@ -2317,7 +2273,7 @@ static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, update_turbo_state(); - max_freq = limits->no_turbo || limits->turbo_disabled ? + max_freq = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; policy->cpuinfo.max_freq = max_freq; if (policy->max > max_freq) @@ -2425,13 +2381,7 @@ static int intel_pstate_register_driver(void) { int ret; - intel_pstate_init_limits(&powersave_limits); - intel_pstate_set_performance_limits(&performance_limits); - if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) && - intel_pstate_driver == &intel_pstate) - limits = &performance_limits; - else - limits = &powersave_limits; + intel_pstate_init_limits(&global); ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { -- 2.39.2