]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/cpufreq/intel_pstate.c
cpufreq: intel_pstate: Fix policy data management in passive mode
[karo-tx-linux.git] / drivers / cpufreq / intel_pstate.c
index 6e68b556305a681cc9788dfcd14efce83d0c15e3..283491f742d3d78659696bd58c48fc5a3bd7a370 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
@@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y)
        return div64_u64(x << EXT_FRAC_BITS, y);
 }
 
+static inline int32_t percent_ext_fp(int percent)
+{
+       return div_ext_fp(percent, 100);
+}
+
 /**
  * struct sample -     Store performance sample
  * @core_avg_perf:     Ratio of APERF/MPERF which is the actual average
@@ -353,42 +358,24 @@ static struct pstate_funcs pstate_funcs __read_mostly;
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
 
+static bool driver_registered __read_mostly;
+
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits performance_limits = {
-       .no_turbo = 0,
-       .turbo_disabled = 0,
-       .max_perf_pct = 100,
-       .max_perf = int_ext_tofp(1),
-       .min_perf_pct = 100,
-       .min_perf = int_ext_tofp(1),
-       .max_policy_pct = 100,
-       .max_sysfs_pct = 100,
-       .min_policy_pct = 0,
-       .min_sysfs_pct = 0,
-};
-
-static struct perf_limits powersave_limits = {
-       .no_turbo = 0,
-       .turbo_disabled = 0,
-       .max_perf_pct = 100,
-       .max_perf = int_ext_tofp(1),
-       .min_perf_pct = 0,
-       .min_perf = 0,
-       .max_policy_pct = 100,
-       .max_sysfs_pct = 100,
-       .min_policy_pct = 0,
-       .min_sysfs_pct = 0,
-};
+static struct perf_limits global;
 
-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
-static struct perf_limits *limits = &performance_limits;
-#else
-static struct perf_limits *limits = &powersave_limits;
-#endif
+static void intel_pstate_init_limits(struct perf_limits *limits)
+{
+       memset(limits, 0, sizeof(*limits));
+       limits->max_perf_pct = 100;
+       limits->max_perf = int_ext_tofp(1);
+       limits->max_policy_pct = 100;
+       limits->max_sysfs_pct = 100;
+}
 
+static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
 
 #ifdef CONFIG_ACPI
@@ -510,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
         * correct max turbo frequency based on the turbo state.
         * Also need to convert to MHz as _PSS freq is in MHz.
         */
-       if (!limits->turbo_disabled)
+       if (!global.turbo_disabled)
                cpu->acpi_perf_data.states[0].core_frequency =
                                        policy->cpuinfo.max_freq / 1000;
        cpu->valid_pss_table = true;
@@ -533,7 +520,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
        acpi_processor_unregister_performance(policy->cpu);
 }
-
 #else
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -630,7 +616,7 @@ static inline void update_turbo_state(void)
 
        cpu = all_cpu_data[0];
        rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
-       limits->turbo_disabled =
+       global.turbo_disabled =
                (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
@@ -854,12 +840,11 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
-       int min, hw_min, max, hw_max, cpu, range, adj_range;
-       struct perf_limits *perf_limits = limits;
+       int min, hw_min, max, hw_max, cpu;
+       struct perf_limits *perf_limits = &global;
        u64 value, cap;
 
        for_each_cpu(cpu, policy->cpus) {
-               int max_perf_pct, min_perf_pct;
                struct cpudata *cpu_data = all_cpu_data[cpu];
                s16 epp;
 
@@ -868,26 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
                rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
                hw_min = HWP_LOWEST_PERF(cap);
-               hw_max = HWP_HIGHEST_PERF(cap);
-               range = hw_max - hw_min;
+               if (global.no_turbo)
+                       hw_max = HWP_GUARANTEED_PERF(cap);
+               else
+                       hw_max = HWP_HIGHEST_PERF(cap);
 
-               max_perf_pct = perf_limits->max_perf_pct;
-               min_perf_pct = perf_limits->min_perf_pct;
+               max = fp_ext_toint(hw_max * perf_limits->max_perf);
+               if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+                       min = max;
+               else
+                       min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
-               adj_range = min_perf_pct * range / 100;
-               min = hw_min + adj_range;
+
                value &= ~HWP_MIN_PERF(~0L);
                value |= HWP_MIN_PERF(min);
 
-               adj_range = max_perf_pct * range / 100;
-               max = hw_min + adj_range;
-               if (limits->no_turbo) {
-                       hw_max = HWP_GUARANTEED_PERF(cap);
-                       if (hw_max < max)
-                               max = hw_max;
-               }
-
                value &= ~HWP_MAX_PERF(~0L);
                value |= HWP_MAX_PERF(max);
 
@@ -991,6 +972,7 @@ static void intel_pstate_update_policies(void)
 static int pid_param_set(void *data, u64 val)
 {
        *(u32 *)data = val;
+       pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
        intel_pstate_reset_all_pid();
        return 0;
 }
@@ -1002,37 +984,59 @@ static int pid_param_get(void *data, u64 *val)
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
 
+static struct dentry *debugfs_parent;
+
 struct pid_param {
        char *name;
        void *value;
+       struct dentry *dentry;
 };
 
 static struct pid_param pid_files[] = {
-       {"sample_rate_ms", &pid_params.sample_rate_ms},
-       {"d_gain_pct", &pid_params.d_gain_pct},
-       {"i_gain_pct", &pid_params.i_gain_pct},
-       {"deadband", &pid_params.deadband},
-       {"setpoint", &pid_params.setpoint},
-       {"p_gain_pct", &pid_params.p_gain_pct},
-       {NULL, NULL}
+       {"sample_rate_ms", &pid_params.sample_rate_ms},
+       {"d_gain_pct", &pid_params.d_gain_pct},
+       {"i_gain_pct", &pid_params.i_gain_pct},
+       {"deadband", &pid_params.deadband},
+       {"setpoint", &pid_params.setpoint},
+       {"p_gain_pct", &pid_params.p_gain_pct},
+       {NULL, NULL}
 };
 
-static void __init intel_pstate_debug_expose_params(void)
+static void intel_pstate_debug_expose_params(void)
 {
-       struct dentry *debugfs_parent;
-       int i = 0;
+       int i;
 
        debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
        if (IS_ERR_OR_NULL(debugfs_parent))
                return;
-       while (pid_files[i].name) {
-               debugfs_create_file(pid_files[i].name, 0660,
-                                   debugfs_parent, pid_files[i].value,
-                                   &fops_pid_param);
-               i++;
+
+       for (i = 0; pid_files[i].name; i++) {
+               struct dentry *dentry;
+
+               dentry = debugfs_create_file(pid_files[i].name, 0660,
+                                            debugfs_parent, pid_files[i].value,
+                                            &fops_pid_param);
+               if (!IS_ERR(dentry))
+                       pid_files[i].dentry = dentry;
        }
 }
 
+static void intel_pstate_debug_hide_params(void)
+{
+       int i;
+
+       if (IS_ERR_OR_NULL(debugfs_parent))
+               return;
+
+       for (i = 0; pid_files[i].name; i++) {
+               debugfs_remove(pid_files[i].dentry);
+               pid_files[i].dentry = NULL;
+       }
+
+       debugfs_remove(debugfs_parent);
+       debugfs_parent = NULL;
+}
+
 /************************** debugfs end ************************/
 
 /************************** sysfs begin ************************/
@@ -1040,9 +1044,37 @@ static void __init intel_pstate_debug_expose_params(void)
        static ssize_t show_##file_name                                 \
        (struct kobject *kobj, struct attribute *attr, char *buf)       \
        {                                                               \
-               return sprintf(buf, "%u\n", limits->object);            \
+               return sprintf(buf, "%u\n", global.object);             \
        }
 
+static ssize_t intel_pstate_show_status(char *buf);
+static int intel_pstate_update_status(const char *buf, size_t size);
+
+static ssize_t show_status(struct kobject *kobj,
+                          struct attribute *attr, char *buf)
+{
+       ssize_t ret;
+
+       mutex_lock(&intel_pstate_driver_lock);
+       ret = intel_pstate_show_status(buf);
+       mutex_unlock(&intel_pstate_driver_lock);
+
+       return ret;
+}
+
+static ssize_t store_status(struct kobject *a, struct attribute *b,
+                           const char *buf, size_t count)
+{
+       char *p = memchr(buf, '\n', count);
+       int ret;
+
+       mutex_lock(&intel_pstate_driver_lock);
+       ret = intel_pstate_update_status(buf, p ? p - buf : count);
+       mutex_unlock(&intel_pstate_driver_lock);
+
+       return ret < 0 ? ret : count;
+}
+
 static ssize_t show_turbo_pct(struct kobject *kobj,
                                struct attribute *attr, char *buf)
 {
@@ -1050,12 +1082,22 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
        int total, no_turbo, turbo_pct;
        uint32_t turbo_fp;
 
+       mutex_lock(&intel_pstate_driver_lock);
+
+       if (!driver_registered) {
+               mutex_unlock(&intel_pstate_driver_lock);
+               return -EAGAIN;
+       }
+
        cpu = all_cpu_data[0];
 
        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
        no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
        turbo_fp = div_fp(no_turbo, total);
        turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
+
+       mutex_unlock(&intel_pstate_driver_lock);
+
        return sprintf(buf, "%u\n", turbo_pct);
 }
 
@@ -1065,8 +1107,18 @@ static ssize_t show_num_pstates(struct kobject *kobj,
        struct cpudata *cpu;
        int total;
 
+       mutex_lock(&intel_pstate_driver_lock);
+
+       if (!driver_registered) {
+               mutex_unlock(&intel_pstate_driver_lock);
+               return -EAGAIN;
+       }
+
        cpu = all_cpu_data[0];
        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
+
+       mutex_unlock(&intel_pstate_driver_lock);
+
        return sprintf(buf, "%u\n", total);
 }
 
@@ -1075,11 +1127,20 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 {
        ssize_t ret;
 
+       mutex_lock(&intel_pstate_driver_lock);
+
+       if (!driver_registered) {
+               mutex_unlock(&intel_pstate_driver_lock);
+               return -EAGAIN;
+       }
+
        update_turbo_state();
-       if (limits->turbo_disabled)
-               ret = sprintf(buf, "%u\n", limits->turbo_disabled);
+       if (global.turbo_disabled)
+               ret = sprintf(buf, "%u\n", global.turbo_disabled);
        else
-               ret = sprintf(buf, "%u\n", limits->no_turbo);
+               ret = sprintf(buf, "%u\n", global.no_turbo);
+
+       mutex_unlock(&intel_pstate_driver_lock);
 
        return ret;
 }
@@ -1094,21 +1155,31 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
        if (ret != 1)
                return -EINVAL;
 
+       mutex_lock(&intel_pstate_driver_lock);
+
+       if (!driver_registered) {
+               mutex_unlock(&intel_pstate_driver_lock);
+               return -EAGAIN;
+       }
+
        mutex_lock(&intel_pstate_limits_lock);
 
        update_turbo_state();
-       if (limits->turbo_disabled) {
+       if (global.turbo_disabled) {
                pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
                mutex_unlock(&intel_pstate_limits_lock);
+               mutex_unlock(&intel_pstate_driver_lock);
                return -EPERM;
        }
 
-       limits->no_turbo = clamp_t(int, input, 0, 1);
+       global.no_turbo = clamp_t(int, input, 0, 1);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
        intel_pstate_update_policies();
 
+       mutex_unlock(&intel_pstate_driver_lock);
+
        return count;
 }
 
@@ -1122,21 +1193,27 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
        if (ret != 1)
                return -EINVAL;
 
+       mutex_lock(&intel_pstate_driver_lock);
+
+       if (!driver_registered) {
+               mutex_unlock(&intel_pstate_driver_lock);
+               return -EAGAIN;
+       }
+
        mutex_lock(&intel_pstate_limits_lock);
 
-       limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
-       limits->max_perf_pct = min(limits->max_policy_pct,
-                                  limits->max_sysfs_pct);
-       limits->max_perf_pct = max(limits->min_policy_pct,
-                                  limits->max_perf_pct);
-       limits->max_perf_pct = max(limits->min_perf_pct,
-                                  limits->max_perf_pct);
-       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+       global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
+       global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
+       global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
+       global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
+       global.max_perf = percent_ext_fp(global.max_perf_pct);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
        intel_pstate_update_policies();
 
+       mutex_unlock(&intel_pstate_driver_lock);
+
        return count;
 }
 
@@ -1150,27 +1227,34 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
        if (ret != 1)
                return -EINVAL;
 
+       mutex_lock(&intel_pstate_driver_lock);
+
+       if (!driver_registered) {
+               mutex_unlock(&intel_pstate_driver_lock);
+               return -EAGAIN;
+       }
+
        mutex_lock(&intel_pstate_limits_lock);
 
-       limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
-       limits->min_perf_pct = max(limits->min_policy_pct,
-                                  limits->min_sysfs_pct);
-       limits->min_perf_pct = min(limits->max_policy_pct,
-                                  limits->min_perf_pct);
-       limits->min_perf_pct = min(limits->max_perf_pct,
-                                  limits->min_perf_pct);
-       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+       global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
+       global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
+       global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
+       global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
+       global.min_perf = percent_ext_fp(global.min_perf_pct);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
        intel_pstate_update_policies();
 
+       mutex_unlock(&intel_pstate_driver_lock);
+
        return count;
 }
 
 show_one(max_perf_pct, max_perf_pct);
 show_one(min_perf_pct, min_perf_pct);
 
+define_one_global_rw(status);
 define_one_global_rw(no_turbo);
 define_one_global_rw(max_perf_pct);
 define_one_global_rw(min_perf_pct);
@@ -1178,6 +1262,7 @@ define_one_global_ro(turbo_pct);
 define_one_global_ro(num_pstates);
 
 static struct attribute *intel_pstate_attributes[] = {
+       &status.attr,
        &no_turbo.attr,
        &turbo_pct.attr,
        &num_pstates.attr,
@@ -1280,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
        u32 vid;
 
        val = (u64)pstate << 8;
-       if (limits->no_turbo && !limits->turbo_disabled)
+       if (global.no_turbo && !global.turbo_disabled)
                val |= (u64)1 << 32;
 
        vid_fp = cpudata->vid.min + mul_fp(
@@ -1359,48 +1444,71 @@ static int core_get_max_pstate_physical(void)
        return (value >> 8) & 0xFF;
 }
 
+static int core_get_tdp_ratio(u64 plat_info)
+{
+       /* Check how many TDP levels present */
+       if (plat_info & 0x600000000) {
+               u64 tdp_ctrl;
+               u64 tdp_ratio;
+               int tdp_msr;
+               int err;
+
+               /* Get the TDP level (0, 1, 2) to get ratios */
+               err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+               if (err)
+                       return err;
+
+               /* TDP MSR are continuous starting at 0x648 */
+               tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
+               err = rdmsrl_safe(tdp_msr, &tdp_ratio);
+               if (err)
+                       return err;
+
+               /* For level 1 and 2, bits[23:16] contain the ratio */
+               if (tdp_ctrl & 0x03)
+                       tdp_ratio >>= 16;
+
+               tdp_ratio &= 0xff; /* ratios are only 8 bits long */
+               pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
+
+               return (int)tdp_ratio;
+       }
+
+       return -ENXIO;
+}
+
 static int core_get_max_pstate(void)
 {
        u64 tar;
        u64 plat_info;
        int max_pstate;
+       int tdp_ratio;
        int err;
 
        rdmsrl(MSR_PLATFORM_INFO, plat_info);
        max_pstate = (plat_info >> 8) & 0xFF;
 
+       tdp_ratio = core_get_tdp_ratio(plat_info);
+       if (tdp_ratio <= 0)
+               return max_pstate;
+
+       if (hwp_active) {
+               /* Turbo activation ratio is not used on HWP platforms */
+               return tdp_ratio;
+       }
+
        err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
        if (!err) {
+               int tar_levels;
+
                /* Do some sanity checking for safety */
-               if (plat_info & 0x600000000) {
-                       u64 tdp_ctrl;
-                       u64 tdp_ratio;
-                       int tdp_msr;
-
-                       err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
-                       if (err)
-                               goto skip_tar;
-
-                       tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
-                       err = rdmsrl_safe(tdp_msr, &tdp_ratio);
-                       if (err)
-                               goto skip_tar;
-
-                       /* For level 1 and 2, bits[23:16] contain the ratio */
-                       if (tdp_ctrl)
-                               tdp_ratio >>= 16;
-
-                       tdp_ratio &= 0xff; /* ratios are only 8 bits long */
-                       if (tdp_ratio - 1 == tar) {
-                               max_pstate = tar;
-                               pr_debug("max_pstate=TAC %x\n", max_pstate);
-                       } else {
-                               goto skip_tar;
-                       }
+               tar_levels = tar & 0xff;
+               if (tdp_ratio - 1 == tar_levels) {
+                       max_pstate = tar_levels;
+                       pr_debug("max_pstate=TAC %x\n", max_pstate);
                }
        }
 
-skip_tar:
        return max_pstate;
 }
 
@@ -1427,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
        u64 val;
 
        val = (u64)pstate << 8;
-       if (limits->no_turbo && !limits->turbo_disabled)
+       if (global.no_turbo && !global.turbo_disabled)
                val |= (u64)1 << 32;
 
        return val;
@@ -1553,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
        int max_perf = cpu->pstate.turbo_pstate;
        int max_perf_adj;
        int min_perf;
-       struct perf_limits *perf_limits = limits;
+       struct perf_limits *perf_limits = &global;
 
-       if (limits->no_turbo || limits->turbo_disabled)
+       if (global.no_turbo || global.turbo_disabled)
                max_perf = cpu->pstate.max_pstate;
 
        if (per_cpu_limits)
@@ -1690,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 
        sample->busy_scaled = busy_frac * 100;
 
-       target = limits->no_turbo || limits->turbo_disabled ?
+       target = global.no_turbo || global.turbo_disabled ?
                        cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
        target += target >> 2;
        target = mul_fp(target, busy_frac);
@@ -1754,13 +1862,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 
        intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
        pstate = clamp_t(int, pstate, min_perf, max_perf);
-       trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
        return pstate;
 }
 
 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 {
-       pstate = intel_pstate_prepare_request(cpu, pstate);
        if (pstate == cpu->pstate.current_pstate)
                return;
 
@@ -1780,6 +1886,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
        update_turbo_state();
 
+       target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+       trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
        intel_pstate_update_pstate(cpu, target_pstate);
 
        sample = &cpu->sample;
@@ -1947,53 +2055,37 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
        synchronize_sched();
 }
 
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-       limits->no_turbo = 0;
-       limits->turbo_disabled = 0;
-       limits->max_perf_pct = 100;
-       limits->max_perf = int_ext_tofp(1);
-       limits->min_perf_pct = 100;
-       limits->min_perf = int_ext_tofp(1);
-       limits->max_policy_pct = 100;
-       limits->max_sysfs_pct = 100;
-       limits->min_policy_pct = 0;
-       limits->min_sysfs_pct = 0;
-}
-
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
                                            struct perf_limits *limits)
 {
+       int32_t max_policy_perf, min_policy_perf;
 
-       limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
-                                             policy->cpuinfo.max_freq);
-       limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+       max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+       max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
        if (policy->max == policy->min) {
-               limits->min_policy_pct = limits->max_policy_pct;
+               min_policy_perf = max_policy_perf;
        } else {
-               limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
-                                                     policy->cpuinfo.max_freq);
-               limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
-                                                0, 100);
+               min_policy_perf = div_ext_fp(policy->min,
+                                            policy->cpuinfo.max_freq);
+               min_policy_perf = clamp_t(int32_t, min_policy_perf,
+                                         0, max_policy_perf);
        }
 
-       /* Normalize user input to [min_policy_pct, max_policy_pct] */
-       limits->min_perf_pct = max(limits->min_policy_pct,
-                                  limits->min_sysfs_pct);
-       limits->min_perf_pct = min(limits->max_policy_pct,
-                                  limits->min_perf_pct);
-       limits->max_perf_pct = min(limits->max_policy_pct,
-                                  limits->max_sysfs_pct);
-       limits->max_perf_pct = max(limits->min_policy_pct,
-                                  limits->max_perf_pct);
-
-       /* Make sure min_perf_pct <= max_perf_pct */
-       limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
-
-       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
-       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+       /* Normalize user input to [min_perf, max_perf] */
+       limits->min_perf = max(min_policy_perf,
+                              percent_ext_fp(limits->min_sysfs_pct));
+       limits->min_perf = min(limits->min_perf, max_policy_perf);
+       limits->max_perf = min(max_policy_perf,
+                              percent_ext_fp(limits->max_sysfs_pct));
+       limits->max_perf = max(min_policy_perf, limits->max_perf);
+
+       /* Make sure min_perf <= max_perf */
+       limits->min_perf = min(limits->min_perf, limits->max_perf);
+
        limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
        limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+       limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
+       limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
 
        pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
                 limits->max_perf_pct, limits->min_perf_pct);
@@ -2002,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
        struct cpudata *cpu;
-       struct perf_limits *perf_limits = NULL;
+       struct perf_limits *perf_limits = &global;
 
        if (!policy->cpuinfo.max_freq)
                return -ENODEV;
@@ -2025,28 +2117,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
        mutex_lock(&intel_pstate_limits_lock);
 
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
-               if (!perf_limits) {
-                       limits = &performance_limits;
-                       perf_limits = limits;
-               }
-               if (policy->max >= policy->cpuinfo.max_freq &&
-                   !limits->no_turbo) {
-                       pr_debug("set performance\n");
-                       intel_pstate_set_performance_limits(perf_limits);
-                       goto out;
-               }
-       } else {
-               pr_debug("set powersave\n");
-               if (!perf_limits) {
-                       limits = &powersave_limits;
-                       perf_limits = limits;
-               }
-
-       }
-
        intel_pstate_update_perf_limits(policy, perf_limits);
- out:
+
        if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
                /*
                 * NOHZ_FULL CPUs need this as the governor callback may not
@@ -2067,6 +2139,13 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
+       struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+       update_turbo_state();
+       policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
+                                       cpu->pstate.max_freq :
+                                       cpu->pstate.turbo_freq;
+
        cpufreq_verify_within_cpu_limits(policy);
 
        if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
@@ -2078,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
                unsigned int max_freq, min_freq;
 
                max_freq = policy->cpuinfo.max_freq *
-                                               limits->max_sysfs_pct / 100;
+                                       global.max_sysfs_pct / 100;
                min_freq = policy->cpuinfo.max_freq *
-                                               limits->min_sysfs_pct / 100;
+                                       global.min_sysfs_pct / 100;
                cpufreq_verify_within_limits(policy, min_freq, max_freq);
        }
 
@@ -2123,13 +2202,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
        cpu = all_cpu_data[policy->cpu];
 
-       /*
-        * We need sane value in the cpu->perf_limits, so inherit from global
-        * perf_limits limits, which are seeded with values based on the
-        * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
-        */
        if (per_cpu_limits)
-               memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
+               intel_pstate_init_limits(cpu->perf_limits);
 
        policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
        policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2137,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
        /* cpuinfo and default policy values */
        policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
        update_turbo_state();
-       policy->cpuinfo.max_freq = limits->turbo_disabled ?
+       policy->cpuinfo.max_freq = global.turbo_disabled ?
                        cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
        policy->cpuinfo.max_freq *= cpu->pstate.scaling;
 
@@ -2157,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
                return ret;
 
        policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-       if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+       if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
                policy->policy = CPUFREQ_POLICY_PERFORMANCE;
        else
                policy->policy = CPUFREQ_POLICY_POWERSAVE;
@@ -2181,46 +2255,16 @@ static struct cpufreq_driver intel_pstate = {
 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 {
        struct cpudata *cpu = all_cpu_data[policy->cpu];
-       struct perf_limits *perf_limits = limits;
 
        update_turbo_state();
-       policy->cpuinfo.max_freq = limits->turbo_disabled ?
+       policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
                        cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 
        cpufreq_verify_within_cpu_limits(policy);
 
-       if (per_cpu_limits)
-               perf_limits = cpu->perf_limits;
-
-       mutex_lock(&intel_pstate_limits_lock);
-
-       intel_pstate_update_perf_limits(policy, perf_limits);
-
-       mutex_unlock(&intel_pstate_limits_lock);
-
        return 0;
 }
 
-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
-                                              struct cpufreq_policy *policy,
-                                              unsigned int target_freq)
-{
-       unsigned int max_freq;
-
-       update_turbo_state();
-
-       max_freq = limits->no_turbo || limits->turbo_disabled ?
-                       cpu->pstate.max_freq : cpu->pstate.turbo_freq;
-       policy->cpuinfo.max_freq = max_freq;
-       if (policy->max > max_freq)
-               policy->max = max_freq;
-
-       if (target_freq > max_freq)
-               target_freq = max_freq;
-
-       return target_freq;
-}
-
 static int intel_cpufreq_target(struct cpufreq_policy *policy,
                                unsigned int target_freq,
                                unsigned int relation)
@@ -2229,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
        struct cpufreq_freqs freqs;
        int target_pstate;
 
+       update_turbo_state();
+
        freqs.old = policy->cur;
-       freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+       freqs.new = target_freq;
 
        cpufreq_freq_transition_begin(policy, &freqs);
        switch (relation) {
@@ -2250,6 +2296,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
                wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
                              pstate_funcs.get_val(cpu, target_pstate));
        }
+       freqs.new = target_pstate * cpu->pstate.scaling;
        cpufreq_freq_transition_end(policy, &freqs, false);
 
        return 0;
@@ -2261,10 +2308,12 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
        struct cpudata *cpu = all_cpu_data[policy->cpu];
        int target_pstate;
 
-       target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+       update_turbo_state();
+
        target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+       target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
        intel_pstate_update_pstate(cpu, target_pstate);
-       return target_freq;
+       return target_pstate * cpu->pstate.scaling;
 }
 
 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -2294,6 +2343,113 @@ static struct cpufreq_driver intel_cpufreq = {
 
 static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
 
+static void intel_pstate_driver_cleanup(void)
+{
+       unsigned int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               if (all_cpu_data[cpu]) {
+                       if (intel_pstate_driver == &intel_pstate)
+                               intel_pstate_clear_update_util_hook(cpu);
+
+                       kfree(all_cpu_data[cpu]);
+                       all_cpu_data[cpu] = NULL;
+               }
+       }
+       put_online_cpus();
+}
+
+static int intel_pstate_register_driver(void)
+{
+       int ret;
+
+       intel_pstate_init_limits(&global);
+
+       ret = cpufreq_register_driver(intel_pstate_driver);
+       if (ret) {
+               intel_pstate_driver_cleanup();
+               return ret;
+       }
+
+       mutex_lock(&intel_pstate_limits_lock);
+       driver_registered = true;
+       mutex_unlock(&intel_pstate_limits_lock);
+
+       if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+           pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+               intel_pstate_debug_expose_params();
+
+       return 0;
+}
+
+static int intel_pstate_unregister_driver(void)
+{
+       if (hwp_active)
+               return -EBUSY;
+
+       if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+           pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+               intel_pstate_debug_hide_params();
+
+       mutex_lock(&intel_pstate_limits_lock);
+       driver_registered = false;
+       mutex_unlock(&intel_pstate_limits_lock);
+
+       cpufreq_unregister_driver(intel_pstate_driver);
+       intel_pstate_driver_cleanup();
+
+       return 0;
+}
+
+static ssize_t intel_pstate_show_status(char *buf)
+{
+       if (!driver_registered)
+               return sprintf(buf, "off\n");
+
+       return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
+                                       "active" : "passive");
+}
+
+static int intel_pstate_update_status(const char *buf, size_t size)
+{
+       int ret;
+
+       if (size == 3 && !strncmp(buf, "off", size))
+               return driver_registered ?
+                       intel_pstate_unregister_driver() : -EINVAL;
+
+       if (size == 6 && !strncmp(buf, "active", size)) {
+               if (driver_registered) {
+                       if (intel_pstate_driver == &intel_pstate)
+                               return 0;
+
+                       ret = intel_pstate_unregister_driver();
+                       if (ret)
+                               return ret;
+               }
+
+               intel_pstate_driver = &intel_pstate;
+               return intel_pstate_register_driver();
+       }
+
+       if (size == 7 && !strncmp(buf, "passive", size)) {
+               if (driver_registered) {
+                       if (intel_pstate_driver != &intel_pstate)
+                               return 0;
+
+                       ret = intel_pstate_unregister_driver();
+                       if (ret)
+                               return ret;
+               }
+
+               intel_pstate_driver = &intel_cpufreq;
+               return intel_pstate_register_driver();
+       }
+
+       return -EINVAL;
+}
+
 static int no_load __initdata;
 static int no_hwp __initdata;
 static int hwp_only __initdata;
@@ -2481,9 +2637,9 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 
 static int __init intel_pstate_init(void)
 {
-       int cpu, rc = 0;
        const struct x86_cpu_id *id;
        struct cpu_defaults *cpu_def;
+       int rc = 0;
 
        if (no_load)
                return -ENODEV;
@@ -2515,45 +2671,29 @@ hwp_cpu_matched:
        if (intel_pstate_platform_pwr_mgmt_exists())
                return -ENODEV;
 
+       if (!hwp_active && hwp_only)
+               return -ENOTSUPP;
+
        pr_info("Intel P-state driver initializing\n");
 
        all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
        if (!all_cpu_data)
                return -ENOMEM;
 
-       if (!hwp_active && hwp_only)
-               goto out;
-
        intel_pstate_request_control_from_smm();
 
-       rc = cpufreq_register_driver(intel_pstate_driver);
-       if (rc)
-               goto out;
-
-       if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-           pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
-               intel_pstate_debug_expose_params();
-
        intel_pstate_sysfs_expose_params();
 
+       mutex_lock(&intel_pstate_driver_lock);
+       rc = intel_pstate_register_driver();
+       mutex_unlock(&intel_pstate_driver_lock);
+       if (rc)
+               return rc;
+
        if (hwp_active)
                pr_info("HWP enabled\n");
 
-       return rc;
-out:
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               if (all_cpu_data[cpu]) {
-                       if (intel_pstate_driver == &intel_pstate)
-                               intel_pstate_clear_update_util_hook(cpu);
-
-                       kfree(all_cpu_data[cpu]);
-               }
-       }
-
-       put_online_cpus();
-       vfree(all_cpu_data);
-       return -ENODEV;
+       return 0;
 }
 device_initcall(intel_pstate_init);