2.2 Powersave
2.3 Userspace
2.4 Ondemand
+2.5 Conservative
3. The Governor Interface in the CPUfreq Core
The CPUfreq govenor "ondemand" sets the CPU depending on the
current usage. To do this the CPU must have the capability to
-switch the frequency very fast.
-
-
+switch the frequency very quickly. There are a number of sysfs file
+accessible parameters:
+
+sampling_rate: measured in uS (10^-6 seconds), this is how often you
+want the kernel to look at the CPU usage and to make decisions on
+what to do about the frequency. Typically this is set to values of
+around '10000' or more.
+
+show_sampling_rate_(min|max): the minimum and maximum sampling rates
+available that you may set 'sampling_rate' to.
+
+up_threshold: defines what the average CPU usaged between the samplings
+of 'sampling_rate' needs to be for the kernel to make a decision on
+whether it should increase the frequency. For example when it is set
+to its default value of '80' it means that between the checking
+intervals the CPU needs to be on average more than 80% in use to then
+decide that the CPU frequency needs to be increased.
+
+sampling_down_factor: this parameter controls the rate that the CPU
+makes a decision on when to decrease the frequency. When set to its
+default value of '5' it means that at 1/5 the sampling_rate the kernel
+makes a decision to lower the frequency. Five "lower rate" decisions
+have to be made in a row before the CPU frequency is actually lower.
+If set to '1' then the frequency decreases as quickly as it increases,
+if set to '2' it decreases at half the rate of the increase.
+
+ignore_nice_load: this parameter takes a value of '0' or '1', when set
+to '0' (its default) then all processes are counted towards towards the
+'cpu utilisation' value. When set to '1' then processes that are
+run with a 'nice' value will not count (and thus be ignored) in the
+overal usage calculation. This is useful if you are running a CPU
+intensive calculation on your laptop that you do not care how long it
+takes to complete as you can 'nice' it and prevent it from taking part
+in the deciding process of whether to increase your CPU frequency.
+
+
+2.5 Conservative
+----------------
+
+The CPUfreq governor "conservative", much like the "ondemand"
+governor, sets the CPU depending on the current usage. It differs in
+behaviour in that it gracefully increases and decreases the CPU speed
+rather than jumping to max speed the moment there is any load on the
+CPU. This behaviour more suitable in a battery powered environment.
+The governor is tweaked in the same manner as the "ondemand" governor
+through sysfs with the addition of:
+
+freq_step: this describes what percentage steps the cpu freq should be
+increased and decreased smoothly by. By default the cpu frequency will
+increase in 5% chunks of your maximum cpu frequency. You can change this
+value to anywhere between 0 and 100 where '0' will effectively lock your
+CPU at a speed regardless of its load whilst '100' will, in theory, make
+it behave identically to the "ondemand" governor.
+
+down_threshold: same as the 'up_threshold' found for the "ondemand"
+governor but for the opposite direction. For example when set to its
+default value of '20' it means that if the CPU usage needs to be below
+20% between samples to have the frequency decreased.
3. The Governor Interface in the CPUfreq Core
=============================================
*/
static int nforce2_set_fsb(unsigned int fsb)
{
- u32 pll, temp = 0;
+ u32 temp = 0;
unsigned int tfsb;
int diff;
+ int pll = 0;
if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
#define PFX "powernow-k8: "
#define BFX PFX "BIOS error: "
-#define VERSION "version 1.50.4"
+#define VERSION "version 1.60.0"
#include "powernow-k8.h"
/* serialize freq changes */
do {
wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
- if (i++ > 100) {
- printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
- return 1;
- }
+ if (i++ > 100) {
+ printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+ return 1;
+ }
} while (query_current_values_with_pending_wait(data));
if (savefid != data->currfid) {
/* Phase 2 - core frequency transition */
static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
{
- u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid;
+ u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
: vcoreqfid - vcocurrfid;
while (vcofiddiff > 2) {
+ (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
+
if (reqfid > data->currfid) {
if (data->currfid > LO_FID_TABLE_TOP) {
- if (write_new_fid(data, data->currfid + 2)) {
+ if (write_new_fid(data, data->currfid + fid_interval)) {
return 1;
}
} else {
}
}
} else {
- if (write_new_fid(data, data->currfid - 2))
+ if (write_new_fid(data, data->currfid - fid_interval))
return 1;
}
set_cpus_allowed(current, cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
- printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
+ printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
goto out;
}
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
- ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) {
+ ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) {
printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
goto out;
}
printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j);
return -ENODEV;
}
- if ((pst[j].fid > MAX_FID)
- || (pst[j].fid & 1)
- || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) {
+ if (pst[j].fid > MAX_FID) {
+ printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j);
+ return -ENODEV;
+ }
+ if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
/* Only first fid is allowed to be in "low" range */
- printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
+ printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
return -EINVAL;
}
if (pst[j].fid < lastfid)
lastfid = pst[j].fid;
}
if (lastfid & 1) {
- printk(KERN_ERR PFX "lastfid invalid\n");
+ printk(KERN_ERR BFX "lastfid invalid\n");
return -EINVAL;
}
if (lastfid > LO_FID_TABLE_TOP)
- printk(KERN_INFO PFX "first fid not from lo freq table\n");
+ printk(KERN_INFO BFX "first fid not from lo freq table\n");
return 0;
}
dprintk("table vers: 0x%x\n", psb->tableversion);
if (psb->tableversion != PSB_VERSION_1_4) {
- printk(KERN_INFO BFX "PSB table is not v1.4\n");
+ printk(KERN_ERR BFX "PSB table is not v1.4\n");
return -ENODEV;
}
* BIOS and Kernel Developer's Guide, which is available on
* www.amd.com
*/
- printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n");
+ printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
return -ENODEV;
}
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
- printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
+ printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
goto err_out;
}
cpumask_t oldmask = CPU_MASK_ALL;
int rc, i;
+ if (!cpu_online(pol->cpu))
+ return -ENODEV;
+
if (!check_supported_cpu(pol->cpu))
return -ENODEV;
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
- printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
+ printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
goto err_out;
}
cpufreq_unregister_driver(&cpufreq_amd64_driver);
}
-MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com.");
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
MODULE_LICENSE("GPL");
late_initcall(powernowk8_init);
module_exit(powernowk8_exit);
-
#define CPUID_XFAM 0x0ff00000 /* extended family */
#define CPUID_XFAM_K8 0
#define CPUID_XMOD 0x000f0000 /* extended model */
-#define CPUID_XMOD_REV_F 0x00040000
+#define CPUID_XMOD_REV_G 0x00060000
#define CPUID_USE_XFAM_XMOD 0x00000f00
#define CPUID_GET_MAX_CAPABILITIES 0x80000000
#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
* low fid table
* - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
* in the low fid table
- * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid
+ * - the parts can only step at <= 200 MHz intervals, odd fid values are
+ * supported in revision G and later revisions.
* - lowest frequency must be >= interprocessor hypertransport link speed
* (only applies to MP systems obviously)
*/
/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
-#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */
+#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */
#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */
#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */
#define MIN_FREQ 800 /* Min and max freqs, per spec */
#define MAX_FREQ 5000
-#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */
+#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */
#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
#define VID_OFF 0x3f
*/
static unsigned int speedstep_processor = 0;
+static u32 pmbase;
/*
* There are only two frequency states for each processor. Values
/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ * speedstep_find_register - read the PMBASE address
*
- * Tries to change the SpeedStep state.
+ * Returns: -ENODEV if no register could be found
*/
-static void speedstep_set_state (unsigned int state)
+static int speedstep_find_register (void)
{
- u32 pmbase;
- u8 pm2_blk;
- u8 value;
- unsigned long flags;
-
- if (!speedstep_chipset_dev || (state > 0x1))
- return;
+ if (!speedstep_chipset_dev)
+ return -ENODEV;
/* get PMBASE */
pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
if (!(pmbase & 0x01)) {
printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
- return;
+ return -ENODEV;
}
pmbase &= 0xFFFFFFFE;
if (!pmbase) {
printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
- return;
+ return -ENODEV;
}
+ dprintk("pmbase is 0x%x\n", pmbase);
+ return 0;
+}
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ * Tries to change the SpeedStep state.
+ */
+static void speedstep_set_state (unsigned int state)
+{
+ u8 pm2_blk;
+ u8 value;
+ unsigned long flags;
+
+ if (state > 0x1)
+ return;
+
/* Disable IRQs */
local_irq_save(flags);
cpus_allowed = current->cpus_allowed;
set_cpus_allowed(current, policy->cpus);
- /* detect low and high frequency */
+ /* detect low and high frequency and transition latency */
result = speedstep_get_freqs(speedstep_processor,
&speedstep_freqs[SPEEDSTEP_LOW].frequency,
&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+ &policy->cpuinfo.transition_latency,
&speedstep_set_state);
set_cpus_allowed(current, cpus_allowed);
if (result)
/* cpuinfo and default policy values */
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
policy->cur = speed;
result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
return -EINVAL;
}
+ if (speedstep_find_register())
+ return -ENODEV;
+
return cpufreq_register_driver(&speedstep_driver);
}
unsigned int speedstep_get_freqs(unsigned int processor,
unsigned int *low_speed,
unsigned int *high_speed,
+ unsigned int *transition_latency,
void (*set_state) (unsigned int state))
{
unsigned int prev_speed;
unsigned int ret = 0;
unsigned long flags;
+ struct timeval tv1, tv2;
if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
return -EINVAL;
return -EIO;
dprintk("previous speed is %u\n", prev_speed);
-
+
local_irq_save(flags);
/* switch to low state */
dprintk("low speed is %u\n", *low_speed);
+ /* start latency measurement */
+ if (transition_latency)
+ do_gettimeofday(&tv1);
+
/* switch to high state */
set_state(SPEEDSTEP_HIGH);
+
+ /* end latency measurement */
+ if (transition_latency)
+ do_gettimeofday(&tv2);
+
*high_speed = speedstep_get_processor_frequency(processor);
if (!*high_speed) {
ret = -EIO;
if (*high_speed != prev_speed)
set_state(SPEEDSTEP_LOW);
+ if (transition_latency) {
+ *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
+ tv2.tv_usec - tv1.tv_usec;
+ dprintk("transition latency is %u uSec\n", *transition_latency);
+
+ /* convert uSec to nSec and add 20% for safety reasons */
+ *transition_latency *= 1200;
+
+ /* check if the latency measurement is too high or too low
+ * and set it to a safe value (500uSec) in that case
+ */
+ if (*transition_latency > 10000000 || *transition_latency < 50000) {
+ printk (KERN_WARNING "speedstep: frequency transition measured seems out of "
+ "range (%u nSec), falling back to a safe one of %u nSec.\n",
+ *transition_latency, 500000);
+ *transition_latency = 500000;
+ }
+ }
+
out:
local_irq_restore(flags);
return (ret);
extern unsigned int speedstep_get_freqs(unsigned int processor,
unsigned int *low_speed,
unsigned int *high_speed,
+ unsigned int *transition_latency,
void (*set_state) (unsigned int state));
result = speedstep_get_freqs(speedstep_processor,
&speedstep_freqs[SPEEDSTEP_LOW].frequency,
&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+ NULL,
&speedstep_set_state);
if (result) {
#include <linux/string.h>
#include <asm/semaphore.h>
#include <linux/seq_file.h>
+#include <linux/cpufreq.h>
/*
* Get CPU information for use by the procfs.
seq_printf(m, "stepping\t: unknown\n");
if ( cpu_has(c, X86_FEATURE_TSC) ) {
+ unsigned int freq = cpufreq_quick_get(n);
+ if (!freq)
+ freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- cpu_khz / 1000, (cpu_khz % 1000));
+ freq / 1000, (freq % 1000));
}
/* Cache size */
#include <linux/initrd.h>
#include <linux/platform.h>
#include <linux/pm.h>
+#include <linux/cpufreq.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
char family[32], features[128], *cp, sep;
struct cpuinfo_ia64 *c = v;
unsigned long mask;
+ unsigned int proc_freq;
int i;
mask = c->features;
sprintf(cp, " 0x%lx", mask);
}
+ proc_freq = cpufreq_quick_get(cpunum);
+ if (!proc_freq)
+ proc_freq = c->proc_freq / 1000;
+
seq_printf(m,
"processor : %d\n"
"vendor : %s\n"
"BogoMIPS : %lu.%02lu\n",
cpunum, c->vendor, family, c->model, c->revision, c->archrev,
features, c->ppn, c->number,
- c->proc_freq / 1000000, c->proc_freq % 1000000,
+ proc_freq / 1000, proc_freq % 1000,
c->itc_freq / 1000000, c->itc_freq % 1000000,
lpj*HZ/500000, (lpj*HZ/5000) % 100);
#ifdef CONFIG_SMP
#include <linux/edd.h>
#include <linux/mmzone.h>
#include <linux/kexec.h>
+#include <linux/cpufreq.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
seq_printf(m, "stepping\t: unknown\n");
if (cpu_has(c,X86_FEATURE_TSC)) {
+ unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
+ if (!freq)
+ freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- cpu_khz / 1000, (cpu_khz % 1000));
+ freq / 1000, (freq % 1000));
}
/* Cache size */
}
+/**
+ * cpufreq_quick_get - get the CPU frequency (in kHz) frpm policy->cur
+ * @cpu: CPU number
+ *
+ * This is the last known freq, without actually getting it from the driver.
+ * Return value will be same as what is shown in scaling_cur_freq in sysfs.
+ */
+unsigned int cpufreq_quick_get(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ unsigned int ret = 0;
+
+ if (policy) {
+ down(&policy->lock);
+ ret = policy->cur;
+ up(&policy->lock);
+ cpufreq_cpu_put(policy);
+ }
+
+ return (ret);
+}
+EXPORT_SYMBOL(cpufreq_quick_get);
+
+
/**
* cpufreq_get - get the current CPU frequency (in kHz)
* @cpu: CPU number
{
return kstat_cpu(cpu).cpustat.idle +
kstat_cpu(cpu).cpustat.iowait +
- ( !dbs_tuners_ins.ignore_nice ?
+ ( dbs_tuners_ins.ignore_nice ?
kstat_cpu(cpu).cpustat.nice :
0);
}
show_one(sampling_down_factor, sampling_down_factor);
show_one(up_threshold, up_threshold);
show_one(down_threshold, down_threshold);
-show_one(ignore_nice, ignore_nice);
+show_one(ignore_nice_load, ignore_nice);
show_one(freq_step, freq_step);
static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
return count;
}
-static ssize_t store_ignore_nice(struct cpufreq_policy *policy,
+static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
unsigned int input;
define_one_rw(sampling_down_factor);
define_one_rw(up_threshold);
define_one_rw(down_threshold);
-define_one_rw(ignore_nice);
+define_one_rw(ignore_nice_load);
define_one_rw(freq_step);
static struct attribute * dbs_attributes[] = {
&sampling_down_factor.attr,
&up_threshold.attr,
&down_threshold.attr,
- &ignore_nice.attr,
+ &ignore_nice_load.attr,
&freq_step.attr,
NULL
};
{
return kstat_cpu(cpu).cpustat.idle +
kstat_cpu(cpu).cpustat.iowait +
- ( !dbs_tuners_ins.ignore_nice ?
+ ( dbs_tuners_ins.ignore_nice ?
kstat_cpu(cpu).cpustat.nice :
0);
}
show_one(sampling_rate, sampling_rate);
show_one(sampling_down_factor, sampling_down_factor);
show_one(up_threshold, up_threshold);
-show_one(ignore_nice, ignore_nice);
+show_one(ignore_nice_load, ignore_nice);
static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
const char *buf, size_t count)
return count;
}
-static ssize_t store_ignore_nice(struct cpufreq_policy *policy,
+static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
unsigned int input;
define_one_rw(sampling_rate);
define_one_rw(sampling_down_factor);
define_one_rw(up_threshold);
-define_one_rw(ignore_nice);
+define_one_rw(ignore_nice_load);
static struct attribute * dbs_attributes[] = {
&sampling_rate_max.attr,
&sampling_rate.attr,
&sampling_down_factor.attr,
&up_threshold.attr,
- &ignore_nice.attr,
+ &ignore_nice_load.attr,
NULL
};
/* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */
unsigned int cpufreq_get(unsigned int cpu);
+/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */
+#ifdef CONFIG_CPU_FREQ
+unsigned int cpufreq_quick_get(unsigned int cpu);
+#else
+static inline unsigned int cpufreq_quick_get(unsigned int cpu)
+{
+ return 0;
+}
+#endif
+
/*********************************************************************
* CPUFREQ DEFAULT GOVERNOR *