]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
x86: mce: Fix thermal throttling message storm
authorIngo Molnar <mingo@elte.hu>
Tue, 22 Sep 2009 13:50:24 +0000 (15:50 +0200)
committerGreg Kroah-Hartman <gregkh@suse.de>
Tue, 10 Nov 2009 00:23:07 +0000 (16:23 -0800)
commit b417c9fd8690637f0c91479435ab3e2bf450c038 upstream.

If a system switches back and forth between hot and cold mode,
the MCE code will print a stream of critical kernel messages.

Extend the throttling code to properly notice this, by
only printing the first hot + cold transition and omitting
the rest up to CHECK_INTERVAL (5 minutes).

This way we'll only get a single incident of:

 [  102.356584] CPU0: Temperature above threshold, cpu clock throttled (total events = 1)
 [  102.357000] Disabling lock debugging due to kernel taint
 [  102.369223] CPU0: Temperature/speed normal

Every 5 minutes. The 'total events' count tells the number of cold/hot
transitions detected, should overheating occur after 5 minutes again:

[  402.357580] CPU0: Temperature above threshold, cpu clock throttled (total events = 24891)
[  402.358001] CPU0: Temperature/speed normal
[  450.704142] Machine check events logged

Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
arch/x86/kernel/cpu/mcheck/therm_throt.c

index 2951436ac0e30ac3d734b3319902d90223d113ba..600e7245de6d18db998f88269a202c8e1f3c8f95 100644 (file)
@@ -42,6 +42,7 @@ struct thermal_state {
 
        u64                     next_check;
        unsigned long           throttle_count;
+       unsigned long           last_throttle_count;
 };
 
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
@@ -120,11 +121,12 @@ static int therm_throt_process(bool is_throttled)
        if (is_throttled)
                state->throttle_count++;
 
-       if (!(was_throttled ^ is_throttled) &&
-                       time_before64(now, state->next_check))
+       if (time_before64(now, state->next_check) &&
+                       state->throttle_count != state->last_throttle_count)
                return 0;
 
        state->next_check = now + CHECK_INTERVAL;
+       state->last_throttle_count = state->throttle_count;
 
        /* if we just entered the thermal event */
        if (is_throttled) {