]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - kernel/watchdog.c
kernel/watchdog.c: avoid races between /proc handlers and CPU hotplug
[karo-tx-linux.git] / kernel / watchdog.c
index a6ffa43f299301dd750e9be092975df0d5e83786..13fdda1a4c91b52e14076026304b4188b49a4ad7 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
 #include <linux/perf_event.h>
+#include <linux/kthread.h>
 
 /*
  * The run state of the lockup detectors is controlled by the content of the
@@ -56,8 +57,10 @@ int __read_mostly watchdog_thresh = 10;
 
 #ifdef CONFIG_SMP
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #else
 #define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
 static struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -66,7 +69,26 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 #define for_each_watchdog_cpu(cpu) \
        for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
 
+/*
+ * The 'watchdog_running' variable is set to 1 when the watchdog threads
+ * are registered/started and is set to 0 when the watchdog threads are
+ * unregistered/stopped, so it is an indicator whether the threads exist.
+ */
 static int __read_mostly watchdog_running;
+/*
+ * If a subsystem has a need to deactivate the watchdog temporarily, it
+ * can use the suspend/resume interface to achieve this. The content of
+ * the 'watchdog_suspended' variable reflects this state. Existing threads
+ * are parked/unparked by the lockup_detector_{suspend|resume} functions
+ * (see comment blocks pertaining to those functions for further details).
+ *
+ * 'watchdog_suspended' also prevents threads from being registered/started
+ * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
+ * of 'watchdog_running' cannot change while the watchdog is deactivated
+ * temporarily (see related code in 'proc' handlers).
+ */
+static int __read_mostly watchdog_suspended;
+
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -90,8 +112,9 @@ static unsigned long soft_lockup_nmi_warn;
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic =
+unsigned int __read_mostly hardlockup_panic =
                        CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
 /*
  * We may not want to enable hard lockup detection by default in all cases,
  * for example when running the kernel as a guest on a hypervisor. In these
@@ -153,6 +176,13 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
        return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+       sysctl_hardlockup_all_cpu_backtrace =
+               !!simple_strtol(str, NULL, 0);
+       return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
 
 /*
@@ -243,15 +273,15 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static int is_hardlockup(void)
+static bool is_hardlockup(void)
 {
        unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
        if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-               return 1;
+               return true;
 
        __this_cpu_write(hrtimer_interrupts_saved, hrint);
-       return 0;
+       return false;
 }
 #endif
 
@@ -298,17 +328,30 @@ static void watchdog_overflow_callback(struct perf_event *event,
         */
        if (is_hardlockup()) {
                int this_cpu = smp_processor_id();
+               struct pt_regs *regs = get_irq_regs();
 
                /* only print hardlockups once */
                if (__this_cpu_read(hard_watchdog_warn) == true)
                        return;
 
-               if (hardlockup_panic)
-                       panic("Watchdog detected hard LOCKUP on cpu %d",
-                             this_cpu);
+               pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+               print_modules();
+               print_irqtrace_events(current);
+               if (regs)
+                       show_regs(regs);
                else
-                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
-                            this_cpu);
+                       dump_stack();
+
+               /*
+                * Perform all-CPU dump only once to avoid multiple hardlockups
+                * generating interleaving traces
+                */
+               if (sysctl_hardlockup_all_cpu_backtrace &&
+                               !test_and_set_bit(0, &hardlockup_allcpu_dumped))
+                       trigger_allbutself_cpu_backtrace();
+
+               if (hardlockup_panic)
+                       panic("Hard LOCKUP");
 
                __this_cpu_write(hard_watchdog_warn, true);
                return;
@@ -327,6 +370,9 @@ static void watchdog_interrupt_count(void)
 static int watchdog_nmi_enable(unsigned int cpu);
 static void watchdog_nmi_disable(unsigned int cpu);
 
+static int watchdog_enable_all_cpus(void);
+static void watchdog_disable_all_cpus(void);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -613,46 +659,9 @@ static void watchdog_nmi_disable(unsigned int cpu)
        }
 }
 
-void watchdog_nmi_enable_all(void)
-{
-       int cpu;
-
-       mutex_lock(&watchdog_proc_mutex);
-
-       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-               goto unlock;
-
-       get_online_cpus();
-       for_each_watchdog_cpu(cpu)
-               watchdog_nmi_enable(cpu);
-       put_online_cpus();
-
-unlock:
-       mutex_unlock(&watchdog_proc_mutex);
-}
-
-void watchdog_nmi_disable_all(void)
-{
-       int cpu;
-
-       mutex_lock(&watchdog_proc_mutex);
-
-       if (!watchdog_running)
-               goto unlock;
-
-       get_online_cpus();
-       for_each_watchdog_cpu(cpu)
-               watchdog_nmi_disable(cpu);
-       put_online_cpus();
-
-unlock:
-       mutex_unlock(&watchdog_proc_mutex);
-}
 #else
 static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
 static void watchdog_nmi_disable(unsigned int cpu) { return; }
-void watchdog_nmi_enable_all(void) {}
-void watchdog_nmi_disable_all(void) {}
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 static struct smp_hotplug_thread watchdog_threads = {
@@ -666,46 +675,105 @@ static struct smp_hotplug_thread watchdog_threads = {
        .unpark                 = watchdog_enable,
 };
 
-static void restart_watchdog_hrtimer(void *info)
+/*
+ * park all watchdog threads that are specified in 'watchdog_cpumask'
+ *
+ * This function returns an error if kthread_park() of a watchdog thread
+ * fails. In this situation, the watchdog threads of some CPUs can already
+ * be parked and the watchdog threads of other CPUs can still be runnable.
+ * Callers are expected to handle this special condition as appropriate in
+ * their context.
+ */
+static int watchdog_park_threads(void)
 {
-       struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
-       int ret;
+       int cpu, ret = 0;
+
+       get_online_cpus();
+       for_each_watchdog_cpu(cpu) {
+               ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
+               if (ret)
+                       break;
+       }
+       put_online_cpus();
+
+       return ret;
+}
 
+/*
+ * unpark all watchdog threads that are specified in 'watchdog_cpumask'
+ */
+static void watchdog_unpark_threads(void)
+{
+       int cpu;
+
+       get_online_cpus();
+       for_each_watchdog_cpu(cpu)
+               kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+       put_online_cpus();
+}
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+       int ret = 0;
+
+       get_online_cpus();
+       mutex_lock(&watchdog_proc_mutex);
        /*
-        * No need to cancel and restart hrtimer if it is currently executing
-        * because it will reprogram itself with the new period now.
-        * We should never see it unqueued here because we are running per-cpu
-        * with interrupts disabled.
+        * Multiple suspend requests can be active in parallel (counted by
+        * the 'watchdog_suspended' variable). If the watchdog threads are
+        * running, the first caller takes care that they will be parked.
+        * The state of 'watchdog_running' cannot change while a suspend
+        * request is active (see related code in 'proc' handlers).
         */
-       ret = hrtimer_try_to_cancel(hrtimer);
-       if (ret == 1)
-               hrtimer_start(hrtimer, ns_to_ktime(sample_period),
-                               HRTIMER_MODE_REL_PINNED);
+       if (watchdog_running && !watchdog_suspended)
+               ret = watchdog_park_threads();
+
+       if (ret == 0)
+               watchdog_suspended++;
+       else {
+               watchdog_disable_all_cpus();
+               pr_err("Failed to suspend lockup detectors, disabled\n");
+               watchdog_enabled = 0;
+       }
+
+       mutex_unlock(&watchdog_proc_mutex);
+
+       return ret;
 }
 
-static void update_watchdog(int cpu)
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
 {
+       mutex_lock(&watchdog_proc_mutex);
+
+       watchdog_suspended--;
        /*
-        * Make sure that perf event counter will adopt to a new
-        * sampling period. Updating the sampling period directly would
-        * be much nicer but we do not have an API for that now so
-        * let's use a big hammer.
-        * Hrtimer will adopt the new period on the next tick but this
-        * might be late already so we have to restart the timer as well.
+        * The watchdog threads are unparked if they were previously running
+        * and if there is no more active suspend request.
         */
-       watchdog_nmi_disable(cpu);
-       smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
-       watchdog_nmi_enable(cpu);
+       if (watchdog_running && !watchdog_suspended)
+               watchdog_unpark_threads();
+
+       mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
 }
 
-static void update_watchdog_all_cpus(void)
+static int update_watchdog_all_cpus(void)
 {
-       int cpu;
+       int ret;
 
-       get_online_cpus();
-       for_each_watchdog_cpu(cpu)
-               update_watchdog(cpu);
-       put_online_cpus();
+       ret = watchdog_park_threads();
+       if (ret)
+               return ret;
+
+       watchdog_unpark_threads();
+
+       return 0;
 }
 
 static int watchdog_enable_all_cpus(void)
@@ -713,29 +781,31 @@ static int watchdog_enable_all_cpus(void)
        int err = 0;
 
        if (!watchdog_running) {
-               err = smpboot_register_percpu_thread(&watchdog_threads);
+               err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
+                                                            &watchdog_cpumask);
                if (err)
                        pr_err("Failed to create watchdog threads, disabled\n");
-               else {
-                       if (smpboot_update_cpumask_percpu_thread(
-                                   &watchdog_threads, &watchdog_cpumask))
-                               pr_err("Failed to set cpumask for watchdog threads\n");
+               else
                        watchdog_running = 1;
-               }
        } else {
                /*
                 * Enable/disable the lockup detectors or
                 * change the sample period 'on the fly'.
                 */
-               update_watchdog_all_cpus();
+               err = update_watchdog_all_cpus();
+
+               if (err) {
+                       watchdog_disable_all_cpus();
+                       pr_err("Failed to update lockup detectors, disabled\n");
+               }
        }
 
+       if (err)
+               watchdog_enabled = 0;
+
        return err;
 }
 
-/* prepare/enable/disable routines */
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
 static void watchdog_disable_all_cpus(void)
 {
        if (watchdog_running) {
@@ -744,6 +814,8 @@ static void watchdog_disable_all_cpus(void)
        }
 }
 
+#ifdef CONFIG_SYSCTL
+
 /*
  * Update the run state of the lockup detectors.
  */
@@ -785,8 +857,15 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
        int err, old, new;
        int *watchdog_param = (int *)table->data;
 
+       get_online_cpus();
        mutex_lock(&watchdog_proc_mutex);
 
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
        /*
         * If the parameter is being read return the state of the corresponding
         * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
@@ -820,15 +899,17 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
                } while (cmpxchg(&watchdog_enabled, old, new) != old);
 
                /*
-                * Update the run state of the lockup detectors.
-                * Restore 'watchdog_enabled' on failure.
+                * Update the run state of the lockup detectors. There is _no_
+                * need to check the value returned by proc_watchdog_update()
+                * and to restore the previous value of 'watchdog_enabled' as
+                * both lockup detectors are disabled if proc_watchdog_update()
+                * returns an error.
                 */
                err = proc_watchdog_update();
-               if (err)
-                       watchdog_enabled = old;
        }
 out:
        mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
        return err;
 }
 
@@ -870,8 +951,15 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
 {
        int err, old;
 
+       get_online_cpus();
        mutex_lock(&watchdog_proc_mutex);
 
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
        old = ACCESS_ONCE(watchdog_thresh);
        err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
@@ -879,15 +967,17 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
                goto out;
 
        /*
-        * Update the sample period.
-        * Restore 'watchdog_thresh' on failure.
+        * Update the sample period. Restore on failure.
         */
        set_sample_period();
        err = proc_watchdog_update();
-       if (err)
+       if (err) {
                watchdog_thresh = old;
+               set_sample_period();
+       }
 out:
        mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
        return err;
 }
 
@@ -902,7 +992,15 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
 {
        int err;
 
+       get_online_cpus();
        mutex_lock(&watchdog_proc_mutex);
+
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
        err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
        if (!err && write) {
                /* Remove impossible cpus to keep sysctl output cleaner. */
@@ -920,7 +1018,9 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
                                pr_err("cpumask update failed\n");
                }
        }
+out:
        mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
        return err;
 }
 
@@ -932,10 +1032,8 @@ void __init lockup_detector_init(void)
 
 #ifdef CONFIG_NO_HZ_FULL
        if (tick_nohz_full_enabled()) {
-               if (!cpumask_empty(tick_nohz_full_mask))
-                       pr_info("Disabling watchdog on nohz_full cores by default\n");
-               cpumask_andnot(&watchdog_cpumask, cpu_possible_mask,
-                              tick_nohz_full_mask);
+               pr_info("Disabling watchdog on nohz_full cores by default\n");
+               cpumask_copy(&watchdog_cpumask, housekeeping_mask);
        } else
                cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
 #else