kernel/watchdog.c: avoid races between /proc handlers and CPU hotplug

[karo-tx-linux.git] / kernel / watchdog.c
diff --git a/kernel/watchdog.c b/kernel/watchdog.c

index a6ffa43f299301dd750e9be092975df0d5e83786..13fdda1a4c91b52e14076026304b4188b49a4ad7 100644 (file)
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,6 +24,7 @@
  #include <asm/irq_regs.h>
  #include <linux/kvm_para.h>
  #include <linux/perf_event.h>
+#include <linux/kthread.h>
  
  /*
   * The run state of the lockup detectors is controlled by the content of the
@@ -56,8 +57,10 @@ int __read_mostly watchdog_thresh = 10;
  
  #ifdef CONFIG_SMP
  int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
  #else
  #define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
  #endif
  static struct cpumask watchdog_cpumask __read_mostly;
  unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -66,7 +69,26 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
  #define for_each_watchdog_cpu(cpu) \
         for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
  
+/*
+ * The 'watchdog_running' variable is set to 1 when the watchdog threads
+ * are registered/started and is set to 0 when the watchdog threads are
+ * unregistered/stopped, so it is an indicator whether the threads exist.
+ */
  static int __read_mostly watchdog_running;
+/*
+ * If a subsystem has a need to deactivate the watchdog temporarily, it
+ * can use the suspend/resume interface to achieve this. The content of
+ * the 'watchdog_suspended' variable reflects this state. Existing threads
+ * are parked/unparked by the lockup_detector_{suspend|resume} functions
+ * (see comment blocks pertaining to those functions for further details).
+ *
+ * 'watchdog_suspended' also prevents threads from being registered/started
+ * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
+ * of 'watchdog_running' cannot change while the watchdog is deactivated
+ * temporarily (see related code in 'proc' handlers).
+ */
+static int __read_mostly watchdog_suspended;
+
  static u64 __read_mostly sample_period;
  
  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -90,8 +112,9 @@ static unsigned long soft_lockup_nmi_warn;
   * Should we panic when a soft-lockup or hard-lockup occurs:
   */
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic =
+unsigned int __read_mostly hardlockup_panic =
                         CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
  /*
   * We may not want to enable hard lockup detection by default in all cases,
   * for example when running the kernel as a guest on a hypervisor. In these
@@ -153,6 +176,13 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
         return 1;
  }
  __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+       sysctl_hardlockup_all_cpu_backtrace =
+               !!simple_strtol(str, NULL, 0);
+       return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
  #endif
  
  /*
@@ -243,15 +273,15 @@ void touch_softlockup_watchdog_sync(void)
  
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
  /* watchdog detector functions */
-static int is_hardlockup(void)
+static bool is_hardlockup(void)
  {
         unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
  
         if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-               return 1;
+               return true;
  
         __this_cpu_write(hrtimer_interrupts_saved, hrint);
-       return 0;
+       return false;
  }
  #endif
  
@@ -298,17 +328,30 @@ static void watchdog_overflow_callback(struct perf_event *event,
          */
         if (is_hardlockup()) {
                 int this_cpu = smp_processor_id();
+               struct pt_regs *regs = get_irq_regs();
  
                 /* only print hardlockups once */
                 if (__this_cpu_read(hard_watchdog_warn) == true)
                         return;
  
-               if (hardlockup_panic)
-                       panic("Watchdog detected hard LOCKUP on cpu %d",
-                             this_cpu);
+               pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+               print_modules();
+               print_irqtrace_events(current);
+               if (regs)
+                       show_regs(regs);
                 else
-                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
-                            this_cpu);
+                       dump_stack();
+
+               /*
+                * Perform all-CPU dump only once to avoid multiple hardlockups
+                * generating interleaving traces
+                */
+               if (sysctl_hardlockup_all_cpu_backtrace &&
+                               !test_and_set_bit(0, &hardlockup_allcpu_dumped))
+                       trigger_allbutself_cpu_backtrace();
+
+               if (hardlockup_panic)
+                       panic("Hard LOCKUP");
  
                 __this_cpu_write(hard_watchdog_warn, true);
                 return;
@@ -327,6 +370,9 @@ static void watchdog_interrupt_count(void)
  static int watchdog_nmi_enable(unsigned int cpu);
  static void watchdog_nmi_disable(unsigned int cpu);
  
+static int watchdog_enable_all_cpus(void);
+static void watchdog_disable_all_cpus(void);
+
  /* watchdog kicker functions */
  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  {
@@ -613,46 +659,9 @@ static void watchdog_nmi_disable(unsigned int cpu)
         }
  }
  
-void watchdog_nmi_enable_all(void)
-{
-       int cpu;
-
-       mutex_lock(&watchdog_proc_mutex);
-
-       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-               goto unlock;
-
-       get_online_cpus();
-       for_each_watchdog_cpu(cpu)
-               watchdog_nmi_enable(cpu);
-       put_online_cpus();
-
-unlock:
-       mutex_unlock(&watchdog_proc_mutex);
-}
-
-void watchdog_nmi_disable_all(void)
-{
-       int cpu;
-
-       mutex_lock(&watchdog_proc_mutex);
-
-       if (!watchdog_running)
-               goto unlock;
-
-       get_online_cpus();
-       for_each_watchdog_cpu(cpu)
-               watchdog_nmi_disable(cpu);
-       put_online_cpus();
-
-unlock:
-       mutex_unlock(&watchdog_proc_mutex);
-}
  #else
  static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
  static void watchdog_nmi_disable(unsigned int cpu) { return; }
-void watchdog_nmi_enable_all(void) {}
-void watchdog_nmi_disable_all(void) {}
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
  
  static struct smp_hotplug_thread watchdog_threads = {
@@ -666,46 +675,105 @@ static struct smp_hotplug_thread watchdog_threads = {
         .unpark                 = watchdog_enable,
  };
  
-static void restart_watchdog_hrtimer(void *info)
+/*
+ * park all watchdog threads that are specified in 'watchdog_cpumask'
+ *
+ * This function returns an error if kthread_park() of a watchdog thread
+ * fails. In this situation, the watchdog threads of some CPUs can already
+ * be parked and the watchdog threads of other CPUs can still be runnable.
+ * Callers are expected to handle this special condition as appropriate in
+ * their context.
+ */
+static int watchdog_park_threads(void)
  {
-       struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
-       int ret;
+       int cpu, ret = 0;
+
+       get_online_cpus();
+       for_each_watchdog_cpu(cpu) {
+               ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
+               if (ret)
+                       break;
+       }
+       put_online_cpus();
+
+       return ret;
+}
  
+/*
+ * unpark all watchdog threads that are specified in 'watchdog_cpumask'
+ */
+static void watchdog_unpark_threads(void)
+{
+       int cpu;
+
+       get_online_cpus();
+       for_each_watchdog_cpu(cpu)
+               kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+       put_online_cpus();
+}
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+       int ret = 0;
+
+       get_online_cpus();
+       mutex_lock(&watchdog_proc_mutex);
         /*
-        * No need to cancel and restart hrtimer if it is currently executing
-        * because it will reprogram itself with the new period now.
-        * We should never see it unqueued here because we are running per-cpu
-        * with interrupts disabled.
+        * Multiple suspend requests can be active in parallel (counted by
+        * the 'watchdog_suspended' variable). If the watchdog threads are
+        * running, the first caller takes care that they will be parked.
+        * The state of 'watchdog_running' cannot change while a suspend
+        * request is active (see related code in 'proc' handlers).
          */
-       ret = hrtimer_try_to_cancel(hrtimer);
-       if (ret == 1)
-               hrtimer_start(hrtimer, ns_to_ktime(sample_period),
-                               HRTIMER_MODE_REL_PINNED);
+       if (watchdog_running && !watchdog_suspended)
+               ret = watchdog_park_threads();
+
+       if (ret == 0)
+               watchdog_suspended++;
+       else {
+               watchdog_disable_all_cpus();
+               pr_err("Failed to suspend lockup detectors, disabled\n");
+               watchdog_enabled = 0;
+       }
+
+       mutex_unlock(&watchdog_proc_mutex);
+
+       return ret;
  }
  
-static void update_watchdog(int cpu)
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
  {
+       mutex_lock(&watchdog_proc_mutex);
+
+       watchdog_suspended--;
         /*
-        * Make sure that perf event counter will adopt to a new
-        * sampling period. Updating the sampling period directly would
-        * be much nicer but we do not have an API for that now so
-        * let's use a big hammer.
-        * Hrtimer will adopt the new period on the next tick but this
-        * might be late already so we have to restart the timer as well.
+        * The watchdog threads are unparked if they were previously running
+        * and if there is no more active suspend request.
          */
-       watchdog_nmi_disable(cpu);
-       smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
-       watchdog_nmi_enable(cpu);
+       if (watchdog_running && !watchdog_suspended)
+               watchdog_unpark_threads();
+
+       mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
  }
  
-static void update_watchdog_all_cpus(void)
+static int update_watchdog_all_cpus(void)
  {
-       int cpu;
+       int ret;
  
-       get_online_cpus();
-       for_each_watchdog_cpu(cpu)
-               update_watchdog(cpu);
-       put_online_cpus();
+       ret = watchdog_park_threads();
+       if (ret)
+               return ret;
+
+       watchdog_unpark_threads();
+
+       return 0;
  }
  
  static int watchdog_enable_all_cpus(void)
@@ -713,29 +781,31 @@ static int watchdog_enable_all_cpus(void)
         int err = 0;
  
         if (!watchdog_running) {
-               err = smpboot_register_percpu_thread(&watchdog_threads);
+               err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
+                                                            &watchdog_cpumask);
                 if (err)
                         pr_err("Failed to create watchdog threads, disabled\n");
-               else {
-                       if (smpboot_update_cpumask_percpu_thread(
-                                   &watchdog_threads, &watchdog_cpumask))
-                               pr_err("Failed to set cpumask for watchdog threads\n");
+               else
                         watchdog_running = 1;
-               }
         } else {
                 /*
                  * Enable/disable the lockup detectors or
                  * change the sample period 'on the fly'.
                  */
-               update_watchdog_all_cpus();
+               err = update_watchdog_all_cpus();
+
+               if (err) {
+                       watchdog_disable_all_cpus();
+                       pr_err("Failed to update lockup detectors, disabled\n");
+               }
         }
  
+       if (err)
+               watchdog_enabled = 0;
+
         return err;
  }
  
-/* prepare/enable/disable routines */
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
  static void watchdog_disable_all_cpus(void)
  {
         if (watchdog_running) {
@@ -744,6 +814,8 @@ static void watchdog_disable_all_cpus(void)
         }
  }
  
+#ifdef CONFIG_SYSCTL
+
  /*
   * Update the run state of the lockup detectors.
   */
@@ -785,8 +857,15 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
         int err, old, new;
         int *watchdog_param = (int *)table->data;
  
+       get_online_cpus();
         mutex_lock(&watchdog_proc_mutex);
  
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
         /*
          * If the parameter is being read return the state of the corresponding
          * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
@@ -820,15 +899,17 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
                 } while (cmpxchg(&watchdog_enabled, old, new) != old);
  
                 /*
-                * Update the run state of the lockup detectors.
-                * Restore 'watchdog_enabled' on failure.
+                * Update the run state of the lockup detectors. There is _no_
+                * need to check the value returned by proc_watchdog_update()
+                * and to restore the previous value of 'watchdog_enabled' as
+                * both lockup detectors are disabled if proc_watchdog_update()
+                * returns an error.
                  */
                 err = proc_watchdog_update();
-               if (err)
-                       watchdog_enabled = old;
         }
  out:
         mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
         return err;
  }
  
@@ -870,8 +951,15 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
  {
         int err, old;
  
+       get_online_cpus();
         mutex_lock(&watchdog_proc_mutex);
  
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
         old = ACCESS_ONCE(watchdog_thresh);
         err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  
@@ -879,15 +967,17 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
                 goto out;
  
         /*
-        * Update the sample period.
-        * Restore 'watchdog_thresh' on failure.
+        * Update the sample period. Restore on failure.
          */
         set_sample_period();
         err = proc_watchdog_update();
-       if (err)
+       if (err) {
                 watchdog_thresh = old;
+               set_sample_period();
+       }
  out:
         mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
         return err;
  }
  
@@ -902,7 +992,15 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
  {
         int err;
  
+       get_online_cpus();
         mutex_lock(&watchdog_proc_mutex);
+
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
         err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
         if (!err && write) {
                 /* Remove impossible cpus to keep sysctl output cleaner. */
@@ -920,7 +1018,9 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
                                 pr_err("cpumask update failed\n");
                 }
         }
+out:
         mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
         return err;
  }
  
@@ -932,10 +1032,8 @@ void __init lockup_detector_init(void)
  
  #ifdef CONFIG_NO_HZ_FULL
         if (tick_nohz_full_enabled()) {
-               if (!cpumask_empty(tick_nohz_full_mask))
-                       pr_info("Disabling watchdog on nohz_full cores by default\n");
-               cpumask_andnot(&watchdog_cpumask, cpu_possible_mask,
-                              tick_nohz_full_mask);
+               pr_info("Disabling watchdog on nohz_full cores by default\n");
+               cpumask_copy(&watchdog_cpumask, housekeeping_mask);
         } else
                 cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  #else