Merge branch 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Jul 2008 01:34:13 +0000 (18:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Jul 2008 01:34:13 +0000 (18:34 -0700)
* 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  softlockup: fix invalid proc_handler for softlockup_panic
  softlockup: fix watchdog task wakeup frequency
  softlockup: fix watchdog task wakeup frequency
  softlockup: show irqtrace
  softlockup: print a module list on being stuck
  softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression
  softlockup: fix false positives on nohz if CPU is 100% idle for more than 60 seconds
  softlockup: fix softlockup_thresh fix
  softlockup: fix softlockup_thresh unaligned access and disable detection at runtime
  softlockup: allow panic on lockup

Documentation/kernel-parameters.txt
include/linux/sched.h
kernel/softlockup.c
kernel/sysctl.c
kernel/time/tick-sched.c
lib/Kconfig.debug

index 30d44b7..47e7d87 100644 (file)
@@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
        snd-ymfpci=     [HW,ALSA]
 
+       softlockup_panic=
+                       [KNL] Should the soft-lockup detector generate panics.
+
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
                        See Documentation/sonypi.txt
 
index 1941d8b..af443a0 100644 (file)
@@ -295,10 +295,11 @@ extern void softlockup_tick(void);
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern unsigned long  softlockup_thresh;
+extern unsigned int  softlockup_panic;
 extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
+extern int softlockup_thresh;
 #else
 static inline void softlockup_tick(void)
 {
index a272d78..7bd8d1a 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
+#include <linux/lockdep.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
 
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
 
 static int __read_mostly did_panic;
-unsigned long __read_mostly softlockup_thresh = 60;
+int __read_mostly softlockup_thresh = 60;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+                               CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+       softlockup_panic = simple_strtoul(str, NULL, 0);
+
+       return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
 
 static int
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@ void softlockup_tick(void)
        struct pt_regs *regs = get_irq_regs();
        unsigned long now;
 
+       /* Is detection switched off? */
+       if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+               /* Be sure we don't false trigger if switched back on */
+               if (touch_timestamp)
+                       per_cpu(touch_timestamp, this_cpu) = 0;
+               return;
+       }
+
        if (touch_timestamp == 0) {
                __touch_softlockup_watchdog();
                return;
@@ -92,11 +116,8 @@ void softlockup_tick(void)
        print_timestamp = per_cpu(print_timestamp, this_cpu);
 
        /* report at most once a second */
-       if ((print_timestamp >= touch_timestamp &&
-                       print_timestamp < (touch_timestamp + 1)) ||
-                       did_panic || !per_cpu(watchdog_task, this_cpu)) {
+       if (print_timestamp == touch_timestamp || did_panic)
                return;
-       }
 
        /* do not print during early bootup: */
        if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@ void softlockup_tick(void)
 
        now = get_timestamp(this_cpu);
 
-       /* Wake up the high-prio watchdog task every second: */
-       if (now > (touch_timestamp + 1))
+       /*
+        * Wake up the high-prio watchdog task twice per
+        * threshold timespan.
+        */
+       if (now > touch_timestamp + softlockup_thresh/2)
                wake_up_process(per_cpu(watchdog_task, this_cpu));
 
        /* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@ void softlockup_tick(void)
                        this_cpu, now - touch_timestamp,
                        current->comm, task_pid_nr(current));
        print_modules();
+       print_irqtrace_events(current);
        if (regs)
                show_regs(regs);
        else
                dump_stack();
        spin_unlock(&print_lock);
+
+       if (softlockup_panic)
+               panic("softlockup: hung tasks");
 }
 
 /*
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
 
        t->last_switch_timestamp = now;
        touch_nmi_watchdog();
+
+       if (softlockup_panic)
+               panic("softlockup: blocked tasks");
 }
 
 /*
index b859e6b..2a7b9d8 100644 (file)
@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
 /* Constants used for minimum and  maximum */
-#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
 static int one = 1;
 #endif
 
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 static int sixty = 60;
+static int neg_one = -1;
 #endif
 
 #ifdef CONFIG_MMU
@@ -737,15 +738,26 @@ static struct ctl_table kern_table[] = {
        },
 #endif
 #ifdef CONFIG_DETECT_SOFTLOCKUP
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "softlockup_panic",
+               .data           = &softlockup_panic,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        {
                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "softlockup_thresh",
                .data           = &softlockup_thresh,
-               .maxlen         = sizeof(unsigned long),
+               .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_doulongvec_minmax,
+               .proc_handler   = &proc_dointvec_minmax,
                .strategy       = &sysctl_intvec,
-               .extra1         = &one,
+               .extra1         = &neg_one,
                .extra2         = &sixty,
        },
        {
index beef7cc..942fc7c 100644 (file)
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
        if (!ts->tick_stopped)
                return;
 
-       touch_softlockup_watchdog();
-
        cpu_clear(cpu, nohz_cpu_mask);
        now = ktime_get();
        ts->idle_waketime = now;
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
        local_irq_save(flags);
        tick_do_update_jiffies64(now);
        local_irq_restore(flags);
+
+       touch_softlockup_watchdog();
 }
 
 void tick_nohz_stop_idle(int cpu)
index ba106db..882c510 100644 (file)
@@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP
        help
          Say Y here to enable the kernel to detect "soft lockups",
          which are bugs that cause the kernel to loop in kernel
-         mode for more than 10 seconds, without giving other tasks a
+         mode for more than 60 seconds, without giving other tasks a
          chance to run.
 
          When a soft-lockup is detected, the kernel will print the
@@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP
           can be detected via the NMI-watchdog, on platforms that
           support it.)
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+       bool "Panic (Reboot) On Soft Lockups"
+       depends on DETECT_SOFTLOCKUP
+       help
+         Say Y here to enable the kernel to panic on "soft lockups",
+         which are bugs that cause the kernel to loop in kernel
+         mode for more than 60 seconds, without giving other tasks a
+         chance to run.
+
+         The panic can be used in combination with panic_timeout,
+         to cause the system to reboot automatically after a
+         lockup has been detected. This feature is useful for
+         high-availability systems that have uptime guarantees and
+         where a lockup must be resolved ASAP.
+
+         Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+       int
+       depends on DETECT_SOFTLOCKUP
+       range 0 1
+       default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+       default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config SCHED_DEBUG
        bool "Collect scheduler debugging info"
        depends on DEBUG_KERNEL && PROC_FS