summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 03:34:13 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 03:34:13 +0200
commitd7b6de14a0ef8a376f9d57b867545b47302b7bfb (patch)
tree46904d68a5a68f22e6c6baf3472edd4c37a39481
parentMerge branch 'devel' of master.kernel.org:/home/rmk/linux-2.6-arm (diff)
parentsoftlockup: fix invalid proc_handler for softlockup_panic (diff)
downloadlinux-d7b6de14a0ef8a376f9d57b867545b47302b7bfb.tar.xz
linux-d7b6de14a0ef8a376f9d57b867545b47302b7bfb.zip
Merge branch 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: softlockup: fix invalid proc_handler for softlockup_panic softlockup: fix watchdog task wakeup frequency softlockup: fix watchdog task wakeup frequency softlockup: show irqtrace softlockup: print a module list on being stuck softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression softlockup: fix false positives on nohz if CPU is 100% idle for more than 60 seconds softlockup: fix softlockup_thresh fix softlockup: fix softlockup_thresh unaligned access and disable detection at runtime softlockup: allow panic on lockup
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/softlockup.c45
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--lib/Kconfig.debug26
6 files changed, 86 insertions, 15 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 30d44b78171a..47e7d8794fc6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file
snd-ymfpci= [HW,ALSA]
+ softlockup_panic=
+ [KNL] Should the soft-lockup detector generate panics.
+
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/sonypi.txt
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1941d8b5cf11..af443a08431f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -295,10 +295,11 @@ extern void softlockup_tick(void);
extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
-extern unsigned long softlockup_thresh;
+extern unsigned int softlockup_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
+extern int softlockup_thresh;
#else
static inline void softlockup_tick(void)
{
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a272d78185eb..7bd8d1aadd5d 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/lockdep.h>
#include <linux/notifier.h>
#include <linux/module.h>
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
static int __read_mostly did_panic;
-unsigned long __read_mostly softlockup_thresh = 60;
+int __read_mostly softlockup_thresh = 60;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+ softlockup_panic = simple_strtoul(str, NULL, 0);
+
+ return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
static int
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@ void softlockup_tick(void)
struct pt_regs *regs = get_irq_regs();
unsigned long now;
+ /* Is detection switched off? */
+ if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+ /* Be sure we don't false trigger if switched back on */
+ if (touch_timestamp)
+ per_cpu(touch_timestamp, this_cpu) = 0;
+ return;
+ }
+
if (touch_timestamp == 0) {
__touch_softlockup_watchdog();
return;
@@ -92,11 +116,8 @@ void softlockup_tick(void)
print_timestamp = per_cpu(print_timestamp, this_cpu);
/* report at most once a second */
- if ((print_timestamp >= touch_timestamp &&
- print_timestamp < (touch_timestamp + 1)) ||
- did_panic || !per_cpu(watchdog_task, this_cpu)) {
+ if (print_timestamp == touch_timestamp || did_panic)
return;
- }
/* do not print during early bootup: */
if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@ void softlockup_tick(void)
now = get_timestamp(this_cpu);
- /* Wake up the high-prio watchdog task every second: */
- if (now > (touch_timestamp + 1))
+ /*
+ * Wake up the high-prio watchdog task twice per
+ * threshold timespan.
+ */
+ if (now > touch_timestamp + softlockup_thresh/2)
wake_up_process(per_cpu(watchdog_task, this_cpu));
/* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@ void softlockup_tick(void)
this_cpu, now - touch_timestamp,
current->comm, task_pid_nr(current));
print_modules();
+ print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
spin_unlock(&print_lock);
+
+ if (softlockup_panic)
+ panic("softlockup: hung tasks");
}
/*
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
t->last_switch_timestamp = now;
touch_nmi_watchdog();
+
+ if (softlockup_panic)
+ panic("softlockup: blocked tasks");
}
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b859e6b5a767..2a7b9d88706b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
-#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
static int one = 1;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60;
+static int neg_one = -1;
#endif
#ifdef CONFIG_MMU
@@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = {
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_thresh",
.data = &softlockup_thresh,
- .maxlen = sizeof(unsigned long),
+ .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &neg_one,
.extra2 = &sixty,
},
{
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index beef7ccdf842..942fc7c85283 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
if (!ts->tick_stopped)
return;
- touch_softlockup_watchdog();
-
cpu_clear(cpu, nohz_cpu_mask);
now = ktime_get();
ts->idle_waketime = now;
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
local_irq_save(flags);
tick_do_update_jiffies64(now);
local_irq_restore(flags);
+
+ touch_softlockup_watchdog();
}
void tick_nohz_stop_idle(int cpu)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba106db5a65b..882c51048993 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP
help
Say Y here to enable the kernel to detect "soft lockups",
which are bugs that cause the kernel to loop in kernel
- mode for more than 10 seconds, without giving other tasks a
+ mode for more than 60 seconds, without giving other tasks a
chance to run.
When a soft-lockup is detected, the kernel will print the
@@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP
can be detected via the NMI-watchdog, on platforms that
support it.)
+config BOOTPARAM_SOFTLOCKUP_PANIC
+ bool "Panic (Reboot) On Soft Lockups"
+ depends on DETECT_SOFTLOCKUP
+ help
+ Say Y here to enable the kernel to panic on "soft lockups",
+ which are bugs that cause the kernel to loop in kernel
+ mode for more than 60 seconds, without giving other tasks a
+ chance to run.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ lockup has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a lockup must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+ int
+ depends on DETECT_SOFTLOCKUP
+ range 0 1
+ default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+ default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
config SCHED_DEBUG
bool "Collect scheduler debugging info"
depends on DEBUG_KERNEL && PROC_FS