From 29b0f591d678838435fbb3e15ef20266f1a9e01d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:56 +0200 Subject: x86, mce: default to panic timeout for machine checks Fatal machine checks can be logged to disk after boot, but only if the system did a warm reboot. That's unfortunately difficult with the default panic behaviour, which waits forever and the admin has to press the power button because modern systems usually miss a reset button. This clears the machine checks in the registers and make it impossible to log them. This patch changes the default for machine check panic to always reboot after 30s. Then the mce can be successfully logged after reboot. I believe this will improve machine check experience for any system running the X server. This is dependent on successfull boot logging of MCEs. This currently only works on Intel systems, on AMD there are quite a lot of systems around which leave junk in the machine check registers after boot, so it's disabled here. These systems will continue to default to endless waiting panic. v2: Only force panic timeout when it's shorter (H.Seto) v3: Only force timeout when there is no timeout (based on comment H.Seto) [ Fix changelog - HS ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7dc369a9974..79d243145b8f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -82,6 +82,7 @@ static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; static int monarch_timeout = -1; +static int mce_panic_timeout; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; @@ -216,6 +217,8 @@ static void wait_for_panic(void) local_irq_enable(); while (timeout-- > 0) udelay(1); + if (panic_timeout == 0) + panic_timeout = mce_panic_timeout; panic("Panicing machine check CPU died"); } @@ -253,6 +256,8 @@ static void mce_panic(char *msg, struct mce *final, char *exp) printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); if (exp) printk(KERN_EMERG "Machine check: %s\n", exp); + if (panic_timeout == 0) + panic_timeout = mce_panic_timeout; panic(msg); } @@ -1117,6 +1122,8 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) } if (monarch_timeout < 0) monarch_timeout = 0; + if (mce_bootlog != 0) + mce_panic_timeout = 30; } static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) -- cgit v1.2.3