diff options
Diffstat (limited to 'drivers/char/ipmi/ipmi_watchdog.c')
-rw-r--r-- | drivers/char/ipmi/ipmi_watchdog.c | 136 |
1 files changed, 94 insertions, 42 deletions
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 6b634e8d9519..147c12047cf3 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -39,6 +39,7 @@ #include <linux/miscdevice.h> #include <linux/init.h> #include <linux/completion.h> +#include <linux/kdebug.h> #include <linux/rwsem.h> #include <linux/errno.h> #include <asm/uaccess.h> @@ -49,9 +50,18 @@ #include <linux/poll.h> #include <linux/string.h> #include <linux/ctype.h> +#include <linux/delay.h> #include <asm/atomic.h> -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/apic.h> + +#ifdef CONFIG_X86 +/* This is ugly, but I've determined that x86 is the only architecture + that can reasonably support the IPMI NMI watchdog timeout at this + time. If another architecture adds this capability somehow, it + will have to be a somewhat different mechanism and I have no idea + how it will work. So in the unlikely event that another + architecture supports this, we can figure out a good generic + mechanism for it at that time. */ +#define HAVE_DIE_NMI_POST #endif #define PFX "IPMI Watchdog: " @@ -317,6 +327,11 @@ static unsigned char ipmi_version_minor; /* If a pretimeout occurs, this is used to allow only one panic to happen. */ static atomic_t preop_panic_excl = ATOMIC_INIT(-1); +#ifdef HAVE_DIE_NMI_POST +static int testing_nmi; +static int nmi_handler_registered; +#endif + static int ipmi_heartbeat(void); static void panic_halt_ipmi_heartbeat(void); @@ -358,6 +373,10 @@ static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg, int hbnow = 0; + /* These can be cleared as we are setting the timeout. */ + ipmi_start_timer_on_heartbeat = 0; + pretimeout_since_last_heartbeat = 0; + data[0] = 0; WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); @@ -432,13 +451,12 @@ static int ipmi_set_timeout(int do_heartbeat) wait_for_completion(&set_timeout_wait); + mutex_unlock(&set_timeout_lock); + if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) || ((send_heartbeat_now) && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) - { rv = ipmi_heartbeat(); - } - mutex_unlock(&set_timeout_lock); out: return rv; @@ -518,12 +536,10 @@ static int ipmi_heartbeat(void) int rv; struct ipmi_system_interface_addr addr; - if (ipmi_ignore_heartbeat) { + if (ipmi_ignore_heartbeat) return 0; - } if (ipmi_start_timer_on_heartbeat) { - ipmi_start_timer_on_heartbeat = 0; ipmi_watchdog_state = action_val; return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); } else if (pretimeout_since_last_heartbeat) { @@ -531,7 +547,6 @@ static int ipmi_heartbeat(void) We don't want to set the action, though, we want to leave that alone (thus it can't be combined with the above operation. */ - pretimeout_since_last_heartbeat = 0; return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); } @@ -919,6 +934,45 @@ static void ipmi_register_watchdog(int ipmi_intf) printk(KERN_CRIT PFX "Unable to register misc device\n"); } +#ifdef HAVE_DIE_NMI_POST + if (nmi_handler_registered) { + int old_pretimeout = pretimeout; + int old_timeout = timeout; + int old_preop_val = preop_val; + + /* Set the pretimeout to go off in a second and give + ourselves plenty of time to stop the timer. */ + ipmi_watchdog_state = WDOG_TIMEOUT_RESET; + preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */ + pretimeout = 99; + timeout = 100; + + testing_nmi = 1; + + rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); + if (rv) { + printk(KERN_WARNING PFX "Error starting timer to" + " test NMI: 0x%x. The NMI pretimeout will" + " likely not work\n", rv); + rv = 0; + goto out_restore; + } + + msleep(1500); + + if (testing_nmi != 2) { + printk(KERN_WARNING PFX "IPMI NMI didn't seem to" + " occur. The NMI pretimeout will" + " likely not work\n"); + } + out_restore: + testing_nmi = 0; + preop_val = old_preop_val; + pretimeout = old_pretimeout; + timeout = old_timeout; + } +#endif + out: up_write(®ister_sem); @@ -928,6 +982,10 @@ static void ipmi_register_watchdog(int ipmi_intf) ipmi_watchdog_state = action_val; ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); printk(KERN_INFO PFX "Starting now!\n"); + } else { + /* Stop the timer now. */ + ipmi_watchdog_state = WDOG_TIMEOUT_NONE; + ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); } } @@ -964,17 +1022,28 @@ static void ipmi_unregister_watchdog(int ipmi_intf) up_write(®ister_sem); } -#ifdef HAVE_NMI_HANDLER +#ifdef HAVE_DIE_NMI_POST static int -ipmi_nmi(void *dev_id, int cpu, int handled) +ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) { + if (val != DIE_NMI_POST) + return NOTIFY_OK; + + if (testing_nmi) { + testing_nmi = 2; + return NOTIFY_STOP; + } + /* If we are not expecting a timeout, ignore it. */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) - return NOTIFY_DONE; + return NOTIFY_OK; + + if (preaction_val != WDOG_PRETIMEOUT_NMI) + return NOTIFY_OK; /* If no one else handled the NMI, we assume it was the IPMI watchdog. */ - if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) { + if (preop_val == WDOG_PREOP_PANIC) { /* On some machines, the heartbeat will give an error and not work unless we re-enable the timer. So do so. */ @@ -983,18 +1052,12 @@ ipmi_nmi(void *dev_id, int cpu, int handled) panic(PFX "pre-timeout"); } - return NOTIFY_DONE; + return NOTIFY_STOP; } -static struct nmi_handler ipmi_nmi_handler = -{ - .link = LIST_HEAD_INIT(ipmi_nmi_handler.link), - .dev_name = "ipmi_watchdog", - .dev_id = NULL, - .handler = ipmi_nmi, - .priority = 0, /* Call us last. */ +static struct notifier_block ipmi_nmi_handler = { + .notifier_call = ipmi_nmi }; -int nmi_handler_registered; #endif static int wdog_reboot_handler(struct notifier_block *this, @@ -1111,7 +1174,7 @@ static int preaction_op(const char *inval, char *outval) preaction_val = WDOG_PRETIMEOUT_NONE; else if (strcmp(inval, "pre_smi") == 0) preaction_val = WDOG_PRETIMEOUT_SMI; -#ifdef HAVE_NMI_HANDLER +#ifdef HAVE_DIE_NMI_POST else if (strcmp(inval, "pre_nmi") == 0) preaction_val = WDOG_PRETIMEOUT_NMI; #endif @@ -1145,7 +1208,7 @@ static int preop_op(const char *inval, char *outval) static void check_parms(void) { -#ifdef HAVE_NMI_HANDLER +#ifdef HAVE_DIE_NMI_POST int do_nmi = 0; int rv; @@ -1158,20 +1221,9 @@ static void check_parms(void) preop_op("preop_none", NULL); do_nmi = 0; } -#ifdef CONFIG_X86_LOCAL_APIC - if (nmi_watchdog == NMI_IO_APIC) { - printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC" - " mode (value is %d), that is incompatible" - " with using NMI in the IPMI watchdog." - " Disabling IPMI nmi pretimeout.\n", - nmi_watchdog); - preaction_val = WDOG_PRETIMEOUT_NONE; - do_nmi = 0; - } -#endif } if (do_nmi && !nmi_handler_registered) { - rv = request_nmi(&ipmi_nmi_handler); + rv = register_die_notifier(&ipmi_nmi_handler); if (rv) { printk(KERN_WARNING PFX "Can't register nmi handler\n"); @@ -1179,7 +1231,7 @@ static void check_parms(void) } else nmi_handler_registered = 1; } else if (!do_nmi && nmi_handler_registered) { - release_nmi(&ipmi_nmi_handler); + unregister_die_notifier(&ipmi_nmi_handler); nmi_handler_registered = 0; } #endif @@ -1215,9 +1267,9 @@ static int __init ipmi_wdog_init(void) rv = ipmi_smi_watcher_register(&smi_watcher); if (rv) { -#ifdef HAVE_NMI_HANDLER - if (preaction_val == WDOG_PRETIMEOUT_NMI) - release_nmi(&ipmi_nmi_handler); +#ifdef HAVE_DIE_NMI_POST + if (nmi_handler_registered) + unregister_die_notifier(&ipmi_nmi_handler); #endif atomic_notifier_chain_unregister(&panic_notifier_list, &wdog_panic_notifier); @@ -1236,9 +1288,9 @@ static void __exit ipmi_wdog_exit(void) ipmi_smi_watcher_unregister(&smi_watcher); ipmi_unregister_watchdog(watchdog_ifnum); -#ifdef HAVE_NMI_HANDLER +#ifdef HAVE_DIE_NMI_POST if (nmi_handler_registered) - release_nmi(&ipmi_nmi_handler); + unregister_die_notifier(&ipmi_nmi_handler); #endif atomic_notifier_chain_unregister(&panic_notifier_list, |