diff options
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r-- | kernel/watchdog.c | 147 |
1 files changed, 70 insertions, 77 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 576d18045811..5470dce212c0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -18,18 +18,14 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/sysctl.h> -#include <linux/smpboot.h> -#include <linux/sched/rt.h> -#include <uapi/linux/sched/types.h> #include <linux/tick.h> -#include <linux/workqueue.h> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/isolation.h> +#include <linux/stop_machine.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> -#include <linux/kthread.h> static DEFINE_MUTEX(watchdog_mutex); @@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void) unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; -static bool softlockup_threads_initialized __read_mostly; +static bool softlockup_initialized __read_mostly; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); -static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); @@ -335,6 +330,27 @@ static void watchdog_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } +static DEFINE_PER_CPU(struct completion, softlockup_completion); +static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); + +/* + * The watchdog thread function - touches the timestamp. + * + * It only runs once every sample_period seconds (4 seconds by + * default) to reset the softlockup timestamp. If this gets delayed + * for more than 2*watchdog_thresh seconds then the debug-printout + * triggers in watchdog_timer_fn(). + */ +static int softlockup_fn(void *data) +{ + __this_cpu_write(soft_lockup_hrtimer_cnt, + __this_cpu_read(hrtimer_interrupts)); + __touch_watchdog(); + complete(this_cpu_ptr(&softlockup_completion)); + + return 0; +} + /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -350,7 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) watchdog_interrupt_count(); /* kick the softlockup detector */ - wake_up_process(__this_cpu_read(softlockup_watchdog)); + if (completion_done(this_cpu_ptr(&softlockup_completion))) { + reinit_completion(this_cpu_ptr(&softlockup_completion)); + stop_one_cpu_nowait(smp_processor_id(), + softlockup_fn, NULL, + this_cpu_ptr(&softlockup_stop_work)); + } /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); @@ -448,16 +469,15 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static void watchdog_set_prio(unsigned int policy, unsigned int prio) -{ - struct sched_param param = { .sched_priority = prio }; - - sched_setscheduler(current, policy, ¶m); -} - static void watchdog_enable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); + struct completion *done = this_cpu_ptr(&softlockup_completion); + + WARN_ON_ONCE(cpu != smp_processor_id()); + + init_completion(done); + complete(done); /* * Start the timer first to prevent the NMI watchdog triggering @@ -473,15 +493,14 @@ static void watchdog_enable(unsigned int cpu) /* Enable the perf event */ if (watchdog_enabled & NMI_WATCHDOG_ENABLED) watchdog_nmi_enable(cpu); - - watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); } static void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); - watchdog_set_prio(SCHED_NORMAL, 0); + WARN_ON_ONCE(cpu != smp_processor_id()); + /* * Disable the perf event first. That prevents that a large delay * between disabling the timer and disabling the perf event causes @@ -489,79 +508,66 @@ static void watchdog_disable(unsigned int cpu) */ watchdog_nmi_disable(cpu); hrtimer_cancel(hrtimer); + wait_for_completion(this_cpu_ptr(&softlockup_completion)); } -static void watchdog_cleanup(unsigned int cpu, bool online) +static int softlockup_stop_fn(void *data) { - watchdog_disable(cpu); + watchdog_disable(smp_processor_id()); + return 0; } -static int watchdog_should_run(unsigned int cpu) +static void softlockup_stop_all(void) { - return __this_cpu_read(hrtimer_interrupts) != - __this_cpu_read(soft_lockup_hrtimer_cnt); + int cpu; + + if (!softlockup_initialized) + return; + + for_each_cpu(cpu, &watchdog_allowed_mask) + smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false); + + cpumask_clear(&watchdog_allowed_mask); } -/* - * The watchdog thread function - touches the timestamp. - * - * It only runs once every sample_period seconds (4 seconds by - * default) to reset the softlockup timestamp. If this gets delayed - * for more than 2*watchdog_thresh seconds then the debug-printout - * triggers in watchdog_timer_fn(). - */ -static void watchdog(unsigned int cpu) +static int softlockup_start_fn(void *data) { - __this_cpu_write(soft_lockup_hrtimer_cnt, - __this_cpu_read(hrtimer_interrupts)); - __touch_watchdog(); + watchdog_enable(smp_processor_id()); + return 0; } -static struct smp_hotplug_thread watchdog_threads = { - .store = &softlockup_watchdog, - .thread_should_run = watchdog_should_run, - .thread_fn = watchdog, - .thread_comm = "watchdog/%u", - .setup = watchdog_enable, - .cleanup = watchdog_cleanup, - .park = watchdog_disable, - .unpark = watchdog_enable, -}; - -static void softlockup_update_smpboot_threads(void) +static void softlockup_start_all(void) { - lockdep_assert_held(&watchdog_mutex); - - if (!softlockup_threads_initialized) - return; + int cpu; - smpboot_update_cpumask_percpu_thread(&watchdog_threads, - &watchdog_allowed_mask); + cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); + for_each_cpu(cpu, &watchdog_allowed_mask) + smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false); } -/* Temporarily park all watchdog threads */ -static void softlockup_park_all_threads(void) +int lockup_detector_online_cpu(unsigned int cpu) { - cpumask_clear(&watchdog_allowed_mask); - softlockup_update_smpboot_threads(); + watchdog_enable(cpu); + return 0; } -/* Unpark enabled threads */ -static void softlockup_unpark_threads(void) +int lockup_detector_offline_cpu(unsigned int cpu) { - cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); - softlockup_update_smpboot_threads(); + watchdog_disable(cpu); + return 0; } static void lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); - softlockup_park_all_threads(); + + softlockup_stop_all(); set_sample_period(); lockup_detector_update_enable(); if (watchdog_enabled && watchdog_thresh) - softlockup_unpark_threads(); + softlockup_start_all(); + watchdog_nmi_start(); cpus_read_unlock(); /* @@ -580,8 +586,6 @@ static void lockup_detector_reconfigure(void) */ static __init void lockup_detector_setup(void) { - int ret; - /* * If sysctl is off and watchdog got disabled on the command line, * nothing to do here. @@ -592,24 +596,13 @@ static __init void lockup_detector_setup(void) !(watchdog_enabled && watchdog_thresh)) return; - ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, - &watchdog_allowed_mask); - if (ret) { - pr_err("Failed to initialize soft lockup detector threads\n"); - return; - } - mutex_lock(&watchdog_mutex); - softlockup_threads_initialized = true; lockup_detector_reconfigure(); + softlockup_initialized = true; mutex_unlock(&watchdog_mutex); } #else /* CONFIG_SOFTLOCKUP_DETECTOR */ -static inline int watchdog_park_threads(void) { return 0; } -static inline void watchdog_unpark_threads(void) { } -static inline int watchdog_enable_all_cpus(void) { return 0; } -static inline void watchdog_disable_all_cpus(void) { } static void lockup_detector_reconfigure(void) { cpus_read_lock(); |