diff options
author | Dexuan Cui <decui@microsoft.com> | 2020-12-22 07:55:41 +0100 |
---|---|---|
committer | Wei Liu <wei.liu@kernel.org> | 2021-01-05 18:52:04 +0100 |
commit | dfe94d4086e40e92b1926bddcefa629b791e9b28 (patch) | |
tree | fb6fe7ef7968b71ca528444f532ecb757a358432 | |
parent | Linux 5.11-rc2 (diff) | |
download | linux-dfe94d4086e40e92b1926bddcefa629b791e9b28.tar.xz linux-dfe94d4086e40e92b1926bddcefa629b791e9b28.zip |
x86/hyperv: Fix kexec panic/hang issues
Currently the kexec kernel can panic or hang due to 2 causes:
1) hv_cpu_die() is not called upon kexec, so the hypervisor corrupts the
old VP Assist Pages when the kexec kernel runs. The same issue is fixed
for hibernation in commit 421f090c819d ("x86/hyperv: Suspend/resume the
VP assist page for hibernation"). Now fix it for kexec.
2) hyperv_cleanup() is called too early. In the kexec path, the other CPUs
are stopped in hv_machine_shutdown() -> native_machine_shutdown(), so
between hv_kexec_handler() and native_machine_shutdown(), the other CPUs
can still try to access the hypercall page and cause panic. The workaround
"hv_hypercall_pg = NULL;" in hyperv_cleanup() is unreliabe. Move
hyperv_cleanup() to a better place.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20201222065541.24312-1-decui@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/mshyperv.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 18 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 2 |
4 files changed, 24 insertions, 2 deletions
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e04d90af4c27..4638a52d8eae 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -16,6 +16,7 @@ #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <asm/idtentry.h> +#include <linux/kexec.h> #include <linux/version.h> #include <linux/vmalloc.h> #include <linux/mm.h> @@ -26,6 +27,8 @@ #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> +int hyperv_init_cpuhp; + void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -401,6 +404,7 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); + hyperv_init_cpuhp = cpuhp; return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ffc289992d1b..30f76b966857 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) +extern int hyperv_init_cpuhp; + extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f628e3dc150f..43b54bef5448 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -135,14 +135,32 @@ static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) hv_kexec_handler(); + + /* + * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor + * corrupts the old VP Assist Pages and can crash the kexec kernel. + */ + if (kexec_in_progress && hyperv_init_cpuhp > 0) + cpuhp_remove_state(hyperv_init_cpuhp); + + /* The function calls stop_other_cpus(). */ native_machine_shutdown(); + + /* Disable the hypercall page when there is only 1 active CPU. */ + if (kexec_in_progress) + hyperv_cleanup(); } static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) hv_crash_handler(regs); + + /* The function calls crash_smp_send_stop(). */ native_machine_crash_shutdown(regs); + + /* Disable the hypercall page when there is only 1 active CPU. */ + hyperv_cleanup(); } #endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_HYPERV */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 502f8cd95f6d..d491fdcee61f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2550,7 +2550,6 @@ static void hv_kexec_handler(void) /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); - hyperv_cleanup(); }; static void hv_crash_handler(struct pt_regs *regs) @@ -2566,7 +2565,6 @@ static void hv_crash_handler(struct pt_regs *regs) cpu = smp_processor_id(); hv_stimer_cleanup(cpu); hv_synic_disable_regs(cpu); - hyperv_cleanup(); }; static int hv_synic_suspend(void) |