From 00a8f886dbdaeea1d93543d5311ddf3a2680bf2b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 15 Sep 2017 16:24:31 +0200 Subject: s390/nmi: use smp_emergency_stop instead of smp_send_stop The smp_send_stop() function can be called from s390_handle_damage while DAT is off. This happens if a machine check indicates that kernel gprs or control registers can not be restored. The function smp_send_stop reenables DAT via __load_psw_mask. That should work for the case of lost kernel gprs and the system will do the expected stop of all CPUs. But if control registers are lost, in particular CR13 with the home space ASCE, interesting secondary crashes may occur. Make smp_emergency_stop callable from nmi.c and remove the cpumask argument. Replace the smp_send_stop call with smp_emergency_stop in the s390_handle_damage function. In addition add notrace and NOKPROBE_SYMBOL annotations for all functions required for the emergency shutdown. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/nmi.c | 9 +++++++-- arch/s390/kernel/smp.c | 30 +++++++++++++++++------------- 2 files changed, 24 insertions(+), 15 deletions(-) (limited to 'arch/s390/kernel') diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 31d03a84126c..15e28eefe7e9 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -38,12 +39,13 @@ struct mcck_struct { static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static void s390_handle_damage(void) +static notrace void s390_handle_damage(void) { - smp_send_stop(); + smp_emergency_stop(); disabled_wait((unsigned long) __builtin_return_address(0)); while (1); } +NOKPROBE_SYMBOL(s390_handle_damage); /* * Main machine check handler function. Will be called with interrupts enabled @@ -275,6 +277,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) return kill_task; } +NOKPROBE_SYMBOL(s390_validate_registers); /* * Backup the guest's machine check info to its description block @@ -300,6 +303,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs) mcck_backup->failing_storage_address = S390_lowcore.failing_storage_address; } +NOKPROBE_SYMBOL(s390_backup_mcck_info); #define MAX_IPD_COUNT 29 #define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ @@ -443,6 +447,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) clear_cpu_flag(CIF_MCCK_GUEST); nmi_exit(); } +NOKPROBE_SYMBOL(s390_do_machine_check); static int __init machine_check_init(void) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cc04b74fbb84..2dba3e88a972 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -422,13 +423,17 @@ void smp_yield_cpu(int cpu) * Send cpus emergency shutdown signal. This gives the cpus the * opportunity to complete outstanding interrupts. */ -static void smp_emergency_stop(cpumask_t *cpumask) +void notrace smp_emergency_stop(void) { + cpumask_t cpumask; u64 end; int cpu; + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + end = get_tod_clock() + (1000000UL << 12); - for_each_cpu(cpu, cpumask) { + for_each_cpu(cpu, &cpumask) { struct pcpu *pcpu = pcpu_devices + cpu; set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, @@ -437,21 +442,21 @@ static void smp_emergency_stop(cpumask_t *cpumask) cpu_relax(); } while (get_tod_clock() < end) { - for_each_cpu(cpu, cpumask) + for_each_cpu(cpu, &cpumask) if (pcpu_stopped(pcpu_devices + cpu)) - cpumask_clear_cpu(cpu, cpumask); - if (cpumask_empty(cpumask)) + cpumask_clear_cpu(cpu, &cpumask); + if (cpumask_empty(&cpumask)) break; cpu_relax(); } } +NOKPROBE_SYMBOL(smp_emergency_stop); /* * Stop all cpus but the current one. */ void smp_send_stop(void) { - cpumask_t cpumask; int cpu; /* Disable all interrupts/machine checks */ @@ -459,17 +464,16 @@ void smp_send_stop(void) trace_hardirqs_off(); debug_set_critical(); - cpumask_copy(&cpumask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &cpumask); if (oops_in_progress) - smp_emergency_stop(&cpumask); + smp_emergency_stop(); /* stop all processors */ - for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; - pcpu_sigp_retry(pcpu, SIGP_STOP, 0); - while (!pcpu_stopped(pcpu)) + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu_devices + cpu)) cpu_relax(); } } -- cgit v1.2.3