diff options
-rw-r--r-- | arch/x86/include/asm/mce.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 82 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 84 |
4 files changed, 137 insertions, 44 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ba1f8890cf51..afd3cdf6f8ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -15,6 +15,7 @@ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ @@ -27,6 +28,15 @@ #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ + +/* MISC register defines */ +#define MCM_ADDR_SEGOFF 0 /* segment offset */ +#define MCM_ADDR_LINEAR 1 /* linear address */ +#define MCM_ADDR_PHYS 2 /* physical address */ +#define MCM_ADDR_MEM 3 /* memory address */ +#define MCM_ADDR_GENERIC 7 /* generic */ /* Fields are zero when not available */ struct mce { diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index f126b4ae7a25..54dcb8ff12e5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -2,9 +2,14 @@ enum severity_level { MCE_NO_SEVERITY, + MCE_KEEP_SEVERITY, MCE_SOME_SEVERITY, + MCE_AO_SEVERITY, MCE_UC_SEVERITY, + MCE_AR_SEVERITY, MCE_PANIC_SEVERITY, }; int mce_severity(struct mce *a, int tolerant, char **msg); + +extern int mce_ser; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c189e89a89ae..4f4d2caf4043 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -19,43 +19,117 @@ * first. Since there are quite a lot of combinations test the bits in a * table-driven way. The rules are simply processed in order, first * match wins. + * + * Note this is only used for machine check exceptions, the corrected + * errors use much simpler rules. The exceptions still check for the corrected + * errors, but only to leave them alone for the CMCI handler (except for + * panic situations) */ +enum context { IN_KERNEL = 1, IN_USER = 2 }; +enum ser { SER_REQUIRED = 1, NO_SER = 2 }; + static struct severity { u64 mask; u64 result; unsigned char sev; unsigned char mcgmask; unsigned char mcgres; + unsigned char ser; + unsigned char context; char *msg; } severities[] = { +#define KERNEL .context = IN_KERNEL +#define USER .context = IN_USER +#define SER .ser = SER_REQUIRED +#define NOSER .ser = NO_SER #define SEV(s) .sev = MCE_ ## s ## _SEVERITY #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } #define MCGMASK(x, res, s, m, r...) \ { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } +#define MASK(x, y, s, m, r...) \ + { .mask = x, .result = y, SEV(s), .msg = m, ## r } +#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) +#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) +#define MCACOD 0xffff + BITCLR(MCI_STATUS_VAL, NO, "Invalid"), BITCLR(MCI_STATUS_EN, NO, "Not enabled"), BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), - MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), + /* When MCIP is not set something is very confused */ + MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), + /* Neither return not error IP -- no chance to recover -> PANIC */ + MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, + "Neither restart nor error IP"), + MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", + KERNEL), + BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), + MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, + "Spurious not enabled", SER), + + /* ignore OVER for UCNA */ + MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, + "Uncorrected no action required", SER), + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, + "Illegal combination (UCNA with AR=1)", SER), + MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), + + /* AR add known MCACODs here */ + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, + "Action required with lost events", SER), + MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, + "Action required; unknown MCACOD", SER), + + /* known AO MCACODs: */ + MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, + "Action optional: memory scrubbing error", SER), + MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, + "Action optional: last level cache writeback error", SER), + + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, + "Action optional unknown MCACOD", SER), + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, + "Action optional with lost events", SER), BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), BITSET(MCI_STATUS_UC, UC, "Uncorrected"), BITSET(0, SOME, "No match") /* always matches. keep at end */ }; +/* + * If the EIPV bit is set, it means the saved IP is the + * instruction which caused the MCE. + */ +static int error_context(struct mce *m) +{ + if (m->mcgstatus & MCG_STATUS_EIPV) + return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; + /* Unknown, assume kernel */ + return IN_KERNEL; +} + int mce_severity(struct mce *a, int tolerant, char **msg) { + enum context ctx = error_context(a); struct severity *s; + for (s = severities;; s++) { if ((a->status & s->mask) != s->result) continue; if ((a->mcgstatus & s->mcgmask) != s->mcgres) continue; - if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && - tolerant < 1) - return MCE_PANIC_SEVERITY; + if (s->ser == SER_REQUIRED && !mce_ser) + continue; + if (s->ser == NO_SER && mce_ser) + continue; + if (s->context && ctx != s->context) + continue; if (msg) *msg = s->msg; + if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { + if (panic_on_oops || tolerant < 1) + return MCE_PANIC_SEVERITY; + } return s->sev; } } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff9c732989de..f051a7807ab4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -83,6 +83,7 @@ static int rip_msr; static int mce_bootlog = -1; static int monarch_timeout = -1; static int mce_panic_timeout; +int mce_ser; static char trigger[128]; static char *trigger_argv[2] = { trigger, NULL }; @@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); * Those are just logged through /dev/mcelog. * * This is executed in standard interrupt context. + * + * Note: spec recommends to panic for fatal unsignalled + * errors here. However this would be quite problematic -- + * we would need to reimplement the Monarch handling and + * it would mess up the exclusion between exception handler + * and poll hander -- * so we skip this for now. + * These cases should not happen anyways, or only when the CPU + * is already totally * confused. In this case it's likely it will + * not fully execute the machine check handler either. */ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { @@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) continue; /* - * Uncorrected events are handled by the exception handler - * when it is enabled. But when the exception is disabled log - * everything. + * Uncorrected or signalled events are handled by the exception + * handler when it is enabled, so don't process those here. * * TBD do the same check for MCI_STATUS_EN here? */ - if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) + if (!(flags & MCP_UC) && + (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; if (m.status & MCI_STATUS_MISCV) @@ -790,6 +800,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) barrier(); /* + * When no restart IP must always kill or panic. + */ + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + kill_it = 1; + + /* * Go through all the banks in exclusion of the other CPUs. * This way we don't report duplicated events on shared banks * because the first one to see it will clear it. @@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) continue; /* - * Non uncorrected errors are handled by machine_check_poll - * Leave them alone, unless this panics. + * Non uncorrected or non signaled errors are handled by + * machine_check_poll. Leave them alone, unless this panics. */ - if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) + if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && + !no_way_out) continue; /* @@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); - __set_bit(i, toclear); + severity = mce_severity(&m, tolerant, NULL); - if (m.status & MCI_STATUS_EN) { - /* - * If this error was uncorrectable and there was - * an overflow, we're in trouble. If no overflow, - * we might get away with just killing a task. - */ - if (m.status & MCI_STATUS_UC) - kill_it = 1; - } else { + /* + * When machine check was for corrected handler don't touch, + * unless we're panicing. + */ + if (severity == MCE_KEEP_SEVERITY && !no_way_out) + continue; + __set_bit(i, toclear); + if (severity == MCE_NO_SEVERITY) { /* * Machine check event was not enabled. Clear, but * ignore. @@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) continue; } + /* + * Kill on action required. + */ + if (severity == MCE_AR_SEVERITY) + kill_it = 1; + if (m.status & MCI_STATUS_MISCV) m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); if (m.status & MCI_STATUS_ADDRV) @@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_get_rip(&m, regs); mce_log(&m); - severity = mce_severity(&m, tolerant, NULL); if (severity > worst) { *final = m; worst = severity; @@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) * one task, do that. If the user has set the tolerance very * high, don't try to do anything at all. */ - if (kill_it && tolerant < 3) { - int user_space = 0; - - /* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. - */ - if (m.mcgstatus & MCG_STATUS_EIPV) - user_space = final->ip && (final->cs & 3); - /* - * If we know that the error was in user space, send a - * SIGBUS. Otherwise, panic if tolerance is low. - * - * force_sig() takes an awful lot of locks and has a slight - * risk of deadlocking. - */ - if (user_space) { - force_sig(SIGBUS, current); - } else if (panic_on_oops || tolerant < 2) { - mce_panic("Uncorrected machine check", final, msg); - } - } + if (kill_it && tolerant < 3) + force_sig(SIGBUS, current); /* notify userspace ASAP */ set_thread_flag(TIF_MCE_NOTIFY); @@ -1049,6 +1050,9 @@ static int mce_cap_init(void) if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) rip_msr = MSR_IA32_MCG_EIP; + if (cap & MCG_SER_P) + mce_ser = 1; + return 0; } |