From 8d21d4c91efaed3eb60cc64d43889665a4bd7a36 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Mon, 28 Jul 2014 02:50:59 -0400 Subject: APEI, GHES: Cleanup unnecessary function for lockless list We have a generic function to reverse a lockless list, kill homegrown copy. Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1406530260-26078-2-git-send-email-gong.chen@linux.intel.com Acked-by: Tony Luck Acked-by: Naoya Horiguchi [ Boris: correct commit msg ] Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fc5f780bb61d..9dcc9158c8e2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -738,20 +738,6 @@ static LIST_HEAD(ghes_nmi); static int ghes_panic_timeout __read_mostly = 30; -static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) -{ - struct llist_node *next, *tail = NULL; - - while (llnode) { - next = llnode->next; - llnode->next = tail; - tail = llnode; - llnode = next; - } - - return tail; -} - static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next; @@ -765,7 +751,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) * Because the time order of estatus in list is reversed, * revert it back to proper order. */ - llnode = llist_nodes_reverse(llnode); + llnode = llist_reverse_order(llnode); while (llnode) { next = llnode->next; estatus_node = llist_entry(llnode, struct ghes_estatus_node, @@ -798,7 +784,7 @@ static void ghes_print_queued_estatus(void) * Because the time order of estatus in list is reversed, * revert it back to proper order. */ - llnode = llist_nodes_reverse(llnode); + llnode = llist_reverse_order(llnode); while (llnode) { estatus_node = llist_entry(llnode, struct ghes_estatus_node, llnode); -- cgit v1.2.3 From 8f7c31f6cd499877aa6b96decd31b406a6cd4ddf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 29 Sep 2014 13:33:17 +0200 Subject: GHES: Make ghes_estatus_caches static It is used only in ghes.c. Signed-off-by: Borislav Petkov --- drivers/acpi/apei/ghes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9dcc9158c8e2..1b6aa514848f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -128,7 +128,7 @@ static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); static struct gen_pool *ghes_estatus_pool; static unsigned long ghes_estatus_pool_size_request; -struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; static int ghes_ioremap_init(void) -- cgit v1.2.3 From 6dc52cbe0b181818450fc1de4c0c850226ce0e68 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Mon, 28 Jul 2014 02:51:00 -0400 Subject: RAS, HWPOISON: Fix wrong error recovery status When Uncorrected error happens, if the poisoned page is referenced by more than one user after error recovery, the recovery is not successful. But currently the display result is wrong. Before this patch: MCE 0x44e336: dirty mlocked LRU page recovery: Recovered MCE 0x44e336: dirty mlocked LRU page still referenced by 1 users mce: Memory error not recovered After this patch: MCE 0x44e336: dirty mlocked LRU page recovery: Failed MCE 0x44e336: dirty mlocked LRU page still referenced by 1 users mce: Memory error not recovered Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1406530260-26078-3-git-send-email-gong.chen@linux.intel.com Acked-by: Naoya Horiguchi Acked-by: Tony Luck Signed-off-by: Borislav Petkov --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8639f6b28746..b852b10ec76d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -860,7 +860,6 @@ static int page_action(struct page_state *ps, struct page *p, int count; result = ps->action(p, pfn); - action_result(pfn, ps->msg, result); count = page_count(p) - 1; if (ps->action == me_swapcache_dirty && result == DELAYED) @@ -871,6 +870,7 @@ static int page_action(struct page_state *ps, struct page *p, pfn, ps->msg, count); result = FAILED; } + action_result(pfn, ps->msg, result); /* Could do more checks here if page looks ok */ /* -- cgit v1.2.3 From 4b737d78a8081cb2def7ced262a212c31363539a Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Tue, 23 Sep 2014 10:16:01 +0800 Subject: x86, MCE, AMD: Use macros to compute bank MSRs Avoid open coded calculations for bank MSRs by hiding the index of higher bank MSRs in well-defined macros. No semantic changes. Signed-off-by: Chen Yucong Link: http://lkml.kernel.org/r/1411438561-24319-1-git-send-email-slaoub@gmail.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5d4999f95aec..f8c56bd7b74d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -217,7 +217,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) - address = MSR_IA32_MC0_MISC + bank * 4; + address = MSR_IA32_MCx_MISC(bank); else if (block == 1) { address = (low & MASK_BLKPTR_LO) >> 21; if (!address) @@ -281,7 +281,7 @@ static void amd_threshold_interrupt(void) continue; for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) { - address = MSR_IA32_MC0_MISC + bank * 4; + address = MSR_IA32_MCx_MISC(bank); } else if (block == 1) { address = (low & MASK_BLKPTR_LO) >> 21; if (!address) @@ -314,8 +314,7 @@ static void amd_threshold_interrupt(void) if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); - rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, - m.status); + rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block; @@ -617,8 +616,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, - MSR_IA32_MC0_MISC + bank * 4); + err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank)); if (!err) goto out; -- cgit v1.2.3 From 44612a3ac671d7b9a9b987ab73dcc776204ac4d5 Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Thu, 2 Oct 2014 14:48:19 +0200 Subject: x86, MCE, AMD: Correct thresholding error logging mce_setup() does not gather the content of IA32_MCG_STATUS, so it should be read explicitly. Moreover, we need to clear IA32_MCx_STATUS to avoid that mce_log() logs the processed threshold event again at next time. But we do the logging ourselves and machine_check_poll() is completely useless there. So kill it. Signed-off-by: Chen Yucong Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f8c56bd7b74d..9ce64955559d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -270,14 +270,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; + int cpu = smp_processor_id(); unsigned int bank, block; struct mce m; - mce_setup(&m); - /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { - if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) + if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) { @@ -309,20 +308,21 @@ static void amd_threshold_interrupt(void) * Log the machine check that caused the threshold * event. */ - machine_check_poll(MCP_TIMESTAMP, - this_cpu_ptr(&mce_poll_banks)); - - if (high & MASK_OVERFLOW_HI) { - rdmsrl(address, m.misc); - rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); - m.bank = K8_MCE_THRESHOLD_BASE - + bank * NR_BLOCKS - + block; - mce_log(&m); - return; - } + if (high & MASK_OVERFLOW_HI) + goto log; } } + return; + +log: + mce_setup(&m); + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + rdmsrl(address, m.misc); + rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); + m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block; + mce_log(&m); + + wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); } /* -- cgit v1.2.3 From 69b957583580bf40624553c64d802fefb54199cb Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Thu, 2 Oct 2014 23:20:12 +0800 Subject: x86, MCE, AMD: Move invariant code out from loop body Assigning to mce_threshold_vector is loop-invariant code in mce_amd_feature_init(). So do it only once, out of loop body. Signed-off-by: Chen Yucong Link: http://lkml.kernel.org/r/1412263212.8085.6.camel@debian [ Boris: commit message corrections. ] Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9ce64955559d..9af7bd74828b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -253,7 +253,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) } mce_threshold_block_init(&b, offset); - mce_threshold_vector = amd_threshold_interrupt; + + if (mce_threshold_vector != amd_threshold_interrupt) + mce_threshold_vector = amd_threshold_interrupt; } } } -- cgit v1.2.3 From a3a529d104ec5149fb9a667dce988635941be1ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Oct 2014 22:19:59 +0200 Subject: x86, MCE, AMD: Drop software-defined bank in error thresholding Aravind had the good question about why we're assigning a software-defined bank when reporting error thresholding errors instead of simply using the bank which reports the last error causing the overflow. Digging through git history, it pointed to 95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors") which added that functionality. The problem with this, however, is that tools don't know about software-defined banks and get puzzled. So drop that K8_MCE_THRESHOLD_BASE and simply use the hw bank reporting the thresholding interrupt. Save us a couple of MSR reads while at it. Reported-by: Aravind Gopalakrishnan Link: https://lkml.kernel.org/r/5435B206.60402@amd.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/mce.h | 1 - arch/x86/kernel/cpu/mcheck/mce_amd.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 958b90f761e5..276392f121fb 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -78,7 +78,6 @@ /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9af7bd74828b..6606523ff1c1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -318,10 +318,9 @@ static void amd_threshold_interrupt(void) log: mce_setup(&m); - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); - rdmsrl(address, m.misc); rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); - m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block; + m.misc = ((u64)high << 32) | low; + m.bank = bank; mce_log(&m); wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); -- cgit v1.2.3 From 8dcf32ea220d87ca517e164de85d336480c9d172 Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Sat, 1 Nov 2014 11:23:32 +0100 Subject: x86, MCE, AMD: Assign interrupt handler only when bank supports it There are some AMD CPU models which have thresholding banks but which cannot generate a thresholding interrupt. This is denoted by the bit MCi_MISC[IntP]. Make sure to check that bit before assigning the thresholding interrupt handler. Signed-off-by: Chen Yucong [ Boris: save an indentation level and rewrite commit message. ] Link: http://lkml.kernel.org/r/1412662128.28440.18.camel@debian Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 6606523ff1c1..f1c3769bbd64 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -212,7 +212,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int offset = -1; + int offset = -1, new; for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -247,15 +247,18 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) b.address = address; b.interrupt_capable = lvt_interrupt_supported(bank, high); - if (b.interrupt_capable) { - int new = (high & MASK_LVTOFF_HI) >> 20; - offset = setup_APIC_mce(offset, new); - } + if (!b.interrupt_capable) + goto init; - mce_threshold_block_init(&b, offset); + new = (high & MASK_LVTOFF_HI) >> 20; + offset = setup_APIC_mce(offset, new); - if (mce_threshold_vector != amd_threshold_interrupt) + if ((offset == new) && + (mce_threshold_vector != amd_threshold_interrupt)) mce_threshold_vector = amd_threshold_interrupt; + +init: + mce_threshold_block_init(&b, offset); } } } -- cgit v1.2.3 From e3480271f59253cb60d030aa5e615bf00b731fea Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Tue, 18 Nov 2014 10:09:19 +0800 Subject: x86, mce, severity: Extend the the mce_severity mechanism to handle UCNA/DEFERRED error Until now, the mce_severity mechanism can only identify the severity of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter out DEFERRED error for AMD platform. This patch extends the mce_severity mechanism for handling UCNA/DEFERRED error. In order to do this, the patch introduces a new severity level - MCE_UCNA/DEFERRED_SEVERITY. In addition, mce_severity is specific to machine check exception, and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity mechanism in non-exception context, the patch also introduces a new argument (is_excp) for mce_severity. `is_excp' is used to explicitly specify the calling context of mce_severity. Reviewed-by: Aravind Gopalakrishnan Signed-off-by: Chen Yucong Signed-off-by: Tony Luck --- arch/x86/include/asm/mce.h | 4 ++++ arch/x86/kernel/cpu/mcheck/mce-internal.h | 4 +++- arch/x86/kernel/cpu/mcheck/mce-severity.c | 23 +++++++++++++++++------ arch/x86/kernel/cpu/mcheck/mce.c | 14 ++++++++------ drivers/edac/mce_amd.h | 3 --- 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 276392f121fb..51b26e895933 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -34,6 +34,10 @@ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ +/* AMD-specific bits */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ +#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ + /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is * bits 15:0. But bit 12 is the 'F' bit, defined for corrected diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 09edd0b65fef..10b46906767f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -3,6 +3,8 @@ enum severity_level { MCE_NO_SEVERITY, + MCE_DEFERRED_SEVERITY, + MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY, MCE_KEEP_SEVERITY, MCE_SOME_SEVERITY, MCE_AO_SEVERITY, @@ -21,7 +23,7 @@ struct mce_bank { char attrname[ATTR_LEN]; /* attribute name */ }; -int mce_severity(struct mce *a, int tolerant, char **msg); +int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); extern struct mce_bank *mce_banks; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c370e1c4468b..8bb433043a7f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -31,6 +31,7 @@ enum context { IN_KERNEL = 1, IN_USER = 2 }; enum ser { SER_REQUIRED = 1, NO_SER = 2 }; +enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; static struct severity { u64 mask; @@ -40,6 +41,7 @@ static struct severity { unsigned char mcgres; unsigned char ser; unsigned char context; + unsigned char excp; unsigned char covered; char *msg; } severities[] = { @@ -48,6 +50,8 @@ static struct severity { #define USER .context = IN_USER #define SER .ser = SER_REQUIRED #define NOSER .ser = NO_SER +#define EXCP .excp = EXCP_CONTEXT +#define NOEXCP .excp = NO_EXCP #define BITCLR(x) .mask = x, .result = 0 #define BITSET(x) .mask = x, .result = x #define MCGMASK(x, y) .mcgmask = x, .mcgres = y @@ -62,7 +66,7 @@ static struct severity { ), MCESEV( NO, "Not enabled", - BITCLR(MCI_STATUS_EN) + EXCP, BITCLR(MCI_STATUS_EN) ), MCESEV( PANIC, "Processor context corrupt", @@ -71,16 +75,20 @@ static struct severity { /* When MCIP is not set something is very confused */ MCESEV( PANIC, "MCIP not set in MCA handler", - MCGMASK(MCG_STATUS_MCIP, 0) + EXCP, MCGMASK(MCG_STATUS_MCIP, 0) ), /* Neither return not error IP -- no chance to recover -> PANIC */ MCESEV( PANIC, "Neither restart nor error IP", - MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) + EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) ), MCESEV( PANIC, "In kernel and no restart IP", - KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) + EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) + ), + MCESEV( + DEFERRED, "Deferred error", + NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) ), MCESEV( KEEP, "Corrected error", @@ -89,7 +97,7 @@ static struct severity { /* ignore OVER for UCNA */ MCESEV( - KEEP, "Uncorrected no action required", + UCNA, "Uncorrected no action required", SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) ), MCESEV( @@ -178,8 +186,9 @@ static int error_context(struct mce *m) return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; } -int mce_severity(struct mce *m, int tolerant, char **msg) +int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) { + enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); enum context ctx = error_context(m); struct severity *s; @@ -194,6 +203,8 @@ int mce_severity(struct mce *m, int tolerant, char **msg) continue; if (s->context && ctx != s->context) continue; + if (s->excp && excp != s->excp) + continue; if (msg) *msg = s->msg; s->covered = 1; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 61a9668cebfd..453e9bf90968 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -668,7 +668,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); } - if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY) + if (mce_severity(m, mca_cfg.tolerant, msg, true) >= + MCE_PANIC_SEVERITY) ret = 1; } return ret; @@ -754,7 +755,7 @@ static void mce_reign(void) for_each_possible_cpu(cpu) { int severity = mce_severity(&per_cpu(mces_seen, cpu), mca_cfg.tolerant, - &nmsg); + &nmsg, true); if (severity > global_worst) { msg = nmsg; global_worst = severity; @@ -1095,13 +1096,14 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - severity = mce_severity(&m, cfg->tolerant, NULL); + severity = mce_severity(&m, cfg->tolerant, NULL, true); /* - * When machine check was for corrected handler don't touch, - * unless we're panicing. + * When machine check was for corrected/deferred handler don't + * touch, unless we're panicing. */ - if (severity == MCE_KEEP_SEVERITY && !no_way_out) + if ((severity == MCE_KEEP_SEVERITY || + severity == MCE_UCNA_SEVERITY) && !no_way_out) continue; __set_bit(i, toclear); if (severity == MCE_NO_SEVERITY) { diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 51b7e3a36e37..c2359a1ea6b3 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -32,9 +32,6 @@ #define R4(x) (((x) >> 4) & 0xf) #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") -#define MCI_STATUS_DEFERRED BIT_64(44) -#define MCI_STATUS_POISON BIT_64(43) - extern const char * const pp_msgs[]; enum tt_ids { -- cgit v1.2.3 From fa92c58694268a7e9f7fa2c6881c1482221c2788 Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Tue, 18 Nov 2014 10:09:20 +0800 Subject: x86, mce: Support memory error recovery for both UCNA and Deferred error in machine_check_poll Uncorrected no action required (UCNA) - is a uncorrected recoverable machine check error that is not signaled via a machine check exception and, instead, is reported to system software as a corrected machine check error. UCNA errors indicate that some data in the system is corrupted, but the data has not been consumed and the processor state is valid and you may continue execution on this processor. UCNA errors require no action from system software to continue execution. Note that UCNA errors are supported by the processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set. -- Intel SDM Volume 3B Deferred errors are errors that cannot be corrected by hardware, but do not cause an immediate interruption in program flow, loss of data integrity, or corruption of processor state. These errors indicate that data has been corrupted but not consumed. Hardware writes information to the status and address registers in the corresponding bank that identifies the source of the error if deferred errors are enabled for logging. Deferred errors are not reported via machine check exceptions; they can be seen by polling the MCi_STATUS registers. -- AMD64 APM Volume 2 Above two items, both UCNA and Deferred errors belong to detected errors, but they can't be corrected by hardware, and this is very similar to Software Recoverable Action Optional (SRAO) errors. Therefore, we can take some actions that have been used for handling SRAO errors to handle UCNA and Deferred errors. Acked-by: Borislav Petkov Signed-off-by: Chen Yucong Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 453e9bf90968..cfb16f631d52 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -575,6 +575,37 @@ static void mce_read_aux(struct mce *m, int i) } } +static bool memory_error(struct mce *m) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86_vendor == X86_VENDOR_AMD) { + /* + * coming soon + */ + return false; + } else if (c->x86_vendor == X86_VENDOR_INTEL) { + /* + * Intel SDM Volume 3B - 15.9.2 Compound Error Codes + * + * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for + * indicating a memory error. Bit 8 is used for indicating a + * cache hierarchy error. The combination of bit 2 and bit 3 + * is used for indicating a `generic' cache hierarchy error + * But we can't just blindly check the above bits, because if + * bit 11 is set, then it is a bus/interconnect error - and + * either way the above bits just gives more detail on what + * bus/interconnect error happened. Note that bit 12 can be + * ignored, as it's the "filter" bit. + */ + return (m->status & 0xef80) == BIT(7) || + (m->status & 0xef00) == BIT(8) || + (m->status & 0xeffc) == 0xc; + } + + return false; +} + DEFINE_PER_CPU(unsigned, mce_poll_count); /* @@ -595,6 +626,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct mce m; + int severity; int i; this_cpu_inc(mce_poll_count); @@ -630,6 +662,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (!(flags & MCP_TIMESTAMP)) m.tsc = 0; + + severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); + + /* + * In the cases where we don't have a valid address after all, + * do not add it into the ring buffer. + */ + if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { + if (m.status & MCI_STATUS_ADDRV) { + mce_ring_add(m.addr >> PAGE_SHIFT); + mce_schedule_work(); + } + } + /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. -- cgit v1.2.3 From c7c9b3929b6a57ad47ab4021c77e46f7ff21c007 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 3 Dec 2014 22:36:45 +0100 Subject: x86/mce: Spell "panicked" correctly We need the additional "k" to make it a hard-c: https://en.wiktionary.org/wiki/panicked Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1417642605-15730-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index cfb16f631d52..d2c611699cd9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -292,10 +292,10 @@ static void print_mce(struct mce *m) #define PANIC_TIMEOUT 5 /* 5 seconds */ -static atomic_t mce_paniced; +static atomic_t mce_panicked; static int fake_panic; -static atomic_t mce_fake_paniced; +static atomic_t mce_fake_panicked; /* Panic in progress. Enable interrupts and wait for final IPI */ static void wait_for_panic(void) @@ -319,7 +319,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) /* * Make sure only one CPU runs in machine check panic */ - if (atomic_inc_return(&mce_paniced) > 1) + if (atomic_inc_return(&mce_panicked) > 1) wait_for_panic(); barrier(); @@ -327,7 +327,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) console_verbose(); } else { /* Don't log too much for fake panic */ - if (atomic_inc_return(&mce_fake_paniced) > 1) + if (atomic_inc_return(&mce_fake_panicked) > 1) return; } /* First print corrected ones that are still unlogged */ @@ -744,7 +744,7 @@ static int mce_timed_out(u64 *t) * might have been modified by someone else. */ rmb(); - if (atomic_read(&mce_paniced)) + if (atomic_read(&mce_panicked)) wait_for_panic(); if (!mca_cfg.monarch_timeout) goto out; @@ -2568,7 +2568,7 @@ struct dentry *mce_get_debugfs_dir(void) static void mce_reset(void) { cpu_missing = 0; - atomic_set(&mce_fake_paniced, 0); + atomic_set(&mce_fake_panicked, 0); atomic_set(&mce_executing, 0); atomic_set(&mce_callin, 0); atomic_set(&global_nwo, 0); -- cgit v1.2.3