diff options
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r-- | drivers/edac/amd64_edac.c | 98 |
1 files changed, 24 insertions, 74 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 82f48ee90f11..2080b1e2e8a2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2282,8 +2282,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, } } -static void amd64_decode_bus_error(struct mem_ctl_info *mci, - struct err_regs *info, int ecc_type) +static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, + struct err_regs *info, int ecc_type) { u32 ec = ERROR_CODE(info->nbsl); u32 xec = EXT_ERROR_CODE(info->nbsl); @@ -2316,86 +2316,23 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci, edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow"); } -void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs, - int handle_errors) +void amd64_decode_bus_error(int node_id, struct err_regs *regs, + int ecc_type) { - struct amd64_pvt *pvt = mci->pvt_info; - int ecc; - u32 ec = ERROR_CODE(regs->nbsl); - u32 xec = EXT_ERROR_CODE(regs->nbsl); - - if (!handle_errors) - return; - - pr_emerg(" Northbridge ERROR, mc node %d", pvt->mc_node_id); - - /* - * F10h, revD can disable ErrCpu[3:0] so check that first and also the - * value encoding has changed so interpret those differently - */ - if ((boot_cpu_data.x86 == 0x10) && - (boot_cpu_data.x86_model > 8)) { - if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) - pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); - } else { - pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf))); - } - - pr_emerg(" Error: %sorrected", - ((regs->nbsh & K8_NBSH_UC_ERR) ? "Unc" : "C")); - pr_cont(", Report Error: %s", - ((regs->nbsh & K8_NBSH_ERR_EN) ? "yes" : "no")); - pr_cont(", MiscV: %svalid, CPU context corrupt: %s", - ((regs->nbsh & K8_NBSH_MISCV) ? "" : "In"), - ((regs->nbsh & K8_NBSH_PCC) ? "yes" : "no")); - - /* do the two bits[14:13] together */ - ecc = regs->nbsh & (0x3 << 13); - if (ecc) - pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); - - pr_cont("\n"); - - if (TLB_ERROR(ec)) { - /* - * GART errors are intended to help graphics driver developers - * to detect bad GART PTEs. It is recommended by AMD to disable - * GART table walk error reporting by default[1] (currently - * being disabled in mce_cpu_quirks()) and according to the - * comment in mce_cpu_quirks(), such GART errors can be - * incorrectly triggered. We may see these errors anyway and - * unless requested by the user, they won't be reported. - * - * [1] section 13.10.1 on BIOS and Kernel Developers Guide for - * AMD NPT family 0Fh processors - */ - if (!report_gart_errors) - return; - - pr_emerg(" GART TLB error, Transaction: %s, Cache Level %s\n", - TT_MSG(ec), LL_MSG(ec)); - } else if (MEM_ERROR(ec)) { - pr_emerg(" Memory/Cache error, Transaction: %s, Type: %s," - " Cache Level: %s", - RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); - } else if (BUS_ERROR(ec)) { - pr_emerg(" Bus (Link/DRAM) error\n"); - amd64_decode_bus_error(mci, regs, ecc); - } else { - /* shouldn't reach here! */ - amd64_mc_printk(mci, KERN_WARNING, - "%s(): unknown MCE error 0x%x\n", __func__, ec); - } + struct mem_ctl_info *mci = mci_lookup[node_id]; - pr_emerg("%s.\n", EXT_ERR_MSG(xec)); + __amd64_decode_bus_error(mci, regs, ecc_type); /* * Check the UE bit of the NB status high register, if set generate some * logs. If NOT a GART error, then process the event as a NO-INFO event. * If it was a GART error, skip that process. + * + * FIXME: this should go somewhere else, if at all. */ if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) edac_mc_handle_ue_no_info(mci, "UE bit is set"); + } /* @@ -2406,8 +2343,10 @@ static void amd64_check(struct mem_ctl_info *mci) { struct err_regs regs; - if (amd64_get_error_info(mci, ®s)) - amd64_decode_nb_mce(mci, ®s, 1); + if (amd64_get_error_info(mci, ®s)) { + struct amd64_pvt *pvt = mci->pvt_info; + amd_decode_nb_mce(pvt->mc_node_id, ®s, 1); + } } /* @@ -3103,6 +3042,13 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) mci_lookup[node_id] = mci; pvt_lookup[node_id] = NULL; + + /* register stuff with EDAC MCE */ + if (report_gart_errors) + amd_report_gart_errors(true); + + amd_register_ecc_decoder(amd64_decode_bus_error); + return 0; err_add_mc: @@ -3169,6 +3115,10 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) mci_lookup[pvt->mc_node_id] = NULL; + /* unregister from EDAC MCE */ + amd_report_gart_errors(false); + amd_unregister_ecc_decoder(amd64_decode_bus_error); + /* Free the EDAC CORE resources */ edac_mc_free(mci); } |