diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/altera_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 19 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 49 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 21 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.c | 17 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 14 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/xgene_edac.c | 6 |
8 files changed, 94 insertions, 43 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 58d3e2b39b5b..6421cc3c7dc1 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -153,13 +153,17 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file, if (count == 3) { edac_printk(KERN_ALERT, EDAC_MC, "Inject Double bit error\n"); + local_irq_disable(); regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, (read_reg | priv->ue_set_mask)); + local_irq_enable(); } else { edac_printk(KERN_ALERT, EDAC_MC, "Inject Single bit error\n"); + local_irq_disable(); regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, (read_reg | priv->ce_set_mask)); + local_irq_enable(); } ptemp[0] = 0x5A5A5A5A; diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ee181c53626f..ca1d63aa4e59 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2155,7 +2155,7 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); } -static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, +static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err, u8 ecc_type) { enum hw_event_mc_err_type err_type; @@ -2165,6 +2165,8 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, err_type = HW_EVENT_ERR_CORRECTED; else if (ecc_type == 1) err_type = HW_EVENT_ERR_UNCORRECTED; + else if (ecc_type == 3) + err_type = HW_EVENT_ERR_DEFERRED; else { WARN(1, "Something is rotten in the state of Denmark.\n"); return; @@ -2227,7 +2229,7 @@ static inline void decode_bus_error(int node_id, struct mce *m) pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); - __log_bus_error(mci, &err, ecc_type); + __log_ecc_error(mci, &err, ecc_type); } /* @@ -2627,7 +2629,6 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, { u32 value, mask = 0x3; /* UECC/CECC enable */ - if (!s->nbctl_valid) return; @@ -2703,7 +2704,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->mod_name = EDAC_MOD_STR; mci->mod_ver = EDAC_AMD64_VERSION; mci->ctl_name = fam->ctl_name; - mci->dev_name = pci_name(pvt->F2); + mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; /* memory scrubber interface */ @@ -2893,7 +2894,11 @@ static int probe_one_instance(unsigned int nid) if (!ecc_enable_override) goto err_enable; - amd64_warn("Forcing ECC on!\n"); + if (boot_cpu_data.x86 >= 0x17) { + amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS."); + goto err_enable; + } else + amd64_warn("Forcing ECC on!\n"); if (!enable_ecc_error_reporting(s, nid, F3)) goto err_enable; @@ -2902,7 +2907,9 @@ static int probe_one_instance(unsigned int nid) ret = init_one_instance(nid); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); - restore_ecc_error_reporting(s, nid, F3); + + if (boot_cpu_data.x86 < 0x17) + restore_ecc_error_reporting(s, nid, F3); } return ret; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index c3ee3ad98a63..d2ea9c4f1824 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -482,15 +482,8 @@ void edac_mc_free(struct mem_ctl_info *mci) } EXPORT_SYMBOL_GPL(edac_mc_free); - -/** - * find_mci_by_dev - * - * scan list of controllers looking for the one that manages - * the 'dev' device - * @dev: pointer to a struct device related with the MCI - */ -struct mem_ctl_info *find_mci_by_dev(struct device *dev) +/* Caller must hold mem_ctls_mutex */ +static struct mem_ctl_info *__find_mci_by_dev(struct device *dev) { struct mem_ctl_info *mci; struct list_head *item; @@ -506,6 +499,24 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) return NULL; } + +/** + * find_mci_by_dev + * + * scan list of controllers looking for the one that manages + * the 'dev' device + * @dev: pointer to a struct device related with the MCI + */ +struct mem_ctl_info *find_mci_by_dev(struct device *dev) +{ + struct mem_ctl_info *ret; + + mutex_lock(&mem_ctls_mutex); + ret = __find_mci_by_dev(dev); + mutex_unlock(&mem_ctls_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(find_mci_by_dev); /* @@ -588,7 +599,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) insert_before = &mc_devices; - p = find_mci_by_dev(mci->pdev); + p = __find_mci_by_dev(mci->pdev); if (unlikely(p != NULL)) goto fail0; @@ -640,26 +651,28 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) * * If found, return a pointer to the structure. * Else return NULL. - * - * Caller must hold mem_ctls_mutex. */ struct mem_ctl_info *edac_mc_find(int idx) { + struct mem_ctl_info *mci = NULL; struct list_head *item; - struct mem_ctl_info *mci; + + mutex_lock(&mem_ctls_mutex); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); if (mci->mc_idx >= idx) { - if (mci->mc_idx == idx) - return mci; - + if (mci->mc_idx == idx) { + goto unlock; + } break; } } - return NULL; +unlock: + mutex_unlock(&mem_ctls_mutex); + return mci; } EXPORT_SYMBOL(edac_mc_find); @@ -779,7 +792,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) mutex_lock(&mem_ctls_mutex); /* find the requested mci struct in the global list */ - mci = find_mci_by_dev(dev); + mci = __find_mci_by_dev(dev); if (mci == NULL) { mutex_unlock(&mem_ctls_mutex); return NULL; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 80762acd8cc8..ba2995c2cc23 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -8,7 +8,7 @@ static struct amd_decoder_ops *fam_ops; static u8 xec_mask = 0xf; static bool report_gart_errors; -static void (*nb_bus_decoder)(int node_id, struct mce *m); +static void (*decode_dram_ecc)(int node_id, struct mce *m); void amd_report_gart_errors(bool v) { @@ -18,16 +18,16 @@ EXPORT_SYMBOL_GPL(amd_report_gart_errors); void amd_register_ecc_decoder(void (*f)(int, struct mce *)) { - nb_bus_decoder = f; + decode_dram_ecc = f; } EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)) { - if (nb_bus_decoder) { - WARN_ON(nb_bus_decoder != f); + if (decode_dram_ecc) { + WARN_ON(decode_dram_ecc != f); - nb_bus_decoder = NULL; + decode_dram_ecc = NULL; } } EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); @@ -763,8 +763,8 @@ static void decode_mc4_mce(struct mce *m) pr_cont("%s.\n", mc4_mce_desc[xec]); - if (nb_bus_decoder) - nb_bus_decoder(node_id, m); + if (decode_dram_ecc) + decode_dram_ecc(node_id, m); return; } break; @@ -877,6 +877,13 @@ static void decode_smca_errors(struct mce *m) pr_emerg(HW_ERR "%s Error: ", ip_name); pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); } + + /* + * amd_get_nb_id() returns the last level cache id. + * The last level cache on Fam17h is 1 level below the node. + */ + if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) + decode_dram_ecc(amd_get_nb_id(m->extcpu) >> 1, m); } static inline void amd_decode_err_code(u16 ec) diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index ff0567526ee3..c62602141f95 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -300,6 +300,22 @@ err: return res; } +static int mpc85xx_pci_err_remove(struct platform_device *op) +{ + struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev); + struct mpc85xx_pci_pdata *pdata = pci->pvt_info; + + edac_dbg(0, "\n"); + + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, orig_pci_err_cap_dr); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, orig_pci_err_en); + + edac_pci_del_device(&op->dev); + edac_pci_free_ctl_info(pci); + + return 0; +} + static const struct platform_device_id mpc85xx_pci_err_match[] = { { .name = "mpc85xx-pci-edac" @@ -309,6 +325,7 @@ static const struct platform_device_id mpc85xx_pci_err_match[] = { static struct platform_driver mpc85xx_pci_err_driver = { .probe = mpc85xx_pci_err_probe, + .remove = mpc85xx_pci_err_remove, .id_table = mpc85xx_pci_err_match, .driver = { .name = "mpc85xx_pci_err", diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 54775221a01f..c1ad0eb7d5dd 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -23,6 +23,7 @@ #include <linux/math64.h> #include <linux/mod_devicetable.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include <asm/processor.h> #include <asm/mce.h> @@ -3365,12 +3366,13 @@ fail0: { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } static const struct x86_cpu_id sbridge_cpuids[] = { - ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ - ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ - ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ - ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ - ICPU(0x56, pci_dev_descr_broadwell_table), /* BROADWELL-DE */ - ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ + ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table), + ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table), + ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table), + ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table), + ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table), + ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table), { } }; MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 0ff4878c2aa1..9edcb29b3001 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -25,6 +25,7 @@ #include <linux/math64.h> #include <linux/mod_devicetable.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include <asm/processor.h> #include <asm/mce.h> @@ -262,8 +263,8 @@ fail: return -ENODEV; } -const struct x86_cpu_id skx_cpuids[] = { - { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */ +static const struct x86_cpu_id skx_cpuids[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); @@ -1036,7 +1037,7 @@ static void skx_remove(void) * search for all the devices we need * check which DIMMs are present. */ -int __init skx_init(void) +static int __init skx_init(void) { const struct x86_cpu_id *id; const struct munit *m; diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index bf19b6e3bd12..5569391ea800 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1602,16 +1602,16 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) dev_err(edac_dev->dev, "IOB PA read data RAM error\n"); if (reg & IOBPA_M_RDATA_CORRUPT_MASK) dev_err(edac_dev->dev, - "Mutilple IOB PA read data RAM error\n"); + "Multiple IOB PA read data RAM error\n"); if (reg & IOBPA_WDATA_CORRUPT_MASK) dev_err(edac_dev->dev, "IOB PA write data RAM error\n"); if (reg & IOBPA_M_WDATA_CORRUPT_MASK) dev_err(edac_dev->dev, - "Mutilple IOB PA write data RAM error\n"); + "Multiple IOB PA write data RAM error\n"); if (reg & IOBPA_TRANS_CORRUPT_MASK) dev_err(edac_dev->dev, "IOB PA transaction error\n"); if (reg & IOBPA_M_TRANS_CORRUPT_MASK) - dev_err(edac_dev->dev, "Mutilple IOB PA transaction error\n"); + dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n"); if (reg & IOBPA_REQIDRAM_CORRUPT_MASK) dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n"); if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK) |