summaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/altera_edac.c667
-rw-r--r--drivers/edac/altera_edac.h73
-rw-r--r--drivers/edac/amd64_edac.c24
-rw-r--r--drivers/edac/amd64_edac.h3
-rw-r--r--drivers/edac/ghes_edac.c23
-rw-r--r--drivers/edac/i3200_edac.c2
-rw-r--r--drivers/edac/i7core_edac.c5
-rw-r--r--drivers/edac/mce_amd.c4
-rw-r--r--drivers/edac/sb_edac.c204
-rw-r--r--drivers/edac/skx_edac.c7
-rw-r--r--drivers/edac/thunderx_edac.c4
11 files changed, 462 insertions, 554 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index ab7c5a937ab0..c89d82aa2776 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -69,25 +69,6 @@ static const struct altr_sdram_prv_data a10_data = {
.ue_set_mask = A10_DIAGINT_TDERRA_MASK,
};
-static const struct altr_sdram_prv_data s10_data = {
- .ecc_ctrl_offset = S10_ECCCTRL1_OFST,
- .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN,
- .ecc_stat_offset = S10_INTSTAT_OFST,
- .ecc_stat_ce_mask = A10_INTSTAT_SBEERR,
- .ecc_stat_ue_mask = A10_INTSTAT_DBEERR,
- .ecc_saddr_offset = S10_SERRADDR_OFST,
- .ecc_daddr_offset = S10_DERRADDR_OFST,
- .ecc_irq_en_offset = S10_ERRINTEN_OFST,
- .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK,
- .ecc_irq_clr_offset = S10_INTSTAT_OFST,
- .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR),
- .ecc_cnt_rst_offset = S10_ECCCTRL1_OFST,
- .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK,
- .ce_ue_trgr_offset = S10_DIAGINTTEST_OFST,
- .ce_set_mask = A10_DIAGINT_TSERRA_MASK,
- .ue_set_mask = A10_DIAGINT_TDERRA_MASK,
-};
-
/*********************** EDAC Memory Controller Functions ****************/
/* The SDRAM controller uses the EDAC Memory Controller framework. */
@@ -239,7 +220,7 @@ static unsigned long get_total_mem(void)
static const struct of_device_id altr_sdram_ctrl_of_match[] = {
{ .compatible = "altr,sdram-edac", .data = &c5_data},
{ .compatible = "altr,sdram-edac-a10", .data = &a10_data},
- { .compatible = "altr,sdram-edac-s10", .data = &s10_data},
+ { .compatible = "altr,sdram-edac-s10", .data = &a10_data},
{},
};
MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -293,6 +274,7 @@ release:
return ret;
}
+static int socfpga_is_a10(void);
static int altr_sdram_probe(struct platform_device *pdev)
{
const struct of_device_id *id;
@@ -416,7 +398,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
goto err;
/* Only the Arria10 has separate IRQs */
- if (irq2 > 0) {
+ if (socfpga_is_a10()) {
/* Arria10 specific initialization */
res = a10_init(mc_vbase);
if (res < 0)
@@ -502,8 +484,9 @@ static int s10_protected_reg_write(void *context, unsigned int reg,
unsigned int val)
{
struct arm_smccc_res result;
+ unsigned long offset = (unsigned long)context;
- arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, reg, val, 0, 0,
+ arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0,
0, 0, 0, &result);
return (int)result.a0;
@@ -523,8 +506,9 @@ static int s10_protected_reg_read(void *context, unsigned int reg,
unsigned int *val)
{
struct arm_smccc_res result;
+ unsigned long offset = (unsigned long)context;
- arm_smccc_smc(INTEL_SIP_SMC_REG_READ, reg, 0, 0, 0,
+ arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0,
0, 0, 0, &result);
*val = (unsigned int)result.a1;
@@ -532,246 +516,18 @@ static int s10_protected_reg_read(void *context, unsigned int reg,
return (int)result.a0;
}
-static bool s10_sdram_writeable_reg(struct device *dev, unsigned int reg)
-{
- switch (reg) {
- case S10_ECCCTRL1_OFST:
- case S10_ERRINTEN_OFST:
- case S10_INTMODE_OFST:
- case S10_INTSTAT_OFST:
- case S10_DIAGINTTEST_OFST:
- case S10_SYSMGR_ECC_INTMASK_VAL_OFST:
- case S10_SYSMGR_ECC_INTMASK_SET_OFST:
- case S10_SYSMGR_ECC_INTMASK_CLR_OFST:
- return true;
- }
- return false;
-}
-
-static bool s10_sdram_readable_reg(struct device *dev, unsigned int reg)
-{
- switch (reg) {
- case S10_ECCCTRL1_OFST:
- case S10_ERRINTEN_OFST:
- case S10_INTMODE_OFST:
- case S10_INTSTAT_OFST:
- case S10_DERRADDR_OFST:
- case S10_SERRADDR_OFST:
- case S10_DIAGINTTEST_OFST:
- case S10_SYSMGR_ECC_INTMASK_VAL_OFST:
- case S10_SYSMGR_ECC_INTMASK_SET_OFST:
- case S10_SYSMGR_ECC_INTMASK_CLR_OFST:
- case S10_SYSMGR_ECC_INTSTAT_SERR_OFST:
- case S10_SYSMGR_ECC_INTSTAT_DERR_OFST:
- return true;
- }
- return false;
-}
-
-static bool s10_sdram_volatile_reg(struct device *dev, unsigned int reg)
-{
- switch (reg) {
- case S10_ECCCTRL1_OFST:
- case S10_ERRINTEN_OFST:
- case S10_INTMODE_OFST:
- case S10_INTSTAT_OFST:
- case S10_DERRADDR_OFST:
- case S10_SERRADDR_OFST:
- case S10_DIAGINTTEST_OFST:
- case S10_SYSMGR_ECC_INTMASK_VAL_OFST:
- case S10_SYSMGR_ECC_INTMASK_SET_OFST:
- case S10_SYSMGR_ECC_INTMASK_CLR_OFST:
- case S10_SYSMGR_ECC_INTSTAT_SERR_OFST:
- case S10_SYSMGR_ECC_INTSTAT_DERR_OFST:
- return true;
- }
- return false;
-}
-
static const struct regmap_config s10_sdram_regmap_cfg = {
.name = "s10_ddr",
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = 0xffffffff,
- .writeable_reg = s10_sdram_writeable_reg,
- .readable_reg = s10_sdram_readable_reg,
- .volatile_reg = s10_sdram_volatile_reg,
+ .max_register = 0xffd12228,
.reg_read = s10_protected_reg_read,
.reg_write = s10_protected_reg_write,
.use_single_read = true,
.use_single_write = true,
};
-static int altr_s10_sdram_probe(struct platform_device *pdev)
-{
- const struct of_device_id *id;
- struct edac_mc_layer layers[2];
- struct mem_ctl_info *mci;
- struct altr_sdram_mc_data *drvdata;
- const struct altr_sdram_prv_data *priv;
- struct regmap *regmap;
- struct dimm_info *dimm;
- u32 read_reg;
- int irq, ret = 0;
- unsigned long mem_size;
-
- id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
- if (!id)
- return -ENODEV;
-
- /* Grab specific offsets and masks for Stratix10 */
- priv = of_match_node(altr_sdram_ctrl_of_match,
- pdev->dev.of_node)->data;
-
- regmap = devm_regmap_init(&pdev->dev, NULL, (void *)priv,
- &s10_sdram_regmap_cfg);
- if (IS_ERR(regmap))
- return PTR_ERR(regmap);
-
- /* Validate the SDRAM controller has ECC enabled */
- if (regmap_read(regmap, priv->ecc_ctrl_offset, &read_reg) ||
- ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) {
- edac_printk(KERN_ERR, EDAC_MC,
- "No ECC/ECC disabled [0x%08X]\n", read_reg);
- return -ENODEV;
- }
-
- /* Grab memory size from device tree. */
- mem_size = get_total_mem();
- if (!mem_size) {
- edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n");
- return -ENODEV;
- }
-
- /* Ensure the SDRAM Interrupt is disabled */
- if (regmap_update_bits(regmap, priv->ecc_irq_en_offset,
- priv->ecc_irq_en_mask, 0)) {
- edac_printk(KERN_ERR, EDAC_MC,
- "Error disabling SDRAM ECC IRQ\n");
- return -ENODEV;
- }
-
- /* Toggle to clear the SDRAM Error count */
- if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset,
- priv->ecc_cnt_rst_mask,
- priv->ecc_cnt_rst_mask)) {
- edac_printk(KERN_ERR, EDAC_MC,
- "Error clearing SDRAM ECC count\n");
- return -ENODEV;
- }
-
- if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset,
- priv->ecc_cnt_rst_mask, 0)) {
- edac_printk(KERN_ERR, EDAC_MC,
- "Error clearing SDRAM ECC count\n");
- return -ENODEV;
- }
-
- irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- edac_printk(KERN_ERR, EDAC_MC,
- "No irq %d in DT\n", irq);
- return -ENODEV;
- }
-
- layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = 1;
- layers[0].is_virt_csrow = true;
- layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = 1;
- layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
- sizeof(struct altr_sdram_mc_data));
- if (!mci)
- return -ENOMEM;
-
- mci->pdev = &pdev->dev;
- drvdata = mci->pvt_info;
- drvdata->mc_vbase = regmap;
- drvdata->data = priv;
- platform_set_drvdata(pdev, mci);
-
- if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
- edac_printk(KERN_ERR, EDAC_MC,
- "Unable to get managed device resource\n");
- ret = -ENOMEM;
- goto free;
- }
-
- mci->mtype_cap = MEM_FLAG_DDR3;
- mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
- mci->edac_cap = EDAC_FLAG_SECDED;
- mci->mod_name = EDAC_MOD_STR;
- mci->ctl_name = dev_name(&pdev->dev);
- mci->scrub_mode = SCRUB_SW_SRC;
- mci->dev_name = dev_name(&pdev->dev);
-
- dimm = *mci->dimms;
- dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1;
- dimm->grain = 8;
- dimm->dtype = DEV_X8;
- dimm->mtype = MEM_DDR3;
- dimm->edac_mode = EDAC_SECDED;
-
- ret = edac_mc_add_mc(mci);
- if (ret < 0)
- goto err;
-
- ret = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler,
- IRQF_SHARED, dev_name(&pdev->dev), mci);
- if (ret < 0) {
- edac_mc_printk(mci, KERN_ERR,
- "Unable to request irq %d\n", irq);
- ret = -ENODEV;
- goto err2;
- }
-
- if (regmap_write(regmap, S10_SYSMGR_ECC_INTMASK_CLR_OFST,
- S10_DDR0_IRQ_MASK)) {
- edac_printk(KERN_ERR, EDAC_MC,
- "Error clearing SDRAM ECC count\n");
- ret = -ENODEV;
- goto err2;
- }
-
- if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset,
- priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) {
- edac_mc_printk(mci, KERN_ERR,
- "Error enabling SDRAM ECC IRQ\n");
- ret = -ENODEV;
- goto err2;
- }
-
- altr_sdr_mc_create_debugfs_nodes(mci);
-
- devres_close_group(&pdev->dev, NULL);
-
- return 0;
-
-err2:
- edac_mc_del_mc(&pdev->dev);
-err:
- devres_release_group(&pdev->dev, NULL);
-free:
- edac_mc_free(mci);
- edac_printk(KERN_ERR, EDAC_MC,
- "EDAC Probe Failed; Error %d\n", ret);
-
- return ret;
-}
-
-static int altr_s10_sdram_remove(struct platform_device *pdev)
-{
- struct mem_ctl_info *mci = platform_get_drvdata(pdev);
-
- edac_mc_del_mc(&pdev->dev);
- edac_mc_free(mci);
- platform_set_drvdata(pdev, NULL);
-
- return 0;
-}
-
/************** </Stratix10 EDAC Memory Controller Functions> ***********/
/*
@@ -805,20 +561,6 @@ static struct platform_driver altr_sdram_edac_driver = {
module_platform_driver(altr_sdram_edac_driver);
-static struct platform_driver altr_s10_sdram_edac_driver = {
- .probe = altr_s10_sdram_probe,
- .remove = altr_s10_sdram_remove,
- .driver = {
- .name = "altr_s10_sdram_edac",
-#ifdef CONFIG_PM
- .pm = &altr_sdram_pm_ops,
-#endif
- .of_match_table = altr_sdram_ctrl_of_match,
- },
-};
-
-module_platform_driver(altr_s10_sdram_edac_driver);
-
/************************* EDAC Parent Probe *************************/
static const struct of_device_id altr_edac_device_of_match[];
@@ -972,6 +714,16 @@ static const struct file_operations altr_edac_a10_device_inject_fops = {
.llseek = generic_file_llseek,
};
+static ssize_t altr_edac_a10_device_trig2(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos);
+
+static const struct file_operations altr_edac_a10_device_inject2_fops = {
+ .open = simple_open,
+ .write = altr_edac_a10_device_trig2,
+ .llseek = generic_file_llseek,
+};
+
static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
const struct edac_device_prv_data *priv)
{
@@ -1253,6 +1005,16 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port)
return ret;
}
+static int socfpga_is_a10(void)
+{
+ return of_machine_is_compatible("altr,socfpga-arria10");
+}
+
+static int socfpga_is_s10(void)
+{
+ return of_machine_is_compatible("altr,socfpga-stratix10");
+}
+
static __init int __maybe_unused
altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
u32 ecc_ctrl_en_mask, bool dual_port)
@@ -1267,8 +1029,32 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
/* Get the ECC Manager - parent of the device EDACs */
np_eccmgr = of_get_parent(np);
- ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr,
- "altr,sysmgr-syscon");
+
+ if (socfpga_is_a10()) {
+ ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr,
+ "altr,sysmgr-syscon");
+ } else {
+ struct device_node *sysmgr_np;
+ struct resource res;
+ uintptr_t base;
+
+ sysmgr_np = of_parse_phandle(np_eccmgr,
+ "altr,sysmgr-syscon", 0);
+ if (!sysmgr_np) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to find altr,sysmgr-syscon\n");
+ return -ENODEV;
+ }
+
+ if (of_address_to_resource(sysmgr_np, 0, &res))
+ return -ENOMEM;
+
+ /* Need physical address for SMCC call */
+ base = res.start;
+
+ ecc_mgr_map = regmap_init(NULL, NULL, (void *)base,
+ &s10_sdram_regmap_cfg);
+ }
of_node_put(np_eccmgr);
if (IS_ERR(ecc_mgr_map)) {
edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -1326,11 +1112,6 @@ out:
return ret;
}
-static int socfpga_is_a10(void)
-{
- return of_machine_is_compatible("altr,socfpga-arria10");
-}
-
static int validate_parent_available(struct device_node *np);
static const struct of_device_id altr_edac_a10_device_of_match[];
static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
@@ -1338,7 +1119,7 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
int irq;
struct device_node *child, *np;
- if (!socfpga_is_a10())
+ if (!socfpga_is_a10() && !socfpga_is_s10())
return -ENODEV;
np = of_find_compatible_node(NULL, NULL,
@@ -1584,7 +1365,7 @@ static const struct edac_device_prv_data a10_enetecc_data = {
.ue_set_mask = ALTR_A10_ECC_TDERRA,
.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
.ecc_irq_handler = altr_edac_a10_ecc_irq,
- .inject_fops = &altr_edac_a10_device_inject_fops,
+ .inject_fops = &altr_edac_a10_device_inject2_fops,
};
static int __init socfpga_init_ethernet_ecc(void)
@@ -1662,7 +1443,7 @@ static const struct edac_device_prv_data a10_usbecc_data = {
.ue_set_mask = ALTR_A10_ECC_TDERRA,
.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
.ecc_irq_handler = altr_edac_a10_ecc_irq,
- .inject_fops = &altr_edac_a10_device_inject_fops,
+ .inject_fops = &altr_edac_a10_device_inject2_fops,
};
static int __init socfpga_init_usb_ecc(void)
@@ -1860,7 +1641,7 @@ static int __init socfpga_init_sdmmc_ecc(void)
int rc = -ENODEV;
struct device_node *child;
- if (!socfpga_is_a10())
+ if (!socfpga_is_a10() && !socfpga_is_s10())
return -ENODEV;
child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
@@ -1944,6 +1725,74 @@ static ssize_t altr_edac_a10_device_trig(struct file *file,
writel(priv->ue_set_mask, set_addr);
else
writel(priv->ce_set_mask, set_addr);
+
+ /* Ensure the interrupt test bits are set */
+ wmb();
+ local_irq_restore(flags);
+
+ return count;
+}
+
+/*
+ * The Stratix10 EDAC Error Injection Functions differ from Arria10
+ * slightly. A few Arria10 peripherals can use this injection function.
+ * Inject the error into the memory and then readback to trigger the IRQ.
+ */
+static ssize_t altr_edac_a10_device_trig2(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct edac_device_ctl_info *edac_dci = file->private_data;
+ struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+ const struct edac_device_prv_data *priv = drvdata->data;
+ void __iomem *set_addr = (drvdata->base + priv->set_err_ofst);
+ unsigned long flags;
+ u8 trig_type;
+
+ if (!user_buf || get_user(trig_type, user_buf))
+ return -EFAULT;
+
+ local_irq_save(flags);
+ if (trig_type == ALTR_UE_TRIGGER_CHAR) {
+ writel(priv->ue_set_mask, set_addr);
+ } else {
+ /* Setup write of 0 to first 4 bytes */
+ writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST);
+ writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST);
+ writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST);
+ writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST);
+ /* Setup write of 4 bytes */
+ writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST);
+ /* Setup Address to 0 */
+ writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST);
+ /* Setup accctrl to write & data override */
+ writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
+ /* Kick it. */
+ writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
+ /* Setup accctrl to read & ecc override */
+ writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
+ /* Kick it. */
+ writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
+ /* Setup write for single bit change */
+ writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST);
+ writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST);
+ writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST);
+ writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST);
+ /* Copy Read ECC to Write ECC */
+ writel(readl(drvdata->base + ECC_BLK_RECC0_OFST),
+ drvdata->base + ECC_BLK_WECC0_OFST);
+ writel(readl(drvdata->base + ECC_BLK_RECC1_OFST),
+ drvdata->base + ECC_BLK_WECC1_OFST);
+ /* Setup accctrl to write & ecc override & data override */
+ writel(ECC_WRITE_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
+ /* Kick it. */
+ writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
+ /* Setup accctrl to read & ecc overwrite & data overwrite */
+ writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
+ /* Kick it. */
+ writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
+ }
+
/* Ensure the interrupt test bits are set */
wmb();
local_irq_restore(flags);
@@ -2147,6 +1996,35 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = {
.xlate = irq_domain_xlate_twocell,
};
+/************** Stratix 10 EDAC Double Bit Error Handler ************/
+#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m)
+
+/*
+ * The double bit error is handled through SError which is fatal. This is
+ * called as a panic notifier to printout ECC error info as part of the panic.
+ */
+static int s10_edac_dberr_handler(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct altr_arria10_edac *edac = to_a10edac(this, panic_notifier);
+ int err_addr, dberror;
+
+ regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
+ &dberror);
+ regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror);
+ if (dberror & S10_DDR0_IRQ_MASK) {
+ regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr);
+ regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
+ err_addr);
+ edac_printk(KERN_ERR, EDAC_MC,
+ "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
+ err_addr);
+ }
+
+ return NOTIFY_DONE;
+}
+
+/****************** Arria 10 EDAC Probe Function *********************/
static int altr_edac_a10_probe(struct platform_device *pdev)
{
struct altr_arria10_edac *edac;
@@ -2160,8 +2038,34 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, edac);
INIT_LIST_HEAD(&edac->a10_ecc_devices);
- edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ if (socfpga_is_a10()) {
+ edac->ecc_mgr_map =
+ syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
"altr,sysmgr-syscon");
+ } else {
+ struct device_node *sysmgr_np;
+ struct resource res;
+ uintptr_t base;
+
+ sysmgr_np = of_parse_phandle(pdev->dev.of_node,
+ "altr,sysmgr-syscon", 0);
+ if (!sysmgr_np) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to find altr,sysmgr-syscon\n");
+ return -ENODEV;
+ }
+
+ if (of_address_to_resource(sysmgr_np, 0, &res))
+ return -ENOMEM;
+
+ /* Need physical address for SMCC call */
+ base = res.start;
+
+ edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL,
+ (void *)base,
+ &s10_sdram_regmap_cfg);
+ }
+
if (IS_ERR(edac->ecc_mgr_map)) {
edac_printk(KERN_ERR, EDAC_DEVICE,
"Unable to get syscon altr,sysmgr-syscon\n");
@@ -2188,14 +2092,38 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_irq_handler,
edac);
- edac->db_irq = platform_get_irq(pdev, 1);
- if (edac->db_irq < 0) {
- dev_err(&pdev->dev, "No DBERR IRQ resource\n");
- return edac->db_irq;
+ if (socfpga_is_a10()) {
+ edac->db_irq = platform_get_irq(pdev, 1);
+ if (edac->db_irq < 0) {
+ dev_err(&pdev->dev, "No DBERR IRQ resource\n");
+ return edac->db_irq;
+ }
+ irq_set_chained_handler_and_data(edac->db_irq,
+ altr_edac_a10_irq_handler,
+ edac);
+ } else {
+ int dberror, err_addr;
+
+ edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &edac->panic_notifier);
+
+ /* Printout a message if uncorrectable error previously. */
+ regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST,
+ &dberror);
+ if (dberror) {
+ regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
+ &err_addr);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Previous Boot UE detected[0x%X] @ 0x%X\n",
+ dberror, err_addr);
+ /* Reset the sticky registers */
+ regmap_write(edac->ecc_mgr_map,
+ S10_SYSMGR_UE_VAL_OFST, 0);
+ regmap_write(edac->ecc_mgr_map,
+ S10_SYSMGR_UE_ADDR_OFST, 0);
+ }
}
- irq_set_chained_handler_and_data(edac->db_irq,
- altr_edac_a10_irq_handler,
- edac);
for_each_child_of_node(pdev->dev.of_node, child) {
if (!of_device_is_available(child))
@@ -2212,7 +2140,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_device_add(edac, child);
- else if (of_device_is_compatible(child, "altr,sdram-edac-a10"))
+ else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) ||
+ (of_device_is_compatible(child, "altr,sdram-edac-s10")))
of_platform_populate(pdev->dev.of_node,
altr_sdram_ctrl_of_match,
NULL, &pdev->dev);
@@ -2223,6 +2152,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
static const struct of_device_id altr_edac_a10_of_match[] = {
{ .compatible = "altr,socfpga-a10-ecc-manager" },
+ { .compatible = "altr,socfpga-s10-ecc-manager" },
{},
};
MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match);
@@ -2236,171 +2166,6 @@ static struct platform_driver altr_edac_a10_driver = {
};
module_platform_driver(altr_edac_a10_driver);
-/************** Stratix 10 EDAC Device Controller Functions> ************/
-
-#define to_s10edac(p, m) container_of(p, struct altr_stratix10_edac, m)
-
-/*
- * The double bit error is handled through SError which is fatal. This is
- * called as a panic notifier to printout ECC error info as part of the panic.
- */
-static int s10_edac_dberr_handler(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct altr_stratix10_edac *edac = to_s10edac(this, panic_notifier);
- int err_addr, dberror;
-
- s10_protected_reg_read(edac, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
- &dberror);
- /* Remember the UE Errors for a reboot */
- s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, dberror);
- if (dberror & S10_DDR0_IRQ_MASK) {
- s10_protected_reg_read(edac, S10_DERRADDR_OFST, &err_addr);
- /* Remember the UE Error address */
- s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST,
- err_addr);
- edac_printk(KERN_ERR, EDAC_MC,
- "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
- err_addr);
- }
-
- return NOTIFY_DONE;
-}
-
-static void altr_edac_s10_irq_handler(struct irq_desc *desc)
-{
- struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc);
- struct irq_chip *chip = irq_desc_get_chip(desc);
- int irq = irq_desc_get_irq(desc);
- int bit, sm_offset, irq_status;
-
- sm_offset = S10_SYSMGR_ECC_INTSTAT_SERR_OFST;
-
- chained_irq_enter(chip, desc);
-
- s10_protected_reg_read(NULL, sm_offset, &irq_status);
-
- for_each_set_bit(bit, (unsigned long *)&irq_status, 32) {
- irq = irq_linear_revmap(edac->domain, bit);
- if (irq)
- generic_handle_irq(irq);
- }
-
- chained_irq_exit(chip, desc);
-}
-
-static void s10_eccmgr_irq_mask(struct irq_data *d)
-{
- struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d);
-
- s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_SET_OFST,
- BIT(d->hwirq));
-}
-
-static void s10_eccmgr_irq_unmask(struct irq_data *d)
-{
- struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d);
-
- s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_CLR_OFST,
- BIT(d->hwirq));
-}
-
-static int s10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq,
- irq_hw_number_t hwirq)
-{
- struct altr_stratix10_edac *edac = d->host_data;
-
- irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq);
- irq_set_chip_data(irq, edac);
- irq_set_noprobe(irq);
-
- return 0;
-}
-
-static const struct irq_domain_ops s10_eccmgr_ic_ops = {
- .map = s10_eccmgr_irqdomain_map,
- .xlate = irq_domain_xlate_twocell,
-};
-
-static int altr_edac_s10_probe(struct platform_device *pdev)
-{
- struct altr_stratix10_edac *edac;
- struct device_node *child;
- int dberror, err_addr;
-
- edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
- if (!edac)
- return -ENOMEM;
-
- edac->dev = &pdev->dev;
- platform_set_drvdata(pdev, edac);
- INIT_LIST_HEAD(&edac->s10_ecc_devices);
-
- edac->irq_chip.name = pdev->dev.of_node->name;
- edac->irq_chip.irq_mask = s10_eccmgr_irq_mask;
- edac->irq_chip.irq_unmask = s10_eccmgr_irq_unmask;
- edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64,
- &s10_eccmgr_ic_ops, edac);
- if (!edac->domain) {
- dev_err(&pdev->dev, "Error adding IRQ domain\n");
- return -ENOMEM;
- }
-
- edac->sb_irq = platform_get_irq(pdev, 0);
- if (edac->sb_irq < 0) {
- dev_err(&pdev->dev, "No SBERR IRQ resource\n");
- return edac->sb_irq;
- }
-
- irq_set_chained_handler_and_data(edac->sb_irq,
- altr_edac_s10_irq_handler,
- edac);
-
- edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
- atomic_notifier_chain_register(&panic_notifier_list,
- &edac->panic_notifier);
-
- /* Printout a message if uncorrectable error previously. */
- s10_protected_reg_read(edac, S10_SYSMGR_UE_VAL_OFST, &dberror);
- if (dberror) {
- s10_protected_reg_read(edac, S10_SYSMGR_UE_ADDR_OFST,
- &err_addr);
- edac_printk(KERN_ERR, EDAC_DEVICE,
- "Previous Boot UE detected[0x%X] @ 0x%X\n",
- dberror, err_addr);
- /* Reset the sticky registers */
- s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, 0);
- s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, 0);
- }
-
- for_each_child_of_node(pdev->dev.of_node, child) {
- if (!of_device_is_available(child))
- continue;
-
- if (of_device_is_compatible(child, "altr,sdram-edac-s10"))
- of_platform_populate(pdev->dev.of_node,
- altr_sdram_ctrl_of_match,
- NULL, &pdev->dev);
- }
-
- return 0;
-}
-
-static const struct of_device_id altr_edac_s10_of_match[] = {
- { .compatible = "altr,socfpga-s10-ecc-manager" },
- {},
-};
-MODULE_DEVICE_TABLE(of, altr_edac_s10_of_match);
-
-static struct platform_driver altr_edac_s10_driver = {
- .probe = altr_edac_s10_probe,
- .driver = {
- .name = "socfpga_s10_ecc_manager",
- .of_match_table = altr_edac_s10_of_match,
- },
-};
-module_platform_driver(altr_edac_s10_driver);
-
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 81f0554e09de..4213cb0bb2a7 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -156,34 +156,6 @@
#define A10_INTMASK_CLR_OFST 0x10
#define A10_DDR0_IRQ_MASK BIT(17)
-/************* Stratix10 Defines **************/
-
-/* SDRAM Controller EccCtrl Register */
-#define S10_ECCCTRL1_OFST 0xF8011100
-
-/* SDRAM Controller DRAM IRQ Register */
-#define S10_ERRINTEN_OFST 0xF8011110
-
-/* SDRAM Interrupt Mode Register */
-#define S10_INTMODE_OFST 0xF801111C
-
-/* SDRAM Controller Error Status Register */
-#define S10_INTSTAT_OFST 0xF8011120
-
-/* SDRAM Controller ECC Error Address Register */
-#define S10_DERRADDR_OFST 0xF801112C
-#define S10_SERRADDR_OFST 0xF8011130
-
-/* SDRAM Controller ECC Diagnostic Register */
-#define S10_DIAGINTTEST_OFST 0xF8011124
-
-/* SDRAM Single Bit Error Count Compare Set Register */
-#define S10_SERRCNTREG_OFST 0xF801113C
-
-/* Sticky registers for Uncorrected Errors */
-#define S10_SYSMGR_UE_VAL_OFST 0xFFD12220
-#define S10_SYSMGR_UE_ADDR_OFST 0xFFD12224
-
struct altr_sdram_prv_data {
int ecc_ctrl_offset;
int ecc_ctl_en_mask;
@@ -319,15 +291,40 @@ struct altr_sdram_mc_data {
/************* Stratix10 Defines **************/
/* Stratix10 ECC Manager Defines */
-#define S10_SYSMGR_ECC_INTMASK_VAL_OFST 0xFFD12090
-#define S10_SYSMGR_ECC_INTMASK_SET_OFST 0xFFD12094
-#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0xFFD12098
+#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
+#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
-#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C
-#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0
+/* Sticky registers for Uncorrected Errors */
+#define S10_SYSMGR_UE_VAL_OFST 0x120
+#define S10_SYSMGR_UE_ADDR_OFST 0x124
#define S10_DDR0_IRQ_MASK BIT(16)
+/* Define ECC Block Offsets for peripherals */
+#define ECC_BLK_ADDRESS_OFST 0x40
+#define ECC_BLK_RDATA0_OFST 0x44
+#define ECC_BLK_RDATA1_OFST 0x48
+#define ECC_BLK_RDATA2_OFST 0x4C
+#define ECC_BLK_RDATA3_OFST 0x50
+#define ECC_BLK_WDATA0_OFST 0x54
+#define ECC_BLK_WDATA1_OFST 0x58
+#define ECC_BLK_WDATA2_OFST 0x5C
+#define ECC_BLK_WDATA3_OFST 0x60
+#define ECC_BLK_RECC0_OFST 0x64
+#define ECC_BLK_RECC1_OFST 0x68
+#define ECC_BLK_WECC0_OFST 0x6C
+#define ECC_BLK_WECC1_OFST 0x70
+#define ECC_BLK_DBYTECTRL_OFST 0x74
+#define ECC_BLK_ACCCTRL_OFST 0x78
+#define ECC_BLK_STARTACC_OFST 0x7C
+
+#define ECC_XACT_KICK 0x10000
+#define ECC_WORD_WRITE 0xF
+#define ECC_WRITE_DOVR 0x101
+#define ECC_WRITE_EDOVR 0x103
+#define ECC_READ_EOVR 0x2
+#define ECC_READ_EDOVR 0x3
+
struct altr_edac_device_dev;
struct edac_device_prv_data {
@@ -370,6 +367,7 @@ struct altr_arria10_edac {
struct irq_domain *domain;
struct irq_chip irq_chip;
struct list_head a10_ecc_devices;
+ struct notifier_block panic_notifier;
};
/*
@@ -437,13 +435,4 @@ struct altr_arria10_edac {
#define INTEL_SIP_SMC_REG_WRITE \
INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
-struct altr_stratix10_edac {
- struct device *dev;
- int sb_irq;
- struct irq_domain *domain;
- struct irq_chip irq_chip;
- struct list_head s10_ecc_devices;
- struct notifier_block panic_notifier;
-};
-
#endif /* #ifndef _ALTERA_EDAC_H */
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 18aeabb1d5ee..6ea98575a402 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -211,7 +211,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
scrubval = scrubrates[i].scrubval;
- if (pvt->fam == 0x17) {
+ if (pvt->fam == 0x17 || pvt->fam == 0x18) {
__f17h_set_scrubval(pvt, scrubval);
} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
f15h_select_dct(pvt, 0);
@@ -264,6 +264,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
break;
case 0x17:
+ case 0x18:
amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
if (scrubval & BIT(0)) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
@@ -1044,6 +1045,7 @@ static void determine_memory_type(struct amd64_pvt *pvt)
goto ddr3;
case 0x17:
+ case 0x18:
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
pvt->dram_type = MEM_LRDDR4;
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
@@ -2200,6 +2202,15 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_base_addr_to_cs_size,
}
},
+ [F17_M10H_CPUS] = {
+ .ctl_name = "F17h_M10h",
+ .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_base_addr_to_cs_size,
+ }
+ },
};
/*
@@ -3188,8 +3199,18 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
break;
case 0x17:
+ if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
+ fam_type = &family_types[F17_M10H_CPUS];
+ pvt->ops = &family_types[F17_M10H_CPUS].ops;
+ break;
+ }
+ /* fall through */
+ case 0x18:
fam_type = &family_types[F17_CPUS];
pvt->ops = &family_types[F17_CPUS].ops;
+
+ if (pvt->fam == 0x18)
+ family_types[F17_CPUS].ctl_name = "F18h";
break;
default:
@@ -3428,6 +3449,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
{ X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
+ { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ }
};
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 1d4b74e9a037..4242f8e39c18 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -115,6 +115,8 @@
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582
#define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460
#define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
/*
* Function 1 - Address Map
@@ -281,6 +283,7 @@ enum amd_families {
F16_CPUS,
F16_M30H_CPUS,
F17_CPUS,
+ F17_M10H_CPUS,
NUM_FAMILIES,
};
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 473aeec4b1da..49396bf6ad88 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -81,6 +81,18 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
(*num_dimm)++;
}
+static int get_dimm_smbios_index(u16 handle)
+{
+ struct mem_ctl_info *mci = ghes_pvt->mci;
+ int i;
+
+ for (i = 0; i < mci->tot_dimms; i++) {
+ if (mci->dimms[i]->smbios_handle == handle)
+ return i;
+ }
+ return -1;
+}
+
static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
{
struct ghes_edac_dimm_fill *dimm_fill = arg;
@@ -177,6 +189,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
entry->total_width, entry->data_width);
}
+ dimm->smbios_handle = entry->handle;
+
dimm_fill->count++;
}
}
@@ -327,12 +341,21 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
const char *bank = NULL, *device = NULL;
+ int index = -1;
+
dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
if (bank != NULL && device != NULL)
p += sprintf(p, "DIMM location:%s %s ", bank, device);
else
p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
mem_err->mem_dev_handle);
+
+ index = get_dimm_smbios_index(mem_err->mem_dev_handle);
+ if (index >= 0) {
+ e->top_layer = index;
+ e->enable_per_layer_report = true;
+ }
+
}
if (p > e->location)
*(p - 1) = '\0';
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index d92d56cee101..299b441647cd 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -399,7 +399,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
if (nr_pages == 0)
continue;
- edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
+ edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j,
stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
dimm->nr_pages = nr_pages;
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 8e120bf60624..9ef448fef12f 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -597,7 +597,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
/* DDR3 has 8 I/O banks */
size = (rows * cols * banks * ranks) >> (20 - 3);
- edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
+ edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
j, size,
RANKOFFSET(dimm_dod[j]),
banks, ranks, rows, cols);
@@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
u32 errnum = find_first_bit(&error, 32);
if (uncorrected_error) {
+ core_err_cnt = 1;
if (ripv)
tp_event = HW_EVENT_ERR_FATAL;
else
@@ -1815,14 +1816,12 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data;
struct i7core_dev *i7_dev;
struct mem_ctl_info *mci;
- struct i7core_pvt *pvt;
i7_dev = get_i7core_dev(mce->socketid);
if (!i7_dev)
return NOTIFY_DONE;
mci = i7_dev->mci;
- pvt = mci->pvt_info;
/*
* Just let mcelog handle it if the error is
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 2ab4d61ee47e..c605089d899f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1059,7 +1059,8 @@ static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- if (c->x86_vendor != X86_VENDOR_AMD)
+ if (c->x86_vendor != X86_VENDOR_AMD &&
+ c->x86_vendor != X86_VENDOR_HYGON)
return -ENODEV;
fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
@@ -1113,6 +1114,7 @@ static int __init mce_amd_init(void)
break;
case 0x17:
+ case 0x18:
xec_mask = 0x3f;
if (!boot_cpu_has(X86_FEATURE_SMCA)) {
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 07726fb00321..9353c3fc7c05 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -326,6 +326,7 @@ struct sbridge_info {
const struct interleave_pkg *interleave_pkg;
u8 max_sad;
u8 (*get_node_id)(struct sbridge_pvt *pvt);
+ u8 (*get_ha)(u8 bank);
enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt);
enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr);
struct pci_dev *pci_vtd;
@@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt)
return GET_BITFIELD(reg, 0, 2);
}
+/*
+ * Use the reporting bank number to determine which memory
+ * controller (also known as "ha" for "home agent"). Sandy
+ * Bridge only has one memory controller per socket, so the
+ * answer is always zero.
+ */
+static u8 sbridge_get_ha(u8 bank)
+{
+ return 0;
+}
+
+/*
+ * On Ivy Bridge, Haswell and Broadwell the error may be in a
+ * home agent bank (7, 8), or one of the per-channel memory
+ * controller banks (9 .. 16).
+ */
+static u8 ibridge_get_ha(u8 bank)
+{
+ switch (bank) {
+ case 7 ... 8:
+ return bank - 7;
+ case 9 ... 16:
+ return (bank - 9) / 4;
+ default:
+ return 0xff;
+ }
+}
+
+/* Not used, but included for safety/symmetry */
+static u8 knl_get_ha(u8 bank)
+{
+ return 0xff;
+}
static u64 haswell_get_tolm(struct sbridge_pvt *pvt)
{
@@ -1622,7 +1656,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
- edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+ edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
size, npages,
banks, ranks, rows, cols);
@@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
return 0;
}
+static int get_memory_error_data_from_mce(struct mem_ctl_info *mci,
+ const struct mce *m, u8 *socket,
+ u8 *ha, long *channel_mask,
+ char *msg)
+{
+ u32 reg, channel = GET_BITFIELD(m->status, 0, 3);
+ struct mem_ctl_info *new_mci;
+ struct sbridge_pvt *pvt;
+ struct pci_dev *pci_ha;
+ bool tad0;
+
+ if (channel >= NUM_CHANNELS) {
+ sprintf(msg, "Invalid channel 0x%x", channel);
+ return -EINVAL;
+ }
+
+ pvt = mci->pvt_info;
+ if (!pvt->info.get_ha) {
+ sprintf(msg, "No get_ha()");
+ return -EINVAL;
+ }
+ *ha = pvt->info.get_ha(m->bank);
+ if (*ha != 0 && *ha != 1) {
+ sprintf(msg, "Impossible bank %d", m->bank);
+ return -EINVAL;
+ }
+
+ *socket = m->socketid;
+ new_mci = get_mci_for_node_id(*socket, *ha);
+ if (!new_mci) {
+ strcpy(msg, "mci socket got corrupted!");
+ return -EINVAL;
+ }
+
+ pvt = new_mci->pvt_info;
+ pci_ha = pvt->pci_ha;
+ pci_read_config_dword(pci_ha, tad_dram_rule[0], &reg);
+ tad0 = m->addr <= TAD_LIMIT(reg);
+
+ *channel_mask = 1 << channel;
+ if (pvt->mirror_mode == FULL_MIRRORING ||
+ (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) {
+ *channel_mask |= 1 << ((channel + 2) % 4);
+ pvt->is_cur_addr_mirrored = true;
+ } else {
+ pvt->is_cur_addr_mirrored = false;
+ }
+
+ if (pvt->is_lockstep)
+ *channel_mask |= 1 << ((channel + 1) % 4);
+
+ return 0;
+}
+
/****************************************************************************
Device initialization routines: put/get, init/exit
****************************************************************************/
@@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
u32 errcode = GET_BITFIELD(m->status, 0, 15);
u32 channel = GET_BITFIELD(m->status, 0, 3);
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+ /*
+ * Bits 5-0 of MCi_MISC give the least significant bit that is valid.
+ * A value 6 is for cache line aligned address, a value 12 is for page
+ * aligned address reported by patrol scrubber.
+ */
+ u32 lsb = GET_BITFIELD(m->misc, 0, 5);
long channel_mask, first_channel;
- u8 rank, socket, ha;
+ u8 rank = 0xff, socket, ha;
int rc, dimm;
- char *area_type = NULL;
+ char *area_type = "DRAM";
if (pvt->info.type != SANDY_BRIDGE)
recoverable = true;
@@ -2888,6 +2982,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
+ core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
@@ -2911,35 +3006,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
* cccc = channel
* If the mask doesn't match, report an error to the parsing logic
*/
- if (! ((errcode & 0xef80) == 0x80)) {
- optype = "Can't parse: it is not a mem";
- } else {
- switch (optypenum) {
- case 0:
- optype = "generic undef request error";
- break;
- case 1:
- optype = "memory read error";
- break;
- case 2:
- optype = "memory write error";
- break;
- case 3:
- optype = "addr/cmd error";
- break;
- case 4:
- optype = "memory scrubbing error";
- break;
- default:
- optype = "reserved";
- break;
- }
+ switch (optypenum) {
+ case 0:
+ optype = "generic undef request error";
+ break;
+ case 1:
+ optype = "memory read error";
+ break;
+ case 2:
+ optype = "memory write error";
+ break;
+ case 3:
+ optype = "addr/cmd error";
+ break;
+ case 4:
+ optype = "memory scrubbing error";
+ break;
+ default:
+ optype = "reserved";
+ break;
}
- /* Only decode errors with an valid address (ADDRV) */
- if (!GET_BITFIELD(m->status, 58, 58))
- return;
-
if (pvt->info.type == KNIGHTS_LANDING) {
if (channel == 14) {
edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n",
@@ -2972,9 +3059,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
optype, msg);
}
return;
- } else {
+ } else if (lsb < 12) {
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
- &channel_mask, &rank, &area_type, msg);
+ &channel_mask, &rank,
+ &area_type, msg);
+ } else {
+ rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
+ &channel_mask, msg);
}
if (rc < 0)
@@ -2989,14 +3080,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
- if (rank < 4)
+ if (rank == 0xff)
+ dimm = -1;
+ else if (rank < 4)
dimm = 0;
else if (rank < 8)
dimm = 1;
else
dimm = 2;
-
/*
* FIXME: On some memory configurations (mirror, lockstep), the
* Memory Controller can't point the error to a single DIMM. The
@@ -3045,17 +3137,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
{
struct mce *mce = (struct mce *)data;
struct mem_ctl_info *mci;
- struct sbridge_pvt *pvt;
char *type;
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
- mci = get_mci_for_node_id(mce->socketid, IMC0);
- if (!mci)
- return NOTIFY_DONE;
- pvt = mci->pvt_info;
-
/*
* Just let mcelog handle it if the error is
* outside the memory controller. A memory error
@@ -3065,6 +3151,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1)
return NOTIFY_DONE;
+ /* Check ADDRV bit in STATUS */
+ if (!GET_BITFIELD(mce->status, 58, 58))
+ return NOTIFY_DONE;
+
+ /* Check MISCV bit in STATUS */
+ if (!GET_BITFIELD(mce->status, 59, 59))
+ return NOTIFY_DONE;
+
+ /* Check address type in MISC (physical address only) */
+ if (GET_BITFIELD(mce->misc, 6, 8) != 2)
+ return NOTIFY_DONE;
+
+ mci = get_mci_for_node_id(mce->socketid, IMC0);
+ if (!mci)
+ return NOTIFY_DONE;
+
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
else
@@ -3173,6 +3275,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = get_memory_type;
pvt->info.get_node_id = get_node_id;
+ pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3197,6 +3300,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = sbridge_dram_rule;
pvt->info.get_memory_type = get_memory_type;
pvt->info.get_node_id = get_node_id;
+ pvt->info.get_ha = sbridge_get_ha;
pvt->info.rir_limit = rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3221,6 +3325,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = haswell_get_memory_type;
pvt->info.get_node_id = haswell_get_node_id;
+ pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = haswell_rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3245,6 +3350,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = haswell_get_memory_type;
pvt->info.get_node_id = haswell_get_node_id;
+ pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = haswell_rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
@@ -3269,6 +3375,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = knl_dram_rule;
pvt->info.get_memory_type = knl_get_memory_type;
pvt->info.get_node_id = knl_get_node_id;
+ pvt->info.get_ha = knl_get_ha;
pvt->info.rir_limit = NULL;
pvt->info.sad_limit = knl_sad_limit;
pvt->info.interleave_mode = knl_interleave_mode;
@@ -3320,17 +3427,14 @@ fail0:
return rc;
}
-#define ICPU(model, table) \
- { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
-
static const struct x86_cpu_id sbridge_cpuids[] = {
- ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
- ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table),
- ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table),
- ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table),
- ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
- ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table),
- ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table),
+ INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
+ INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table),
+ INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table),
+ INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table),
+ INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
+ INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table),
+ INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
index fae095162c01..dd209e0dd9ab 100644
--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -364,7 +364,7 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
- edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+ edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
imc->mc, chan, dimmno, size, npages,
banks, 1 << ranks, rows, cols);
@@ -424,7 +424,7 @@ unknown_size:
dimm->mtype = MEM_NVDIMM;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
- edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu Mb (%u pages)\n",
+ edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
@@ -668,7 +668,7 @@ sad_found:
break;
case 2:
lchan = (addr >> shift) % 2;
- lchan = (lchan << 1) | ~lchan;
+ lchan = (lchan << 1) | !lchan;
break;
case 3:
lchan = ((addr >> shift) % 2) << 1;
@@ -959,6 +959,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
+ core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index c009d94f40c5..34be60fe6892 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -1884,7 +1884,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
default:
dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
l2c->pdev->device);
- return IRQ_NONE;
+ goto err_free;
}
while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
@@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
l2c->ring_tail++;
}
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
err_free:
kfree(other);