diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/altera_edac.c | 667 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 73 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 24 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 3 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 23 | ||||
-rw-r--r-- | drivers/edac/i3200_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 5 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 204 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/thunderx_edac.c | 4 |
11 files changed, 462 insertions, 554 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index ab7c5a937ab0..c89d82aa2776 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -69,25 +69,6 @@ static const struct altr_sdram_prv_data a10_data = { .ue_set_mask = A10_DIAGINT_TDERRA_MASK, }; -static const struct altr_sdram_prv_data s10_data = { - .ecc_ctrl_offset = S10_ECCCTRL1_OFST, - .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN, - .ecc_stat_offset = S10_INTSTAT_OFST, - .ecc_stat_ce_mask = A10_INTSTAT_SBEERR, - .ecc_stat_ue_mask = A10_INTSTAT_DBEERR, - .ecc_saddr_offset = S10_SERRADDR_OFST, - .ecc_daddr_offset = S10_DERRADDR_OFST, - .ecc_irq_en_offset = S10_ERRINTEN_OFST, - .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK, - .ecc_irq_clr_offset = S10_INTSTAT_OFST, - .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR), - .ecc_cnt_rst_offset = S10_ECCCTRL1_OFST, - .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK, - .ce_ue_trgr_offset = S10_DIAGINTTEST_OFST, - .ce_set_mask = A10_DIAGINT_TSERRA_MASK, - .ue_set_mask = A10_DIAGINT_TDERRA_MASK, -}; - /*********************** EDAC Memory Controller Functions ****************/ /* The SDRAM controller uses the EDAC Memory Controller framework. */ @@ -239,7 +220,7 @@ static unsigned long get_total_mem(void) static const struct of_device_id altr_sdram_ctrl_of_match[] = { { .compatible = "altr,sdram-edac", .data = &c5_data}, { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, - { .compatible = "altr,sdram-edac-s10", .data = &s10_data}, + { .compatible = "altr,sdram-edac-s10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -293,6 +274,7 @@ release: return ret; } +static int socfpga_is_a10(void); static int altr_sdram_probe(struct platform_device *pdev) { const struct of_device_id *id; @@ -416,7 +398,7 @@ static int altr_sdram_probe(struct platform_device *pdev) goto err; /* Only the Arria10 has separate IRQs */ - if (irq2 > 0) { + if (socfpga_is_a10()) { /* Arria10 specific initialization */ res = a10_init(mc_vbase); if (res < 0) @@ -502,8 +484,9 @@ static int s10_protected_reg_write(void *context, unsigned int reg, unsigned int val) { struct arm_smccc_res result; + unsigned long offset = (unsigned long)context; - arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, reg, val, 0, 0, + arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0, 0, 0, 0, &result); return (int)result.a0; @@ -523,8 +506,9 @@ static int s10_protected_reg_read(void *context, unsigned int reg, unsigned int *val) { struct arm_smccc_res result; + unsigned long offset = (unsigned long)context; - arm_smccc_smc(INTEL_SIP_SMC_REG_READ, reg, 0, 0, 0, + arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0, 0, 0, 0, &result); *val = (unsigned int)result.a1; @@ -532,246 +516,18 @@ static int s10_protected_reg_read(void *context, unsigned int reg, return (int)result.a0; } -static bool s10_sdram_writeable_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - return true; - } - return false; -} - -static bool s10_sdram_readable_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DERRADDR_OFST: - case S10_SERRADDR_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - case S10_SYSMGR_ECC_INTSTAT_SERR_OFST: - case S10_SYSMGR_ECC_INTSTAT_DERR_OFST: - return true; - } - return false; -} - -static bool s10_sdram_volatile_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DERRADDR_OFST: - case S10_SERRADDR_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - case S10_SYSMGR_ECC_INTSTAT_SERR_OFST: - case S10_SYSMGR_ECC_INTSTAT_DERR_OFST: - return true; - } - return false; -} - static const struct regmap_config s10_sdram_regmap_cfg = { .name = "s10_ddr", .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0xffffffff, - .writeable_reg = s10_sdram_writeable_reg, - .readable_reg = s10_sdram_readable_reg, - .volatile_reg = s10_sdram_volatile_reg, + .max_register = 0xffd12228, .reg_read = s10_protected_reg_read, .reg_write = s10_protected_reg_write, .use_single_read = true, .use_single_write = true, }; -static int altr_s10_sdram_probe(struct platform_device *pdev) -{ - const struct of_device_id *id; - struct edac_mc_layer layers[2]; - struct mem_ctl_info *mci; - struct altr_sdram_mc_data *drvdata; - const struct altr_sdram_prv_data *priv; - struct regmap *regmap; - struct dimm_info *dimm; - u32 read_reg; - int irq, ret = 0; - unsigned long mem_size; - - id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev); - if (!id) - return -ENODEV; - - /* Grab specific offsets and masks for Stratix10 */ - priv = of_match_node(altr_sdram_ctrl_of_match, - pdev->dev.of_node)->data; - - regmap = devm_regmap_init(&pdev->dev, NULL, (void *)priv, - &s10_sdram_regmap_cfg); - if (IS_ERR(regmap)) - return PTR_ERR(regmap); - - /* Validate the SDRAM controller has ECC enabled */ - if (regmap_read(regmap, priv->ecc_ctrl_offset, &read_reg) || - ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) { - edac_printk(KERN_ERR, EDAC_MC, - "No ECC/ECC disabled [0x%08X]\n", read_reg); - return -ENODEV; - } - - /* Grab memory size from device tree. */ - mem_size = get_total_mem(); - if (!mem_size) { - edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n"); - return -ENODEV; - } - - /* Ensure the SDRAM Interrupt is disabled */ - if (regmap_update_bits(regmap, priv->ecc_irq_en_offset, - priv->ecc_irq_en_mask, 0)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error disabling SDRAM ECC IRQ\n"); - return -ENODEV; - } - - /* Toggle to clear the SDRAM Error count */ - if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset, - priv->ecc_cnt_rst_mask, - priv->ecc_cnt_rst_mask)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - return -ENODEV; - } - - if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset, - priv->ecc_cnt_rst_mask, 0)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - return -ENODEV; - } - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - edac_printk(KERN_ERR, EDAC_MC, - "No irq %d in DT\n", irq); - return -ENODEV; - } - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 1; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = 1; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, - sizeof(struct altr_sdram_mc_data)); - if (!mci) - return -ENOMEM; - - mci->pdev = &pdev->dev; - drvdata = mci->pvt_info; - drvdata->mc_vbase = regmap; - drvdata->data = priv; - platform_set_drvdata(pdev, mci); - - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { - edac_printk(KERN_ERR, EDAC_MC, - "Unable to get managed device resource\n"); - ret = -ENOMEM; - goto free; - } - - mci->mtype_cap = MEM_FLAG_DDR3; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = dev_name(&pdev->dev); - mci->scrub_mode = SCRUB_SW_SRC; - mci->dev_name = dev_name(&pdev->dev); - - dimm = *mci->dimms; - dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1; - dimm->grain = 8; - dimm->dtype = DEV_X8; - dimm->mtype = MEM_DDR3; - dimm->edac_mode = EDAC_SECDED; - - ret = edac_mc_add_mc(mci); - if (ret < 0) - goto err; - - ret = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler, - IRQF_SHARED, dev_name(&pdev->dev), mci); - if (ret < 0) { - edac_mc_printk(mci, KERN_ERR, - "Unable to request irq %d\n", irq); - ret = -ENODEV; - goto err2; - } - - if (regmap_write(regmap, S10_SYSMGR_ECC_INTMASK_CLR_OFST, - S10_DDR0_IRQ_MASK)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - ret = -ENODEV; - goto err2; - } - - if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset, - priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) { - edac_mc_printk(mci, KERN_ERR, - "Error enabling SDRAM ECC IRQ\n"); - ret = -ENODEV; - goto err2; - } - - altr_sdr_mc_create_debugfs_nodes(mci); - - devres_close_group(&pdev->dev, NULL); - - return 0; - -err2: - edac_mc_del_mc(&pdev->dev); -err: - devres_release_group(&pdev->dev, NULL); -free: - edac_mc_free(mci); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC Probe Failed; Error %d\n", ret); - - return ret; -} - -static int altr_s10_sdram_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = platform_get_drvdata(pdev); - - edac_mc_del_mc(&pdev->dev); - edac_mc_free(mci); - platform_set_drvdata(pdev, NULL); - - return 0; -} - /************** </Stratix10 EDAC Memory Controller Functions> ***********/ /* @@ -805,20 +561,6 @@ static struct platform_driver altr_sdram_edac_driver = { module_platform_driver(altr_sdram_edac_driver); -static struct platform_driver altr_s10_sdram_edac_driver = { - .probe = altr_s10_sdram_probe, - .remove = altr_s10_sdram_remove, - .driver = { - .name = "altr_s10_sdram_edac", -#ifdef CONFIG_PM - .pm = &altr_sdram_pm_ops, -#endif - .of_match_table = altr_sdram_ctrl_of_match, - }, -}; - -module_platform_driver(altr_s10_sdram_edac_driver); - /************************* EDAC Parent Probe *************************/ static const struct of_device_id altr_edac_device_of_match[]; @@ -972,6 +714,16 @@ static const struct file_operations altr_edac_a10_device_inject_fops = { .llseek = generic_file_llseek, }; +static ssize_t altr_edac_a10_device_trig2(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject2_fops = { + .open = simple_open, + .write = altr_edac_a10_device_trig2, + .llseek = generic_file_llseek, +}; + static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, const struct edac_device_prv_data *priv) { @@ -1253,6 +1005,16 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port) return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + +static int socfpga_is_s10(void) +{ + return of_machine_is_compatible("altr,socfpga-stratix10"); +} + static __init int __maybe_unused altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, u32 ecc_ctrl_en_mask, bool dual_port) @@ -1267,8 +1029,32 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, /* Get the ECC Manager - parent of the device EDACs */ np_eccmgr = of_get_parent(np); - ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, - "altr,sysmgr-syscon"); + + if (socfpga_is_a10()) { + ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, + "altr,sysmgr-syscon"); + } else { + struct device_node *sysmgr_np; + struct resource res; + uintptr_t base; + + sysmgr_np = of_parse_phandle(np_eccmgr, + "altr,sysmgr-syscon", 0); + if (!sysmgr_np) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to find altr,sysmgr-syscon\n"); + return -ENODEV; + } + + if (of_address_to_resource(sysmgr_np, 0, &res)) + return -ENOMEM; + + /* Need physical address for SMCC call */ + base = res.start; + + ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, + &s10_sdram_regmap_cfg); + } of_node_put(np_eccmgr); if (IS_ERR(ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -1326,11 +1112,6 @@ out: return ret; } -static int socfpga_is_a10(void) -{ - return of_machine_is_compatible("altr,socfpga-arria10"); -} - static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) @@ -1338,7 +1119,7 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) int irq; struct device_node *child, *np; - if (!socfpga_is_a10()) + if (!socfpga_is_a10() && !socfpga_is_s10()) return -ENODEV; np = of_find_compatible_node(NULL, NULL, @@ -1584,7 +1365,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject_fops, + .inject_fops = &altr_edac_a10_device_inject2_fops, }; static int __init socfpga_init_ethernet_ecc(void) @@ -1662,7 +1443,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject_fops, + .inject_fops = &altr_edac_a10_device_inject2_fops, }; static int __init socfpga_init_usb_ecc(void) @@ -1860,7 +1641,7 @@ static int __init socfpga_init_sdmmc_ecc(void) int rc = -ENODEV; struct device_node *child; - if (!socfpga_is_a10()) + if (!socfpga_is_a10() && !socfpga_is_s10()) return -ENODEV; child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); @@ -1944,6 +1725,74 @@ static ssize_t altr_edac_a10_device_trig(struct file *file, writel(priv->ue_set_mask, set_addr); else writel(priv->ce_set_mask, set_addr); + + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +/* + * The Stratix10 EDAC Error Injection Functions differ from Arria10 + * slightly. A few Arria10 peripherals can use this injection function. + * Inject the error into the memory and then readback to trigger the IRQ. + */ +static ssize_t altr_edac_a10_device_trig2(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) { + writel(priv->ue_set_mask, set_addr); + } else { + /* Setup write of 0 to first 4 bytes */ + writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + /* Setup write of 4 bytes */ + writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); + /* Setup Address to 0 */ + writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST); + /* Setup accctrl to write & data override */ + writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc override */ + writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup write for single bit change */ + writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + /* Copy Read ECC to Write ECC */ + writel(readl(drvdata->base + ECC_BLK_RECC0_OFST), + drvdata->base + ECC_BLK_WECC0_OFST); + writel(readl(drvdata->base + ECC_BLK_RECC1_OFST), + drvdata->base + ECC_BLK_WECC1_OFST); + /* Setup accctrl to write & ecc override & data override */ + writel(ECC_WRITE_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc overwrite & data overwrite */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + } + /* Ensure the interrupt test bits are set */ wmb(); local_irq_restore(flags); @@ -2147,6 +1996,35 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = { .xlate = irq_domain_xlate_twocell, }; +/************** Stratix 10 EDAC Double Bit Error Handler ************/ +#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m) + +/* + * The double bit error is handled through SError which is fatal. This is + * called as a panic notifier to printout ECC error info as part of the panic. + */ +static int s10_edac_dberr_handler(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct altr_arria10_edac *edac = to_a10edac(this, panic_notifier); + int err_addr, dberror; + + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, + &dberror); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); + if (dberror & S10_DDR0_IRQ_MASK) { + regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + err_addr); + edac_printk(KERN_ERR, EDAC_MC, + "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", + err_addr); + } + + return NOTIFY_DONE; +} + +/****************** Arria 10 EDAC Probe Function *********************/ static int altr_edac_a10_probe(struct platform_device *pdev) { struct altr_arria10_edac *edac; @@ -2160,8 +2038,34 @@ static int altr_edac_a10_probe(struct platform_device *pdev) platform_set_drvdata(pdev, edac); INIT_LIST_HEAD(&edac->a10_ecc_devices); - edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + if (socfpga_is_a10()) { + edac->ecc_mgr_map = + syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "altr,sysmgr-syscon"); + } else { + struct device_node *sysmgr_np; + struct resource res; + uintptr_t base; + + sysmgr_np = of_parse_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon", 0); + if (!sysmgr_np) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to find altr,sysmgr-syscon\n"); + return -ENODEV; + } + + if (of_address_to_resource(sysmgr_np, 0, &res)) + return -ENOMEM; + + /* Need physical address for SMCC call */ + base = res.start; + + edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, + (void *)base, + &s10_sdram_regmap_cfg); + } + if (IS_ERR(edac->ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, "Unable to get syscon altr,sysmgr-syscon\n"); @@ -2188,14 +2092,38 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_irq_handler, edac); - edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); - return edac->db_irq; + if (socfpga_is_a10()) { + edac->db_irq = platform_get_irq(pdev, 1); + if (edac->db_irq < 0) { + dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + return edac->db_irq; + } + irq_set_chained_handler_and_data(edac->db_irq, + altr_edac_a10_irq_handler, + edac); + } else { + int dberror, err_addr; + + edac->panic_notifier.notifier_call = s10_edac_dberr_handler; + atomic_notifier_chain_register(&panic_notifier_list, + &edac->panic_notifier); + + /* Printout a message if uncorrectable error previously. */ + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, + &dberror); + if (dberror) { + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + &err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "Previous Boot UE detected[0x%X] @ 0x%X\n", + dberror, err_addr); + /* Reset the sticky registers */ + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_VAL_OFST, 0); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, 0); + } } - irq_set_chained_handler_and_data(edac->db_irq, - altr_edac_a10_irq_handler, - edac); for_each_child_of_node(pdev->dev.of_node, child) { if (!of_device_is_available(child)) @@ -2212,7 +2140,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_device_add(edac, child); - else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) + else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || + (of_device_is_compatible(child, "altr,sdram-edac-s10"))) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); @@ -2223,6 +2152,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev) static const struct of_device_id altr_edac_a10_of_match[] = { { .compatible = "altr,socfpga-a10-ecc-manager" }, + { .compatible = "altr,socfpga-s10-ecc-manager" }, {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match); @@ -2236,171 +2166,6 @@ static struct platform_driver altr_edac_a10_driver = { }; module_platform_driver(altr_edac_a10_driver); -/************** Stratix 10 EDAC Device Controller Functions> ************/ - -#define to_s10edac(p, m) container_of(p, struct altr_stratix10_edac, m) - -/* - * The double bit error is handled through SError which is fatal. This is - * called as a panic notifier to printout ECC error info as part of the panic. - */ -static int s10_edac_dberr_handler(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct altr_stratix10_edac *edac = to_s10edac(this, panic_notifier); - int err_addr, dberror; - - s10_protected_reg_read(edac, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, - &dberror); - /* Remember the UE Errors for a reboot */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, dberror); - if (dberror & S10_DDR0_IRQ_MASK) { - s10_protected_reg_read(edac, S10_DERRADDR_OFST, &err_addr); - /* Remember the UE Error address */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, - err_addr); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", - err_addr); - } - - return NOTIFY_DONE; -} - -static void altr_edac_s10_irq_handler(struct irq_desc *desc) -{ - struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_chip(desc); - int irq = irq_desc_get_irq(desc); - int bit, sm_offset, irq_status; - - sm_offset = S10_SYSMGR_ECC_INTSTAT_SERR_OFST; - - chained_irq_enter(chip, desc); - - s10_protected_reg_read(NULL, sm_offset, &irq_status); - - for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { - irq = irq_linear_revmap(edac->domain, bit); - if (irq) - generic_handle_irq(irq); - } - - chained_irq_exit(chip, desc); -} - -static void s10_eccmgr_irq_mask(struct irq_data *d) -{ - struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d); - - s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_SET_OFST, - BIT(d->hwirq)); -} - -static void s10_eccmgr_irq_unmask(struct irq_data *d) -{ - struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d); - - s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_CLR_OFST, - BIT(d->hwirq)); -} - -static int s10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq) -{ - struct altr_stratix10_edac *edac = d->host_data; - - irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq); - irq_set_chip_data(irq, edac); - irq_set_noprobe(irq); - - return 0; -} - -static const struct irq_domain_ops s10_eccmgr_ic_ops = { - .map = s10_eccmgr_irqdomain_map, - .xlate = irq_domain_xlate_twocell, -}; - -static int altr_edac_s10_probe(struct platform_device *pdev) -{ - struct altr_stratix10_edac *edac; - struct device_node *child; - int dberror, err_addr; - - edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); - if (!edac) - return -ENOMEM; - - edac->dev = &pdev->dev; - platform_set_drvdata(pdev, edac); - INIT_LIST_HEAD(&edac->s10_ecc_devices); - - edac->irq_chip.name = pdev->dev.of_node->name; - edac->irq_chip.irq_mask = s10_eccmgr_irq_mask; - edac->irq_chip.irq_unmask = s10_eccmgr_irq_unmask; - edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64, - &s10_eccmgr_ic_ops, edac); - if (!edac->domain) { - dev_err(&pdev->dev, "Error adding IRQ domain\n"); - return -ENOMEM; - } - - edac->sb_irq = platform_get_irq(pdev, 0); - if (edac->sb_irq < 0) { - dev_err(&pdev->dev, "No SBERR IRQ resource\n"); - return edac->sb_irq; - } - - irq_set_chained_handler_and_data(edac->sb_irq, - altr_edac_s10_irq_handler, - edac); - - edac->panic_notifier.notifier_call = s10_edac_dberr_handler; - atomic_notifier_chain_register(&panic_notifier_list, - &edac->panic_notifier); - - /* Printout a message if uncorrectable error previously. */ - s10_protected_reg_read(edac, S10_SYSMGR_UE_VAL_OFST, &dberror); - if (dberror) { - s10_protected_reg_read(edac, S10_SYSMGR_UE_ADDR_OFST, - &err_addr); - edac_printk(KERN_ERR, EDAC_DEVICE, - "Previous Boot UE detected[0x%X] @ 0x%X\n", - dberror, err_addr); - /* Reset the sticky registers */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, 0); - s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, 0); - } - - for_each_child_of_node(pdev->dev.of_node, child) { - if (!of_device_is_available(child)) - continue; - - if (of_device_is_compatible(child, "altr,sdram-edac-s10")) - of_platform_populate(pdev->dev.of_node, - altr_sdram_ctrl_of_match, - NULL, &pdev->dev); - } - - return 0; -} - -static const struct of_device_id altr_edac_s10_of_match[] = { - { .compatible = "altr,socfpga-s10-ecc-manager" }, - {}, -}; -MODULE_DEVICE_TABLE(of, altr_edac_s10_of_match); - -static struct platform_driver altr_edac_s10_driver = { - .probe = altr_edac_s10_probe, - .driver = { - .name = "socfpga_s10_ecc_manager", - .of_match_table = altr_edac_s10_of_match, - }, -}; -module_platform_driver(altr_edac_s10_driver); - MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 81f0554e09de..4213cb0bb2a7 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -156,34 +156,6 @@ #define A10_INTMASK_CLR_OFST 0x10 #define A10_DDR0_IRQ_MASK BIT(17) -/************* Stratix10 Defines **************/ - -/* SDRAM Controller EccCtrl Register */ -#define S10_ECCCTRL1_OFST 0xF8011100 - -/* SDRAM Controller DRAM IRQ Register */ -#define S10_ERRINTEN_OFST 0xF8011110 - -/* SDRAM Interrupt Mode Register */ -#define S10_INTMODE_OFST 0xF801111C - -/* SDRAM Controller Error Status Register */ -#define S10_INTSTAT_OFST 0xF8011120 - -/* SDRAM Controller ECC Error Address Register */ -#define S10_DERRADDR_OFST 0xF801112C -#define S10_SERRADDR_OFST 0xF8011130 - -/* SDRAM Controller ECC Diagnostic Register */ -#define S10_DIAGINTTEST_OFST 0xF8011124 - -/* SDRAM Single Bit Error Count Compare Set Register */ -#define S10_SERRCNTREG_OFST 0xF801113C - -/* Sticky registers for Uncorrected Errors */ -#define S10_SYSMGR_UE_VAL_OFST 0xFFD12220 -#define S10_SYSMGR_UE_ADDR_OFST 0xFFD12224 - struct altr_sdram_prv_data { int ecc_ctrl_offset; int ecc_ctl_en_mask; @@ -319,15 +291,40 @@ struct altr_sdram_mc_data { /************* Stratix10 Defines **************/ /* Stratix10 ECC Manager Defines */ -#define S10_SYSMGR_ECC_INTMASK_VAL_OFST 0xFFD12090 -#define S10_SYSMGR_ECC_INTMASK_SET_OFST 0xFFD12094 -#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0xFFD12098 +#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 -#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C -#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0 +/* Sticky registers for Uncorrected Errors */ +#define S10_SYSMGR_UE_VAL_OFST 0x120 +#define S10_SYSMGR_UE_ADDR_OFST 0x124 #define S10_DDR0_IRQ_MASK BIT(16) +/* Define ECC Block Offsets for peripherals */ +#define ECC_BLK_ADDRESS_OFST 0x40 +#define ECC_BLK_RDATA0_OFST 0x44 +#define ECC_BLK_RDATA1_OFST 0x48 +#define ECC_BLK_RDATA2_OFST 0x4C +#define ECC_BLK_RDATA3_OFST 0x50 +#define ECC_BLK_WDATA0_OFST 0x54 +#define ECC_BLK_WDATA1_OFST 0x58 +#define ECC_BLK_WDATA2_OFST 0x5C +#define ECC_BLK_WDATA3_OFST 0x60 +#define ECC_BLK_RECC0_OFST 0x64 +#define ECC_BLK_RECC1_OFST 0x68 +#define ECC_BLK_WECC0_OFST 0x6C +#define ECC_BLK_WECC1_OFST 0x70 +#define ECC_BLK_DBYTECTRL_OFST 0x74 +#define ECC_BLK_ACCCTRL_OFST 0x78 +#define ECC_BLK_STARTACC_OFST 0x7C + +#define ECC_XACT_KICK 0x10000 +#define ECC_WORD_WRITE 0xF +#define ECC_WRITE_DOVR 0x101 +#define ECC_WRITE_EDOVR 0x103 +#define ECC_READ_EOVR 0x2 +#define ECC_READ_EDOVR 0x3 + struct altr_edac_device_dev; struct edac_device_prv_data { @@ -370,6 +367,7 @@ struct altr_arria10_edac { struct irq_domain *domain; struct irq_chip irq_chip; struct list_head a10_ecc_devices; + struct notifier_block panic_notifier; }; /* @@ -437,13 +435,4 @@ struct altr_arria10_edac { #define INTEL_SIP_SMC_REG_WRITE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) -struct altr_stratix10_edac { - struct device *dev; - int sb_irq; - struct irq_domain *domain; - struct irq_chip irq_chip; - struct list_head s10_ecc_devices; - struct notifier_block panic_notifier; -}; - #endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18aeabb1d5ee..6ea98575a402 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -211,7 +211,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17) { + if (pvt->fam == 0x17 || pvt->fam == 0x18) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -264,6 +264,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) break; case 0x17: + case 0x18: amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -1044,6 +1045,7 @@ static void determine_memory_type(struct amd64_pvt *pvt) goto ddr3; case 0x17: + case 0x18: if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) pvt->dram_type = MEM_LRDDR4; else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) @@ -2200,6 +2202,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M10H_CPUS] = { + .ctl_name = "F17h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -3188,8 +3199,18 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x17: + if (pvt->model >= 0x10 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M10H_CPUS]; + pvt->ops = &family_types[F17_M10H_CPUS].ops; + break; + } + /* fall through */ + case 0x18: fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; + + if (pvt->fam == 0x18) + family_types[F17_CPUS].ctl_name = "F18h"; break; default: @@ -3428,6 +3449,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1d4b74e9a037..4242f8e39c18 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -115,6 +115,8 @@ #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 #define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee /* * Function 1 - Address Map @@ -281,6 +283,7 @@ enum amd_families { F16_CPUS, F16_M30H_CPUS, F17_CPUS, + F17_M10H_CPUS, NUM_FAMILIES, }; diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 473aeec4b1da..49396bf6ad88 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -81,6 +81,18 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) (*num_dimm)++; } +static int get_dimm_smbios_index(u16 handle) +{ + struct mem_ctl_info *mci = ghes_pvt->mci; + int i; + + for (i = 0; i < mci->tot_dimms; i++) { + if (mci->dimms[i]->smbios_handle == handle) + return i; + } + return -1; +} + static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) { struct ghes_edac_dimm_fill *dimm_fill = arg; @@ -177,6 +189,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) entry->total_width, entry->data_width); } + dimm->smbios_handle = entry->handle; + dimm_fill->count++; } } @@ -327,12 +341,21 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { const char *bank = NULL, *device = NULL; + int index = -1; + dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); if (bank != NULL && device != NULL) p += sprintf(p, "DIMM location:%s %s ", bank, device); else p += sprintf(p, "DIMM DMI handle: 0x%.4x ", mem_err->mem_dev_handle); + + index = get_dimm_smbios_index(mem_err->mem_dev_handle); + if (index >= 0) { + e->top_layer = index; + e->enable_per_layer_report = true; + } + } if (p > e->location) *(p - 1) = '\0'; diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index d92d56cee101..299b441647cd 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -399,7 +399,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) if (nr_pages == 0) continue; - edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j, stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 8e120bf60624..9ef448fef12f 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -597,7 +597,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) /* DDR3 has 8 I/O banks */ size = (rows * cols * banks * ranks) >> (20 - 3); - edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", j, size, RANKOFFSET(dimm_dod[j]), banks, ranks, rows, cols); @@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) tp_event = HW_EVENT_ERR_FATAL; else @@ -1815,14 +1816,12 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; struct i7core_dev *i7_dev; struct mem_ctl_info *mci; - struct i7core_pvt *pvt; i7_dev = get_i7core_dev(mce->socketid); if (!i7_dev) return NOTIFY_DONE; mci = i7_dev->mci; - pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 2ab4d61ee47e..c605089d899f 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1059,7 +1059,8 @@ static int __init mce_amd_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_AMD) + if (c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); @@ -1113,6 +1114,7 @@ static int __init mce_amd_init(void) break; case 0x17: + case 0x18: xec_mask = 0x3f; if (!boot_cpu_has(X86_FEATURE_SMCA)) { printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 07726fb00321..9353c3fc7c05 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -326,6 +326,7 @@ struct sbridge_info { const struct interleave_pkg *interleave_pkg; u8 max_sad; u8 (*get_node_id)(struct sbridge_pvt *pvt); + u8 (*get_ha)(u8 bank); enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); struct pci_dev *pci_vtd; @@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt) return GET_BITFIELD(reg, 0, 2); } +/* + * Use the reporting bank number to determine which memory + * controller (also known as "ha" for "home agent"). Sandy + * Bridge only has one memory controller per socket, so the + * answer is always zero. + */ +static u8 sbridge_get_ha(u8 bank) +{ + return 0; +} + +/* + * On Ivy Bridge, Haswell and Broadwell the error may be in a + * home agent bank (7, 8), or one of the per-channel memory + * controller banks (9 .. 16). + */ +static u8 ibridge_get_ha(u8 bank) +{ + switch (bank) { + case 7 ... 8: + return bank - 7; + case 9 ... 16: + return (bank - 9) / 4; + default: + return 0xff; + } +} + +/* Not used, but included for safety/symmetry */ +static u8 knl_get_ha(u8 bank) +{ + return 0xff; +} static u64 haswell_get_tolm(struct sbridge_pvt *pvt) { @@ -1622,7 +1656,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j, size, npages, banks, ranks, rows, cols); @@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return 0; } +static int get_memory_error_data_from_mce(struct mem_ctl_info *mci, + const struct mce *m, u8 *socket, + u8 *ha, long *channel_mask, + char *msg) +{ + u32 reg, channel = GET_BITFIELD(m->status, 0, 3); + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt; + struct pci_dev *pci_ha; + bool tad0; + + if (channel >= NUM_CHANNELS) { + sprintf(msg, "Invalid channel 0x%x", channel); + return -EINVAL; + } + + pvt = mci->pvt_info; + if (!pvt->info.get_ha) { + sprintf(msg, "No get_ha()"); + return -EINVAL; + } + *ha = pvt->info.get_ha(m->bank); + if (*ha != 0 && *ha != 1) { + sprintf(msg, "Impossible bank %d", m->bank); + return -EINVAL; + } + + *socket = m->socketid; + new_mci = get_mci_for_node_id(*socket, *ha); + if (!new_mci) { + strcpy(msg, "mci socket got corrupted!"); + return -EINVAL; + } + + pvt = new_mci->pvt_info; + pci_ha = pvt->pci_ha; + pci_read_config_dword(pci_ha, tad_dram_rule[0], ®); + tad0 = m->addr <= TAD_LIMIT(reg); + + *channel_mask = 1 << channel; + if (pvt->mirror_mode == FULL_MIRRORING || + (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) { + *channel_mask |= 1 << ((channel + 2) % 4); + pvt->is_cur_addr_mirrored = true; + } else { + pvt->is_cur_addr_mirrored = false; + } + + if (pvt->is_lockstep) + *channel_mask |= 1 << ((channel + 1) % 4); + + return 0; +} + /**************************************************************************** Device initialization routines: put/get, init/exit ****************************************************************************/ @@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 errcode = GET_BITFIELD(m->status, 0, 15); u32 channel = GET_BITFIELD(m->status, 0, 3); u32 optypenum = GET_BITFIELD(m->status, 4, 6); + /* + * Bits 5-0 of MCi_MISC give the least significant bit that is valid. + * A value 6 is for cache line aligned address, a value 12 is for page + * aligned address reported by patrol scrubber. + */ + u32 lsb = GET_BITFIELD(m->misc, 0, 5); long channel_mask, first_channel; - u8 rank, socket, ha; + u8 rank = 0xff, socket, ha; int rc, dimm; - char *area_type = NULL; + char *area_type = "DRAM"; if (pvt->info.type != SANDY_BRIDGE) recoverable = true; @@ -2888,6 +2982,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; @@ -2911,35 +3006,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * cccc = channel * If the mask doesn't match, report an error to the parsing logic */ - if (! ((errcode & 0xef80) == 0x80)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; } - /* Only decode errors with an valid address (ADDRV) */ - if (!GET_BITFIELD(m->status, 58, 58)) - return; - if (pvt->info.type == KNIGHTS_LANDING) { if (channel == 14) { edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", @@ -2972,9 +3059,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, optype, msg); } return; - } else { + } else if (lsb < 12) { rc = get_memory_error_data(mci, m->addr, &socket, &ha, - &channel_mask, &rank, &area_type, msg); + &channel_mask, &rank, + &area_type, msg); + } else { + rc = get_memory_error_data_from_mce(mci, m, &socket, &ha, + &channel_mask, msg); } if (rc < 0) @@ -2989,14 +3080,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); - if (rank < 4) + if (rank == 0xff) + dimm = -1; + else if (rank < 4) dimm = 0; else if (rank < 8) dimm = 1; else dimm = 2; - /* * FIXME: On some memory configurations (mirror, lockstep), the * Memory Controller can't point the error to a single DIMM. The @@ -3045,17 +3137,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, { struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; - struct sbridge_pvt *pvt; char *type; if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; - mci = get_mci_for_node_id(mce->socketid, IMC0); - if (!mci) - return NOTIFY_DONE; - pvt = mci->pvt_info; - /* * Just let mcelog handle it if the error is * outside the memory controller. A memory error @@ -3065,6 +3151,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; + /* Check ADDRV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 58, 58)) + return NOTIFY_DONE; + + /* Check MISCV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 59, 59)) + return NOTIFY_DONE; + + /* Check address type in MISC (physical address only) */ + if (GET_BITFIELD(mce->misc, 6, 8) != 2) + return NOTIFY_DONE; + + mci = get_mci_for_node_id(mce->socketid, IMC0); + if (!mci) + return NOTIFY_DONE; + if (mce->mcgstatus & MCG_STATUS_MCIP) type = "Exception"; else @@ -3173,6 +3275,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3197,6 +3300,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = sbridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = sbridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3221,6 +3325,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3245,6 +3350,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3269,6 +3375,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = knl_dram_rule; pvt->info.get_memory_type = knl_get_memory_type; pvt->info.get_node_id = knl_get_node_id; + pvt->info.get_ha = knl_get_ha; pvt->info.rir_limit = NULL; pvt->info.sad_limit = knl_sad_limit; pvt->info.interleave_mode = knl_interleave_mode; @@ -3320,17 +3427,14 @@ fail0: return rc; } -#define ICPU(model, table) \ - { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } - static const struct x86_cpu_id sbridge_cpuids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table), - ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table), - ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table), - ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table), - ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table), - ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table), + INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table), + INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table), + INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table), + INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table), + INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table), { } }; MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index fae095162c01..dd209e0dd9ab 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -364,7 +364,7 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", imc->mc, chan, dimmno, size, npages, banks, 1 << ranks, rows, cols); @@ -424,7 +424,7 @@ unknown_size: dimm->mtype = MEM_NVDIMM; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ - edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu Mb (%u pages)\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", @@ -668,7 +668,7 @@ sad_found: break; case 2: lchan = (addr >> shift) % 2; - lchan = (lchan << 1) | ~lchan; + lchan = (lchan << 1) | !lchan; break; case 3: lchan = ((addr >> shift) % 2) << 1; @@ -959,6 +959,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index c009d94f40c5..34be60fe6892 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1884,7 +1884,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) default: dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", l2c->pdev->device); - return IRQ_NONE; + goto err_free; } while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, @@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) l2c->ring_tail++; } - return IRQ_HANDLED; + ret = IRQ_HANDLED; err_free: kfree(other); |