diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/amd64_edac.c | 39 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 3 | ||||
-rw-r--r-- | drivers/edac/e752x_edac.c | 117 | ||||
-rw-r--r-- | drivers/edac/edac_device_sysfs.c | 6 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 4 | ||||
-rw-r--r-- | drivers/edac/edac_pci_sysfs.c | 4 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.c | 163 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.h | 3 |
8 files changed, 292 insertions, 47 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 3391e6739d06..cf17dbb8014f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -13,7 +13,7 @@ module_param(report_gart_errors, int, 0644); static int ecc_enable_override; module_param(ecc_enable_override, int, 0644); -static struct msr *msrs; +static struct msr __percpu *msrs; /* Lookup table for all possible MC control instances */ struct amd64_pvt; @@ -2553,14 +2553,14 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) if (on) { if (reg->l & K8_MSR_MCGCTL_NBE) - pvt->flags.ecc_report = 1; + pvt->flags.nb_mce_enable = 1; reg->l |= K8_MSR_MCGCTL_NBE; } else { /* - * Turn off ECC reporting only when it was off before + * Turn off NB MCE reporting only when it was off before */ - if (!pvt->flags.ecc_report) + if (!pvt->flags.nb_mce_enable) reg->l &= ~K8_MSR_MCGCTL_NBE; } } @@ -2571,22 +2571,11 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) return 0; } -/* - * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we" - * enable it. - */ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; - if (!ecc_enable_override) - return; - - amd64_printk(KERN_WARNING, - "'ecc_enable_override' parameter is active, " - "Enabling AMD ECC hardware now: CAUTION\n"); - amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value); /* turn on UECCn and CECCEn bits */ @@ -2611,6 +2600,8 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) "This node reports that DRAM ECC is " "currently Disabled; ENABLING now\n"); + pvt->flags.nb_ecc_prev = 0; + /* Attempt to turn on DRAM ECC Enable */ value |= K8_NBCFG_ECC_ENABLE; pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); @@ -2625,7 +2616,10 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) amd64_printk(KERN_DEBUG, "Hardware accepted DRAM ECC Enable\n"); } + } else { + pvt->flags.nb_ecc_prev = 1; } + debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); @@ -2644,12 +2638,18 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) value &= ~mask; value |= pvt->old_nbctl; - /* restore the NB Enable MCGCTL bit */ pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); + /* restore previous BIOS DRAM ECC "off" setting which we force-enabled */ + if (!pvt->flags.nb_ecc_prev) { + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); + value &= ~K8_NBCFG_ECC_ENABLE; + pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); + } + + /* restore the NB Enable MCGCTL bit */ if (amd64_toggle_ecc_err_reporting(pvt, OFF)) - amd64_printk(KERN_WARNING, "Error restoring ECC reporting over " - "MCGCTL!\n"); + amd64_printk(KERN_WARNING, "Error restoring NB MCGCTL settings!\n"); } /* @@ -2690,8 +2690,9 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) if (!ecc_enable_override) { amd64_printk(KERN_NOTICE, "%s", ecc_msg); return -ENODEV; + } else { + amd64_printk(KERN_WARNING, "Forcing ECC checking on!\n"); } - ecc_enable_override = 0; } return 0; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 41bc561e5981..0d4bf5638243 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -487,7 +487,8 @@ struct amd64_pvt { /* misc settings */ struct flags { unsigned long cf8_extcfg:1; - unsigned long ecc_report:1; + unsigned long nb_mce_enable:1; + unsigned long nb_ecc_prev:1; } flags; }; diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index d205d493a68a..243e9aacad69 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -75,6 +75,14 @@ static struct edac_pci_ctl_info *e752x_pci; #define E752X_NR_CSROWS 8 /* number of csrows */ /* E752X register addresses - device 0 function 0 */ +#define E752X_MCHSCRB 0x52 /* Memory Scrub register (16b) */ + /* + * 6:5 Scrub Completion Count + * 3:2 Scrub Rate (i3100 only) + * 01=fast 10=normal + * 1:0 Scrub Mode enable + * 00=off 10=on + */ #define E752X_DRB 0x60 /* DRAM row boundary register (8b) */ #define E752X_DRA 0x70 /* DRAM row attribute register (8b) */ /* @@ -240,6 +248,41 @@ static const struct e752x_dev_info e752x_devs[] = { .ctl_name = "3100"}, }; +/* Valid scrub rates for the e752x/3100 hardware memory scrubber. We + * map the scrubbing bandwidth to a hardware register value. The 'set' + * operation finds the 'matching or higher value'. Note that scrubbing + * on the e752x can only be enabled/disabled. The 3100 supports + * a normal and fast mode. + */ + +#define SDRATE_EOT 0xFFFFFFFF + +struct scrubrate { + u32 bandwidth; /* bandwidth consumed by scrubbing in bytes/sec */ + u16 scrubval; /* register value for scrub rate */ +}; + +/* Rate below assumes same performance as i3100 using PC3200 DDR2 in + * normal mode. e752x bridges don't support choosing normal or fast mode, + * so the scrubbing bandwidth value isn't all that important - scrubbing is + * either on or off. + */ +static const struct scrubrate scrubrates_e752x[] = { + {0, 0x00}, /* Scrubbing Off */ + {500000, 0x02}, /* Scrubbing On */ + {SDRATE_EOT, 0x00} /* End of Table */ +}; + +/* Fast mode: 2 GByte PC3200 DDR2 scrubbed in 33s = 63161283 bytes/s + * Normal mode: 125 (32000 / 256) times slower than fast mode. + */ +static const struct scrubrate scrubrates_i3100[] = { + {0, 0x00}, /* Scrubbing Off */ + {500000, 0x0a}, /* Normal mode - 32k clocks */ + {62500000, 0x06}, /* Fast mode - 256 clocks */ + {SDRATE_EOT, 0x00} /* End of Table */ +}; + static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, unsigned long page) { @@ -915,6 +958,68 @@ static void e752x_check(struct mem_ctl_info *mci) e752x_process_error_info(mci, &info, 1); } +/* Program byte/sec bandwidth scrub rate to hardware */ +static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *new_bw) +{ + const struct scrubrate *scrubrates; + struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct pci_dev *pdev = pvt->dev_d0f0; + int i; + + if (pvt->dev_info->ctl_dev == PCI_DEVICE_ID_INTEL_3100_0) + scrubrates = scrubrates_i3100; + else + scrubrates = scrubrates_e752x; + + /* Translate the desired scrub rate to a e752x/3100 register value. + * Search for the bandwidth that is equal or greater than the + * desired rate and program the cooresponding register value. + */ + for (i = 0; scrubrates[i].bandwidth != SDRATE_EOT; i++) + if (scrubrates[i].bandwidth >= *new_bw) + break; + + if (scrubrates[i].bandwidth == SDRATE_EOT) + return -1; + + pci_write_config_word(pdev, E752X_MCHSCRB, scrubrates[i].scrubval); + + return 0; +} + +/* Convert current scrub rate value into byte/sec bandwidth */ +static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw) +{ + const struct scrubrate *scrubrates; + struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct pci_dev *pdev = pvt->dev_d0f0; + u16 scrubval; + int i; + + if (pvt->dev_info->ctl_dev == PCI_DEVICE_ID_INTEL_3100_0) + scrubrates = scrubrates_i3100; + else + scrubrates = scrubrates_e752x; + + /* Find the bandwidth matching the memory scrubber configuration */ + pci_read_config_word(pdev, E752X_MCHSCRB, &scrubval); + scrubval = scrubval & 0x0f; + + for (i = 0; scrubrates[i].bandwidth != SDRATE_EOT; i++) + if (scrubrates[i].scrubval == scrubval) + break; + + if (scrubrates[i].bandwidth == SDRATE_EOT) { + e752x_printk(KERN_WARNING, + "Invalid sdram scrub control value: 0x%x\n", scrubval); + return -1; + } + + *bw = scrubrates[i].bandwidth; + + return 0; +} + /* Return 1 if dual channel mode is active. Else return 0. */ static inline int dual_channel_active(u16 ddrcsr) { @@ -1073,10 +1178,7 @@ fail: /* Setup system bus parity mask register. * Sysbus parity supported on: - * e7320/e7520/e7525 + Xeon - * i3100 + Xeon/Celeron - * Sysbus parity not supported on: - * i3100 + Pentium M/Celeron M/Core Duo/Core2 Duo + * e7320/e7520/e7525 + Xeon */ static void e752x_init_sysbus_parity_mask(struct e752x_pvt *pvt) { @@ -1087,10 +1189,7 @@ static void e752x_init_sysbus_parity_mask(struct e752x_pvt *pvt) /* Allow module parameter override, else see if CPU supports parity */ if (sysbus_parity != -1) { enable = sysbus_parity; - } else if (cpu_id[0] && - ((strstr(cpu_id, "Pentium") && strstr(cpu_id, " M ")) || - (strstr(cpu_id, "Celeron") && strstr(cpu_id, " M ")) || - (strstr(cpu_id, "Core") && strstr(cpu_id, "Duo")))) { + } else if (cpu_id[0] && !strstr(cpu_id, "Xeon")) { e752x_printk(KERN_INFO, "System Bus Parity not " "supported by CPU, disabling\n"); enable = 0; @@ -1187,6 +1286,8 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) mci->dev_name = pci_name(pdev); mci->edac_check = e752x_check; mci->ctl_page_to_phys = ctl_page_to_phys; + mci->set_sdram_scrub_rate = set_sdram_scrub_rate; + mci->get_sdram_scrub_rate = get_sdram_scrub_rate; /* set the map type. 1 = normal, 0 = reversed * Must be set before e752x_init_csrows in case csrow mapping diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 53764577035f..5fdedbc0f545 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -137,7 +137,7 @@ static ssize_t edac_dev_ctl_info_store(struct kobject *kobj, } /* edac_dev file operations for an 'ctl_info' */ -static struct sysfs_ops device_ctl_info_ops = { +static const struct sysfs_ops device_ctl_info_ops = { .show = edac_dev_ctl_info_show, .store = edac_dev_ctl_info_store }; @@ -373,7 +373,7 @@ static ssize_t edac_dev_instance_store(struct kobject *kobj, } /* edac_dev file operations for an 'instance' */ -static struct sysfs_ops device_instance_ops = { +static const struct sysfs_ops device_instance_ops = { .show = edac_dev_instance_show, .store = edac_dev_instance_store }; @@ -476,7 +476,7 @@ static ssize_t edac_dev_block_store(struct kobject *kobj, } /* edac_dev file operations for a 'block' */ -static struct sysfs_ops device_block_ops = { +static const struct sysfs_ops device_block_ops = { .show = edac_dev_block_show, .store = edac_dev_block_store }; diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index e1d4ce083481..88840e9fa3e0 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -245,7 +245,7 @@ static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, return -EIO; } -static struct sysfs_ops csrowfs_ops = { +static const struct sysfs_ops csrowfs_ops = { .show = csrowdev_show, .store = csrowdev_store }; @@ -575,7 +575,7 @@ static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, } /* Intermediate show/store table */ -static struct sysfs_ops mci_ops = { +static const struct sysfs_ops mci_ops = { .show = mcidev_show, .store = mcidev_store }; diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index fb60a877d768..bef94e3d9944 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -121,7 +121,7 @@ static ssize_t edac_pci_instance_store(struct kobject *kobj, } /* fs_ops table */ -static struct sysfs_ops pci_instance_ops = { +static const struct sysfs_ops pci_instance_ops = { .show = edac_pci_instance_show, .store = edac_pci_instance_store }; @@ -261,7 +261,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj, return -EIO; } -static struct sysfs_ops edac_pci_sysfs_ops = { +static const struct sysfs_ops edac_pci_sysfs_ops = { .show = edac_pci_dev_show, .store = edac_pci_dev_store }; diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index ecd5928d7110..94cac0aacea3 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -239,16 +239,15 @@ static int __devinit mpc85xx_pci_err_probe(struct of_device *op, /* we only need the error registers */ r.start += 0xe00; - if (!devm_request_mem_region(&op->dev, r.start, - r.end - r.start + 1, pdata->name)) { + if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), + pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); res = -EBUSY; goto err; } - pdata->pci_vbase = devm_ioremap(&op->dev, r.start, - r.end - r.start + 1); + pdata->pci_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); if (!pdata->pci_vbase) { printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); res = -ENOMEM; @@ -668,15 +667,125 @@ static struct of_platform_driver mpc85xx_l2_err_driver = { /**************************** MC Err device ***************************/ +/* + * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the + * MPC8572 User's Manual. Each line represents a syndrome bit column as a + * 64-bit value, but split into an upper and lower 32-bit chunk. The labels + * below correspond to Freescale's manuals. + */ +static unsigned int ecc_table[16] = { + /* MSB LSB */ + /* [0:31] [32:63] */ + 0xf00fe11e, 0xc33c0ff7, /* Syndrome bit 7 */ + 0x00ff00ff, 0x00fff0ff, + 0x0f0f0f0f, 0x0f0fff00, + 0x11113333, 0x7777000f, + 0x22224444, 0x8888222f, + 0x44448888, 0xffff4441, + 0x8888ffff, 0x11118882, + 0xffff1111, 0x22221114, /* Syndrome bit 0 */ +}; + +/* + * Calculate the correct ECC value for a 64-bit value specified by high:low + */ +static u8 calculate_ecc(u32 high, u32 low) +{ + u32 mask_low; + u32 mask_high; + int bit_cnt; + u8 ecc = 0; + int i; + int j; + + for (i = 0; i < 8; i++) { + mask_high = ecc_table[i * 2]; + mask_low = ecc_table[i * 2 + 1]; + bit_cnt = 0; + + for (j = 0; j < 32; j++) { + if ((mask_high >> j) & 1) + bit_cnt ^= (high >> j) & 1; + if ((mask_low >> j) & 1) + bit_cnt ^= (low >> j) & 1; + } + + ecc |= bit_cnt << i; + } + + return ecc; +} + +/* + * Create the syndrome code which is generated if the data line specified by + * 'bit' failed. Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641 + * User's Manual and 9-61 in the MPC8572 User's Manual. + */ +static u8 syndrome_from_bit(unsigned int bit) { + int i; + u8 syndrome = 0; + + /* + * Cycle through the upper or lower 32-bit portion of each value in + * ecc_table depending on if 'bit' is in the upper or lower half of + * 64-bit data. + */ + for (i = bit < 32; i < 16; i += 2) + syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2); + + return syndrome; +} + +/* + * Decode data and ecc syndrome to determine what went wrong + * Note: This can only decode single-bit errors + */ +static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc, + int *bad_data_bit, int *bad_ecc_bit) +{ + int i; + u8 syndrome; + + *bad_data_bit = -1; + *bad_ecc_bit = -1; + + /* + * Calculate the ECC of the captured data and XOR it with the captured + * ECC to find an ECC syndrome value we can search for + */ + syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc; + + /* Check if a data line is stuck... */ + for (i = 0; i < 64; i++) { + if (syndrome == syndrome_from_bit(i)) { + *bad_data_bit = i; + return; + } + } + + /* If data is correct, check ECC bits for errors... */ + for (i = 0; i < 8; i++) { + if ((syndrome >> i) & 0x1) { + *bad_ecc_bit = i; + return; + } + } +} + static void mpc85xx_mc_check(struct mem_ctl_info *mci) { struct mpc85xx_mc_pdata *pdata = mci->pvt_info; struct csrow_info *csrow; + u32 bus_width; u32 err_detect; u32 syndrome; u32 err_addr; u32 pfn; int row_index; + u32 cap_high; + u32 cap_low; + int bad_data_bit; + int bad_ecc_bit; err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT); if (!err_detect) @@ -692,6 +801,15 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) } syndrome = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ECC); + + /* Mask off appropriate bits of syndrome based on bus width */ + bus_width = (in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG) & + DSC_DBW_MASK) ? 32 : 64; + if (bus_width == 64) + syndrome &= 0xff; + else + syndrome &= 0xffff; + err_addr = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS); pfn = err_addr >> PAGE_SHIFT; @@ -701,14 +819,35 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) break; } - mpc85xx_mc_printk(mci, KERN_ERR, "Capture Data High: %#8.8x\n", - in_be32(pdata->mc_vbase + - MPC85XX_MC_CAPTURE_DATA_HI)); - mpc85xx_mc_printk(mci, KERN_ERR, "Capture Data Low: %#8.8x\n", - in_be32(pdata->mc_vbase + - MPC85XX_MC_CAPTURE_DATA_LO)); - mpc85xx_mc_printk(mci, KERN_ERR, "syndrome: %#8.8x\n", syndrome); - mpc85xx_mc_printk(mci, KERN_ERR, "err addr: %#8.8x\n", err_addr); + cap_high = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_HI); + cap_low = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_LO); + + /* + * Analyze single-bit errors on 64-bit wide buses + * TODO: Add support for 32-bit wide buses + */ + if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { + sbe_ecc_decode(cap_high, cap_low, syndrome, + &bad_data_bit, &bad_ecc_bit); + + if (bad_data_bit != -1) + mpc85xx_mc_printk(mci, KERN_ERR, + "Faulty Data bit: %d\n", bad_data_bit); + if (bad_ecc_bit != -1) + mpc85xx_mc_printk(mci, KERN_ERR, + "Faulty ECC bit: %d\n", bad_ecc_bit); + + mpc85xx_mc_printk(mci, KERN_ERR, + "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", + cap_high ^ (1 << (bad_data_bit - 32)), + cap_low ^ (1 << bad_data_bit), + syndrome ^ (1 << bad_ecc_bit)); + } + + mpc85xx_mc_printk(mci, KERN_ERR, + "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", + cap_high, cap_low, syndrome); + mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8x\n", err_addr); mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); /* we are out of range */ diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 52432ee7c4b9..cb24df839460 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -48,6 +48,9 @@ #define DSC_MEM_EN 0x80000000 #define DSC_ECC_EN 0x20000000 #define DSC_RD_EN 0x10000000 +#define DSC_DBW_MASK 0x00180000 +#define DSC_DBW_32 0x00080000 +#define DSC_DBW_64 0x00000000 #define DSC_SDTYPE_MASK 0x07000000 |