From f92cae45263b25cdb4c4d24e297e07945d3bc01b Mon Sep 17 00:00:00 2001 From: Ashish Shenoy Date: Wed, 22 Feb 2012 17:20:38 -0800 Subject: amd64_edac: Fix missing csrows sysfs nodes While initializing the array of csrow attribute instances, a few csrows were uninitialized. This happened because the module only performed a check for DRAM base ctl register0's and not DRAM base ctl register1's chip select enable bit. There could be systems with DIMMs populated on only single memory channel whereas the module also assumed that a dual channel dimm had double the memory size of a single memory channel instead of checking the memory on each channel. This patch fixes these above issues. Signed-off-by: Ashish Shenoy Signed-off-by: Prasanna S. Panchamukhi Link: http://lkml.kernel.org/r/4F459CFA.5090604@riverbed.com Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c9eee6d33e9a..b9424dcde906 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2133,6 +2133,7 @@ static void read_mc_regs(struct amd64_pvt *pvt) static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { u32 cs_mode, nr_pages; + u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; /* * The math on this doesn't look right on the surface because x/2*4 can @@ -2141,16 +2142,10 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) * number of bits to shift the DBAM register to extract the proper CSROW * field. */ - cs_mode = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF; + cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF; nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); - /* - * If dual channel then double the memory size of single channel. - * Channel count is 1 or 2 - */ - nr_pages <<= (pvt->channel_count - 1); - debugf0(" (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); debugf0(" nr_pages= %u channel-count = %d\n", nr_pages, pvt->channel_count); @@ -2181,7 +2176,7 @@ static int init_csrows(struct mem_ctl_info *mci) for_each_chip_select(i, 0, pvt) { csrow = &mci->csrows[i]; - if (!csrow_enabled(i, 0, pvt)) { + if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) { debugf1("----CSROW %d EMPTY for node %d\n", i, pvt->mc_node_id); continue; @@ -2191,7 +2186,10 @@ static int init_csrows(struct mem_ctl_info *mci) i, pvt->mc_node_id); empty = 0; - csrow->nr_pages = amd64_csrow_nr_pages(pvt, 0, i); + if (csrow_enabled(i, 0, pvt)) + csrow->nr_pages = amd64_csrow_nr_pages(pvt, 0, i); + if (csrow_enabled(i, 1, pvt)) + csrow->nr_pages += amd64_csrow_nr_pages(pvt, 1, i); find_csrow_limits(mci, i, &input_addr_min, &input_addr_max); sys_addr = input_addr_to_sys_addr(mci, input_addr_min); csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT); -- cgit v1.2.3 From 11b0a31473edf74b70ab6f8fe857b61bff82d7cc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 9 Nov 2011 21:28:43 +0100 Subject: amd64_edac: Fix K8 revD and later chip select sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix DRAM chip select sizes calculation for K8, revisions D and E. Reported-by: Niklas Söderlund --- drivers/edac/amd64_edac.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index b9424dcde906..03807283aca4 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1132,12 +1132,36 @@ static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr2_cs_size(cs_mode, dclr & WIDTH_128); } else if (pvt->ext_model >= K8_REV_D) { + unsigned diff; WARN_ON(cs_mode > 10); - if (cs_mode == 3 || cs_mode == 8) - return 32 << (cs_mode - 1); - else - return 32 << cs_mode; + /* + * the below calculation, besides trying to win an obfuscated C + * contest, maps cs_mode values to DIMM chip select sizes. The + * mappings are: + * + * cs_mode CS size (mb) + * ======= ============ + * 0 32 + * 1 64 + * 2 128 + * 3 128 + * 4 256 + * 5 512 + * 6 256 + * 7 512 + * 8 1024 + * 9 1024 + * 10 2048 + * + * Basically, it calculates a value with which to shift the + * smallest CS size of 32MB. + * + * ddr[23]_cs_size have a similar purpose. + */ + diff = cs_mode/3 + (unsigned)(cs_mode > 5); + + return 32 << (cs_mode - diff); } else { WARN_ON(cs_mode > 6); -- cgit v1.2.3 From 5e8e19bf6c3c9d8ecf74e2a7fdae99a76949bdf6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 21 Sep 2011 14:10:43 +0200 Subject: EDAC: Correct scrub rate API The original scrub rate API definition states that if scrub rate accessors are not implemented, a negative value (-1) should be written to the sysfs file (/sys/devices/system/edac/mc/mc/sdram_scrub_rate, where N is the memory controller number on the system). This is counter-intuitive and awkward at the very least because, when setting the scrub rate, userspace has to write to sysfs and then read it back to check error status of the operation. As Tony notes, best it would be to not have the sdram_scrub_rate in sysfs if scrub rate support is not implemented. It is too late about that and a bunch of drivers on a bunch of arches would need to be changed and tested which is not a trivial task ATM. Instead, settle for the next best thing of returning -ENODEV when implementation is missing and -EINVAL when there was an error encountered while setting the scrub rate. Reported-by: Han Pingtian Cc: Tony Luck Link: http://lkml.kernel.org/r/20110916105856.GA13253@hpt.nay.redhat.com Signed-off-by: Borislav Petkov --- Documentation/edac.txt | 4 ++-- drivers/edac/edac_mc_sysfs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/edac.txt b/Documentation/edac.txt index 249822cde82b..fdcc49fad8e1 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt @@ -334,8 +334,8 @@ Sdram memory scrubbing rate: Reading the file will return the actual scrubbing rate employed. - If configuration fails or memory scrubbing is not implemented, the value - of the attribute file will be -1. + If configuration fails or memory scrubbing is not implemented, accessing + that attribute will fail. diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index d56e63477d5c..e9a28f576d14 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -452,7 +452,7 @@ static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, int new_bw = 0; if (!mci->set_sdram_scrub_rate) - return -EINVAL; + return -ENODEV; if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; @@ -475,7 +475,7 @@ static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) int bandwidth = 0; if (!mci->get_sdram_scrub_rate) - return -EINVAL; + return -ENODEV; bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { -- cgit v1.2.3 From 36c46f31df910b092aaaed27c7c616bb8e2302a1 Mon Sep 17 00:00:00 2001 From: Lionel Debroux Date: Mon, 27 Feb 2012 07:41:47 +0100 Subject: EDAC: Make pci_device_id tables __devinitconst. These const tables are currently marked __devinitdata, but Documentation/PCI/pci.txt says: "o The ID table array should be marked __devinitconst; this is done automatically if the table is declared with DEFINE_PCI_DEVICE_TABLE()." So use DEFINE_PCI_DEVICE_TABLE(x). Based on PaX and earlier work by Andi Kleen. Signed-off-by: Lionel Debroux Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- drivers/edac/amd76x_edac.c | 2 +- drivers/edac/e752x_edac.c | 2 +- drivers/edac/e7xxx_edac.c | 2 +- drivers/edac/i3000_edac.c | 2 +- drivers/edac/i3200_edac.c | 2 +- drivers/edac/i5000_edac.c | 2 +- drivers/edac/i5100_edac.c | 2 +- drivers/edac/i5400_edac.c | 2 +- drivers/edac/i7300_edac.c | 2 +- drivers/edac/i7core_edac.c | 2 +- drivers/edac/i82443bxgx_edac.c | 2 +- drivers/edac/i82860_edac.c | 2 +- drivers/edac/i82875p_edac.c | 2 +- drivers/edac/i82975x_edac.c | 2 +- drivers/edac/r82600_edac.c | 2 +- drivers/edac/sb_edac.c | 2 +- drivers/edac/x38_edac.c | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 03807283aca4..7ef73c919c5d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2707,7 +2707,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) * PCI core identifies what devices are on a system during boot, and then * inquiry this table to see if this driver is for a given device found. */ -static const struct pci_device_id amd64_pci_table[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = { { .vendor = PCI_VENDOR_ID_AMD, .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index e47e73bbbcc5..f8fd3c807bde 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -321,7 +321,7 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(amd76x_pci_tbl) = { { PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762}, diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 1af531a11d21..41223261ede9 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -1380,7 +1380,7 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(e752x_pci_tbl) = { { PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520}, diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 6ffb6d23281f..68dea87b72e6 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -525,7 +525,7 @@ static void __devexit e7xxx_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(e7xxx_pci_tbl) = { { PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205}, diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index c0510b3d7035..277689a68841 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -470,7 +470,7 @@ static void __devexit i3000_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i3000_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i3000_pci_tbl) = { { PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3000}, diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 73f55e2008c2..046808c6357d 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -445,7 +445,7 @@ static void __devexit i3200_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i3200_pci_tbl) = { { PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3200}, diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 4dc3ac25a422..a2680d8e744b 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1516,7 +1516,7 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static const struct pci_device_id i5000_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i5000_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), .driver_data = I5000P}, diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index bcbdeeca48b8..2e23547b2f24 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -1051,7 +1051,7 @@ static void __devexit i5100_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i5100_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i5100_pci_tbl) = { /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) }, { 0, } diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 74d6ec342afb..67ec9626a330 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1383,7 +1383,7 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static const struct pci_device_id i5400_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i5400_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, {0,} /* 0 terminated list. */ }; diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 6104dba380b6..3bafa3bca148 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -1192,7 +1192,7 @@ static void __devexit i7300_remove_one(struct pci_dev *pdev) * * Has only 8086:360c PCI ID */ -static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i7300_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, {0,} /* 0 terminated list. */ }; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 8568d9b61875..85226ccf5290 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -391,7 +391,7 @@ static const struct pci_id_table pci_dev_table[] = { /* * pci_device_id table for which devices we are looking for */ -static const struct pci_device_id i7core_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)}, {0,} /* 0 terminated list. */ diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 4329d39f902c..3bf2b2f490e7 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -380,7 +380,7 @@ static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); -static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i82443bxgx_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 931a05775049..c779092d18d1 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -270,7 +270,7 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82860_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i82860_pci_tbl) = { { PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82860}, diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 33864c63c684..10f15d85fb5e 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -511,7 +511,7 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i82875p_pci_tbl) = { { PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P}, diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 4184e0171f00..0cd8368f88f8 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -612,7 +612,7 @@ static void __devexit i82975x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82975x_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(i82975x_pci_tbl) = { { PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82975X diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index e294e1b3616c..6d908ad72d64 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -373,7 +373,7 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id r82600_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(r82600_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) }, diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 1dc118d83cc6..3a605f777712 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -367,7 +367,7 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = { /* * pci_device_id table for which devices we are looking for */ -static const struct pci_device_id sbridge_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, {0,} /* 0 terminated list. */ }; diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index b6f47de152f3..a438297389e5 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -440,7 +440,7 @@ static void __devexit x38_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id x38_pci_tbl[] __devinitdata = { +static DEFINE_PCI_DEVICE_TABLE(x38_pci_tbl) = { { PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, X38}, -- cgit v1.2.3 From 344f0a0631e1b2784859fbe2351d99dce2652b77 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 15 Nov 2011 17:10:58 +0100 Subject: MCE, AMD: Correct some MC0 error types Use "System Read Data Error" as a more general name for MC0 bus errors on F15h and update some error definitions. Signed-off-by: Borislav Petkov Reviewed-by: Andreas Herrmann --- drivers/edac/mce_amd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index bd926ea2e00c..0ee1c0a115a2 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -255,10 +255,9 @@ static bool f15h_dc_mce(u16 ec, u8 xec) } else if (BUS_ERROR(ec)) { if (!xec) - pr_cont("during system linefill.\n"); + pr_cont("System Read Data Error.\n"); else - pr_cont(" Internal %s condition.\n", - ((xec == 1) ? "livelock" : "deadlock")); + pr_cont(" Internal error condition type %d.\n", xec); } else ret = false; -- cgit v1.2.3 From 6c1173a61e63c32bd40cb1e6dd16343240a328eb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Nov 2011 19:45:34 +0100 Subject: MCE, AMD: Correct ucode patch buffer description This MC1 error signature is called differently now, fix it. Signed-off-by: Borislav Petkov Reviewed-by: Andreas Herrmann --- drivers/edac/mce_amd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 0ee1c0a115a2..5626e17a6b91 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -88,7 +88,7 @@ static const char * const f15h_ic_mce_desc[] = { "Parity error for IC probe tag valid bit", "PFB non-cacheable bit parity error", "PFB valid bit parity error", /* xec = 0xd */ - "patch RAM", /* xec = 010 */ + "Microcode Patch Buffer", /* xec = 010 */ "uop queue", "insn buffer", "predecode buffer", @@ -354,7 +354,11 @@ static bool f15h_ic_mce(u16 ec, u8 xec) pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); break; - case 0x10 ... 0x14: + case 0x10: + pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]); + break; + + case 0x11 ... 0x14: pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); break; -- cgit v1.2.3 From b64a99c1752d2b6525a5011a8e473f8f8a4bdd79 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 23 Nov 2011 14:50:44 +0100 Subject: MCE, AMD: Correct VB data error description Sync with latest BKDG error types. Signed-off-by: Borislav Petkov Reviewed-by: Andreas Herrmann --- drivers/edac/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 5626e17a6b91..bf6dd9978aa7 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -104,7 +104,7 @@ static const char * const f15h_cu_mce_desc[] = { "WCC Tag ECC error", "WCC Data ECC error", "WCB Data parity error", - "VB Data/ECC error", + "VB Data ECC or parity error", "L2 Tag ECC error", /* xec = 0x10 */ "Hard L2 Tag ECC error", "Multiple hits on L2 tag", -- cgit v1.2.3 From 68782673e6dd69054a9b75b0983a5e45e16f6625 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 24 Nov 2011 21:29:57 +0100 Subject: MCE, AMD: Rework NB MCE signatures Correct their formulation, replace per-family functions with a single, unified lookup table. Signed-off-by: Borislav Petkov Reviewed-by: Andreas Herrmann --- drivers/edac/mce_amd.c | 176 ++++++++++++++----------------------------------- drivers/edac/mce_amd.h | 1 - 2 files changed, 48 insertions(+), 129 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index bf6dd9978aa7..f6ebe5e9a57f 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -64,17 +64,6 @@ EXPORT_SYMBOL_GPL(to_msgs); const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; EXPORT_SYMBOL_GPL(ii_msgs); -static const char *f10h_nb_mce_desc[] = { - "HT link data error", - "Protocol error (link, L3, probe filter, etc.)", - "Parity error in NB-internal arrays", - "Link Retry due to IO link transmission error", - "L3 ECC data cache error", - "ECC error in L3 cache tag", - "L3 LRU parity bits error", - "ECC Error in the Probe Filter directory" -}; - static const char * const f15h_ic_mce_desc[] = { "UC during a demand linefill from L2", "Parity error during data load from IC", @@ -112,6 +101,28 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; +static const char *nb_mce_desc[] = { + "DRAM ECC error detected on the NB", + "CRC error detected on HT link", + "Link-defined sync error packets detected on HT link", + "HT Master abort", + "HT Target abort", + "Invalid GART PTE entry during GART table walk", + "Unsupported atomic RMW received from an IO link", + "Watchdog timeout due to lack of progress", + "DRAM ECC error detected on the NB", + "SVM DMA Exclusion Vector error", + "HT data error detected on link", + "Protocol error (link, L3, probe filter)", + "NB internal arrays parity error", + "DRAM addr/ctl signals parity error", + "IO link transmission error", + "L3 data cache ECC error", /* xec = 0x1c */ + "L3 cache tag error", + "L3 LRU parity bits error", + "ECC Error in the Probe Filter directory" +}; + static const char * const fr_ex_mce_desc[] = { "CPU Watchdog timer expire", "Wakeup array dest tag", @@ -499,58 +510,31 @@ wrong_ls_mce: pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); } -static bool k8_nb_mce(u16 ec, u8 xec) +void amd_decode_nb_mce(struct mce *m) { - bool ret = true; - - switch (xec) { - case 0x1: - pr_cont("CRC error detected on HT link.\n"); - break; - - case 0x5: - pr_cont("Invalid GART PTE entry during GART table walk.\n"); - break; - - case 0x6: - pr_cont("Unsupported atomic RMW received from an IO link.\n"); - break; - - case 0x0: - case 0x8: - if (boot_cpu_data.x86 == 0x11) - return false; - - pr_cont("DRAM ECC error detected on the NB.\n"); - break; - - case 0xd: - pr_cont("Parity error on the DRAM addr/ctl signals.\n"); - break; - - default: - ret = false; - break; - } + struct cpuinfo_x86 *c = &boot_cpu_data; + int node_id = amd_get_nb_id(m->extcpu); + u16 ec = EC(m->status); + u8 xec = XEC(m->status, 0x1f); + u8 offset = 0; - return ret; -} + pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); -static bool f10h_nb_mce(u16 ec, u8 xec) -{ - bool ret = true; - u8 offset = 0; + switch (xec) { + case 0x0 ... 0xe: - if (k8_nb_mce(ec, xec)) - return true; + /* special handling for DRAM ECCs */ + if (xec == 0x0 || xec == 0x8) { + /* no ECCs on F11h */ + if (c->x86 == 0x11) + goto wrong_nb_mce; - switch(xec) { - case 0xa ... 0xc: - offset = 10; - break; + pr_cont("%s.\n", nb_mce_desc[xec]); - case 0xe: - offset = 11; + if (nb_bus_decoder) + nb_bus_decoder(node_id, m); + return; + } break; case 0xf: @@ -559,83 +543,25 @@ static bool f10h_nb_mce(u16 ec, u8 xec) else if (BUS_ERROR(ec)) pr_cont("DMA Exclusion Vector Table Walk error.\n"); else - ret = false; - - goto out; - break; + goto wrong_nb_mce; + return; case 0x19: if (boot_cpu_data.x86 == 0x15) pr_cont("Compute Unit Data Error.\n"); else - ret = false; - - goto out; - break; + goto wrong_nb_mce; + return; case 0x1c ... 0x1f: - offset = 24; + offset = 13; break; default: - ret = false; - - goto out; - break; - } - - pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]); - -out: - return ret; -} - -static bool nb_noop_mce(u16 ec, u8 xec) -{ - return false; -} - -void amd_decode_nb_mce(struct mce *m) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - int node_id = amd_get_nb_id(m->extcpu); - u16 ec = EC(m->status); - u8 xec = XEC(m->status, 0x1f); - - pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); - - switch (xec) { - case 0x2: - pr_cont("Sync error (sync packets on HT link detected).\n"); - return; - - case 0x3: - pr_cont("HT Master abort.\n"); - return; - - case 0x4: - pr_cont("HT Target abort.\n"); - return; - - case 0x7: - pr_cont("NB Watchdog timeout.\n"); - return; - - case 0x9: - pr_cont("SVM DMA Exclusion Vector error.\n"); - return; - - default: - break; - } - - if (!fam_ops->nb_mce(ec, xec)) goto wrong_nb_mce; + } - if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x15) - if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) - nb_bus_decoder(node_id, m); - + pr_cont("%s.\n", nb_mce_desc[xec - offset]); return; wrong_nb_mce: @@ -844,39 +770,33 @@ static int __init mce_amd_init(void) case 0xf: fam_ops->dc_mce = k8_dc_mce; fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = k8_nb_mce; break; case 0x10: fam_ops->dc_mce = f10h_dc_mce; fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; break; case 0x11: fam_ops->dc_mce = k8_dc_mce; fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; break; case 0x12: fam_ops->dc_mce = f12h_dc_mce; fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = nb_noop_mce; break; case 0x14: nb_err_cpumask = 0x3; fam_ops->dc_mce = f14h_dc_mce; fam_ops->ic_mce = f14h_ic_mce; - fam_ops->nb_mce = nb_noop_mce; break; case 0x15: xec_mask = 0x1f; fam_ops->dc_mce = f15h_dc_mce; fam_ops->ic_mce = f15h_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; break; default: diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 0106747e240c..6fcf599e691f 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -82,7 +82,6 @@ extern const char *ii_msgs[]; struct amd_decoder_ops { bool (*dc_mce)(u16, u8); bool (*ic_mce)(u16, u8); - bool (*nb_mce)(u16, u8); }; void amd_report_gart_errors(bool); -- cgit v1.2.3 From ae615b4b5f0b875cbe8a029239436c6aed8c0ef4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 25 Nov 2011 15:42:59 +0100 Subject: MCE, AMD: Correct bank 5 error signatures ... and remove superfluous ErrorCodeExt check. Signed-off-by: Borislav Petkov Reviewed-by: Andreas Herrmann --- drivers/edac/mce_amd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index f6ebe5e9a57f..88a92974b78c 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -136,7 +136,7 @@ static const char * const fr_ex_mce_desc[] = { "Physical register file AG0 port", "Physical register file AG1 port", "Flag register file", - "DE correctable error could not be corrected" + "DE error occurred" }; static bool f12h_dc_mce(u16 ec, u8 xec) @@ -577,9 +577,6 @@ static void amd_decode_fr_mce(struct mce *m) if (c->x86 == 0xf || c->x86 == 0x11) goto wrong_fr_mce; - if (c->x86 != 0x15 && xec != 0x0) - goto wrong_fr_mce; - pr_emerg(HW_ERR "%s Error: ", (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); -- cgit v1.2.3 From ebe2aea86872622d4352cd71d55298fedf69a7bb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Nov 2011 19:03:25 +0100 Subject: MCE, AMD: Constify error tables ... so that checkpatch can chill out. Signed-off-by: Borislav Petkov Reviewed-by: Andreas Herrmann --- drivers/edac/mce_amd.c | 14 +++++++------- drivers/edac/mce_amd.h | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 88a92974b78c..36e1486eb9aa 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -39,29 +39,29 @@ EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); */ /* transaction type */ -const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; +const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; EXPORT_SYMBOL_GPL(tt_msgs); /* cache level */ -const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; +const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; EXPORT_SYMBOL_GPL(ll_msgs); /* memory transaction type */ -const char *rrrr_msgs[] = { +const char * const rrrr_msgs[] = { "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" }; EXPORT_SYMBOL_GPL(rrrr_msgs); /* participating processor */ -const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; +const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; EXPORT_SYMBOL_GPL(pp_msgs); /* request timeout */ -const char *to_msgs[] = { "no timeout", "timed out" }; +const char * const to_msgs[] = { "no timeout", "timed out" }; EXPORT_SYMBOL_GPL(to_msgs); /* memory or i/o */ -const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; +const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; EXPORT_SYMBOL_GPL(ii_msgs); static const char * const f15h_ic_mce_desc[] = { @@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; -static const char *nb_mce_desc[] = { +static const char * const nb_mce_desc[] = { "DRAM ECC error detected on the NB", "CRC error detected on HT link", "Link-defined sync error packets detected on HT link", diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 6fcf599e691f..c6074c5cd1ef 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -69,12 +69,12 @@ enum rrrr_ids { R4_SNOOP, }; -extern const char *tt_msgs[]; -extern const char *ll_msgs[]; -extern const char *rrrr_msgs[]; -extern const char *pp_msgs[]; -extern const char *to_msgs[]; -extern const char *ii_msgs[]; +extern const char * const tt_msgs[]; +extern const char * const ll_msgs[]; +extern const char * const rrrr_msgs[]; +extern const char * const pp_msgs[]; +extern const char * const to_msgs[]; +extern const char * const ii_msgs[]; /* * per-family decoder ops -- cgit v1.2.3