diff options
-rw-r--r-- | arch/x86/include/asm/mce.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 64 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 6 |
6 files changed, 72 insertions, 38 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 98165835dc68..6add827381c9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -121,7 +121,8 @@ struct mce_log { #ifdef __KERNEL__ -extern struct atomic_notifier_head x86_mce_decoder_chain; +extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_unregister_decode_chain(struct notifier_block *nb); #include <linux/percpu.h> #include <linux/init.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2af127d4c3d1..5be2464cce6a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -/* - * CPU/chipset specific EDAC code can register a notifier call here to print - * MCE errors in a human-readable form. - */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); -EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); - /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL @@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { static DEFINE_PER_CPU(struct work_struct, mce_work); +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); + /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) { @@ -190,6 +189,57 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } +static void drain_mcelog_buffer(void) +{ + unsigned int next, i, prev = 0; + + next = rcu_dereference_check_mce(mcelog.next); + + do { + struct mce *m; + + /* drain what was logged during boot */ + for (i = prev; i < next; i++) { + unsigned long start = jiffies; + unsigned retries = 1; + + m = &mcelog.entry[i]; + + while (!m->finished) { + if (time_after_eq(jiffies, start + 2*retries)) + retries++; + + cpu_relax(); + + if (!m->finished && retries >= 4) { + pr_err("MCE: skipping error being logged currently!\n"); + break; + } + } + smp_rmb(); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); + } + + memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); + prev = next; + next = cmpxchg(&mcelog.next, prev, 0); + } while (next != prev); +} + + +void mce_register_decode_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); + drain_mcelog_buffer(); +} +EXPORT_SYMBOL_GPL(mce_register_decode_chain); + +void mce_unregister_decode_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); + static void print_mce(struct mce *m) { int ret = 0; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea6..ce04b5804085 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -323,17 +323,6 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ -/* - * Set up the most two significant bit to notify mce log that this thermal - * event type. - * This is a temp solution. May be changed in the future with mce log - * infrasture. - */ -#define CORE_THROTTLED (0) -#define CORE_POWER_LIMIT ((__u64)1 << 62) -#define PACKAGE_THROTTLED ((__u64)2 << 62) -#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) - static void notify_thresholds(__u64 msr_val) { /* check whether the interrupt handler is defined; @@ -363,27 +352,23 @@ static void intel_thermal_interrupt(void) if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, CORE_LEVEL) != 0) - mce_log_therm_throt_event(CORE_THROTTLED | msr_val); + mce_log_therm_throt_event(msr_val); if (this_cpu_has(X86_FEATURE_PLN)) - if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, + therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); + CORE_LEVEL); if (this_cpu_has(X86_FEATURE_PTS)) { rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, + therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, - PACKAGE_LEVEL) != 0) - mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); + PACKAGE_LEVEL); if (this_cpu_has(X86_FEATURE_PLN)) - if (therm_throt_process(msr_val & + therm_throt_process(msr_val & PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, - PACKAGE_LEVEL) != 0) - mce_log_therm_throt_event(PACKAGE_POWER_LIMIT - | msr_val); + PACKAGE_LEVEL); } } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 70ad8923f1d7..8568d9b61875 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -2234,7 +2234,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) if (pvt->enable_scrub) disable_sdram_scrub_setting(mci); - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); + mce_unregister_decode_chain(&i7_mce_dec); /* Disable EDAC polling */ i7core_pci_ctl_release(pvt); @@ -2336,7 +2336,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* DCLK for scrub rate setting */ pvt->dclk_freq = get_dclk_freq(); - atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); + mce_register_decode_chain(&i7_mce_dec); return 0; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index d0864d9c38ad..bd926ea2e00c 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -884,7 +884,7 @@ static int __init mce_amd_init(void) pr_info("MCE: In-kernel MCE decoding enabled.\n"); - atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); + mce_register_decode_chain(&amd_mce_dec_nb); return 0; } @@ -893,7 +893,7 @@ early_initcall(mce_amd_init); #ifdef MODULE static void __exit mce_amd_exit(void) { - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); + mce_unregister_decode_chain(&amd_mce_dec_nb); kfree(fam_ops); } diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 7a402bfbee7d..965bc0cc0cf6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1661,8 +1661,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", __func__, mci, &sbridge_dev->pdev[0]->dev); - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, - &sbridge_mce_dec); + mce_unregister_decode_chain(&sbridge_mce_dec); /* Remove MC sysfs nodes */ edac_mc_del_mc(mci->dev); @@ -1731,8 +1730,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) goto fail0; } - atomic_notifier_chain_register(&x86_mce_decoder_chain, - &sbridge_mce_dec); + mce_register_decode_chain(&sbridge_mce_dec); return 0; fail0: |