diff options
45 files changed, 750 insertions, 250 deletions
@@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 -SUBLEVEL = 29 -EXTRAVERSION = +SUBLEVEL = 30 +EXTRAVERSION = -rc1 NAME = Temporary Tasmanian Devil # *DOCUMENTATION* diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0beba0d1468d..bb83b1c397aa 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -154,6 +154,7 @@ * CPUID levels like 0x6, 0xA etc */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ /* Virtualization flags: Linux defined */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 098ec84b8c00..f2870920f246 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -431,6 +431,12 @@ static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); + if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; + /* Make LAPIC timer preferrable over percpu HPET */ + lapic_clockevent.rating = 150; + } + memcpy(levt, &lapic_clockevent, sizeof(*levt)); levt->cpumask = cpumask_of(smp_processor_id()); diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 8220ae69849d..c965e5212714 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, { 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 19f6b9d27e83..9d3af380c6bd 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -68,6 +68,7 @@ struct acpi_cpufreq_data { unsigned int max_freq; unsigned int resume; unsigned int cpu_feature; + u64 saved_aperf, saved_mperf; }; static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); @@ -241,26 +242,23 @@ static u32 get_cur_val(const struct cpumask *mask) return cmd.val; } -struct perf_cur { +struct perf_pair { union { struct { u32 lo; u32 hi; } split; u64 whole; - } aperf_cur, mperf_cur; + } aperf, mperf; }; static long read_measured_perf_ctrs(void *_cur) { - struct perf_cur *cur = _cur; + struct perf_pair *cur = _cur; - rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0, 0); - wrmsr(MSR_IA32_MPERF, 0, 0); + rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); + rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); return 0; } @@ -281,52 +279,57 @@ static long read_measured_perf_ctrs(void *_cur) static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int cpu) { - struct perf_cur cur; + struct perf_pair readin, cur; unsigned int perf_percent; unsigned int retval; - if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur)) + if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin)) return 0; + cur.aperf.whole = readin.aperf.whole - + per_cpu(drv_data, cpu)->saved_aperf; + cur.mperf.whole = readin.mperf.whole - + per_cpu(drv_data, cpu)->saved_mperf; + per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole; + per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole; + #ifdef __i386__ /* * We dont want to do 64 bit divide with 32 bit kernel * Get an approximate value. Return failure in case we cannot get * an approximate value. */ - if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) { + if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { int shift_count; u32 h; - h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi); + h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); shift_count = fls(h); - cur.aperf_cur.whole >>= shift_count; - cur.mperf_cur.whole >>= shift_count; + cur.aperf.whole >>= shift_count; + cur.mperf.whole >>= shift_count; } - if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) { + if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { int shift_count = 7; - cur.aperf_cur.split.lo >>= shift_count; - cur.mperf_cur.split.lo >>= shift_count; + cur.aperf.split.lo >>= shift_count; + cur.mperf.split.lo >>= shift_count; } - if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo) - perf_percent = (cur.aperf_cur.split.lo * 100) / - cur.mperf_cur.split.lo; + if (cur.aperf.split.lo && cur.mperf.split.lo) + perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; else perf_percent = 0; #else - if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) { + if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { int shift_count = 7; - cur.aperf_cur.whole >>= shift_count; - cur.mperf_cur.whole >>= shift_count; + cur.aperf.whole >>= shift_count; + cur.mperf.whole >>= shift_count; } - if (cur.aperf_cur.whole && cur.mperf_cur.whole) - perf_percent = (cur.aperf_cur.whole * 100) / - cur.mperf_cur.whole; + if (cur.aperf.whole && cur.mperf.whole) + perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; else perf_percent = 0; diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 0bd48e65a0ca..ce2ed3e4aad9 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -33,7 +33,6 @@ #include <linux/timex.h> #include <linux/io.h> #include <linux/acpi.h> -#include <linux/kernel.h> #include <asm/msr.h> #include <acpi/processor.h> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 61df77532120..70a10ca100f6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -20,7 +20,6 @@ #include <asm/cacheflush.h> #include <asm/ftrace.h> -#include <linux/ftrace.h> #include <asm/nops.h> #include <asm/nmi.h> diff --git a/block/blk-core.c b/block/blk-core.c index 43fdedc524ee..07ab75403e1a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -131,6 +131,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; + rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; rq->ref_count = 1; } diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c index bd3c937b0ac0..7737afb157c3 100644 --- a/drivers/acpi/acpica/hwvalid.c +++ b/drivers/acpi/acpica/hwvalid.c @@ -90,7 +90,6 @@ static const struct acpi_port_info acpi_protected_ports[] = { {"PIT2", 0x0048, 0x004B, ACPI_OSI_WIN_XP}, {"RTC", 0x0070, 0x0071, ACPI_OSI_WIN_XP}, {"CMOS", 0x0074, 0x0076, ACPI_OSI_WIN_XP}, - {"DMA1", 0x0081, 0x0083, ACPI_OSI_WIN_XP}, {"DMA1L", 0x0087, 0x0087, ACPI_OSI_WIN_XP}, {"DMA2", 0x0089, 0x008B, ACPI_OSI_WIN_XP}, {"DMA2L", 0x008F, 0x008F, ACPI_OSI_WIN_XP}, diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index b0de6312919a..3c7d8942f23b 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -903,7 +903,7 @@ static struct acpi_driver acpi_battery_driver = { }, }; -static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie) +static void acpi_battery_init_async(void *unused, async_cookie_t cookie) { if (acpi_disabled) return; diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index 05dfdc96802e..d0d550d22a6d 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -343,9 +343,6 @@ acpi_system_write_alarm(struct file *file, } #endif /* HAVE_ACPI_LEGACY_ALARM */ -extern struct list_head acpi_wakeup_device_list; -extern spinlock_t acpi_device_lock; - static int acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) { @@ -353,7 +350,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "Device\tS-state\t Status Sysfs node\n"); - spin_lock(&acpi_device_lock); + mutex_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, wakeup_list); @@ -361,7 +358,6 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) if (!dev->wakeup.flags.valid) continue; - spin_unlock(&acpi_device_lock); ldev = acpi_get_physical_device(dev->handle); seq_printf(seq, "%s\t S%d\t%c%-8s ", @@ -376,9 +372,8 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "\n"); put_device(ldev); - spin_lock(&acpi_device_lock); } - spin_unlock(&acpi_device_lock); + mutex_unlock(&acpi_device_lock); return 0; } @@ -409,7 +404,7 @@ acpi_system_write_wakeup_device(struct file *file, strbuf[len] = '\0'; sscanf(strbuf, "%s", str); - spin_lock(&acpi_device_lock); + mutex_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, wakeup_list); @@ -446,7 +441,7 @@ acpi_system_write_wakeup_device(struct file *file, } } } - spin_unlock(&acpi_device_lock); + mutex_unlock(&acpi_device_lock); return count; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4e6e758bd397..6fe121434ffb 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -145,6 +145,9 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr, struct acpi_processor_power *pwr = &pr->power; u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2; + if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) + return; + /* * Check, if one of the previous states already marked the lapic * unstable diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 20c23c049207..8ff510b91d88 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -24,7 +24,7 @@ extern struct acpi_device *acpi_root; static LIST_HEAD(acpi_device_list); static LIST_HEAD(acpi_bus_id_list); -DEFINE_SPINLOCK(acpi_device_lock); +DEFINE_MUTEX(acpi_device_lock); LIST_HEAD(acpi_wakeup_device_list); struct acpi_device_bus_id{ @@ -491,7 +491,6 @@ static int acpi_device_register(struct acpi_device *device, */ INIT_LIST_HEAD(&device->children); INIT_LIST_HEAD(&device->node); - INIT_LIST_HEAD(&device->g_list); INIT_LIST_HEAD(&device->wakeup_list); new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL); @@ -500,7 +499,7 @@ static int acpi_device_register(struct acpi_device *device, return -ENOMEM; } - spin_lock(&acpi_device_lock); + mutex_lock(&acpi_device_lock); /* * Find suitable bus_id and instance number in acpi_bus_id_list * If failed, create one and link it into acpi_bus_id_list @@ -521,14 +520,12 @@ static int acpi_device_register(struct acpi_device *device, } dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, acpi_device_bus_id->instance_no); - if (device->parent) { + if (device->parent) list_add_tail(&device->node, &device->parent->children); - list_add_tail(&device->g_list, &device->parent->g_list); - } else - list_add_tail(&device->g_list, &acpi_device_list); + if (device->wakeup.flags.valid) list_add_tail(&device->wakeup_list, &acpi_wakeup_device_list); - spin_unlock(&acpi_device_lock); + mutex_unlock(&acpi_device_lock); if (device->parent) device->dev.parent = &parent->dev; @@ -549,28 +546,22 @@ static int acpi_device_register(struct acpi_device *device, device->removal_type = ACPI_BUS_REMOVAL_NORMAL; return 0; end: - spin_lock(&acpi_device_lock); - if (device->parent) { + mutex_lock(&acpi_device_lock); + if (device->parent) list_del(&device->node); - list_del(&device->g_list); - } else - list_del(&device->g_list); list_del(&device->wakeup_list); - spin_unlock(&acpi_device_lock); + mutex_unlock(&acpi_device_lock); return result; } static void acpi_device_unregister(struct acpi_device *device, int type) { - spin_lock(&acpi_device_lock); - if (device->parent) { + mutex_lock(&acpi_device_lock); + if (device->parent) list_del(&device->node); - list_del(&device->g_list); - } else - list_del(&device->g_list); list_del(&device->wakeup_list); - spin_unlock(&acpi_device_lock); + mutex_unlock(&acpi_device_lock); acpi_detach_data(device->handle, acpi_bus_data_handler); diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index cfaf8f5b0a14..8a8f3b3382a6 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -5,3 +5,6 @@ extern int acpi_suspend (u32 state); extern void acpi_enable_wakeup_device_prep(u8 sleep_state); extern void acpi_enable_wakeup_device(u8 sleep_state); extern void acpi_disable_wakeup_device(u8 sleep_state); + +extern struct list_head acpi_wakeup_device_list; +extern struct mutex acpi_device_lock; diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 0914eaa9a097..9cd15e8c8932 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -194,6 +194,7 @@ struct acpi_thermal { struct acpi_handle_list devices; struct thermal_zone_device *thermal_zone; int tz_enabled; + int kelvin_offset; struct mutex lock; }; @@ -583,7 +584,7 @@ static void acpi_thermal_check(void *data) } /* sys I/F for generic thermal sysfs support */ -#define KELVIN_TO_MILLICELSIUS(t) (t * 100 - 273200) +#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100) static int thermal_get_temp(struct thermal_zone_device *thermal, unsigned long *temp) @@ -598,7 +599,7 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, if (result) return result; - *temp = KELVIN_TO_MILLICELSIUS(tz->temperature); + *temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset); return 0; } @@ -704,7 +705,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, if (tz->trips.critical.flags.valid) { if (!trip) { *temp = KELVIN_TO_MILLICELSIUS( - tz->trips.critical.temperature); + tz->trips.critical.temperature, + tz->kelvin_offset); return 0; } trip--; @@ -713,7 +715,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, if (tz->trips.hot.flags.valid) { if (!trip) { *temp = KELVIN_TO_MILLICELSIUS( - tz->trips.hot.temperature); + tz->trips.hot.temperature, + tz->kelvin_offset); return 0; } trip--; @@ -722,7 +725,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, if (tz->trips.passive.flags.valid) { if (!trip) { *temp = KELVIN_TO_MILLICELSIUS( - tz->trips.passive.temperature); + tz->trips.passive.temperature, + tz->kelvin_offset); return 0; } trip--; @@ -732,7 +736,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, tz->trips.active[i].flags.valid; i++) { if (!trip) { *temp = KELVIN_TO_MILLICELSIUS( - tz->trips.active[i].temperature); + tz->trips.active[i].temperature, + tz->kelvin_offset); return 0; } trip--; @@ -747,7 +752,8 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal, if (tz->trips.critical.flags.valid) { *temperature = KELVIN_TO_MILLICELSIUS( - tz->trips.critical.temperature); + tz->trips.critical.temperature, + tz->kelvin_offset); return 0; } else return -EINVAL; @@ -1331,6 +1337,25 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz) return 0; } +/* + * The exact offset between Kelvin and degree Celsius is 273.15. However ACPI + * handles temperature values with a single decimal place. As a consequence, + * some implementations use an offset of 273.1 and others use an offset of + * 273.2. Try to find out which one is being used, to present the most + * accurate and visually appealing number. + * + * The heuristic below should work for all ACPI thermal zones which have a + * critical trip point with a value being a multiple of 0.5 degree Celsius. + */ +static void acpi_thermal_guess_offset(struct acpi_thermal *tz) +{ + if (tz->trips.critical.flags.valid && + (tz->trips.critical.temperature % 5) == 1) + tz->kelvin_offset = 2731; + else + tz->kelvin_offset = 2732; +} + static int acpi_thermal_add(struct acpi_device *device) { int result = 0; @@ -1356,6 +1381,8 @@ static int acpi_thermal_add(struct acpi_device *device) if (result) goto free_memory; + acpi_thermal_guess_offset(tz); + result = acpi_thermal_register_thermal_zone(tz); if (result) goto free_memory; diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c index 5aee8c26cc9f..88725dcdf8bc 100644 --- a/drivers/acpi/wakeup.c +++ b/drivers/acpi/wakeup.c @@ -12,12 +12,14 @@ #include "internal.h" #include "sleep.h" +/* + * We didn't lock acpi_device_lock in the file, because it invokes oops in + * suspend/resume and isn't really required as this is called in S-state. At + * that time, there is no device hotplug + **/ #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("wakeup_devices") -extern struct list_head acpi_wakeup_device_list; -extern spinlock_t acpi_device_lock; - /** * acpi_enable_wakeup_device_prep - prepare wakeup devices * @sleep_state: ACPI state @@ -29,7 +31,6 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state) { struct list_head *node, *next; - spin_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, @@ -40,11 +41,8 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state) (sleep_state > (u32) dev->wakeup.sleep_state)) continue; - spin_unlock(&acpi_device_lock); acpi_enable_wakeup_device_power(dev, sleep_state); - spin_lock(&acpi_device_lock); } - spin_unlock(&acpi_device_lock); } /** @@ -60,7 +58,6 @@ void acpi_enable_wakeup_device(u8 sleep_state) * Caution: this routine must be invoked when interrupt is disabled * Refer ACPI2.0: P212 */ - spin_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, wakeup_list); @@ -74,22 +71,17 @@ void acpi_enable_wakeup_device(u8 sleep_state) if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared) || sleep_state > (u32) dev->wakeup.sleep_state) { if (dev->wakeup.flags.run_wake) { - spin_unlock(&acpi_device_lock); /* set_gpe_type will disable GPE, leave it like that */ acpi_set_gpe_type(dev->wakeup.gpe_device, dev->wakeup.gpe_number, ACPI_GPE_TYPE_RUNTIME); - spin_lock(&acpi_device_lock); } continue; } - spin_unlock(&acpi_device_lock); if (!dev->wakeup.flags.run_wake) acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number); - spin_lock(&acpi_device_lock); } - spin_unlock(&acpi_device_lock); } /** @@ -101,7 +93,6 @@ void acpi_disable_wakeup_device(u8 sleep_state) { struct list_head *node, *next; - spin_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, wakeup_list); @@ -112,19 +103,16 @@ void acpi_disable_wakeup_device(u8 sleep_state) if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared) || sleep_state > (u32) dev->wakeup.sleep_state) { if (dev->wakeup.flags.run_wake) { - spin_unlock(&acpi_device_lock); acpi_set_gpe_type(dev->wakeup.gpe_device, dev->wakeup.gpe_number, ACPI_GPE_TYPE_WAKE_RUN); /* Re-enable it, since set_gpe_type will disable it */ acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number); - spin_lock(&acpi_device_lock); } continue; } - spin_unlock(&acpi_device_lock); acpi_disable_wakeup_device_power(dev); /* Never disable run-wake GPE */ if (!dev->wakeup.flags.run_wake) { @@ -133,16 +121,14 @@ void acpi_disable_wakeup_device(u8 sleep_state) acpi_clear_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number, ACPI_NOT_ISR); } - spin_lock(&acpi_device_lock); } - spin_unlock(&acpi_device_lock); } int __init acpi_wakeup_device_init(void) { struct list_head *node, *next; - spin_lock(&acpi_device_lock); + mutex_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct acpi_device, @@ -150,15 +136,13 @@ int __init acpi_wakeup_device_init(void) /* In case user doesn't load button driver */ if (!dev->wakeup.flags.run_wake || dev->wakeup.state.enabled) continue; - spin_unlock(&acpi_device_lock); acpi_set_gpe_type(dev->wakeup.gpe_device, dev->wakeup.gpe_number, ACPI_GPE_TYPE_WAKE_RUN); acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number); dev->wakeup.state.enabled = 1; - spin_lock(&acpi_device_lock); } - spin_unlock(&acpi_device_lock); + mutex_unlock(&acpi_device_lock); return 0; } diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 1a11de0d3e6d..fe7cf0188acc 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -273,7 +273,7 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc, u32 *sinf) union acpi_object *hkey = NULL; int i; - status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, 0, + status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, NULL, &buffer); if (ACPI_FAILURE(status)) { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3523b895eb4b..5a97bcfe03e5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -516,8 +516,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out_unlock; ret = nfs_updatepage(filp, page, 0, pagelen); - if (ret == 0) - ret = pagelen; out_unlock: unlock_page(page); if (ret) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index a2228511d4be..c34b11022908 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -270,7 +270,6 @@ struct acpi_device { struct list_head children; struct list_head node; struct list_head wakeup_list; - struct list_head g_list; struct acpi_device_status status; struct acpi_device_flags flags; struct acpi_device_pnp pnp; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index cebfdcd3dbdd..37bcb50a4d7c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -76,7 +76,8 @@ struct ftrace_branch_data { * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code * to disable branch tracing on a per file basis. */ -#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +#if defined(CONFIG_TRACE_BRANCH_PROFILING) \ + && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__) void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf74..da5405dce347 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -356,6 +356,9 @@ struct ftrace_graph_ret { #ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* for init task */ +#define INIT_FTRACE_GRAPH .ret_stack = NULL + /* * Stack of return addresses for functions * of a thread. @@ -430,10 +433,11 @@ static inline void unpause_graph_tracing(void) { atomic_dec(¤t->tracing_graph_pause); } -#else +#else /* !CONFIG_FUNCTION_GRAPH_TRACER */ #define __notrace_funcgraph #define __irq_entry +#define INIT_FTRACE_GRAPH static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } @@ -445,7 +449,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) static inline void pause_graph_tracing(void) { } static inline void unpause_graph_tracing(void) { } -#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef CONFIG_TRACING #include <linux/sched.h> diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index faa1cf848bcd..45257475623c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -116,7 +116,7 @@ # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET #endif -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) extern void synchronize_irq(unsigned int irq); #else # define synchronize_irq(irq) barrier() diff --git a/include/linux/init_task.h b/include/linux/init_task.h index af1de95e711e..dcfb93337e9a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -5,6 +5,7 @@ #include <linux/irqflags.h> #include <linux/utsname.h> #include <linux/lockdep.h> +#include <linux/ftrace.h> #include <linux/ipc.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> @@ -185,6 +186,7 @@ extern struct cred init_cred; INIT_IDS \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + INIT_FTRACE_GRAPH \ } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8a9613d0c674..91bb76f44f14 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -59,6 +59,18 @@ #define IRQF_NOBALANCING 0x00000800 #define IRQF_IRQPOLL 0x00001000 +/* + * Bits used by threaded handlers: + * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run + * IRQTF_DIED - handler thread died + * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed + */ +enum { + IRQTF_RUNTHREAD, + IRQTF_DIED, + IRQTF_WARNED, +}; + typedef irqreturn_t (*irq_handler_t)(int, void *); /** @@ -71,6 +83,9 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * @next: pointer to the next irqaction for shared interrupts * @irq: interrupt number * @dir: pointer to the proc/irq/NN/name entry + * @thread_fn: interupt handler function for threaded interrupts + * @thread: thread pointer for threaded interrupts + * @thread_flags: flags related to @thread */ struct irqaction { irq_handler_t handler; @@ -81,18 +96,68 @@ struct irqaction { struct irqaction *next; int irq; struct proc_dir_entry *dir; + irq_handler_t thread_fn; + struct task_struct *thread; + unsigned long thread_flags; }; extern irqreturn_t no_action(int cpl, void *dev_id); -extern int __must_check request_irq(unsigned int, irq_handler_t handler, - unsigned long, const char *, void *); + +#ifdef CONFIG_GENERIC_HARDIRQS +extern int __must_check +request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long flags, const char *name, void *dev); + +static inline int __must_check +request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, + const char *name, void *dev) +{ + return request_threaded_irq(irq, handler, NULL, flags, name, dev); +} + +extern void exit_irq_thread(void); +#else + +extern int __must_check +request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, + const char *name, void *dev); + +/* + * Special function to avoid ifdeffery in kernel/irq/devres.c which + * gets magically built by GENERIC_HARDIRQS=n architectures (sparc, + * m68k). I really love these $@%#!* obvious Makefile references: + * ../../../kernel/irq/devres.o + */ +static inline int __must_check +request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long flags, const char *name, void *dev) +{ + return request_irq(irq, handler, flags, name, dev); +} + +static inline void exit_irq_thread(void) { } +#endif + extern void free_irq(unsigned int, void *); struct device; -extern int __must_check devm_request_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id); +extern int __must_check +devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id); + +static inline int __must_check +devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id) +{ + return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags, + devname, dev_id); +} + extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); /* diff --git a/include/linux/irq.h b/include/linux/irq.h index 974890b3c52f..ca507c9426b0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -22,6 +22,7 @@ #include <linux/irqnr.h> #include <linux/errno.h> #include <linux/topology.h> +#include <linux/wait.h> #include <asm/irq.h> #include <asm/ptrace.h> @@ -158,6 +159,8 @@ struct irq_2_iommu; * @affinity: IRQ affinity on SMP * @cpu: cpu index useful for balancing * @pending_mask: pending rebalanced interrupts + * @threads_active: number of irqaction threads currently running + * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers * @dir: /proc/irq/ procfs entry * @name: flow handler name for /proc/interrupts output */ @@ -189,6 +192,8 @@ struct irq_desc { cpumask_var_t pending_mask; #endif #endif + atomic_t threads_active; + wait_queue_head_t wait_for_threads; #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h index c5584ca5b8c9..819acaaac3f5 100644 --- a/include/linux/irqreturn.h +++ b/include/linux/irqreturn.h @@ -5,10 +5,12 @@ * enum irqreturn * @IRQ_NONE interrupt was not from this device * @IRQ_HANDLED interrupt was handled by this device + * @IRQ_WAKE_THREAD handler requests to wake the handler thread */ enum irqreturn { IRQ_NONE, IRQ_HANDLED, + IRQ_WAKE_THREAD, }; typedef enum irqreturn irqreturn_t; diff --git a/include/linux/sched.h b/include/linux/sched.h index b94f3541f67b..98e1fe51601d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -300,17 +300,11 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; -extern unsigned long sysctl_hung_task_check_count; -extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_warnings; extern int softlockup_thresh; #else static inline void softlockup_tick(void) { } -static inline void spawn_softlockup_task(void) -{ -} static inline void touch_softlockup_watchdog(void) { } @@ -319,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void) } #endif +#ifdef CONFIG_DETECT_HUNG_TASK +extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; +extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos); +#endif /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) @@ -1255,9 +1258,8 @@ struct task_struct { /* ipc stuff */ struct sysv_sem sysvsem; #endif -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_DETECT_HUNG_TASK /* hung task detection */ - unsigned long last_switch_timestamp; unsigned long last_switch_count; #endif /* CPU-specific state of this task */ @@ -1294,6 +1296,11 @@ struct task_struct { /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ spinlock_t alloc_lock; +#ifdef CONFIG_GENERIC_HARDIRQS + /* IRQ handler threads */ + struct irqaction *irqaction; +#endif + /* Protection of the PI data structures: */ spinlock_t pi_lock; diff --git a/kernel/Makefile b/kernel/Makefile index bab1dffe37e9..42423665660a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o +obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o diff --git a/kernel/exit.c b/kernel/exit.c index 32cbf2607cb0..abf9cf3b95c6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code) schedule(); } + exit_irq_thread(); + exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against diff --git a/kernel/fork.c b/kernel/fork.c index 660c2b8765bc..989c7c202b3d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; +#ifdef CONFIG_DETECT_HUNG_TASK + tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; +#endif tsk->mm = NULL; tsk->active_mm = NULL; @@ -1032,11 +1035,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->default_timer_slack_ns = current->timer_slack_ns; -#ifdef CONFIG_DETECT_SOFTLOCKUP - p->last_switch_count = 0; - p->last_switch_timestamp = 0; -#endif - task_io_accounting_init(&p->ioac); acct_clear_integrals(p); diff --git a/kernel/hung_task.c b/kernel/hung_task.c new file mode 100644 index 000000000000..022a4927b785 --- /dev/null +++ b/kernel/hung_task.c @@ -0,0 +1,217 @@ +/* + * Detect Hung Task + * + * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state + * + */ + +#include <linux/mm.h> +#include <linux/cpu.h> +#include <linux/nmi.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#include <linux/lockdep.h> +#include <linux/module.h> +#include <linux/sysctl.h> + +/* + * The number of tasks checked: + */ +unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; + +/* + * Limit number of tasks checked in a batch. + * + * This value controls the preemptibility of khungtaskd since preemption + * is disabled during the critical section. It also controls the size of + * the RCU grace period. So it needs to be upper-bound. + */ +#define HUNG_TASK_BATCHING 1024 + +/* + * Zero means infinite timeout - no checking done: + */ +unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; + +unsigned long __read_mostly sysctl_hung_task_warnings = 10; + +static int __read_mostly did_panic; + +static struct task_struct *watchdog_task; + +/* + * Should we panic (and reboot, if panic_timeout= is set) when a + * hung task is detected: + */ +unsigned int __read_mostly sysctl_hung_task_panic = + CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; + +static int __init hung_task_panic_setup(char *str) +{ + sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); + + return 1; +} +__setup("hung_task_panic=", hung_task_panic_setup); + +static int +hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) +{ + did_panic = 1; + + return NOTIFY_DONE; +} + +static struct notifier_block panic_block = { + .notifier_call = hung_task_panic, +}; + +static void check_hung_task(struct task_struct *t, unsigned long timeout) +{ + unsigned long switch_count = t->nvcsw + t->nivcsw; + + /* + * Ensure the task is not frozen. + * Also, when a freshly created task is scheduled once, changes + * its state to TASK_UNINTERRUPTIBLE without having ever been + * switched out once, it musn't be checked. + */ + if (unlikely(t->flags & PF_FROZEN || !switch_count)) + return; + + if (switch_count != t->last_switch_count) { + t->last_switch_count = switch_count; + return; + } + if (!sysctl_hung_task_warnings) + return; + sysctl_hung_task_warnings--; + + /* + * Ok, the task did not get scheduled for more than 2 minutes, + * complain: + */ + printk(KERN_ERR "INFO: task %s:%d blocked for more than " + "%ld seconds.\n", t->comm, t->pid, timeout); + printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" + " disables this message.\n"); + sched_show_task(t); + __debug_show_held_locks(t); + + touch_nmi_watchdog(); + + if (sysctl_hung_task_panic) + panic("hung_task: blocked tasks"); +} + +/* + * To avoid extending the RCU grace period for an unbounded amount of time, + * periodically exit the critical section and enter a new one. + * + * For preemptible RCU it is sufficient to call rcu_read_unlock in order + * exit the grace period. For classic RCU, a reschedule is required. + */ +static void rcu_lock_break(struct task_struct *g, struct task_struct *t) +{ + get_task_struct(g); + get_task_struct(t); + rcu_read_unlock(); + cond_resched(); + rcu_read_lock(); + put_task_struct(t); + put_task_struct(g); +} + +/* + * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for + * a really long time (120 seconds). If that happens, print out + * a warning. + */ +static void check_hung_uninterruptible_tasks(unsigned long timeout) +{ + int max_count = sysctl_hung_task_check_count; + int batch_count = HUNG_TASK_BATCHING; + struct task_struct *g, *t; + + /* + * If the system crashed already then all bets are off, + * do not report extra hung tasks: + */ + if (test_taint(TAINT_DIE) || did_panic) + return; + + rcu_read_lock(); + do_each_thread(g, t) { + if (!--max_count) + goto unlock; + if (!--batch_count) { + batch_count = HUNG_TASK_BATCHING; + rcu_lock_break(g, t); + /* Exit if t or g was unhashed during refresh. */ + if (t->state == TASK_DEAD || g->state == TASK_DEAD) + goto unlock; + } + /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ + if (t->state == TASK_UNINTERRUPTIBLE) + check_hung_task(t, timeout); + } while_each_thread(g, t); + unlock: + rcu_read_unlock(); +} + +static unsigned long timeout_jiffies(unsigned long timeout) +{ + /* timeout of 0 will disable the watchdog */ + return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; +} + +/* + * Process updating of timeout sysctl + */ +int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + + if (ret || !write) + goto out; + + wake_up_process(watchdog_task); + + out: + return ret; +} + +/* + * kthread which checks for tasks stuck in D state + */ +static int watchdog(void *dummy) +{ + set_user_nice(current, 0); + + for ( ; ; ) { + unsigned long timeout = sysctl_hung_task_timeout_secs; + + while (schedule_timeout_interruptible(timeout_jiffies(timeout))) + timeout = sysctl_hung_task_timeout_secs; + + check_hung_uninterruptible_tasks(timeout); + } + + return 0; +} + +static int __init hung_task_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); + + return 0; +} + +module_init(hung_task_init); diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 38a25b8d8bff..d06df9c41cba 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data) } /** - * devm_request_irq - allocate an interrupt line for a managed device + * devm_request_threaded_irq - allocate an interrupt line for a managed device * @dev: device to request interrupt for * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs + * @thread_fn: function to be called in a threaded interrupt context. NULL + * for devices which handle everything in @handler * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device * @dev_id: A cookie passed back to the handler function @@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data) * If an IRQ allocated with this function needs to be freed * separately, dev_free_irq() must be used. */ -int devm_request_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id) +int devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id) { struct irq_devres *dr; int rc; @@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq, if (!dr) return -ENOMEM; - rc = request_irq(irq, handler, irqflags, devname, dev_id); + rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, + dev_id); if (rc) { devres_free(dr); return rc; @@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, return 0; } -EXPORT_SYMBOL(devm_request_irq); +EXPORT_SYMBOL(devm_request_threaded_irq); /** * devm_free_irq - free an interrupt diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 343acecae629..d82142be8dd2 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id) return IRQ_NONE; } +static void warn_no_thread(unsigned int irq, struct irqaction *action) +{ + if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags)) + return; + + printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD " + "but no thread function available.", irq, action->name); +} + DEFINE_TRACE(irq_handler_entry); DEFINE_TRACE(irq_handler_exit); @@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) trace_irq_handler_entry(irq, action); ret = action->handler(irq, action->dev_id); trace_irq_handler_exit(irq, action, ret); - if (ret == IRQ_HANDLED) + + switch (ret) { + case IRQ_WAKE_THREAD: + /* + * Set result to handled so the spurious check + * does not trigger. + */ + ret = IRQ_HANDLED; + + /* + * Catch drivers which return WAKE_THREAD but + * did not set up a thread function + */ + if (unlikely(!action->thread_fn)) { + warn_no_thread(irq, action); + break; + } + + /* + * Wake up the handler thread for this + * action. In case the thread crashed and was + * killed we just pretend that we handled the + * interrupt. The hardirq handler above has + * disabled the device interrupt, so no irq + * storm is lurking. + */ + if (likely(!test_bit(IRQTF_DIED, + &action->thread_flags))) { + set_bit(IRQTF_RUNTHREAD, &action->thread_flags); + wake_up_process(action->thread); + } + + /* Fall through to add to randomness */ + case IRQ_HANDLED: status |= action->flags; + break; + + default: + break; + } + retval |= ret; action = action->next; } while (action); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1516ab77355c..7e2e7dd4cd2f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -8,16 +8,15 @@ */ #include <linux/irq.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/sched.h> #include "internals.h" -#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) -cpumask_var_t irq_default_affinity; - /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * @irq: interrupt number to wait for @@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq) /* Oops, that failed? */ } while (status & IRQ_INPROGRESS); + + /* + * We made sure that no hardirq handler is running. Now verify + * that no threaded handlers are active. + */ + wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); } EXPORT_SYMBOL(synchronize_irq); +#ifdef CONFIG_SMP +cpumask_var_t irq_default_affinity; + /** * irq_can_set_affinity - Check if the affinity of a given irq can be set * @irq: Interrupt to check @@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq) return 1; } +static void +irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) +{ + struct irqaction *action = desc->action; + + while (action) { + if (action->thread) + set_cpus_allowed_ptr(action->thread, cpumask); + action = action->next; + } +} + /** * irq_set_affinity - Set the irq affinity of a given irq * @irq: Interrupt to set affinity @@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif + irq_set_thread_affinity(desc, cpumask); desc->status |= IRQ_AFFINITY_SET; spin_unlock_irqrestore(&desc->lock, flags); return 0; @@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq) spin_lock_irqsave(&desc->lock, flags); ret = setup_affinity(irq, desc); + if (!ret) + irq_set_thread_affinity(desc, desc->affinity); spin_unlock_irqrestore(&desc->lock, flags); return ret; @@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, return ret; } +static int irq_wait_for_interrupt(struct irqaction *action) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + + if (test_and_clear_bit(IRQTF_RUNTHREAD, + &action->thread_flags)) { + __set_current_state(TASK_RUNNING); + return 0; + } + schedule(); + } + return -1; +} + +/* + * Interrupt handler thread + */ +static int irq_thread(void *data) +{ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; + struct irqaction *action = data; + struct irq_desc *desc = irq_to_desc(action->irq); + int wake; + + sched_setscheduler(current, SCHED_FIFO, ¶m); + current->irqaction = action; + + while (!irq_wait_for_interrupt(action)) { + + atomic_inc(&desc->threads_active); + + spin_lock_irq(&desc->lock); + if (unlikely(desc->status & IRQ_DISABLED)) { + /* + * CHECKME: We might need a dedicated + * IRQ_THREAD_PENDING flag here, which + * retriggers the thread in check_irq_resend() + * but AFAICT IRQ_PENDING should be fine as it + * retriggers the interrupt itself --- tglx + */ + desc->status |= IRQ_PENDING; + spin_unlock_irq(&desc->lock); + } else { + spin_unlock_irq(&desc->lock); + + action->thread_fn(action->irq, action->dev_id); + } + + wake = atomic_dec_and_test(&desc->threads_active); + + if (wake && waitqueue_active(&desc->wait_for_threads)) + wake_up(&desc->wait_for_threads); + } + + /* + * Clear irqaction. Otherwise exit_irq_thread() would make + * fuzz about an active irq thread going into nirvana. + */ + current->irqaction = NULL; + return 0; +} + +/* + * Called from do_exit() + */ +void exit_irq_thread(void) +{ + struct task_struct *tsk = current; + + if (!tsk->irqaction) + return; + + printk(KERN_ERR + "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", + tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); + + /* + * Set the THREAD DIED flag to prevent further wakeups of the + * soon to be gone threaded handler. + */ + set_bit(IRQTF_DIED, &tsk->irqaction->flags); +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } /* + * Threaded handler ? + */ + if (new->thread_fn) { + struct task_struct *t; + + t = kthread_create(irq_thread, new, "irq/%d-%s", irq, + new->name); + if (IS_ERR(t)) + return PTR_ERR(t); + /* + * We keep the reference to the task struct even if + * the thread dies to avoid that the interrupt code + * references an already freed task_struct. + */ + get_task_struct(t); + new->thread = t; + wake_up_process(t); + } + + /* * The following block of code has to be executed atomically */ spin_lock_irqsave(&desc->lock, flags); @@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!shared) { irq_chip_set_defaults(desc->chip); + init_waitqueue_head(&desc->wait_for_threads); + /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, irq, new->flags & IRQF_TRIGGER_MASK); - if (ret) { - spin_unlock_irqrestore(&desc->lock, flags); - return ret; - } + if (ret) + goto out_thread; } else compat_irq_chip_set_default_handler(desc); #if defined(CONFIG_IRQ_PER_CPU) @@ -549,8 +676,19 @@ mismatch: dump_stack(); } #endif + ret = -EBUSY; + +out_thread: spin_unlock_irqrestore(&desc->lock, flags); - return -EBUSY; + if (new->thread) { + struct task_struct *t = new->thread; + + new->thread = NULL; + if (likely(!test_bit(IRQTF_DIED, &new->thread_flags))) + kthread_stop(t); + put_task_struct(t); + } + return ret; } /** @@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action, **action_ptr; + struct task_struct *irqthread; unsigned long flags; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); @@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) else desc->chip->disable(irq); } + + irqthread = action->thread; + action->thread = NULL; + spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) /* Make sure it's not being used on another CPU: */ synchronize_irq(irq); + if (irqthread) { + if (!test_bit(IRQTF_DIED, &action->thread_flags)) + kthread_stop(irqthread); + put_task_struct(irqthread); + } + #ifdef CONFIG_DEBUG_SHIRQ /* * It's a shared IRQ -- the driver ought to be prepared for an IRQ @@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id) EXPORT_SYMBOL(free_irq); /** - * request_irq - allocate an interrupt line + * request_threaded_irq - allocate an interrupt line * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts + * @thread_fn: Function called from the irq handler thread + * If NULL, no irq thread is created * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device * @dev_id: A cookie passed back to the handler function @@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq); * raises, you must take care both to initialise your hardware * and to set up the interrupt handler in the right order. * + * If you want to set up a threaded irq handler for your device + * then you need to supply @handler and @thread_fn. @handler ist + * still called in hard interrupt context and has to check + * whether the interrupt originates from the device. If yes it + * needs to disable the interrupt on the device and return + * IRQ_THREAD_WAKE which will wake up the handler thread and run + * @thread_fn. This split handler design is necessary to support + * shared interrupts. + * * Dev_id must be globally unique. Normally the address of the * device data structure is used as the cookie. Since the handler * receives this value it makes sense to use it. @@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq); * IRQF_TRIGGER_* Specify active edge(s) or level * */ -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) +int request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, unsigned long irqflags, + const char *devname, void *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler, return -ENOMEM; action->handler = handler; + action->thread_fn = thread_fn; action->flags = irqflags; action->name = devname; action->dev_id = dev_id; @@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler, #endif return retval; } -EXPORT_SYMBOL(request_irq); +EXPORT_SYMBOL(request_threaded_irq); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 85d5a2455103..88796c330838 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -166,97 +166,11 @@ void softlockup_tick(void) } /* - * Have a reasonable limit on the number of tasks checked: - */ -unsigned long __read_mostly sysctl_hung_task_check_count = 1024; - -/* - * Zero means infinite timeout - no checking done: - */ -unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; - -unsigned long __read_mostly sysctl_hung_task_warnings = 10; - -/* - * Only do the hung-tasks check on one CPU: - */ -static int check_cpu __read_mostly = -1; - -static void check_hung_task(struct task_struct *t, unsigned long now) -{ - unsigned long switch_count = t->nvcsw + t->nivcsw; - - if (t->flags & PF_FROZEN) - return; - - if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { - t->last_switch_count = switch_count; - t->last_switch_timestamp = now; - return; - } - if ((long)(now - t->last_switch_timestamp) < - sysctl_hung_task_timeout_secs) - return; - if (!sysctl_hung_task_warnings) - return; - sysctl_hung_task_warnings--; - - /* - * Ok, the task did not get scheduled for more than 2 minutes, - * complain: - */ - printk(KERN_ERR "INFO: task %s:%d blocked for more than " - "%ld seconds.\n", t->comm, t->pid, - sysctl_hung_task_timeout_secs); - printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" - " disables this message.\n"); - sched_show_task(t); - __debug_show_held_locks(t); - - t->last_switch_timestamp = now; - touch_nmi_watchdog(); - - if (softlockup_panic) - panic("softlockup: blocked tasks"); -} - -/* - * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for - * a really long time (120 seconds). If that happens, print out - * a warning. - */ -static void check_hung_uninterruptible_tasks(int this_cpu) -{ - int max_count = sysctl_hung_task_check_count; - unsigned long now = get_timestamp(this_cpu); - struct task_struct *g, *t; - - /* - * If the system crashed already then all bets are off, - * do not report extra hung tasks: - */ - if (test_taint(TAINT_DIE) || did_panic) - return; - - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (!--max_count) - goto unlock; - /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ - if (t->state == TASK_UNINTERRUPTIBLE) - check_hung_task(t, now); - } while_each_thread(g, t); - unlock: - read_unlock(&tasklist_lock); -} - -/* * The watchdog thread - runs every second and touches the timestamp. */ static int watchdog(void *__bind_cpu) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - int this_cpu = (long)__bind_cpu; sched_setscheduler(current, SCHED_FIFO, ¶m); @@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu) if (kthread_should_stop()) break; - if (this_cpu == check_cpu) { - if (sysctl_hung_task_timeout_secs) - check_hung_uninterruptible_tasks(this_cpu); - } - set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); @@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - check_cpu = cpumask_any(cpu_online_mask); wake_up_process(per_cpu(watchdog_task, hotcpu)); break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - if (hotcpu == check_cpu) { - /* Pick any other online cpu. */ - check_cpu = cpumask_any_but(cpu_online_mask, hotcpu); - } - break; - case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: if (!per_cpu(watchdog_task, hotcpu)) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 72eb1a41dcab..4286b62b34a0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -814,6 +814,19 @@ static struct ctl_table kern_table[] = { .extra1 = &neg_one, .extra2 = &sixty, }, +#endif +#ifdef CONFIG_DETECT_HUNG_TASK + { + .ctl_name = CTL_UNNUMBERED, + .procname = "hung_task_panic", + .data = &sysctl_hung_task_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, { .ctl_name = CTL_UNNUMBERED, .procname = "hung_task_check_count", @@ -829,7 +842,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_hung_task_timeout_secs, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &proc_dohung_task_timeout_secs, .strategy = &sysctl_intvec, }, { diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 947c5b3f90c4..b32ff446c3fb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, char *msg; struct blk_trace *bt; - if (count > BLK_TN_MAX_MSG) + if (count >= BLK_TN_MAX_MSG) return -EINVAL; - msg = kmalloc(count, GFP_KERNEL); + msg = kmalloc(count + 1, GFP_KERNEL); if (msg == NULL) return -ENOMEM; @@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, return -EFAULT; } + msg[count] = '\0'; bt = filp->private_data; __trace_note_message(bt, "%s", msg); kfree(msg); @@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - sizeof(rq->cmd), rq->cmd); + rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0174a40c563..9d28476a9851 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,6 +30,7 @@ #include <linux/percpu.h> #include <linux/splice.h> #include <linux/kdebug.h> +#include <linux/string.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/poll.h> @@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); -long -ns2usecs(cycle_t nsec) +unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; do_div(nsec, 1000); @@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter) return; cpumask_set_cpu(iter->cpu, iter->started); - trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); + + /* Don't print started cpu buffer for the first entry of the trace */ + if (iter->idx > 1) + trace_seq_printf(s, "##### CPU %u buffer started ####\n", + iter->cpu); } static enum print_line_t print_trace_fmt(struct trace_iterator *iter) @@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file) if (current_trace) *iter->trace = *current_trace; + if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) + goto fail; + + cpumask_clear(iter->started); + if (current_trace && current_trace->print_max) iter->tr = &max_tr; else @@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } + free_cpumask_var(iter->started); fail: mutex_unlock(&trace_types_lock); kfree(iter->trace); @@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file) seq_release(inode, file); mutex_destroy(&iter->mutex); + free_cpumask_var(iter->started); kfree(iter->trace); kfree(iter); return 0; @@ -2358,9 +2369,9 @@ static const char readme_msg[] = "# mkdir /debug\n" "# mount -t debugfs nodev /debug\n\n" "# cat /debug/tracing/available_tracers\n" - "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" + "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" "# cat /debug/tracing/current_tracer\n" - "none\n" + "nop\n" "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cbc168f1e43d..e685ac2b2ba1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern long ns2usecs(cycle_t nsec); +extern unsigned long long ns2usecs(cycle_t nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4d9952d3df50..07a22c33ebf3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -40,7 +40,7 @@ #undef TRACE_FIELD_ZERO_CHAR #define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ "offset:%u;\tsize:0;\n", \ (unsigned int)offsetof(typeof(field), item)); \ if (!ret) \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d72b9a63b247..64b54a59c55b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" + ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx, diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index de35f200abd3..9117cea6f1ae 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) pc = preempt_count(); tracing_record_cmdline(current); + if (sched_stopped) + return; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = ctx_trace->data[cpu]; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3c5ad6b2ec84..5bc00e8f153e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); /* @@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) data = wakeup_trace->data[wakeup_cpu]; data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); + + /* + * We must be careful in using CALLER_ADDR2. But since wake_up + * is not called by an assembly function (where as schedule is) + * it should be safe to use it here. + */ trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9638d99644af..c6e854f215fa 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC +config DETECT_HUNG_TASK + bool "Detect Hung Tasks" + depends on DEBUG_KERNEL + default DETECT_SOFTLOCKUP + help + Say Y here to enable the kernel to detect "hung tasks", + which are bugs that cause the task to be stuck in + uninterruptible "D" state indefinitiley. + + When a hung task is detected, the kernel will print the + current stack trace (which you should report), but the + task will stay in uninterruptible state. If lockdep is + enabled then all held locks will also be reported. This + feature has negligible overhead. + +config BOOTPARAM_HUNG_TASK_PANIC + bool "Panic (Reboot) On Hung Tasks" + depends on DETECT_HUNG_TASK + help + Say Y here to enable the kernel to panic on "hung tasks", + which are bugs that cause the kernel to leave a task stuck + in uninterruptible "D" state. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + hung task has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a hung tasks must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_HUNG_TASK_PANIC_VALUE + int + depends on DETECT_HUNG_TASK + range 0 1 + default 0 if !BOOTPARAM_HUNG_TASK_PANIC + default 1 if BOOTPARAM_HUNG_TASK_PANIC + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS diff --git a/scripts/trace/power.pl b/scripts/tracing/power.pl index 4f729b3501e0..4f729b3501e0 100644 --- a/scripts/trace/power.pl +++ b/scripts/tracing/power.pl |