diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 123 | ||||
-rw-r--r-- | kernel/exit.c | 16 | ||||
-rw-r--r-- | kernel/fork.c | 22 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 828 | ||||
-rw-r--r-- | kernel/kprobes.c | 6 | ||||
-rw-r--r-- | kernel/params.c | 3 | ||||
-rw-r--r-- | kernel/power/power.h | 24 | ||||
-rw-r--r-- | kernel/power/process.c | 7 | ||||
-rw-r--r-- | kernel/power/user.c | 6 | ||||
-rw-r--r-- | kernel/relay.c | 10 | ||||
-rw-r--r-- | kernel/sched/core.c | 19 | ||||
-rw-r--r-- | kernel/sched/fair.c | 34 | ||||
-rw-r--r-- | kernel/sched/rt.c | 5 | ||||
-rw-r--r-- | kernel/watchdog.c | 2 |
14 files changed, 921 insertions, 184 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 32b48c889711..1b5c081d8b9f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2300,7 +2300,10 @@ do { \ return div64_u64(dividend, divisor); } -static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) +static DEFINE_PER_CPU(int, perf_throttled_count); +static DEFINE_PER_CPU(u64, perf_throttled_seq); + +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable) { struct hw_perf_event *hwc = &event->hw; s64 period, sample_period; @@ -2319,22 +2322,40 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - event->pmu->stop(event, PERF_EF_UPDATE); + if (disable) + event->pmu->stop(event, PERF_EF_UPDATE); + local64_set(&hwc->period_left, 0); - event->pmu->start(event, PERF_EF_RELOAD); + + if (disable) + event->pmu->start(event, PERF_EF_RELOAD); } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) +/* + * combine freq adjustment with unthrottling to avoid two passes over the + * events. At the same time, make sure, having freq events does not change + * the rate of unthrottling as that would introduce bias. + */ +static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, + int needs_unthr) { struct perf_event *event; struct hw_perf_event *hwc; - u64 interrupts, now; + u64 now, period = TICK_NSEC; s64 delta; - if (!ctx->nr_freq) + /* + * only need to iterate over all events iff: + * - context have events in frequency mode (needs freq adjust) + * - there are events to unthrottle on this cpu + */ + if (!(ctx->nr_freq || needs_unthr)) return; + raw_spin_lock(&ctx->lock); + perf_pmu_disable(ctx->pmu); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -2344,13 +2365,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) hwc = &event->hw; - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle events on the tick - */ - if (interrupts == MAX_INTERRUPTS) { + if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) { + hwc->interrupts = 0; perf_log_throttle(event, 1); event->pmu->start(event, 0); } @@ -2358,14 +2374,30 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (!event->attr.freq || !event->attr.sample_freq) continue; - event->pmu->read(event); + /* + * stop the event and update event->count + */ + event->pmu->stop(event, PERF_EF_UPDATE); + now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; + /* + * restart the event + * reload only if value has changed + * we have stopped the event so tell that + * to perf_adjust_period() to avoid stopping it + * twice. + */ if (delta > 0) - perf_adjust_period(event, period, delta); + perf_adjust_period(event, period, delta, false); + + event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); } + + perf_pmu_enable(ctx->pmu); + raw_spin_unlock(&ctx->lock); } /* @@ -2388,16 +2420,13 @@ static void rotate_ctx(struct perf_event_context *ctx) */ static void perf_rotate_context(struct perf_cpu_context *cpuctx) { - u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; struct perf_event_context *ctx = NULL; - int rotate = 0, remove = 1, freq = 0; + int rotate = 0, remove = 1; if (cpuctx->ctx.nr_events) { remove = 0; if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) rotate = 1; - if (cpuctx->ctx.nr_freq) - freq = 1; } ctx = cpuctx->task_ctx; @@ -2405,37 +2434,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) remove = 0; if (ctx->nr_events != ctx->nr_active) rotate = 1; - if (ctx->nr_freq) - freq = 1; } - if (!rotate && !freq) + if (!rotate) goto done; perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); - if (freq) { - perf_ctx_adjust_freq(&cpuctx->ctx, interval); - if (ctx) - perf_ctx_adjust_freq(ctx, interval); - } - - if (rotate) { - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - if (ctx) - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + if (ctx) + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); - rotate_ctx(&cpuctx->ctx); - if (ctx) - rotate_ctx(ctx); + rotate_ctx(&cpuctx->ctx); + if (ctx) + rotate_ctx(ctx); - perf_event_sched_in(cpuctx, ctx, current); - } + perf_event_sched_in(cpuctx, ctx, current); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); - done: if (remove) list_del_init(&cpuctx->rotation_list); @@ -2445,10 +2463,22 @@ void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); struct perf_cpu_context *cpuctx, *tmp; + struct perf_event_context *ctx; + int throttled; WARN_ON(!irqs_disabled()); + __this_cpu_inc(perf_throttled_seq); + throttled = __this_cpu_xchg(perf_throttled_count, 0); + list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { + ctx = &cpuctx->ctx; + perf_adjust_freq_unthr_context(ctx, throttled); + + ctx = cpuctx->task_ctx; + if (ctx) + perf_adjust_freq_unthr_context(ctx, throttled); + if (cpuctx->jiffies_interval == 1 || !(jiffies % cpuctx->jiffies_interval)) perf_rotate_context(cpuctx); @@ -4509,6 +4539,7 @@ static int __perf_event_overflow(struct perf_event *event, { int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; + u64 seq; int ret = 0; /* @@ -4518,14 +4549,20 @@ static int __perf_event_overflow(struct perf_event *event, if (unlikely(!is_sampling_event(event))) return 0; - if (unlikely(hwc->interrupts >= max_samples_per_tick)) { - if (throttle) { + seq = __this_cpu_read(perf_throttled_seq); + if (seq != hwc->interrupts_seq) { + hwc->interrupts_seq = seq; + hwc->interrupts = 1; + } else { + hwc->interrupts++; + if (unlikely(throttle + && hwc->interrupts >= max_samples_per_tick)) { + __this_cpu_inc(perf_throttled_count); hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(event, 0); ret = 1; } - } else - hwc->interrupts++; + } if (event->attr.freq) { u64 now = perf_clock(); @@ -4534,7 +4571,7 @@ static int __perf_event_overflow(struct perf_event *event, hwc->freq_time_stamp = now; if (delta > 0 && delta < 2*TICK_NSEC) - perf_adjust_period(event, delta, hwc->last_period); + perf_adjust_period(event, delta, hwc->last_period, true); } /* diff --git a/kernel/exit.c b/kernel/exit.c index 294b1709170d..4b4042f9bc6a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1038,6 +1038,22 @@ void do_exit(long code) if (tsk->nr_dirtied) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); + + /* + * The setting of TASK_RUNNING by try_to_wake_up() may be delayed + * when the following two conditions become true. + * - There is race condition of mmap_sem (It is acquired by + * exit_mm()), and + * - SMI occurs before setting TASK_RUNINNG. + * (or hypervisor of virtual machine switches to other guest) + * As a result, we may become TASK_RUNNING after becoming TASK_DEAD + * + * To avoid it, we have to wait for releasing tsk->pi_lock which + * is held by try_to_wake_up() + */ + smp_mb(); + raw_spin_unlock_wait(&tsk->pi_lock); + /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ diff --git a/kernel/fork.c b/kernel/fork.c index 051f090d40c1..b77fd559c78e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task) } EXPORT_SYMBOL_GPL(get_task_mm); +struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) +{ + struct mm_struct *mm; + int err; + + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, mode)) { + mmput(mm); + mm = ERR_PTR(-EACCES); + } + mutex_unlock(&task->signal->cred_guard_mutex); + + return mm; +} + /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. @@ -890,7 +910,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; new_ioc->ioprio = ioc->ioprio; - put_io_context(new_ioc, NULL); + put_io_context(new_ioc); } #endif return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1f9e26526b69..af48e59bc2ff 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1,189 +1,793 @@ +#include <linux/debugfs.h> +#include <linux/hardirq.h> +#include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqdesc.h> #include <linux/irqdomain.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/smp.h> +#include <linux/fs.h> + +#define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs. + * ie. legacy 8259, gets irqs 1..15 */ +#define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ +#define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ +#define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */ static LIST_HEAD(irq_domain_list); static DEFINE_MUTEX(irq_domain_mutex); +static DEFINE_MUTEX(revmap_trees_mutex); +static unsigned int irq_virq_count = NR_IRQS; +static struct irq_domain *irq_default_domain; + /** - * irq_domain_add() - Register an irq_domain - * @domain: ptr to initialized irq_domain structure + * irq_domain_alloc() - Allocate a new irq_domain data structure + * @of_node: optional device-tree node of the interrupt controller + * @revmap_type: type of reverse mapping to use + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer * - * Registers an irq_domain structure. The irq_domain must at a minimum be - * initialized with an ops structure pointer, and either a ->to_irq hook or - * a valid irq_base value. Everything else is optional. + * Allocates and initialize and irq_domain structure. Caller is expected to + * register allocated irq_domain with irq_domain_register(). Returns pointer + * to IRQ domain, or NULL on failure. */ -void irq_domain_add(struct irq_domain *domain) +static struct irq_domain *irq_domain_alloc(struct device_node *of_node, + unsigned int revmap_type, + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_data *d; - int hwirq, irq; + struct irq_domain *domain; - /* - * This assumes that the irq_domain owner has already allocated - * the irq_descs. This block will be removed when support for dynamic - * allocation of irq_descs is added to irq_domain. - */ - irq_domain_for_each_irq(domain, hwirq, irq) { - d = irq_get_irq_data(irq); - if (!d) { - WARN(1, "error: assigning domain to non existant irq_desc"); - return; - } - if (d->domain) { - /* things are broken; just report, don't clean up */ - WARN(1, "error: irq_desc already assigned to a domain"); - return; + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (WARN_ON(!domain)) + return NULL; + + /* Fill structure */ + domain->revmap_type = revmap_type; + domain->ops = ops; + domain->host_data = host_data; + domain->of_node = of_node_get(of_node); + + return domain; +} + +static void irq_domain_add(struct irq_domain *domain) +{ + mutex_lock(&irq_domain_mutex); + list_add(&domain->link, &irq_domain_list); + mutex_unlock(&irq_domain_mutex); + pr_debug("irq: Allocated domain of type %d @0x%p\n", + domain->revmap_type, domain); +} + +static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, + irq_hw_number_t hwirq) +{ + irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; + int size = domain->revmap_data.legacy.size; + + if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) + return 0; + return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; +} + +/** + * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. + * @of_node: pointer to interrupt controller's device tree node. + * @size: total number of irqs in legacy mapping + * @first_irq: first number of irq block assigned to the domain + * @first_hwirq: first hwirq number to use for the translation. Should normally + * be '0', but a positive integer can be used if the effective + * hwirqs numbering does not begin at zero. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + * + * Note: the map() callback will be called before this function returns + * for all legacy interrupts except 0 (which is always the invalid irq for + * a legacy controller). + */ +struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + unsigned int i; + + domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); + if (!domain) + return NULL; + + domain->revmap_data.legacy.first_irq = first_irq; + domain->revmap_data.legacy.first_hwirq = first_hwirq; + domain->revmap_data.legacy.size = size; + + mutex_lock(&irq_domain_mutex); + /* Verify that all the irqs are available */ + for (i = 0; i < size; i++) { + int irq = first_irq + i; + struct irq_data *irq_data = irq_get_irq_data(irq); + + if (WARN_ON(!irq_data || irq_data->domain)) { + mutex_unlock(&irq_domain_mutex); + of_node_put(domain->of_node); + kfree(domain); + return NULL; } - d->domain = domain; - d->hwirq = hwirq; } - mutex_lock(&irq_domain_mutex); - list_add(&domain->list, &irq_domain_list); + /* Claim all of the irqs before registering a legacy domain */ + for (i = 0; i < size; i++) { + struct irq_data *irq_data = irq_get_irq_data(first_irq + i); + irq_data->hwirq = first_hwirq + i; + irq_data->domain = domain; + } mutex_unlock(&irq_domain_mutex); + + for (i = 0; i < size; i++) { + int irq = first_irq + i; + int hwirq = first_hwirq + i; + + /* IRQ0 gets ignored */ + if (!irq) + continue; + + /* Legacy flags are left to default at this point, + * one can then use irq_create_mapping() to + * explicitly change them + */ + ops->map(domain, irq, hwirq); + + /* Clear norequest flags */ + irq_clear_status_flags(irq, IRQ_NOREQUEST); + } + + irq_domain_add(domain); + return domain; +} + +/** + * irq_domain_add_linear() - Allocate and register a legacy revmap irq_domain. + * @of_node: pointer to interrupt controller's device tree node. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + */ +struct irq_domain *irq_domain_add_linear(struct device_node *of_node, + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + unsigned int *revmap; + + revmap = kzalloc(sizeof(*revmap) * size, GFP_KERNEL); + if (WARN_ON(!revmap)) + return NULL; + + domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data); + if (!domain) { + kfree(revmap); + return NULL; + } + domain->revmap_data.linear.size = size; + domain->revmap_data.linear.revmap = revmap; + irq_domain_add(domain); + return domain; +} + +struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain = irq_domain_alloc(of_node, + IRQ_DOMAIN_MAP_NOMAP, ops, host_data); + if (domain) + irq_domain_add(domain); + return domain; +} + +/** + * irq_domain_add_tree() + * @of_node: pointer to interrupt controller's device tree node. + * @ops: map/unmap domain callbacks + * + * Note: The radix tree will be allocated later during boot automatically + * (the reverse mapping will use the slow path until that happens). + */ +struct irq_domain *irq_domain_add_tree(struct device_node *of_node, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain = irq_domain_alloc(of_node, + IRQ_DOMAIN_MAP_TREE, ops, host_data); + if (domain) { + INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL); + irq_domain_add(domain); + } + return domain; } /** - * irq_domain_del() - Unregister an irq_domain - * @domain: ptr to registered irq_domain. + * irq_find_host() - Locates a domain for a given device node + * @node: device-tree node of the interrupt controller */ -void irq_domain_del(struct irq_domain *domain) +struct irq_domain *irq_find_host(struct device_node *node) { - struct irq_data *d; - int hwirq, irq; + struct irq_domain *h, *found = NULL; + int rc; + /* We might want to match the legacy controller last since + * it might potentially be set to match all interrupts in + * the absence of a device node. This isn't a problem so far + * yet though... + */ mutex_lock(&irq_domain_mutex); - list_del(&domain->list); + list_for_each_entry(h, &irq_domain_list, link) { + if (h->ops->match) + rc = h->ops->match(h, node); + else + rc = (h->of_node != NULL) && (h->of_node == node); + + if (rc) { + found = h; + break; + } + } mutex_unlock(&irq_domain_mutex); + return found; +} +EXPORT_SYMBOL_GPL(irq_find_host); + +/** + * irq_set_default_host() - Set a "default" irq domain + * @domain: default domain pointer + * + * For convenience, it's possible to set a "default" domain that will be used + * whenever NULL is passed to irq_create_mapping(). It makes life easier for + * platforms that want to manipulate a few hard coded interrupt numbers that + * aren't properly represented in the device-tree. + */ +void irq_set_default_host(struct irq_domain *domain) +{ + pr_debug("irq: Default domain set to @0x%p\n", domain); + + irq_default_domain = domain; +} + +/** + * irq_set_virq_count() - Set the maximum number of linux irqs + * @count: number of linux irqs, capped with NR_IRQS + * + * This is mainly for use by platforms like iSeries who want to program + * the virtual irq number in the controller to avoid the reverse mapping + */ +void irq_set_virq_count(unsigned int count) +{ + pr_debug("irq: Trying to set virq count to %d\n", count); - /* Clear the irq_domain assignments */ - irq_domain_for_each_irq(domain, hwirq, irq) { - d = irq_get_irq_data(irq); - d->domain = NULL; + BUG_ON(count < NUM_ISA_INTERRUPTS); + if (count < NR_IRQS) + irq_virq_count = count; +} + +static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + + irq_data->hwirq = hwirq; + irq_data->domain = domain; + if (domain->ops->map(domain, virq, hwirq)) { + pr_debug("irq: -> mapping failed, freeing\n"); + irq_data->domain = NULL; + irq_data->hwirq = 0; + return -1; } + + irq_clear_status_flags(virq, IRQ_NOREQUEST); + + return 0; } -#if defined(CONFIG_OF_IRQ) /** - * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec + * irq_create_direct_mapping() - Allocate an irq for direct mapping + * @domain: domain to allocate the irq for or NULL for default domain * - * Used by the device tree interrupt mapping code to translate a device tree - * interrupt specifier to a valid linux irq number. Returns either a valid - * linux IRQ number or 0. + * This routine is used for irq controllers which can choose the hardware + * interrupt numbers they generate. In such a case it's simplest to use + * the linux irq as the hardware interrupt number. + */ +unsigned int irq_create_direct_mapping(struct irq_domain *domain) +{ + unsigned int virq; + + if (domain == NULL) + domain = irq_default_domain; + + BUG_ON(domain == NULL); + WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP); + + virq = irq_alloc_desc_from(1, 0); + if (!virq) { + pr_debug("irq: create_direct virq allocation failed\n"); + return 0; + } + if (virq >= irq_virq_count) { + pr_err("ERROR: no free irqs available below %i maximum\n", + irq_virq_count); + irq_free_desc(virq); + return 0; + } + + pr_debug("irq: create_direct obtained virq %d\n", virq); + + if (irq_setup_virq(domain, virq, virq)) { + irq_free_desc(virq); + return 0; + } + + return virq; +} + +/** + * irq_create_mapping() - Map a hardware interrupt into linux irq space + * @domain: domain owning this hardware interrupt or NULL for default domain + * @hwirq: hardware irq number in that domain space * - * When the caller no longer need the irq number returned by this function it - * should arrange to call irq_dispose_mapping(). + * Only one mapping per hardware interrupt is permitted. Returns a linux + * irq number. + * If the sense/trigger is to be specified, set_irq_type() should be called + * on the number returned from that call. */ +unsigned int irq_create_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq) +{ + unsigned int virq, hint; + + pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); + + /* Look for default domain if nececssary */ + if (domain == NULL) + domain = irq_default_domain; + if (domain == NULL) { + printk(KERN_WARNING "irq_create_mapping called for" + " NULL domain, hwirq=%lx\n", hwirq); + WARN_ON(1); + return 0; + } + pr_debug("irq: -> using domain @%p\n", domain); + + /* Check if mapping already exists */ + virq = irq_find_mapping(domain, hwirq); + if (virq) { + pr_debug("irq: -> existing mapping on virq %d\n", virq); + return virq; + } + + /* Get a virtual interrupt number */ + if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) + return irq_domain_legacy_revmap(domain, hwirq); + + /* Allocate a virtual interrupt number */ + hint = hwirq % irq_virq_count; + if (hint == 0) + hint++; + virq = irq_alloc_desc_from(hint, 0); + if (!virq) + virq = irq_alloc_desc_from(1, 0); + if (!virq) { + pr_debug("irq: -> virq allocation failed\n"); + return 0; + } + + if (irq_setup_virq(domain, virq, hwirq)) { + if (domain->revmap_type != IRQ_DOMAIN_MAP_LEGACY) + irq_free_desc(virq); + return 0; + } + + pr_debug("irq: irq %lu on domain %s mapped to virtual irq %u\n", + hwirq, domain->of_node ? domain->of_node->full_name : "null", virq); + + return virq; +} +EXPORT_SYMBOL_GPL(irq_create_mapping); + unsigned int irq_create_of_mapping(struct device_node *controller, const u32 *intspec, unsigned int intsize) { struct irq_domain *domain; - unsigned long hwirq; - unsigned int irq, type; - int rc = -EINVAL; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + unsigned int virq; - /* Find a domain which can translate the irq spec */ - mutex_lock(&irq_domain_mutex); - list_for_each_entry(domain, &irq_domain_list, list) { - if (!domain->ops->dt_translate) - continue; - rc = domain->ops->dt_translate(domain, controller, - intspec, intsize, &hwirq, &type); - if (rc == 0) - break; + domain = controller ? irq_find_host(controller) : irq_default_domain; + if (!domain) { +#ifdef CONFIG_MIPS + /* + * Workaround to avoid breaking interrupt controller drivers + * that don't yet register an irq_domain. This is temporary + * code. ~~~gcl, Feb 24, 2012 + * + * Scheduled for removal in Linux v3.6. That should be enough + * time. + */ + if (intsize > 0) + return intspec[0]; +#endif + printk(KERN_WARNING "irq: no irq domain found for %s !\n", + controller->full_name); + return 0; } - mutex_unlock(&irq_domain_mutex); - if (rc != 0) - return 0; + /* If domain has no translation, then we assume interrupt line */ + if (domain->ops->xlate == NULL) + hwirq = intspec[0]; + else { + if (domain->ops->xlate(domain, controller, intspec, intsize, + &hwirq, &type)) + return 0; + } + + /* Create mapping */ + virq = irq_create_mapping(domain, hwirq); + if (!virq) + return virq; - irq = irq_domain_to_irq(domain, hwirq); - if (type != IRQ_TYPE_NONE) - irq_set_irq_type(irq, type); - pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n", - controller->full_name, (int)hwirq, irq, type); - return irq; + /* Set type if specified and different than the current one */ + if (type != IRQ_TYPE_NONE && + type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) + irq_set_irq_type(virq, type); + return virq; } EXPORT_SYMBOL_GPL(irq_create_of_mapping); /** - * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping() - * @irq: linux irq number to be discarded + * irq_dispose_mapping() - Unmap an interrupt + * @virq: linux irq number of the interrupt to unmap + */ +void irq_dispose_mapping(unsigned int virq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_domain *domain; + irq_hw_number_t hwirq; + + if (!virq || !irq_data) + return; + + domain = irq_data->domain; + if (WARN_ON(domain == NULL)) + return; + + /* Never unmap legacy interrupts */ + if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) + return; + + irq_set_status_flags(virq, IRQ_NOREQUEST); + + /* remove chip and handler */ + irq_set_chip_and_handler(virq, NULL, NULL); + + /* Make sure it's completed */ + synchronize_irq(virq); + + /* Tell the PIC about it */ + if (domain->ops->unmap) + domain->ops->unmap(domain, virq); + smp_mb(); + + /* Clear reverse map */ + hwirq = irq_data->hwirq; + switch(domain->revmap_type) { + case IRQ_DOMAIN_MAP_LINEAR: + if (hwirq < domain->revmap_data.linear.size) + domain->revmap_data.linear.revmap[hwirq] = 0; + break; + case IRQ_DOMAIN_MAP_TREE: + mutex_lock(&revmap_trees_mutex); + radix_tree_delete(&domain->revmap_data.tree, hwirq); + mutex_unlock(&revmap_trees_mutex); + break; + } + + irq_free_desc(virq); +} +EXPORT_SYMBOL_GPL(irq_dispose_mapping); + +/** + * irq_find_mapping() - Find a linux irq from an hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * This is a slow path, for use by generic code. It's expected that an + * irq controller implementation directly calls the appropriate low level + * mapping function. + */ +unsigned int irq_find_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq) +{ + unsigned int i; + unsigned int hint = hwirq % irq_virq_count; + + /* Look for default domain if nececssary */ + if (domain == NULL) + domain = irq_default_domain; + if (domain == NULL) + return 0; + + /* legacy -> bail early */ + if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) + return irq_domain_legacy_revmap(domain, hwirq); + + /* Slow path does a linear search of the map */ + if (hint == 0) + hint = 1; + i = hint; + do { + struct irq_data *data = irq_get_irq_data(i); + if (data && (data->domain == domain) && (data->hwirq == hwirq)) + return i; + i++; + if (i >= irq_virq_count) + i = 1; + } while(i != hint); + return 0; +} +EXPORT_SYMBOL_GPL(irq_find_mapping); + +/** + * irq_radix_revmap_lookup() - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space * - * Calling this function indicates the caller no longer needs a reference to - * the linux irq number returned by a prior call to irq_create_of_mapping(). + * This is a fast path, for use by irq controller code that uses radix tree + * revmaps */ -void irq_dispose_mapping(unsigned int irq) +unsigned int irq_radix_revmap_lookup(struct irq_domain *domain, + irq_hw_number_t hwirq) { + struct irq_data *irq_data; + + if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) + return irq_find_mapping(domain, hwirq); + + /* + * Freeing an irq can delete nodes along the path to + * do the lookup via call_rcu. + */ + rcu_read_lock(); + irq_data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); + rcu_read_unlock(); + /* - * nothing yet; will be filled when support for dynamic allocation of - * irq_descs is added to irq_domain + * If found in radix tree, then fine. + * Else fallback to linear lookup - this should not happen in practice + * as it means that we failed to insert the node in the radix tree. */ + return irq_data ? irq_data->irq : irq_find_mapping(domain, hwirq); } -EXPORT_SYMBOL_GPL(irq_dispose_mapping); -int irq_domain_simple_dt_translate(struct irq_domain *d, - struct device_node *controller, - const u32 *intspec, unsigned int intsize, - unsigned long *out_hwirq, unsigned int *out_type) +/** + * irq_radix_revmap_insert() - Insert a hw irq to linux irq number mapping. + * @domain: domain owning this hardware interrupt + * @virq: linux irq number + * @hwirq: hardware irq number in that domain space + * + * This is for use by irq controllers that use a radix tree reverse + * mapping for fast lookup. + */ +void irq_radix_revmap_insert(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) { - if (d->of_node != controller) - return -EINVAL; - if (intsize < 1) - return -EINVAL; - if (d->nr_irq && ((intspec[0] < d->hwirq_base) || - (intspec[0] >= d->hwirq_base + d->nr_irq))) - return -EINVAL; + struct irq_data *irq_data = irq_get_irq_data(virq); + + if (WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) + return; + + if (virq) { + mutex_lock(&revmap_trees_mutex); + radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); + mutex_unlock(&revmap_trees_mutex); + } +} + +/** + * irq_linear_revmap() - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * This is a fast path, for use by irq controller code that uses linear + * revmaps. It does fallback to the slow path if the revmap doesn't exist + * yet and will create the revmap entry with appropriate locking + */ +unsigned int irq_linear_revmap(struct irq_domain *domain, + irq_hw_number_t hwirq) +{ + unsigned int *revmap; + + if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR)) + return irq_find_mapping(domain, hwirq); + + /* Check revmap bounds */ + if (unlikely(hwirq >= domain->revmap_data.linear.size)) + return irq_find_mapping(domain, hwirq); + + /* Check if revmap was allocated */ + revmap = domain->revmap_data.linear.revmap; + if (unlikely(revmap == NULL)) + return irq_find_mapping(domain, hwirq); + + /* Fill up revmap with slow path if no mapping found */ + if (unlikely(!revmap[hwirq])) + revmap[hwirq] = irq_find_mapping(domain, hwirq); + + return revmap[hwirq]; +} + +#ifdef CONFIG_VIRQ_DEBUG +static int virq_debug_show(struct seq_file *m, void *private) +{ + unsigned long flags; + struct irq_desc *desc; + const char *p; + static const char none[] = "none"; + void *data; + int i; + + seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", + "chip name", "chip data", "domain name"); + + for (i = 1; i < nr_irqs; i++) { + desc = irq_to_desc(i); + if (!desc) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); + + if (desc->action && desc->action->handler) { + struct irq_chip *chip; + + seq_printf(m, "%5d ", i); + seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); + + chip = irq_desc_get_chip(desc); + if (chip && chip->name) + p = chip->name; + else + p = none; + seq_printf(m, "%-15s ", p); + + data = irq_desc_get_chip_data(desc); + seq_printf(m, "0x%16p ", data); + + if (desc->irq_data.domain->of_node) + p = desc->irq_data.domain->of_node->full_name; + else + p = none; + seq_printf(m, "%s\n", p); + } + + raw_spin_unlock_irqrestore(&desc->lock, flags); + } + + return 0; +} +static int virq_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, virq_debug_show, inode->i_private); +} + +static const struct file_operations virq_debug_fops = { + .open = virq_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init irq_debugfs_init(void) +{ + if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root, + NULL, &virq_debug_fops) == NULL) + return -ENOMEM; + + return 0; +} +__initcall(irq_debugfs_init); +#endif /* CONFIG_VIRQ_DEBUG */ + +int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + return 0; +} + +/** + * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings + * + * Device Tree IRQ specifier translation function which works with one cell + * bindings where the cell value maps directly to the hwirq number. + */ +int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (WARN_ON(intsize < 1)) + return -EINVAL; *out_hwirq = intspec[0]; *out_type = IRQ_TYPE_NONE; - if (intsize > 1) - *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; return 0; } +EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); /** - * irq_domain_create_simple() - Set up a 'simple' translation range + * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings + * + * Device Tree IRQ specifier translation function which works with two cell + * bindings where the cell values map directly to the hwirq number + * and linux irq flags. */ -void irq_domain_add_simple(struct device_node *controller, int irq_base) +int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) { - struct irq_domain *domain; - - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) { - WARN_ON(1); - return; - } + if (WARN_ON(intsize < 2)) + return -EINVAL; + *out_hwirq = intspec[0]; + *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); - domain->irq_base = irq_base; - domain->of_node = of_node_get(controller); - domain->ops = &irq_domain_simple_ops; - irq_domain_add(domain); +/** + * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings + * + * Device Tree IRQ specifier translation function which works with either one + * or two cell bindings where the cell values map directly to the hwirq number + * and linux irq flags. + * + * Note: don't use this function unless your interrupt controller explicitly + * supports both one and two cell bindings. For the majority of controllers + * the _onecell() or _twocell() variants above should be used. + */ +int irq_domain_xlate_onetwocell(struct irq_domain *d, + struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (WARN_ON(intsize < 1)) + return -EINVAL; + *out_hwirq = intspec[0]; + *out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE; + return 0; } -EXPORT_SYMBOL_GPL(irq_domain_add_simple); +EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); +const struct irq_domain_ops irq_domain_simple_ops = { + .map = irq_domain_simple_map, + .xlate = irq_domain_xlate_onetwocell, +}; +EXPORT_SYMBOL_GPL(irq_domain_simple_ops); + +#ifdef CONFIG_OF_IRQ void irq_domain_generate_simple(const struct of_device_id *match, u64 phys_base, unsigned int irq_start) { struct device_node *node; - pr_info("looking for phys_base=%llx, irq_start=%i\n", + pr_debug("looking for phys_base=%llx, irq_start=%i\n", (unsigned long long) phys_base, (int) irq_start); node = of_find_matching_node_by_address(NULL, match, phys_base); if (node) - irq_domain_add_simple(node, irq_start); - else - pr_info("no node found\n"); + irq_domain_add_legacy(node, 32, irq_start, 0, + &irq_domain_simple_ops, NULL); } EXPORT_SYMBOL_GPL(irq_domain_generate_simple); -#endif /* CONFIG_OF_IRQ */ - -struct irq_domain_ops irq_domain_simple_ops = { -#ifdef CONFIG_OF_IRQ - .dt_translate = irq_domain_simple_dt_translate, -#endif /* CONFIG_OF_IRQ */ -}; -EXPORT_SYMBOL_GPL(irq_domain_simple_ops); +#endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 29f5b65bee29..9788c0ec6f43 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1673,8 +1673,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, ri->rp = rp; ri->task = current; - if (rp->entry_handler && rp->entry_handler(ri, regs)) + if (rp->entry_handler && rp->entry_handler(ri, regs)) { + raw_spin_lock_irqsave(&rp->lock, flags); + hlist_add_head(&ri->hlist, &rp->free_instances); + raw_spin_unlock_irqrestore(&rp->lock, flags); return 0; + } arch_prepare_kretprobe(ri, regs); diff --git a/kernel/params.c b/kernel/params.c index 32ee04308285..4bc965d8a1fe 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -97,7 +97,8 @@ static int parse_one(char *param, for (i = 0; i < num_params; i++) { if (parameq(param, params[i].name)) { /* No one handled NULL, so do it here. */ - if (!val && params[i].ops->set != param_set_bool) + if (!val && params[i].ops->set != param_set_bool + && params[i].ops->set != param_set_bint) return -EINVAL; pr_debug("They are equal! Calling %p\n", params[i].ops->set); diff --git a/kernel/power/power.h b/kernel/power/power.h index 0c4defe6d3b8..21724eee5206 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -231,8 +231,28 @@ extern int pm_test_level; #ifdef CONFIG_SUSPEND_FREEZER static inline int suspend_freeze_processes(void) { - int error = freeze_processes(); - return error ? : freeze_kernel_threads(); + int error; + + error = freeze_processes(); + + /* + * freeze_processes() automatically thaws every task if freezing + * fails. So we need not do anything extra upon error. + */ + if (error) + goto Finish; + + error = freeze_kernel_threads(); + + /* + * freeze_kernel_threads() thaws only kernel threads upon freezing + * failure. So we have to thaw the userspace tasks ourselves. + */ + if (error) + thaw_processes(); + + Finish: + return error; } static inline void suspend_thaw_processes(void) diff --git a/kernel/power/process.c b/kernel/power/process.c index eeca00311f39..7e426459e60a 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -143,7 +143,10 @@ int freeze_processes(void) /** * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. * - * On success, returns 0. On failure, -errno and system is fully thawed. + * On success, returns 0. On failure, -errno and only the kernel threads are + * thawed, so as to give a chance to the caller to do additional cleanups + * (if any) before thawing the userspace tasks. So, it is the responsibility + * of the caller to thaw the userspace tasks, when the time is right. */ int freeze_kernel_threads(void) { @@ -159,7 +162,7 @@ int freeze_kernel_threads(void) BUG_ON(in_atomic()); if (error) - thaw_processes(); + thaw_kernel_threads(); return error; } diff --git a/kernel/power/user.c b/kernel/power/user.c index e5a21a857302..3e100075b13c 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -249,13 +249,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } pm_restore_gfp_mask(); error = hibernation_snapshot(data->platform_support); - if (!error) { + if (error) { + thaw_kernel_threads(); + } else { error = put_user(in_suspend, (int __user *)arg); if (!error && !freezer_test_done) data->ready = 1; if (freezer_test_done) { freezer_test_done = false; - thaw_processes(); + thaw_kernel_threads(); } } break; diff --git a/kernel/relay.c b/kernel/relay.c index 4335e1d7ee2d..ab56a1764d4d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -164,10 +164,14 @@ depopulate: */ static struct rchan_buf *relay_create_buf(struct rchan *chan) { - struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); - if (!buf) + struct rchan_buf *buf; + + if (chan->n_subbufs > UINT_MAX / sizeof(size_t *)) return NULL; + buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); + if (!buf) + return NULL; buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL); if (!buf->padding) goto free_buf; @@ -574,6 +578,8 @@ struct rchan *relay_open(const char *base_filename, if (!(subbuf_size && n_subbufs)) return NULL; + if (subbuf_size > UINT_MAX / n_subbufs) + return NULL; chan = kzalloc(sizeof(struct rchan), GFP_KERNEL); if (!chan) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index df00cb09263e..5255c9d2e053 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -74,6 +74,7 @@ #include <asm/tlb.h> #include <asm/irq_regs.h> +#include <asm/mutex.h> #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #endif @@ -723,9 +724,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) p->sched_class->dequeue_task(rq, p, flags); } -/* - * activate_task - move a task to the runqueue. - */ void activate_task(struct rq *rq, struct task_struct *p, int flags) { if (task_contributes_to_load(p)) @@ -734,9 +732,6 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) enqueue_task(rq, p, flags); } -/* - * deactivate_task - remove a task from the runqueue. - */ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) { if (task_contributes_to_load(p)) @@ -4134,7 +4129,7 @@ recheck: on_rq = p->on_rq; running = task_current(rq, p); if (on_rq) - deactivate_task(rq, p, 0); + dequeue_task(rq, p, 0); if (running) p->sched_class->put_prev_task(rq, p); @@ -4147,7 +4142,7 @@ recheck: if (running) p->sched_class->set_curr_task(rq); if (on_rq) - activate_task(rq, p, 0); + enqueue_task(rq, p, 0); check_class_changed(rq, p, prev_class, oldprio); task_rq_unlock(rq, p, &flags); @@ -4998,9 +4993,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) * placed properly. */ if (p->on_rq) { - deactivate_task(rq_src, p, 0); + dequeue_task(rq_src, p, 0); set_task_cpu(p, dest_cpu); - activate_task(rq_dest, p, 0); + enqueue_task(rq_dest, p, 0); check_preempt_curr(rq_dest, p, 0); } done: @@ -7032,10 +7027,10 @@ static void normalize_task(struct rq *rq, struct task_struct *p) on_rq = p->on_rq; if (on_rq) - deactivate_task(rq, p, 0); + dequeue_task(rq, p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { - activate_task(rq, p, 0); + enqueue_task(rq, p, 0); resched_task(rq->curr); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 84adb2d66cbd..7c6414fc669d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4866,6 +4866,15 @@ static void nohz_balancer_kick(int cpu) return; } +static inline void clear_nohz_tick_stopped(int cpu) +{ + if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { + cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); + atomic_dec(&nohz.nr_cpus); + clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); + } +} + static inline void set_cpu_sd_state_busy(void) { struct sched_domain *sd; @@ -4904,6 +4913,12 @@ void select_nohz_load_balancer(int stop_tick) { int cpu = smp_processor_id(); + /* + * If this cpu is going down, then nothing needs to be done. + */ + if (!cpu_active(cpu)) + return; + if (stop_tick) { if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) return; @@ -4914,6 +4929,18 @@ void select_nohz_load_balancer(int stop_tick) } return; } + +static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DYING: + clear_nohz_tick_stopped(smp_processor_id()); + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} #endif static DEFINE_SPINLOCK(balancing); @@ -5070,11 +5097,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) * busy tick after returning from idle, we will update the busy stats. */ set_cpu_sd_state_busy(); - if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { - clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); - cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); - atomic_dec(&nohz.nr_cpus); - } + clear_nohz_tick_stopped(cpu); /* * None are in tickless mode and hence no need for NOHZ idle load @@ -5590,6 +5613,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_NO_HZ zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); + cpu_notifier(sched_ilb_notifier, 0); #endif #endif /* SMP */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3640ebbb466b..f42ae7fb5ec5 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1587,6 +1587,11 @@ static int push_rt_task(struct rq *rq) if (!next_task) return 0; +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + if (unlikely(task_running(rq, next_task))) + return 0; +#endif + retry: if (unlikely(next_task == rq->curr)) { WARN_ON(1); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 1d7bca7f4f52..d117262deba3 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -296,7 +296,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; - printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", + printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); print_modules(); |