diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 3 | ||||
-rw-r--r-- | kernel/cpu.c | 13 | ||||
-rw-r--r-- | kernel/events/core.c | 99 | ||||
-rw-r--r-- | kernel/events/internal.h | 10 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 37 | ||||
-rw-r--r-- | kernel/fork.c | 7 | ||||
-rw-r--r-- | kernel/irq/internals.h | 4 | ||||
-rw-r--r-- | kernel/irq/resend.c | 18 | ||||
-rw-r--r-- | kernel/kthread.c | 4 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 11 | ||||
-rw-r--r-- | kernel/module.c | 9 | ||||
-rw-r--r-- | kernel/resource.c | 6 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 | ||||
-rw-r--r-- | kernel/signal.c | 13 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 24 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 164 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 22 | ||||
-rw-r--r-- | kernel/time/tick-sched.h | 10 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 52 | ||||
-rw-r--r-- | kernel/trace/trace.h | 1 | ||||
-rw-r--r-- | kernel/trace/trace_branch.c | 17 |
21 files changed, 367 insertions, 159 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 09c65640cad6..e85bdfd15fed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1021,8 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { - WARN_ON(1); + if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) { send_sig(SIGKILL, current, 0); return -1; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c9c9fab16cc..5644ec5582b9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,7 @@ #include <linux/suspend.h> #include <linux/lockdep.h> #include <linux/tick.h> +#include <linux/irq.h> #include <trace/events/power.h> #include "smpboot.h" @@ -392,13 +393,19 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smpboot_park_threads(cpu); /* - * So now all preempt/rcu users must observe !cpu_active(). + * Prevent irq alloc/free while the dying cpu reorganizes the + * interrupt affinities. */ + irq_lock_sparse(); + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); + irq_unlock_sparse(); goto out_release; } BUG_ON(cpu_online(cpu)); @@ -415,6 +422,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ per_cpu(cpu_dead_idle, cpu) = false; + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); @@ -519,6 +529,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); + if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); diff --git a/kernel/events/core.c b/kernel/events/core.c index e965cfae4207..e6feb5114134 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1868,8 +1868,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - event->tstamp_running += tstamp - event->tstamp_stopped; - perf_set_shadow_time(event, ctx, tstamp); perf_log_itrace_start(event); @@ -1881,6 +1879,8 @@ event_sched_in(struct perf_event *event, goto out; } + event->tstamp_running += tstamp - event->tstamp_stopped; + if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) @@ -3958,28 +3958,21 @@ static void perf_event_for_each(struct perf_event *event, perf_event_for_each_child(sibling, func); } -static int perf_event_period(struct perf_event *event, u64 __user *arg) -{ - struct perf_event_context *ctx = event->ctx; - int ret = 0, active; +struct period_event { + struct perf_event *event; u64 value; +}; - if (!is_sampling_event(event)) - return -EINVAL; - - if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - - if (!value) - return -EINVAL; +static int __perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + struct perf_event_context *ctx = event->ctx; + u64 value = pe->value; + bool active; - raw_spin_lock_irq(&ctx->lock); + raw_spin_lock(&ctx->lock); if (event->attr.freq) { - if (value > sysctl_perf_event_sample_rate) { - ret = -EINVAL; - goto unlock; - } - event->attr.sample_freq = value; } else { event->attr.sample_period = value; @@ -3998,11 +3991,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) event->pmu->start(event, PERF_EF_RELOAD); perf_pmu_enable(ctx->pmu); } + raw_spin_unlock(&ctx->lock); -unlock: + return 0; +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct period_event pe = { .event = event, }; + struct perf_event_context *ctx = event->ctx; + struct task_struct *task; + u64 value; + + if (!is_sampling_event(event)) + return -EINVAL; + + if (copy_from_user(&value, arg, sizeof(value))) + return -EFAULT; + + if (!value) + return -EINVAL; + + if (event->attr.freq && value > sysctl_perf_event_sample_rate) + return -EINVAL; + + task = ctx->task; + pe.value = value; + + if (!task) { + cpu_function_call(event->cpu, __perf_event_period, &pe); + return 0; + } + +retry: + if (!task_function_call(task, __perf_event_period, &pe)) + return 0; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + raw_spin_unlock_irq(&ctx->lock); + task = ctx->task; + goto retry; + } + + __perf_event_period(&pe); raw_spin_unlock_irq(&ctx->lock); - return ret; + return 0; } static const struct file_operations perf_fops; @@ -4358,14 +4393,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; @@ -4748,12 +4775,20 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); if (event->pending_kill) { - kill_fasync(&event->fasync, SIGIO, event->pending_kill); + kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); event->pending_kill = 0; } } @@ -6132,7 +6167,7 @@ static int __perf_event_overflow(struct perf_event *event, else perf_event_output(event, data, regs); - if (event->fasync && event->pending_kill) { + if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; irq_work_queue(&event->pending); } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2deb24c7a40d..2bbad9c1274c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 96472824a752..c8aa3f75bc4d 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ err_put: rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -547,17 +559,30 @@ static void __rb_free_aux(struct ring_buffer *rb) rb->aux_priv = NULL; } - for (pg = 0; pg < rb->aux_nr_pages; pg++) - rb_free_aux_page(rb, pg); + if (rb->aux_nr_pages) { + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); - kfree(rb->aux_pages); - rb->aux_nr_pages = 0; + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; + } } void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..dbd9b8d7b7cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -287,6 +287,11 @@ static void set_max_threads(unsigned int max_threads_suggested) max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); } +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +/* Initialized by the architecture: */ +int arch_task_struct_size __read_mostly; +#endif + void __init fork_init(void) { #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR @@ -295,7 +300,7 @@ void __init fork_init(void) #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", sizeof(struct task_struct), + kmem_cache_create("task_struct", arch_task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4834ee828c41..61008b8433ab 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -76,12 +76,8 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } -extern void irq_lock_sparse(void); -extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); -static inline void irq_lock_sparse(void) { } -static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 9065107f083e..7a5237a1bce5 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -75,13 +75,21 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { #ifdef CONFIG_HARDIRQS_SW_RESEND /* - * If the interrupt has a parent irq and runs - * in the thread context of the parent irq, - * retrigger the parent. + * If the interrupt is running in the thread + * context of the parent irq we need to be + * careful, because we cannot trigger it + * directly. */ - if (desc->parent_irq && - irq_settings_is_nested_thread(desc)) + if (irq_settings_is_nested_thread(desc)) { + /* + * If the parent_irq is valid, we + * retrigger the parent, otherwise we + * do nothing. + */ + if (!desc->parent_irq) + return; irq = desc->parent_irq; + } /* Set it pending and activate the softirq: */ set_bit(irq, irqs_resend); tasklet_schedule(&resend_tasklet); diff --git a/kernel/kthread.c b/kernel/kthread.c index 10e489c448fe..fdea0bee7b5a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -97,6 +97,7 @@ bool kthread_should_park(void) { return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags); } +EXPORT_SYMBOL_GPL(kthread_should_park); /** * kthread_freezable_should_stop - should this freezable kthread return now? @@ -171,6 +172,7 @@ void kthread_parkme(void) { __kthread_parkme(to_kthread(current)); } +EXPORT_SYMBOL_GPL(kthread_parkme); static int kthread(void *_create) { @@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k) if (kthread) __kthread_unpark(k, kthread); } +EXPORT_SYMBOL_GPL(kthread_unpark); /** * kthread_park - park a thread created by kthread_create(). @@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k) } return ret; } +EXPORT_SYMBOL_GPL(kthread_park); /** * kthread_stop - stop a thread created by kthread_create(). diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 04ab18151cc8..df19ae4debd0 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -4,6 +4,7 @@ #include <linux/hash.h> #include <linux/bootmem.h> +#include <linux/debug_locks.h> /* * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead @@ -286,15 +287,23 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { struct __qspinlock *l = (void *)lock; struct pv_node *node; + u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); /* * We must not unlock if SLOW, because in that case we must first * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL)) + if (likely(lockval == _Q_LOCKED_VAL)) return; + if (unlikely(lockval != _Q_SLOW_VAL)) { + if (debug_locks_silent) + return; + WARN(1, "pvqspinlock: lock %p has corrupted value 0x%x!\n", lock, atomic_read(&lock->val)); + return; + } + /* * Since the above failed to release, this must be the SLOW path. * Therefore start by looking up the blocked node and unhashing it. diff --git a/kernel/module.c b/kernel/module.c index 3e0e19763d24..b86b7bf1be38 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -602,13 +602,16 @@ const struct kernel_symbol *find_symbol(const char *name, } EXPORT_SYMBOL_GPL(find_symbol); -/* Search for module by name: must hold module_mutex. */ +/* + * Search for module by name: must hold module_mutex (or preempt disabled + * for read-only access). + */ static struct module *find_module_all(const char *name, size_t len, bool even_unformed) { struct module *mod; - module_assert_mutex(); + module_assert_mutex_or_preempt(); list_for_each_entry(mod, &modules, list) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) @@ -621,6 +624,7 @@ static struct module *find_module_all(const char *name, size_t len, struct module *find_module(const char *name) { + module_assert_mutex(); return find_module_all(name, strlen(name), false); } EXPORT_SYMBOL_GPL(find_module); @@ -3557,6 +3561,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ synchronize_sched(); diff --git a/kernel/resource.c b/kernel/resource.c index 90552aab5f2d..fed052a1bc9f 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -504,13 +504,13 @@ int region_is_ram(resource_size_t start, unsigned long size) { struct resource *p; resource_size_t end = start + size - 1; - int flags = IORESOURCE_MEM | IORESOURCE_BUSY; + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; const char *name = "System RAM"; int ret = -1; read_lock(&resource_lock); for (p = iomem_resource.child; p ; p = p->sibling) { - if (end < p->start) + if (p->end < start) continue; if (p->start <= start && end <= p->end) { @@ -521,7 +521,7 @@ int region_is_ram(resource_size_t start, unsigned long size) ret = 1; break; } - if (p->end < start) + if (end < p->start) break; /* not found */ } read_unlock(&resource_lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 65c8f3ebdc3c..d113c3ba8bc4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3683,7 +3683,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->throttled = 1; cfs_rq->throttled_clock = rq_clock(rq); raw_spin_lock(&cfs_b->lock); - empty = list_empty(&cfs_rq->throttled_list); + empty = list_empty(&cfs_b->throttled_cfs_rq); /* * Add to the _head_ of the list, so that an already-started diff --git a/kernel/signal.c b/kernel/signal.c index 836df8dac6cc..0f6bbbe77b46 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2748,12 +2748,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif #ifdef SEGV_BNDERR - err |= __put_user(from->si_lower, &to->si_lower); - err |= __put_user(from->si_upper, &to->si_upper); + if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) { + err |= __put_user(from->si_lower, &to->si_lower); + err |= __put_user(from->si_upper, &to->si_upper); + } #endif break; case __SI_CHLD: @@ -3017,7 +3020,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; int ret = copy_siginfo_from_user32(&info, uinfo); if (unlikely(ret)) return ret; @@ -3061,7 +3064,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 08ccc3da3ca0..50eb107f1198 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -120,19 +120,25 @@ static int __clockevents_switch_state(struct clock_event_device *dev, /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: - return dev->set_state_shutdown(dev); + if (dev->set_state_shutdown) + return dev->set_state_shutdown(dev); + return 0; case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_state_periodic(dev); + if (dev->set_state_periodic) + return dev->set_state_periodic(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_state_oneshot(dev); + if (dev->set_state_oneshot) + return dev->set_state_oneshot(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT_STOPPED: /* Core internal bug */ @@ -471,18 +477,6 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New state-specific callbacks */ - if (!dev->set_state_shutdown) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_state_periodic) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_state_oneshot) - return -EINVAL; - return 0; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d39f32cdd1b5..f6aae7977824 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret; + int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt - * is delivered by the broadcast device. + * is delivered by the broadcast device, if + * the broadcast device exists and is not + * hrtimer based. */ - ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: - /* Nothing to do */ - ret = 0; break; } } @@ -265,8 +266,22 @@ static bool tick_do_broadcast(struct cpumask *mask) * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; + cpumask_clear_cpu(cpu, mask); - local = true; + /* + * We only run the local handler, if the broadcast + * device is not hrtimer based. Otherwise we run into + * a hrtimer recursion. + * + * local timer_interrupt() + * local_handler() + * expire_hrtimers() + * bc_handler() + * local_handler() + * expire_hrtimers() + */ + local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { @@ -301,6 +316,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + + /* Handle spurious interrupts gracefully */ + if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { + raw_spin_unlock(&tick_broadcast_lock); + return; + } + bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { @@ -359,8 +381,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { - if (tick_broadcast_device.mode == - TICKDEV_MODE_PERIODIC) + /* + * Only shutdown the cpu local device, if: + * + * - the broadcast device exists + * - the broadcast device is not a hrtimer based one + * - the broadcast device is in periodic mode to + * avoid a hickup during switch to oneshot mode + */ + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && + tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; @@ -379,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) break; } - if (cpumask_empty(tick_broadcast_mask)) { - if (!bc_stopped) - clockevents_shutdown(bc); - } else if (bc_stopped) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - tick_broadcast_start_periodic(bc); - else - tick_broadcast_setup_oneshot(bc); + if (bc) { + if (cpumask_empty(tick_broadcast_mask)) { + if (!bc_stopped) + clockevents_shutdown(bc); + } else if (bc_stopped) { + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); + } } raw_spin_unlock(&tick_broadcast_lock); } @@ -662,71 +694,82 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -/** - * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode - * @state: The target state (enter/exit) - * - * The system enters/leaves a state, where affected devices might stop - * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. - */ -int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; - struct tick_device *td; int cpu, ret = 0; ktime_t now; /* - * Periodic mode does not care about the enter/exit of power - * states + * If there is no broadcast device, tell the caller not to go + * into deep idle. */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return 0; + if (!tick_broadcast_device.evtdev) + return -EBUSY; - /* - * We are called with preemtion disabled from the depth of the - * idle code, so we can't be moved away. - */ - td = this_cpu_ptr(&tick_cpu_device); - dev = td->evtdev; - - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; + dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we do not add + * the CPU to the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + goto out; + + /* + * If the broadcast device is in periodic mode, we + * return. + */ + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { + /* If it is a hrtimer based broadcast, return busy */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) + ret = -EBUSY; + goto out; + } + if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); + + /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); + /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be - * woken by the IPI right away. + * woken by the IPI right away; we return + * busy, so the CPU does not try to go deep + * idle. */ - if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && - dev->next_event.tv64 < bc->next_event.tv64) + if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { + ret = -EBUSY; + } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + /* + * In case of hrtimer broadcasts the + * programming might have moved the + * timer to this cpu. If yes, remove + * us from the broadcast mask and + * return busy. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) { + cpumask_clear_cpu(cpu, + tick_broadcast_oneshot_mask); + } + } } - /* - * If the current CPU owns the hrtimer broadcast - * mechanism, it cannot go deep idle and we remove the - * CPU from the broadcast mask. We don't have to go - * through the EXIT path as the local timer is not - * shutdown. - */ - ret = broadcast_needs_cpu(bc, cpu); - if (ret) - cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); @@ -796,7 +839,6 @@ out: raw_spin_unlock(&tick_broadcast_lock); return ret; } -EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); /* * Reset the one shot broadcast for a cpu @@ -938,6 +980,16 @@ bool tick_broadcast_oneshot_available(void) return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } +#else +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return -EBUSY; + + return 0; +} #endif void __init tick_broadcast_init(void) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 76446cb5dfe1..f8bf47571dda 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -343,6 +343,28 @@ out_bc: tick_install_broadcast_device(newdev); } +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. + */ +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + + if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) + return 0; + + return __tick_broadcast_oneshot_control(state); +} +EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); + #ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 42fdf4958bcc..a4a8d4e9baa1 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -71,4 +71,14 @@ extern void tick_cancel_sched_timer(int cpu); static inline void tick_cancel_sched_timer(int cpu) { } #endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int +__tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return -EBUSY; +} +#endif + #endif diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02bece4a99ea..eb11011b5292 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -98,6 +98,13 @@ struct ftrace_pid { struct pid *pid; }; +static bool ftrace_pids_enabled(void) +{ + return !list_empty(&ftrace_pids); +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops); + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op, regs); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; + op->saved_func(ip, parent_ip, op, regs); } /** @@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; } static void control_ops_disable_all(struct ftrace_ops *ops) @@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + /* Always save the function, and reset at unregistering */ + ops->saved_func = ops->func; + + if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled()) + ops->func = ftrace_pid_func; + ftrace_update_trampoline(ops); if (ftrace_enabled) @@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); + ops->func = ops->saved_func; + return 0; } static void ftrace_update_pid_func(void) { + bool enabled = ftrace_pids_enabled(); + struct ftrace_ops *op; + /* Only do something if we are tracing something */ if (ftrace_trace_function == ftrace_stub) return; + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PID) { + op->func = enabled ? ftrace_pid_func : + op->saved_func; + ftrace_update_trampoline(op); + } + } while_for_each_ftrace_op(op); + update_ftrace_function(); } @@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = { .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED, + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; /* @@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; static int __init ftrace_nodyn_init(void) @@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) if (WARN_ON(tr->ops->func != ftrace_stub)) printk("ftrace ops had %pS for function\n", tr->ops->func); - /* Only the top level instance does pid tracing */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } } tr->ops->func = func; tr->ops->private = tr; @@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos) { mutex_lock(&ftrace_lock); - if (list_empty(&ftrace_pids) && (!*pos)) + if (!ftrace_pids_enabled() && (!*pos)) return (void *) 1; return seq_list_start(&ftrace_pids, *pos); @@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f060716b02ae..74bde81601a9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,7 @@ enum { TRACE_CONTROL_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index a87b43f49eb4..e2e12ad3186f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_branch *entry; struct ring_buffer *buffer; unsigned long flags; - int cpu, pc; + int pc; const char *p; + if (current->trace_recursion & TRACE_BRANCH_BIT) + return; + /* * I would love to save just the ftrace_likely_data pointer, but * this code can also be used by modules. Ugly things can happen @@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); - if (atomic_inc_return(&data->disabled) != 1) + raw_local_irq_save(flags); + current->trace_recursion |= TRACE_BRANCH_BIT; + data = this_cpu_ptr(tr->trace_buffer.data); + if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); @@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) __buffer_unlock_commit(buffer, event); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + current->trace_recursion &= ~TRACE_BRANCH_BIT; + raw_local_irq_restore(flags); } static inline |