summaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2020-11-04 18:14:52 +0100
committerThomas Gleixner <tglx@linutronix.de>2020-11-04 18:14:52 +0100
commit01be83eea08d6d9f9209843e2e084505fba4053f (patch)
tree95b456e1ac40399fd5f55b57ae0936643bea1836 /kernel/events
parententry: Fixup irqentry_enter() comment (diff)
parententry: Fix the incorrect ordering of lockdep and RCU check (diff)
downloadlinux-01be83eea08d6d9f9209843e2e084505fba4053f.tar.xz
linux-01be83eea08d6d9f9209843e2e084505fba4053f.zip
Merge branch 'core/urgent' into core/entry
Pick up the entry fix before further modifications.
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c128
-rw-r--r--kernel/events/uprobes.c4
2 files changed, 80 insertions, 52 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5bfe8e3c6e44..da467e1dd49a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -99,7 +99,7 @@ static void remote_function(void *data)
* retry due to any failures in smp_call_function_single(), such as if the
* task_cpu() goes offline concurrently.
*
- * returns @func return value or -ESRCH when the process isn't running
+ * returns @func return value or -ESRCH or -ENXIO when the process isn't running
*/
static int
task_function_call(struct task_struct *p, remote_function_f func, void *info)
@@ -115,7 +115,8 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
for (;;) {
ret = smp_call_function_single(task_cpu(p), remote_function,
&data, 1);
- ret = !ret ? data.ret : -EAGAIN;
+ if (!ret)
+ ret = data.ret;
if (ret != -EAGAIN)
break;
@@ -382,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
-static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
@@ -2133,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event)
return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
}
+/*
+ * Events that have PERF_EV_CAP_SIBLING require being part of a group and
+ * cannot exist on their own, schedule them out and move them into the ERROR
+ * state. Also see _perf_event_enable(), it will not be able to recover
+ * this ERROR state.
+ */
+static inline void perf_remove_sibling_event(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+
+ event_sched_out(event, cpuctx, ctx);
+ perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+}
+
static void perf_group_detach(struct perf_event *event)
{
+ struct perf_event *leader = event->group_leader;
struct perf_event *sibling, *tmp;
struct perf_event_context *ctx = event->ctx;
@@ -2153,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event)
/*
* If this is a sibling, remove it from its group.
*/
- if (event->group_leader != event) {
+ if (leader != event) {
list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--;
goto out;
@@ -2166,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event)
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
+ if (sibling->event_caps & PERF_EV_CAP_SIBLING)
+ perf_remove_sibling_event(sibling);
+
sibling->group_leader = sibling;
list_del_init(&sibling->sibling_list);
@@ -2183,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event)
}
out:
- perf_event__header_size(event->group_leader);
-
- for_each_sibling_event(tmp, event->group_leader)
+ for_each_sibling_event(tmp, leader)
perf_event__header_size(tmp);
+
+ perf_event__header_size(leader);
}
static bool is_orphaned_event(struct perf_event *event)
@@ -2979,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event)
raw_spin_lock_irq(&ctx->lock);
if (event->state >= PERF_EVENT_STATE_INACTIVE ||
event->state < PERF_EVENT_STATE_ERROR) {
+out:
raw_spin_unlock_irq(&ctx->lock);
return;
}
@@ -2990,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event)
* has gone back into error state, as distinct from the task having
* been scheduled away before the cross-call arrived.
*/
- if (event->state == PERF_EVENT_STATE_ERROR)
+ if (event->state == PERF_EVENT_STATE_ERROR) {
+ /*
+ * Detached SIBLING events cannot leave ERROR state.
+ */
+ if (event->event_caps & PERF_EV_CAP_SIBLING &&
+ event->group_leader == event)
+ goto out;
+
event->state = PERF_EVENT_STATE_OFF;
+ }
raw_spin_unlock_irq(&ctx->lock);
event_function_call(event, __perf_event_enable, NULL);
@@ -3356,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
struct perf_event_context *parent, *next_parent;
struct perf_cpu_context *cpuctx;
int do_switch = 1;
+ struct pmu *pmu;
if (likely(!ctx))
return;
+ pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx);
if (!cpuctx->task_ctx)
return;
@@ -3389,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock(&ctx->lock);
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
- struct pmu *pmu = ctx->pmu;
WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task);
+ perf_pmu_disable(pmu);
+
+ if (cpuctx->sched_cb_usage && pmu->sched_task)
+ pmu->sched_task(ctx, false);
+
/*
* PMU specific parts of task perf context can require
* additional synchronization. As an example of such
@@ -3405,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
else
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
+ perf_pmu_enable(pmu);
+
/*
* RCU_INIT_POINTER here is safe because we've not
* modified the ctx and the above modification of
@@ -3427,21 +3463,22 @@ unlock:
if (do_switch) {
raw_spin_lock(&ctx->lock);
+ perf_pmu_disable(pmu);
+
+ if (cpuctx->sched_cb_usage && pmu->sched_task)
+ pmu->sched_task(ctx, false);
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
+
+ perf_pmu_enable(pmu);
raw_spin_unlock(&ctx->lock);
}
}
-static DEFINE_PER_CPU(struct list_head, sched_cb_list);
-
void perf_sched_cb_dec(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- this_cpu_dec(perf_sched_cb_usages);
-
- if (!--cpuctx->sched_cb_usage)
- list_del(&cpuctx->sched_cb_entry);
+ --cpuctx->sched_cb_usage;
}
@@ -3449,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- if (!cpuctx->sched_cb_usage++)
- list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
-
- this_cpu_inc(perf_sched_cb_usages);
+ cpuctx->sched_cb_usage++;
}
/*
@@ -3463,30 +3497,22 @@ void perf_sched_cb_inc(struct pmu *pmu)
* PEBS requires this to provide PID/TID information. This requires we flush
* all queued PEBS records before we context switch to a new task.
*/
-static void perf_pmu_sched_task(struct task_struct *prev,
- struct task_struct *next,
- bool sched_in)
+static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
{
- struct perf_cpu_context *cpuctx;
struct pmu *pmu;
- if (prev == next)
- return;
-
- list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
- pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
+ pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
- if (WARN_ON_ONCE(!pmu->sched_task))
- continue;
+ if (WARN_ON_ONCE(!pmu->sched_task))
+ return;
- perf_ctx_lock(cpuctx, cpuctx->task_ctx);
- perf_pmu_disable(pmu);
+ perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+ perf_pmu_disable(pmu);
- pmu->sched_task(cpuctx->task_ctx, sched_in);
+ pmu->sched_task(cpuctx->task_ctx, sched_in);
- perf_pmu_enable(pmu);
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
- }
+ perf_pmu_enable(pmu);
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
static void perf_event_switch(struct task_struct *task,
@@ -3511,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;
- if (__this_cpu_read(perf_sched_cb_usages))
- perf_pmu_sched_task(task, next, false);
-
if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false);
@@ -3745,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
+ struct pmu *pmu = ctx->pmu;
cpuctx = __get_cpu_context(ctx);
- if (cpuctx->task_ctx == ctx)
+ if (cpuctx->task_ctx == ctx) {
+ if (cpuctx->sched_cb_usage)
+ __perf_pmu_sched_task(cpuctx, true);
return;
+ }
perf_ctx_lock(cpuctx, ctx);
/*
@@ -3758,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!ctx->nr_events)
goto unlock;
- perf_pmu_disable(ctx->pmu);
+ perf_pmu_disable(pmu);
/*
* We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned,
@@ -3770,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
perf_event_sched_in(cpuctx, ctx, task);
- perf_pmu_enable(ctx->pmu);
+
+ if (cpuctx->sched_cb_usage && pmu->sched_task)
+ pmu->sched_task(cpuctx->task_ctx, true);
+
+ perf_pmu_enable(pmu);
unlock:
perf_ctx_unlock(cpuctx, ctx);
@@ -3813,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true);
-
- if (__this_cpu_read(perf_sched_cb_usages))
- perf_pmu_sched_task(prev, task, true);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -5868,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
-
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
+ bool detach_rest = false;
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5903,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
}
- atomic_dec(&rb->mmap_count);
+ if (atomic_dec_and_test(&rb->mmap_count))
+ detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
@@ -5912,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
- if (atomic_read(&rb->mmap_count))
+ if (!detach_rest)
goto out_put;
/*
@@ -10034,7 +10063,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
case IF_SRC_KERNELADDR:
case IF_SRC_KERNEL:
kernel = 1;
- /* fall through */
+ fallthrough;
case IF_SRC_FILEADDR:
case IF_SRC_FILE:
@@ -12828,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void)
#ifdef CONFIG_CGROUP_PERF
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
- INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index edd0c985a939..bf9edd8d75be 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -205,7 +205,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
try_to_free_swap(old_page);
page_vma_mapped_walk_done(&pvmw);
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page))
munlock_vma_page(old_page);
put_page(old_page);
@@ -1823,7 +1823,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
t->utask->dup_xol_addr = area->vaddr;
init_task_work(&t->utask->dup_xol_work, dup_xol_work);
- task_work_add(t, &t->utask->dup_xol_work, true);
+ task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
}
/*