summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/dispatcher.c28
-rw-r--r--kernel/bpf/percpu_freelist.c23
-rw-r--r--kernel/bpf/verifier.c14
-rw-r--r--kernel/events/core.c25
-rw-r--r--kernel/rseq.c19
-rw-r--r--kernel/sched/core.c52
6 files changed, 99 insertions, 62 deletions
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 04f0a045dcaa..c19719f48ce0 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -4,7 +4,7 @@
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
-#include <linux/init.h>
+#include <linux/static_call.h>
/* The BPF dispatcher is a multiway branch code generator. The
* dispatcher is a mechanism to avoid the performance penalty of an
@@ -91,11 +91,6 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n
return -ENOTSUPP;
}
-int __weak __init bpf_arch_init_dispatcher_early(void *ip)
-{
- return -ENOTSUPP;
-}
-
static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf)
{
s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
@@ -110,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *b
static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
{
- void *old, *new, *tmp;
- u32 noff;
- int err;
-
- if (!prev_num_progs) {
- old = NULL;
- noff = 0;
- } else {
- old = d->image + d->image_off;
+ void *new, *tmp;
+ u32 noff = 0;
+
+ if (prev_num_progs)
noff = d->image_off ^ (PAGE_SIZE / 2);
- }
new = d->num_progs ? d->image + noff : NULL;
tmp = d->num_progs ? d->rw_image + noff : NULL;
@@ -134,11 +123,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
return;
}
- err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new);
- if (err || !new)
- return;
+ __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func);
- d->image_off = noff;
+ if (new)
+ d->image_off = noff;
}
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index b6e7f5c5b9ab..034cf87b54e9 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -100,22 +100,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
u32 nr_elems)
{
struct pcpu_freelist_head *head;
- int i, cpu, pcpu_entries;
+ unsigned int cpu, cpu_idx, i, j, n, m;
- pcpu_entries = nr_elems / num_possible_cpus() + 1;
- i = 0;
+ n = nr_elems / num_possible_cpus();
+ m = nr_elems % num_possible_cpus();
+ cpu_idx = 0;
for_each_possible_cpu(cpu) {
-again:
head = per_cpu_ptr(s->freelist, cpu);
- /* No locking required as this is not visible yet. */
- pcpu_freelist_push_node(head, buf);
- i++;
- buf += elem_size;
- if (i == nr_elems)
- break;
- if (i % pcpu_entries)
- goto again;
+ j = n + (cpu_idx < m ? 1 : 0);
+ for (i = 0; i < j; i++) {
+ /* No locking required as this is not visible yet. */
+ pcpu_freelist_push_node(head, buf);
+ buf += elem_size;
+ }
+ cpu_idx++;
}
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 225666307bba..264b3dc714cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6745,11 +6745,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* Transfer references to the callee */
err = copy_reference_state(callee, caller);
if (err)
- return err;
+ goto err_out;
err = set_callee_state_cb(env, caller, callee, *insn_idx);
if (err)
- return err;
+ goto err_out;
clear_caller_saved_regs(env, caller->regs);
@@ -6766,6 +6766,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
print_verifier_state(env, callee, true);
}
return 0;
+
+err_out:
+ free_func_state(callee);
+ state->frame[state->curframe + 1] = NULL;
+ return err;
}
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
@@ -6979,8 +6984,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return -EINVAL;
}
- state->curframe--;
- caller = state->frame[state->curframe];
+ caller = state->frame[state->curframe - 1];
if (callee->in_callback_fn) {
/* enforce R0 return value range [0, 1]. */
struct tnum range = callee->callback_ret_range;
@@ -7019,7 +7023,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
}
/* clear everything in the callee */
free_func_state(callee);
- state->frame[state->curframe + 1] = NULL;
+ state->frame[state->curframe--] = NULL;
return 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4ec3717003d5..884871427a94 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9306,14 +9306,27 @@ static int __perf_event_overflow(struct perf_event *event,
}
if (event->attr.sigtrap) {
- /*
- * Should not be able to return to user space without processing
- * pending_sigtrap (kernel events can overflow multiple times).
- */
- WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel);
+ unsigned int pending_id = 1;
+
+ if (regs)
+ pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
if (!event->pending_sigtrap) {
- event->pending_sigtrap = 1;
+ event->pending_sigtrap = pending_id;
local_inc(&event->ctx->nr_pending);
+ } else if (event->attr.exclude_kernel) {
+ /*
+ * Should not be able to return to user space without
+ * consuming pending_sigtrap; with exceptions:
+ *
+ * 1. Where !exclude_kernel, events can overflow again
+ * in the kernel without returning to user space.
+ *
+ * 2. Events that can overflow again before the IRQ-
+ * work without user space progress (e.g. hrtimer).
+ * To approximate progress (with false negatives),
+ * check 32-bit hash of the current IP.
+ */
+ WARN_ON_ONCE(event->pending_sigtrap != pending_id);
}
event->pending_addr = data->addr;
irq_work_queue(&event->pending_irq);
diff --git a/kernel/rseq.c b/kernel/rseq.c
index bda8175f8f99..d38ab944105d 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -171,12 +171,27 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
return 0;
}
+static bool rseq_warn_flags(const char *str, u32 flags)
+{
+ u32 test_flags;
+
+ if (!flags)
+ return false;
+ test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
+ if (test_flags)
+ pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
+ test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
+ if (test_flags)
+ pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
+ return true;
+}
+
static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
{
u32 flags, event_mask;
int ret;
- if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags)
+ if (rseq_warn_flags("rseq_cs", cs_flags))
return -EINVAL;
/* Get thread flags. */
@@ -184,7 +199,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
if (ret)
return ret;
- if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags)
+ if (rseq_warn_flags("rseq", flags))
return -EINVAL;
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cb2aa2b54c7a..daff72f00385 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4200,6 +4200,40 @@ out:
return success;
}
+static bool __task_needs_rq_lock(struct task_struct *p)
+{
+ unsigned int state = READ_ONCE(p->__state);
+
+ /*
+ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
+ * the task is blocked. Make sure to check @state since ttwu() can drop
+ * locks at the end, see ttwu_queue_wakelist().
+ */
+ if (state == TASK_RUNNING || state == TASK_WAKING)
+ return true;
+
+ /*
+ * Ensure we load p->on_rq after p->__state, otherwise it would be
+ * possible to, falsely, observe p->on_rq == 0.
+ *
+ * See try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+ if (p->on_rq)
+ return true;
+
+#ifdef CONFIG_SMP
+ /*
+ * Ensure the task has finished __schedule() and will not be referenced
+ * anymore. Again, see try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+ smp_cond_load_acquire(&p->on_cpu, !VAL);
+#endif
+
+ return false;
+}
+
/**
* task_call_func - Invoke a function on task in fixed state
* @p: Process for which the function is to be invoked, can be @current.
@@ -4217,28 +4251,12 @@ out:
int task_call_func(struct task_struct *p, task_call_f func, void *arg)
{
struct rq *rq = NULL;
- unsigned int state;
struct rq_flags rf;
int ret;
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
- state = READ_ONCE(p->__state);
-
- /*
- * Ensure we load p->on_rq after p->__state, otherwise it would be
- * possible to, falsely, observe p->on_rq == 0.
- *
- * See try_to_wake_up() for a longer comment.
- */
- smp_rmb();
-
- /*
- * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
- * the task is blocked. Make sure to check @state since ttwu() can drop
- * locks at the end, see ttwu_queue_wakelist().
- */
- if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq)
+ if (__task_needs_rq_lock(p))
rq = __task_rq_lock(p, &rf);
/*