summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-06-27 21:12:07 +0200
committerJakub Kicinski <kuba@kernel.org>2024-06-27 21:14:11 +0200
commit193b9b200224dab4eaec407eb2955303a521f5b6 (patch)
tree95366182cc2871667a60613a921f7bfae0b9e2c7 /kernel
parentMerge branch 'series-to-deliver-ethernet-for-stm32mp25' (diff)
parentMerge tag 'wireless-2024-06-27' of git://git.kernel.org/pub/scm/linux/kernel/... (diff)
downloadlinux-193b9b200224dab4eaec407eb2955303a521f5b6.tar.xz
linux-193b9b200224dab4eaec407eb2955303a521f5b6.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. No conflicts. Adjacent changes: e3f02f32a050 ("ionic: fix kernel panic due to multi-buffer handling") d9c04209990b ("ionic: Mark error paths in the data path as unlikely") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arena.c16
-rw-r--r--kernel/bpf/ringbuf.c31
-rw-r--r--kernel/bpf/verifier.c61
-rw-r--r--kernel/workqueue.c51
4 files changed, 131 insertions, 28 deletions
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 583ee4fe48ef..e52b3ad231b9 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map)
struct vma_list {
struct vm_area_struct *vma;
struct list_head head;
+ atomic_t mmap_count;
};
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
if (!vml)
return -ENOMEM;
+ atomic_set(&vml->mmap_count, 1);
vma->vm_private_data = vml;
vml->vma = vma;
list_add(&vml->head, &arena->vma_list);
return 0;
}
+static void arena_vm_open(struct vm_area_struct *vma)
+{
+ struct vma_list *vml = vma->vm_private_data;
+
+ atomic_inc(&vml->mmap_count);
+}
+
static void arena_vm_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
- struct vma_list *vml;
+ struct vma_list *vml = vma->vm_private_data;
+ if (!atomic_dec_and_test(&vml->mmap_count))
+ return;
guard(mutex)(&arena->lock);
- vml = vma->vm_private_data;
+ /* update link list under lock */
list_del(&vml->head);
vma->vm_private_data = NULL;
kfree(vml);
@@ -287,6 +298,7 @@ out:
}
static const struct vm_operations_struct arena_vm_ops = {
+ .open = arena_vm_open,
.close = arena_vm_close,
.fault = arena_vm_fault,
};
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0ee653a936ea..e20b90c36131 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
* This prevents a user-space application from modifying the
* position and ruining in-kernel tracking. The permissions of the
* pages depend on who is producing samples: user-space or the
- * kernel.
+ * kernel. Note that the pending counter is placed in the same
+ * page as the producer, so that it shares the same cache line.
*
* Kernel-producer
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
+ unsigned long pending_pos;
char data[] __aligned(PAGE_SIZE);
};
@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb->mask = data_sz - 1;
rb->consumer_pos = 0;
rb->producer_pos = 0;
+ rb->pending_pos = 0;
return rb;
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
{
- unsigned long cons_pos, prod_pos, new_prod_pos, flags;
- u32 len, pg_off;
+ unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
struct bpf_ringbuf_hdr *hdr;
+ u32 len, pg_off, tmp_size, hdr_len;
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
spin_lock_irqsave(&rb->spinlock, flags);
}
+ pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
new_prod_pos = prod_pos + len;
- /* check for out of ringbuf space by ensuring producer position
- * doesn't advance more than (ringbuf_size - 1) ahead
+ while (pend_pos < prod_pos) {
+ hdr = (void *)rb->data + (pend_pos & rb->mask);
+ hdr_len = READ_ONCE(hdr->len);
+ if (hdr_len & BPF_RINGBUF_BUSY_BIT)
+ break;
+ tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
+ tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
+ pend_pos += tmp_size;
+ }
+ rb->pending_pos = pend_pos;
+
+ /* check for out of ringbuf space:
+ * - by ensuring producer position doesn't advance more than
+ * (ringbuf_size - 1) ahead
+ * - by ensuring oldest not yet committed record until newest
+ * record does not span more than (ringbuf_size - 1)
*/
- if (new_prod_pos - cons_pos > rb->mask) {
+ if (new_prod_pos - cons_pos > rb->mask ||
+ new_prod_pos - pend_pos > rb->mask) {
spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c955f98c9a2b..fcecaba8668d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6236,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
}
reg->u32_min_value = 0;
reg->u32_max_value = U32_MAX;
+ reg->var_off = tnum_subreg(tnum_unknown);
}
static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
@@ -6280,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
reg->s32_max_value = s32_max;
reg->u32_min_value = (u32)s32_min;
reg->u32_max_value = (u32)s32_max;
+ reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
return;
}
@@ -12719,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b)
return res < a;
}
+static bool signed_add16_overflows(s16 a, s16 b)
+{
+ /* Do the add in u16, where overflow is well-defined */
+ s16 res = (s16)((u16)a + (u16)b);
+
+ if (b < 0)
+ return res > a;
+ return res < a;
+}
+
static bool signed_sub_overflows(s64 a, s64 b)
{
/* Do the sub in u64, where overflow is well-defined */
@@ -17448,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ if (sl->state.may_goto_depth != cur->may_goto_depth &&
+ states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
update_loop_entry(cur, &sl->state);
goto hit;
}
- goto skip_inf_loop_check;
}
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
@@ -18730,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (signed_add32_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->imm += delta;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (signed_add16_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->off += delta;
+ }
+ }
+ return 0;
+}
+
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
u32 off, u32 cnt)
{
@@ -20004,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
stack_depth_extra = 8;
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
cnt = 4;
@@ -20546,6 +20594,13 @@ next_insn:
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
+ /*
+ * If may_goto is a first insn of a prog there could be a jmp
+ * insn that points to it, hence adjust all such jmps to point
+ * to insn after BPF_ST that inits may_goto count.
+ * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+ */
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
}
/* Since poke tab is now finalized, publish aux to tracker. */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 003474c9a77d..3fbaecfc88c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,6 +125,7 @@ enum wq_internal_consts {
HIGHPRI_NICE_LEVEL = MIN_NICE,
WQ_NAME_LEN = 32,
+ WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
};
/*
@@ -2742,6 +2743,26 @@ static void worker_detach_from_pool(struct worker *worker)
complete(detach_completion);
}
+static int format_worker_id(char *buf, size_t size, struct worker *worker,
+ struct worker_pool *pool)
+{
+ if (worker->rescue_wq)
+ return scnprintf(buf, size, "kworker/R-%s",
+ worker->rescue_wq->name);
+
+ if (pool) {
+ if (pool->cpu >= 0)
+ return scnprintf(buf, size, "kworker/%d:%d%s",
+ pool->cpu, worker->id,
+ pool->attrs->nice < 0 ? "H" : "");
+ else
+ return scnprintf(buf, size, "kworker/u%d:%d",
+ pool->id, worker->id);
+ } else {
+ return scnprintf(buf, size, "kworker/dying");
+ }
+}
+
/**
* create_worker - create a new workqueue worker
* @pool: pool the new worker will belong to
@@ -2758,7 +2779,6 @@ static struct worker *create_worker(struct worker_pool *pool)
{
struct worker *worker;
int id;
- char id_buf[23];
/* ID is needed to determine kthread name */
id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
@@ -2777,17 +2797,14 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->id = id;
if (!(pool->flags & POOL_BH)) {
- if (pool->cpu >= 0)
- snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
- pool->attrs->nice < 0 ? "H" : "");
- else
- snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
+ char id_buf[WORKER_ID_LEN];
+ format_worker_id(id_buf, sizeof(id_buf), worker, pool);
worker->task = kthread_create_on_node(worker_thread, worker,
- pool->node, "kworker/%s", id_buf);
+ pool->node, "%s", id_buf);
if (IS_ERR(worker->task)) {
if (PTR_ERR(worker->task) == -EINTR) {
- pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
+ pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n",
id_buf);
} else {
pr_err_once("workqueue: Failed to create a worker thread: %pe",
@@ -3350,7 +3367,6 @@ woke_up:
raw_spin_unlock_irq(&pool->lock);
set_pf_worker(false);
- set_task_comm(worker->task, "kworker/dying");
ida_free(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker);
WARN_ON_ONCE(!list_empty(&worker->entry));
@@ -5542,6 +5558,7 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
static int init_rescuer(struct workqueue_struct *wq)
{
struct worker *rescuer;
+ char id_buf[WORKER_ID_LEN];
int ret;
if (!(wq->flags & WQ_MEM_RECLAIM))
@@ -5555,7 +5572,9 @@ static int init_rescuer(struct workqueue_struct *wq)
}
rescuer->rescue_wq = wq;
- rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name);
+ format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL);
+
+ rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf);
if (IS_ERR(rescuer->task)) {
ret = PTR_ERR(rescuer->task);
pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
@@ -6384,19 +6403,15 @@ void show_freezable_workqueues(void)
/* used to show worker information through /proc/PID/{comm,stat,status} */
void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
{
- int off;
-
- /* always show the actual comm */
- off = strscpy(buf, task->comm, size);
- if (off < 0)
- return;
-
/* stabilize PF_WQ_WORKER and worker pool association */
mutex_lock(&wq_pool_attach_mutex);
if (task->flags & PF_WQ_WORKER) {
struct worker *worker = kthread_data(task);
struct worker_pool *pool = worker->pool;
+ int off;
+
+ off = format_worker_id(buf, size, worker, pool);
if (pool) {
raw_spin_lock_irq(&pool->lock);
@@ -6415,6 +6430,8 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
}
raw_spin_unlock_irq(&pool->lock);
}
+ } else {
+ strscpy(buf, task->comm, size);
}
mutex_unlock(&wq_pool_attach_mutex);