diff options
author | Alexei Starovoitov <ast@kernel.org> | 2020-01-25 16:12:41 +0100 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2020-01-25 16:12:46 +0100 |
commit | e9f02a8027675e3957d463d7f8422d79fa90f2ba (patch) | |
tree | 79f53b2c1eeaef4807494ba3de30fb2192ff3edc | |
parent | libbpf: Fix realloc usage in bpf_core_find_cands (diff) | |
parent | selftest/bpf: Add test for allowed trampolines count (diff) | |
download | linux-e9f02a8027675e3957d463d7f8422d79fa90f2ba.tar.xz linux-e9f02a8027675e3957d463d7f8422d79fa90f2ba.zip |
Merge branch 'trampoline-fixes'
Jiri Olsa says:
====================
hi,
sending 2 fixes to fix kernel support for loading
trampoline programs in bcc/bpftrace and allow to
unwind through trampoline/dispatcher.
Original rfc post [1].
Speedup output of perf bench while running klockstat.py
on kprobes vs trampolines:
Without:
$ perf bench sched messaging -l 50000
...
Total time: 18.571 [sec]
With current kprobe tracing:
$ perf bench sched messaging -l 50000
...
Total time: 183.395 [sec]
With kfunc tracing:
$ perf bench sched messaging -l 50000
...
Total time: 39.773 [sec]
v4 changes:
- rebased on latest bpf-next/master
- removed image tree mutex and use trampoline_mutex instead
- checking directly for string pointer in patch 1 [Alexei]
- skipped helpers patches, as they are no longer needed [Alexei]
v3 changes:
- added ack from John Fastabend for patch 1
- move out is_bpf_image_address from is_bpf_text_address call [David]
v2 changes:
- make the unwind work for dispatcher as well
- added test for allowed trampolines count
- used raw tp pt_regs nest-arrays for trampoline helpers
thanks,
jirka
[1] https://lore.kernel.org/netdev/20191229143740.29143-1-jolsa@kernel.org/
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/linux/bpf.h | 12 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 16 | ||||
-rw-r--r-- | kernel/bpf/dispatcher.c | 4 | ||||
-rw-r--r-- | kernel/bpf/trampoline.c | 80 | ||||
-rw-r--r-- | kernel/extable.c | 7 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/trampoline_count.c | 112 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_trampoline_count.c | 21 |
7 files changed, 239 insertions, 13 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9687861fd7e..8e9ad3943cd9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .func = &name##func, \ @@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_PTR(name) (&name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); +struct bpf_image { + struct latch_tree_node tnode; + unsigned char data[]; +}; +#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) +bool is_bpf_image_address(unsigned long address); +void *bpf_image_alloc(void); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to) {} +static inline bool is_bpf_image_address(unsigned long address) +{ + return false; +} #endif struct bpf_func_info_aux { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 32963b6d5a9c..b7c1660fb594 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) } } +static bool is_string_ptr(struct btf *btf, const struct btf_type *t) +{ + /* t comes in already as a pointer */ + t = btf_type_by_id(btf, t->type); + + /* allow const */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) + t = btf_type_by_id(btf, t->type); + + /* char, signed char, unsigned char */ + return btf_type_is_int(t) && t->size == 1; +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, */ return true; + if (is_string_ptr(btf, t)) + return true; + /* this is a pointer to another type */ info->reg_type = PTR_TO_BTF_ID; diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 204ee61a3904..b3e5b214fed8 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) noff = 0; } else { old = d->image + d->image_off; - noff = d->image_off ^ (PAGE_SIZE / 2); + noff = d->image_off ^ (BPF_IMAGE_SIZE / 2); } new = d->num_progs ? d->image + noff : NULL; @@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_jit_alloc_exec_page(); + d->image = bpf_image_alloc(); if (!d->image) goto out; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index eb64c245052b..6b264a92064b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/ftrace.h> +#include <linux/rbtree_latch.h> /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = { #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; +static struct latch_tree_root image_tree __cacheline_aligned; -/* serializes access to trampoline_table */ +/* serializes access to trampoline_table and image_tree */ static DEFINE_MUTEX(trampoline_mutex); -void *bpf_jit_alloc_exec_page(void) +static void *bpf_jit_alloc_exec_page(void) { void *image; @@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void) return image; } +static __always_inline bool image_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + struct bpf_image *ia = container_of(a, struct bpf_image, tnode); + struct bpf_image *ib = container_of(b, struct bpf_image, tnode); + + return ia < ib; +} + +static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n) +{ + void *image = container_of(n, struct bpf_image, tnode); + + if (addr < image) + return -1; + if (addr >= image + PAGE_SIZE) + return 1; + + return 0; +} + +static const struct latch_tree_ops image_tree_ops = { + .less = image_tree_less, + .comp = image_tree_comp, +}; + +static void *__bpf_image_alloc(bool lock) +{ + struct bpf_image *image; + + image = bpf_jit_alloc_exec_page(); + if (!image) + return NULL; + + if (lock) + mutex_lock(&trampoline_mutex); + latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops); + if (lock) + mutex_unlock(&trampoline_mutex); + return image->data; +} + +void *bpf_image_alloc(void) +{ + return __bpf_image_alloc(true); +} + +bool is_bpf_image_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL; + rcu_read_unlock(); + + return ret; +} + struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { struct bpf_trampoline *tr; @@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) goto out; /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ - image = bpf_jit_alloc_exec_page(); + image = __bpf_image_alloc(false); if (!image) { kfree(tr); tr = NULL; @@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) } /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into PAGE_SIZE / 2 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ #define BPF_MAX_TRAMP_PROGS 40 static int bpf_trampoline_update(struct bpf_trampoline *tr) { - void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; - void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; + void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2; + void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2; struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; @@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) */ synchronize_rcu_tasks(); - err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, + err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2, &tr->func.model, flags, fentry, fentry_cnt, fexit, fexit_cnt, @@ -284,6 +344,8 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { + struct bpf_image *image; + if (!tr) return; mutex_lock(&trampoline_mutex); @@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) goto out; if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; + image = container_of(tr->image, struct bpf_image, data); + latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops); /* wait for tasks to get out of trampoline before freeing it */ synchronize_rcu_tasks(); - bpf_jit_free_exec(tr->image); + bpf_jit_free_exec(image); hlist_del(&tr->hlist); kfree(tr); out: diff --git a/kernel/extable.c b/kernel/extable.c index f6920a11e28a..a0024f27d3a1 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr) * triggers a stack trace, or a WARN() that happens during * coming back from idle, or cpu on or offlining. * - * is_module_text_address() as well as the kprobe slots - * and is_bpf_text_address() require RCU to be watching. + * is_module_text_address() as well as the kprobe slots, + * is_bpf_text_address() and is_bpf_image_address require + * RCU to be watching. */ no_rcu = !rcu_is_watching(); @@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr) goto out; if (is_bpf_text_address(addr)) goto out; + if (is_bpf_image_address(addr)) + goto out; ret = 0; out: if (no_rcu) diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c new file mode 100644 index 000000000000..1235f3d1cc50 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include <sched.h> +#include <sys/prctl.h> +#include <test_progs.h> + +#define MAX_TRAMP_PROGS 40 + +struct inst { + struct bpf_object *obj; + struct bpf_link *link_fentry; + struct bpf_link *link_fexit; +}; + +static int test_task_rename(void) +{ + int fd, duration = 0, err; + char buf[] = "test_overhead"; + + fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(fd < 0, "open /proc", "err %d", errno)) + return -1; + err = write(fd, buf, sizeof(buf)); + if (err < 0) { + CHECK(err < 0, "task rename", "err %d", errno); + close(fd); + return -1; + } + close(fd); + return 0; +} + +static struct bpf_link *load(struct bpf_object *obj, const char *name) +{ + struct bpf_program *prog; + int duration = 0; + + prog = bpf_object__find_program_by_title(obj, name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) + return ERR_PTR(-EINVAL); + return bpf_program__attach_trace(prog); +} + +void test_trampoline_count(void) +{ + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; + const char *object = "test_trampoline_count.o"; + struct inst inst[MAX_TRAMP_PROGS] = { 0 }; + int err, i = 0, duration = 0; + struct bpf_object *obj; + struct bpf_link *link; + char comm[16] = {}; + + /* attach 'allowed' 40 trampoline programs */ + for (i = 0; i < MAX_TRAMP_PROGS; i++) { + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + inst[i].obj = obj; + + if (rand() % 2) { + link = load(obj, fentry_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fentry = link; + } else { + link = load(obj, fexit_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fexit = link; + } + } + + /* and try 1 extra.. */ + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup_extra; + + /* ..that needs to fail */ + link = load(obj, fentry_name); + if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) { + bpf_link__destroy(link); + goto cleanup_extra; + } + + /* with E2BIG error */ + CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link)); + + /* and finaly execute the probe */ + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) + goto cleanup_extra; + CHECK_FAIL(test_task_rename()); + CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L)); + +cleanup_extra: + bpf_object__close(obj); +cleanup: + while (--i) { + bpf_link__destroy(inst[i].link_fentry); + bpf_link__destroy(inst[i].link_fexit); + bpf_object__close(inst[i].obj); + } +} diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c new file mode 100644 index 000000000000..e51e6e3a81c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdbool.h> +#include <stddef.h> +#include <linux/bpf.h> +#include "bpf_trace_helpers.h" + +struct task_struct; + +SEC("fentry/__set_task_comm") +int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +SEC("fexit/__set_task_comm") +int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; |