diff options
-rw-r--r-- | Documentation/sysctl/net.txt | 12 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 20 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 3 | ||||
-rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 26 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 23 | ||||
-rw-r--r-- | include/linux/bpf.h | 4 | ||||
-rw-r--r-- | include/linux/filter.h | 112 | ||||
-rw-r--r-- | kernel/bpf/arraymap.c | 10 | ||||
-rw-r--r-- | kernel/bpf/core.c | 235 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 8 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 2 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 2 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
-rw-r--r-- | kernel/extable.c | 9 | ||||
-rw-r--r-- | kernel/kallsyms.c | 61 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 6 | ||||
-rw-r--r-- | net/Kconfig | 3 | ||||
-rw-r--r-- | net/core/filter.c | 18 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 7 |
19 files changed, 453 insertions, 110 deletions
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index b80fbd4e5575..2ebabc93014a 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -54,6 +54,18 @@ Values : 1 - enable JIT hardening for unprivileged users only 2 - enable JIT hardening for all users +bpf_jit_kallsyms +---------------- + +When Berkeley Packet Filter Just in Time compiler is enabled, then compiled +images are unknown addresses to the kernel, meaning they neither show up in +traces nor in /proc/kallsyms. This enables export of these addresses, which +can be used for debugging/tracing. If bpf_jit_harden is enabled, this feature +is disabled. +Values : + 0 - disable JIT kallsyms export (default value) + 1 - enable JIT kallsyms export for privileged users only + dev_weight -------------- diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b2fc97a2c56c..05d12104d270 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -813,11 +813,6 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -void bpf_jit_compile(struct bpf_prog *prog) -{ - /* Nothing to do here. We support Internal BPF. */ -} - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; @@ -915,18 +910,3 @@ out: tmp : orig_prog); return prog; } - -void bpf_jit_free(struct bpf_prog *prog) -{ - unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!prog->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(prog); -} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 73a5cf18fd84..c34166ef76fc 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -961,8 +961,6 @@ common_load: return 0; } -void bpf_jit_compile(struct bpf_prog *fp) { } - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; @@ -1066,6 +1064,7 @@ out: return fp; } +/* Overriding bpf_jit_free() as we don't set images read-only. */ void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 167b31b186c1..f1d0e62ec1dd 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1263,14 +1263,6 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) } /* - * Classic BPF function stub. BPF programs will be converted into - * eBPF and then bpf_int_jit_compile() will be called. - */ -void bpf_jit_compile(struct bpf_prog *fp) -{ -} - -/* * Compile eBPF program "fp" */ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) @@ -1347,21 +1339,3 @@ out: tmp : orig_fp); return fp; } - -/* - * Free eBPF program - */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!fp->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(fp); -} diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bb660e53cbd6..18a62e208826 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1067,13 +1067,13 @@ common_load: ilen = prog - temp; if (ilen > BPF_MAX_INSN_SIZE) { - pr_err("bpf_jit_compile fatal insn size error\n"); + pr_err("bpf_jit: fatal insn size error\n"); return -EFAULT; } if (image) { if (unlikely(proglen + ilen > oldproglen)) { - pr_err("bpf_jit_compile fatal error\n"); + pr_err("bpf_jit: fatal error\n"); return -EFAULT; } memcpy(image + proglen, temp, ilen); @@ -1085,10 +1085,6 @@ common_load: return proglen; } -void bpf_jit_compile(struct bpf_prog *prog) -{ -} - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; @@ -1184,18 +1180,3 @@ out: tmp : orig_prog); return prog; } - -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!fp->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(fp); -} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 57d60dc5b600..909fc033173a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -8,10 +8,12 @@ #define _LINUX_BPF_H 1 #include <uapi/linux/bpf.h> + #include <linux/workqueue.h> #include <linux/file.h> #include <linux/percpu.h> #include <linux/err.h> +#include <linux/rbtree_latch.h> struct perf_event; struct bpf_map; @@ -177,6 +179,8 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + struct latch_tree_node ksym_tnode; + struct list_head ksym_lnode; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; diff --git a/include/linux/filter.h b/include/linux/filter.h index e4eb2546339a..0c1cc9143cb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -54,6 +54,12 @@ struct bpf_prog_aux; #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +/* As per nm, we expose JITed images as text (code) section for + * kallsyms. That way, tools like perf can find it to match + * addresses. + */ +#define BPF_SYM_ELF_TYPE 't' + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -555,6 +561,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { set_memory_rw((unsigned long)fp, fp->pages); } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ + set_memory_rw((unsigned long)hdr, hdr->pages); +} #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { @@ -563,8 +574,21 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ +} #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +static inline struct bpf_binary_header * +bpf_jit_binary_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr = real_start & PAGE_MASK; + + return (void *)addr; +} + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { @@ -607,6 +631,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, @@ -616,6 +641,7 @@ void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; +extern int bpf_jit_kallsyms; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); @@ -625,7 +651,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); -void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); @@ -651,6 +676,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return fp->jited && bpf_jit_is_ebpf(); +} + static inline bool bpf_jit_blinding_enabled(void) { /* These are the prerequisites, should someone ever have the @@ -668,15 +698,91 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } -#else -static inline void bpf_jit_compile(struct bpf_prog *fp) + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + /* There are a couple of corner cases where kallsyms should + * not be enabled f.e. on hardening. + */ + if (bpf_jit_harden) + return false; + if (!bpf_jit_kallsyms) + return false; + if (bpf_jit_kallsyms == 1) + return true; + + return false; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym); +bool is_bpf_text_address(unsigned long addr); +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym); + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) { + const char *ret = __bpf_address_lookup(addr, size, off, sym); + + if (ret && modname) + *modname = NULL; + return ret; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp); +void bpf_prog_kallsyms_del(struct bpf_prog *fp); + +#else /* CONFIG_BPF_JIT */ + +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return false; } static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); } + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + return false; +} + +static inline const char * +__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + return NULL; +} + +static inline bool is_bpf_text_address(unsigned long addr) +{ + return false; +} + +static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *sym) +{ + return -ERANGE; +} + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} + +static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ +} #endif /* CONFIG_BPF_JIT */ #define BPF_ANC BIT(15) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3d55d95dcf49..6b6f41f0b211 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -269,7 +269,7 @@ static const struct bpf_map_ops array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list array_type __read_mostly = { +static struct bpf_map_type_list array_type __ro_after_init = { .ops = &array_ops, .type = BPF_MAP_TYPE_ARRAY, }; @@ -283,7 +283,7 @@ static const struct bpf_map_ops percpu_array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list percpu_array_type __read_mostly = { +static struct bpf_map_type_list percpu_array_type __ro_after_init = { .ops = &percpu_array_ops, .type = BPF_MAP_TYPE_PERCPU_ARRAY, }; @@ -409,7 +409,7 @@ static const struct bpf_map_ops prog_array_ops = { .map_fd_put_ptr = prog_fd_array_put_ptr, }; -static struct bpf_map_type_list prog_array_type __read_mostly = { +static struct bpf_map_type_list prog_array_type __ro_after_init = { .ops = &prog_array_ops, .type = BPF_MAP_TYPE_PROG_ARRAY, }; @@ -522,7 +522,7 @@ static const struct bpf_map_ops perf_event_array_ops = { .map_release = perf_event_fd_array_release, }; -static struct bpf_map_type_list perf_event_array_type __read_mostly = { +static struct bpf_map_type_list perf_event_array_type __ro_after_init = { .ops = &perf_event_array_ops, .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; @@ -564,7 +564,7 @@ static const struct bpf_map_ops cgroup_array_ops = { .map_fd_put_ptr = cgroup_fd_array_put_ptr, }; -static struct bpf_map_type_list cgroup_array_type __read_mostly = { +static struct bpf_map_type_list cgroup_array_type __ro_after_init = { .ops = &cgroup_array_ops, .type = BPF_MAP_TYPE_CGROUP_ARRAY, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fddd76b1b627..f45827e205d3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -28,6 +28,9 @@ #include <linux/moduleloader.h> #include <linux/bpf.h> #include <linux/frame.h> +#include <linux/rbtree_latch.h> +#include <linux/kallsyms.h> +#include <linux/rcupdate.h> #include <asm/unaligned.h> @@ -95,6 +98,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) fp->aux = aux; fp->aux->prog = fp; + INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); + return fp; } EXPORT_SYMBOL_GPL(bpf_prog_alloc); @@ -290,6 +295,206 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +static __always_inline void +bpf_get_prog_addr_region(const struct bpf_prog *prog, + unsigned long *symbol_start, + unsigned long *symbol_end) +{ + const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); + unsigned long addr = (unsigned long)hdr; + + WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); + + *symbol_start = addr; + *symbol_end = addr + hdr->pages * PAGE_SIZE; +} + +static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +{ + BUILD_BUG_ON(sizeof("bpf_prog_") + + sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN); + + sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); + sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); + *sym = 0; +} + +static __always_inline unsigned long +bpf_get_prog_addr_start(struct latch_tree_node *n) +{ + unsigned long symbol_start, symbol_end; + const struct bpf_prog_aux *aux; + + aux = container_of(n, struct bpf_prog_aux, ksym_tnode); + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + + return symbol_start; +} + +static __always_inline bool bpf_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); +} + +static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long symbol_start, symbol_end; + const struct bpf_prog_aux *aux; + + aux = container_of(n, struct bpf_prog_aux, ksym_tnode); + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + + if (val < symbol_start) + return -1; + if (val >= symbol_end) + return 1; + + return 0; +} + +static const struct latch_tree_ops bpf_tree_ops = { + .less = bpf_tree_less, + .comp = bpf_tree_comp, +}; + +static DEFINE_SPINLOCK(bpf_lock); +static LIST_HEAD(bpf_kallsyms); +static struct latch_tree_root bpf_tree __cacheline_aligned; + +int bpf_jit_kallsyms __read_mostly; + +static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) +{ + WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); + list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); + latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); +} + +static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) +{ + if (list_empty(&aux->ksym_lnode)) + return; + + latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); + list_del_rcu(&aux->ksym_lnode); +} + +static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) +{ + return fp->jited && !bpf_prog_was_classic(fp); +} + +static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym_lnode) || + fp->aux->ksym_lnode.prev == LIST_POISON2; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ + unsigned long flags; + + if (!bpf_prog_kallsyms_candidate(fp) || + !capable(CAP_SYS_ADMIN)) + return; + + spin_lock_irqsave(&bpf_lock, flags); + bpf_prog_ksym_node_add(fp->aux); + spin_unlock_irqrestore(&bpf_lock, flags); +} + +void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ + unsigned long flags; + + if (!bpf_prog_kallsyms_candidate(fp)) + return; + + spin_lock_irqsave(&bpf_lock, flags); + bpf_prog_ksym_node_del(fp->aux); + spin_unlock_irqrestore(&bpf_lock, flags); +} + +static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) +{ + struct latch_tree_node *n; + + if (!bpf_jit_kallsyms_enabled()) + return NULL; + + n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); + return n ? + container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : + NULL; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + unsigned long symbol_start, symbol_end; + struct bpf_prog *prog; + char *ret = NULL; + + rcu_read_lock(); + prog = bpf_prog_kallsyms_find(addr); + if (prog) { + bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); + bpf_get_prog_name(prog, sym); + + ret = sym; + if (size) + *size = symbol_end - symbol_start; + if (off) + *off = addr - symbol_start; + } + rcu_read_unlock(); + + return ret; +} + +bool is_bpf_text_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = bpf_prog_kallsyms_find(addr) != NULL; + rcu_read_unlock(); + + return ret; +} + +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym) +{ + unsigned long symbol_start, symbol_end; + struct bpf_prog_aux *aux; + unsigned int it = 0; + int ret = -ERANGE; + + if (!bpf_jit_kallsyms_enabled()) + return ret; + + rcu_read_lock(); + list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { + if (it++ != symnum) + continue; + + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + bpf_get_prog_name(aux->prog, sym); + + *value = symbol_start; + *type = BPF_SYM_ELF_TYPE; + + ret = 0; + break; + } + rcu_read_unlock(); + + return ret; +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -326,6 +531,24 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) module_memfree(hdr); } +/* This symbol is only overridden by archs that have different + * requirements than the usual eBPF JITs, f.e. when they only + * implement cBPF JIT, do not set images read-only, etc. + */ +void __weak bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) { + struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); + + bpf_jit_binary_unlock_ro(hdr); + bpf_jit_binary_free(hdr); + + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); + } + + bpf_prog_unlock_free(fp); +} + int bpf_jit_harden __read_mostly; static int bpf_jit_blind_insn(const struct bpf_insn *from, @@ -1154,12 +1377,22 @@ const struct bpf_func_proto bpf_tail_call_proto = { .arg3_type = ARG_ANYTHING, }; -/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */ +/* Stub for JITs that only support cBPF. eBPF programs are interpreted. + * It is encouraged to implement bpf_int_jit_compile() instead, so that + * eBPF and implicitly also cBPF can get JITed! + */ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) { return prog; } +/* Stub for JITs that support eBPF. All cBPF code gets transformed into + * eBPF by the kernel and is later compiled by bpf_int_jit_compile(). + */ +void __weak bpf_jit_compile(struct bpf_prog *prog) +{ +} + bool __weak bpf_helper_changes_pkt_data(void *func) { return false; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index a753bbe7df0a..3ea87fb19a94 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1023,7 +1023,7 @@ static const struct bpf_map_ops htab_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_type __read_mostly = { +static struct bpf_map_type_list htab_type __ro_after_init = { .ops = &htab_ops, .type = BPF_MAP_TYPE_HASH, }; @@ -1037,7 +1037,7 @@ static const struct bpf_map_ops htab_lru_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_type __read_mostly = { +static struct bpf_map_type_list htab_lru_type __ro_after_init = { .ops = &htab_lru_ops, .type = BPF_MAP_TYPE_LRU_HASH, }; @@ -1124,7 +1124,7 @@ static const struct bpf_map_ops htab_percpu_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_percpu_type __read_mostly = { +static struct bpf_map_type_list htab_percpu_type __ro_after_init = { .ops = &htab_percpu_ops, .type = BPF_MAP_TYPE_PERCPU_HASH, }; @@ -1138,7 +1138,7 @@ static const struct bpf_map_ops htab_lru_percpu_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_percpu_type __read_mostly = { +static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = { .ops = &htab_lru_percpu_ops, .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, }; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e0f6a0bd279b..8bfe0afaee10 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -508,7 +508,7 @@ static const struct bpf_map_ops trie_ops = { .map_delete_elem = trie_delete_elem, }; -static struct bpf_map_type_list trie_type __read_mostly = { +static struct bpf_map_type_list trie_type __ro_after_init = { .ops = &trie_ops, .type = BPF_MAP_TYPE_LPM_TRIE, }; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index be8519148c25..22aa45cd0324 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -273,7 +273,7 @@ static const struct bpf_map_ops stack_map_ops = { .map_delete_elem = stack_map_delete_elem, }; -static struct bpf_map_type_list stack_map_type __read_mostly = { +static struct bpf_map_type_list stack_map_type __ro_after_init = { .ops = &stack_map_ops, .type = BPF_MAP_TYPE_STACK_TRACE, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f74ca17af64a..461eb1e66a0f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -707,6 +707,7 @@ void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) { trace_bpf_prog_put_rcu(prog); + bpf_prog_kallsyms_del(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } } @@ -903,6 +904,7 @@ static int bpf_prog_load(union bpf_attr *attr) /* failed to allocate fd */ goto free_used_maps; + bpf_prog_kallsyms_add(prog); trace_bpf_prog_load(prog, err); return err; diff --git a/kernel/extable.c b/kernel/extable.c index e3beec4a2339..bd82117ad424 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/init.h> +#include <linux/filter.h> #include <asm/sections.h> #include <linux/uaccess.h> @@ -104,6 +105,8 @@ int __kernel_text_address(unsigned long addr) return 1; if (is_ftrace_trampoline(addr)) return 1; + if (is_bpf_text_address(addr)) + return 1; /* * There might be init symbols in saved stacktraces. * Give those symbols a chance to be printed in @@ -123,7 +126,11 @@ int kernel_text_address(unsigned long addr) return 1; if (is_module_text_address(addr)) return 1; - return is_ftrace_trampoline(addr); + if (is_ftrace_trampoline(addr)) + return 1; + if (is_bpf_text_address(addr)) + return 1; + return 0; } /* diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fafd1a3ef0da..6a3b249a2ae1 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -23,6 +23,7 @@ #include <linux/mm.h> #include <linux/ctype.h> #include <linux/slab.h> +#include <linux/filter.h> #include <linux/compiler.h> #include <asm/sections.h> @@ -300,10 +301,11 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { char namebuf[KSYM_NAME_LEN]; + if (is_ksym_addr(addr)) return !!get_symbol_pos(addr, symbolsize, offset); - - return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf); + return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) || + !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } /* @@ -318,6 +320,8 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { + const char *ret; + namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; @@ -333,9 +337,13 @@ const char *kallsyms_lookup(unsigned long addr, return namebuf; } - /* See if it's in a module. */ - return module_address_lookup(addr, symbolsize, offset, modname, - namebuf); + /* See if it's in a module or a BPF JITed image. */ + ret = module_address_lookup(addr, symbolsize, offset, + modname, namebuf); + if (!ret) + ret = bpf_address_lookup(addr, symbolsize, + offset, modname, namebuf); + return ret; } int lookup_symbol_name(unsigned long addr, char *symname) @@ -471,6 +479,7 @@ EXPORT_SYMBOL(__print_symbol); /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ struct kallsym_iter { loff_t pos; + loff_t pos_mod_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; @@ -481,13 +490,27 @@ struct kallsym_iter { static int get_ksymbol_mod(struct kallsym_iter *iter) { - if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, - &iter->type, iter->name, iter->module_name, - &iter->exported) < 0) + int ret = module_get_kallsym(iter->pos - kallsyms_num_syms, + &iter->value, &iter->type, + iter->name, iter->module_name, + &iter->exported); + if (ret < 0) { + iter->pos_mod_end = iter->pos; return 0; + } + return 1; } +static int get_ksymbol_bpf(struct kallsym_iter *iter) +{ + iter->module_name[0] = '\0'; + iter->exported = 0; + return bpf_get_kallsym(iter->pos - iter->pos_mod_end, + &iter->value, &iter->type, + iter->name) < 0 ? 0 : 1; +} + /* Returns space to next name. */ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) { @@ -508,16 +531,30 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; + if (new_pos == 0) + iter->pos_mod_end = 0; +} + +static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) +{ + iter->pos = pos; + + if (iter->pos_mod_end > 0 && + iter->pos_mod_end < iter->pos) + return get_ksymbol_bpf(iter); + + if (!get_ksymbol_mod(iter)) + return get_ksymbol_bpf(iter); + + return 1; } /* Returns false if pos at or past end of file. */ static int update_iter(struct kallsym_iter *iter, loff_t pos) { /* Module symbols can be accessed randomly. */ - if (pos >= kallsyms_num_syms) { - iter->pos = pos; - return get_ksymbol_mod(iter); - } + if (pos >= kallsyms_num_syms) + return update_iter_mod(iter, pos); /* If we're not on the desired position, reset to new position. */ if (pos != iter->pos) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 424daa4586d1..cee9802cf3e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -506,7 +506,7 @@ static const struct bpf_verifier_ops kprobe_prog_ops = { .is_valid_access = kprobe_prog_is_valid_access, }; -static struct bpf_prog_type_list kprobe_tl = { +static struct bpf_prog_type_list kprobe_tl __ro_after_init = { .ops = &kprobe_prog_ops, .type = BPF_PROG_TYPE_KPROBE, }; @@ -589,7 +589,7 @@ static const struct bpf_verifier_ops tracepoint_prog_ops = { .is_valid_access = tp_prog_is_valid_access, }; -static struct bpf_prog_type_list tracepoint_tl = { +static struct bpf_prog_type_list tracepoint_tl __ro_after_init = { .ops = &tracepoint_prog_ops, .type = BPF_PROG_TYPE_TRACEPOINT, }; @@ -648,7 +648,7 @@ static const struct bpf_verifier_ops perf_event_prog_ops = { .convert_ctx_access = pe_prog_convert_ctx_access, }; -static struct bpf_prog_type_list perf_event_tl = { +static struct bpf_prog_type_list perf_event_tl __ro_after_init = { .ops = &perf_event_prog_ops, .type = BPF_PROG_TYPE_PERF_EVENT, }; diff --git a/net/Kconfig b/net/Kconfig index f19c0c3b9589..102f781a0131 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -297,7 +297,8 @@ config BPF_JIT Note, admin should enable this feature changing: /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) config NET_FLOW_LIMIT bool diff --git a/net/core/filter.c b/net/core/filter.c index 0b753cbb2536..e466e0040137 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3296,47 +3296,47 @@ static const struct bpf_verifier_ops cg_sock_ops = { .convert_ctx_access = sock_filter_convert_ctx_access, }; -static struct bpf_prog_type_list sk_filter_type __read_mostly = { +static struct bpf_prog_type_list sk_filter_type __ro_after_init = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; -static struct bpf_prog_type_list sched_cls_type __read_mostly = { +static struct bpf_prog_type_list sched_cls_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; -static struct bpf_prog_type_list sched_act_type __read_mostly = { +static struct bpf_prog_type_list sched_act_type __ro_after_init = { .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; -static struct bpf_prog_type_list xdp_type __read_mostly = { +static struct bpf_prog_type_list xdp_type __ro_after_init = { .ops = &xdp_ops, .type = BPF_PROG_TYPE_XDP, }; -static struct bpf_prog_type_list cg_skb_type __read_mostly = { +static struct bpf_prog_type_list cg_skb_type __ro_after_init = { .ops = &cg_skb_ops, .type = BPF_PROG_TYPE_CGROUP_SKB, }; -static struct bpf_prog_type_list lwt_in_type __read_mostly = { +static struct bpf_prog_type_list lwt_in_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_IN, }; -static struct bpf_prog_type_list lwt_out_type __read_mostly = { +static struct bpf_prog_type_list lwt_out_type __ro_after_init = { .ops = &lwt_inout_ops, .type = BPF_PROG_TYPE_LWT_OUT, }; -static struct bpf_prog_type_list lwt_xmit_type __read_mostly = { +static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = { .ops = &lwt_xmit_ops, .type = BPF_PROG_TYPE_LWT_XMIT, }; -static struct bpf_prog_type_list cg_sock_type __read_mostly = { +static struct bpf_prog_type_list cg_sock_type __ro_after_init = { .ops = &cg_sock_ops, .type = BPF_PROG_TYPE_CGROUP_SOCK }; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eaa72eb0399c..4ead336e14ea 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -334,6 +334,13 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dointvec, }, + { + .procname = "bpf_jit_kallsyms", + .data = &bpf_jit_kallsyms, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, # endif #endif { |