diff options
Diffstat (limited to 'tools/lib/bpf')
-rw-r--r-- | tools/lib/bpf/.gitignore | 1 | ||||
-rw-r--r-- | tools/lib/bpf/Makefile | 21 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 38 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 9 | ||||
-rw-r--r-- | tools/lib/bpf/btf.c | 126 | ||||
-rw-r--r-- | tools/lib/bpf/btf.h | 3 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 654 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 6 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 7 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.pc.template | 12 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_probes.c | 76 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_util.h | 30 | ||||
-rw-r--r-- | tools/lib/bpf/xsk.c | 193 | ||||
-rw-r--r-- | tools/lib/bpf/xsk.h | 22 |
14 files changed, 979 insertions, 219 deletions
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index fecb78afea3f..d9e9dec04605 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,4 +1,5 @@ libbpf_version.h +libbpf.pc FEATURE-DUMP.libbpf test_libbpf libbpf.so.* diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 8e7c56e9590f..f91639bf5650 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 2 +BPF_EXTRAVERSION = 3 MAKEFLAGS += --no-print-directory @@ -90,6 +90,7 @@ LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) LIB_FILE = libbpf.a libbpf.so* +PC_FILE = libbpf.pc # Set compile option CFLAGS ifdef EXTRA_CFLAGS @@ -134,13 +135,14 @@ VERSION_SCRIPT := libbpf.map LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_TARGET) +CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -187,6 +189,12 @@ $(OUTPUT)libbpf.a: $(BPF_IN) $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ +$(OUTPUT)libbpf.pc: + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(LIBBPF_VERSION)|" \ + < libbpf.pc.template > $@ + check: check_abi check_abi: $(OUTPUT)libbpf.so @@ -222,9 +230,14 @@ install_headers: $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ $(call do_install,xsk.h,$(prefix)/include/bpf,644); -install: install_lib +install_pkgconfig: $(PC_FILE) + $(call QUIET_INSTALL, $(PC_FILE)) \ + $(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644) + +install: install_lib install_pkgconfig ### Cleaning rules @@ -234,7 +247,7 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9cd015574e83..c4a48086dc9a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -46,6 +46,8 @@ # define __NR_bpf 349 # elif defined(__s390__) # define __NR_bpf 351 +# elif defined(__arc__) +# define __NR_bpf 280 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif @@ -79,7 +81,6 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -89,8 +90,9 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.value_size = create_attr->value_size; attr.max_entries = create_attr->max_entries; attr.map_flags = create_attr->map_flags; - memcpy(attr.map_name, create_attr->name, - min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (create_attr->name) + memcpy(attr.map_name, create_attr->name, + min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = create_attr->numa_node; attr.btf_fd = create_attr->btf_fd; attr.btf_key_type_id = create_attr->btf_key_type_id; @@ -155,7 +157,6 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { - __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -166,7 +167,9 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; - memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (name) + memcpy(attr.map_name, name, + min(strlen(name), BPF_OBJ_NAME_LEN - 1)); if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; @@ -216,18 +219,15 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, void *finfo = NULL, *linfo = NULL; union bpf_attr attr; __u32 log_level; - __u32 name_len; int fd; if (!load_attr || !log_buf != !log_buf_sz) return -EINVAL; log_level = load_attr->log_level; - if (log_level > 2 || (log_level && !log_buf)) + if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) return -EINVAL; - name_len = load_attr->name ? strlen(load_attr->name) : 0; - memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; @@ -253,8 +253,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); - memcpy(attr.prog_name, load_attr->name, - min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (load_attr->name) + memcpy(attr.prog_name, load_attr->name, + min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) @@ -429,6 +430,16 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); } +int bpf_map_freeze(int fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + + return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; @@ -545,10 +556,15 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) attr.test.data_out = ptr_to_u64(test_attr->data_out); attr.test.data_size_in = test_attr->data_size_in; attr.test.data_size_out = test_attr->data_size_out; + attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); + attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); + attr.test.ctx_size_in = test_attr->ctx_size_in; + attr.test.ctx_size_out = test_attr->ctx_size_out; attr.test.repeat = test_attr->repeat; ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); test_attr->data_size_out = attr.test.data_size_out; + test_attr->ctx_size_out = attr.test.ctx_size_out; test_attr->retval = attr.test.retval; test_attr->duration = attr.test.duration; return ret; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6ffdd79bea89..9593fec75652 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -26,6 +26,7 @@ #include <linux/bpf.h> #include <stdbool.h> #include <stddef.h> +#include <stdint.h> #ifdef __cplusplus extern "C" { @@ -92,7 +93,7 @@ struct bpf_load_program_attr { #define MAPS_RELAX_COMPAT 0x01 /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE (256 * 1024) +#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz); @@ -117,6 +118,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); +LIBBPF_API int bpf_map_freeze(int fd); LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, @@ -135,6 +137,11 @@ struct bpf_prog_test_run_attr { * out: length of data_out */ __u32 retval; /* out: return code of the BPF program */ __u32 duration; /* out: average per repetition in ns */ + const void *ctx_in; /* optional */ + __u32 ctx_size_in; + void *ctx_out; /* optional */ + __u32 ctx_size_out; /* in: max length of ctx_out + * out: length of cxt_out */ }; LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index cf119c9b6f27..75eaf10b9e1a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -24,6 +24,8 @@ ((k) == BTF_KIND_CONST) || \ ((k) == BTF_KIND_RESTRICT)) +#define IS_VAR(k) ((k) == BTF_KIND_VAR) + static struct btf_type btf_void; struct btf { @@ -212,6 +214,10 @@ static int btf_type_size(struct btf_type *t) return base_size + vlen * sizeof(struct btf_member); case BTF_KIND_FUNC_PROTO: return base_size + vlen * sizeof(struct btf_param); + case BTF_KIND_VAR: + return base_size + sizeof(struct btf_var); + case BTF_KIND_DATASEC: + return base_size + vlen * sizeof(struct btf_var_secinfo); default: pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); return -EINVAL; @@ -283,6 +289,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_DATASEC: size = t->size; goto done; case BTF_KIND_PTR: @@ -292,6 +299,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -326,7 +334,8 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); while (depth < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t) && - IS_MODIFIER(BTF_INFO_KIND(t->info))) { + (IS_MODIFIER(BTF_INFO_KIND(t->info)) || + IS_VAR(BTF_INFO_KIND(t->info)))) { type_id = t->type; t = btf__type_by_id(btf, type_id); depth++; @@ -408,6 +417,92 @@ done: return btf; } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + ret = bpf_object__section_size(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1); + i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = (struct btf_var *)(t_var + 1); + + if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = bpf_object__variable_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", name); + return -ENOENT; + } + + vsi->offset = off; + } + + qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i; + + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = btf->types[i]; + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return err; +} + int btf__load(struct btf *btf) { __u32 log_buf_size = BPF_LOG_BUF_SIZE; @@ -1259,8 +1354,16 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, } /* special BTF "void" type is made canonical immediately */ d->map[0] = 0; - for (i = 1; i <= btf->nr_types; i++) - d->map[i] = BTF_UNPROCESSED_ID; + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = d->btf->types[i]; + __u16 kind = BTF_INFO_KIND(t->info); + + /* VAR and DATASEC are never deduped and are self-canonical */ + if (kind == BTF_KIND_VAR || kind == BTF_KIND_DATASEC) + d->map[i] = i; + else + d->map[i] = BTF_UNPROCESSED_ID; + } d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); if (!d->hypot_map) { @@ -1851,6 +1954,8 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_UNION: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: return 0; case BTF_KIND_INT: @@ -2604,6 +2709,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_VAR: r = btf_dedup_remap_type_id(d, t->type); if (r < 0) return r; @@ -2658,6 +2764,20 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) break; } + case BTF_KIND_DATASEC: { + struct btf_var_secinfo *var = (struct btf_var_secinfo *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, var->type); + if (r < 0) + return r; + var->type = r; + var++; + } + break; + } + default: return -EINVAL; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 28a1e1e59861..c7b399e81fce 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -21,6 +21,8 @@ struct btf; struct btf_ext; struct btf_type; +struct bpf_object; + /* * The .BTF.ext ELF section layout defined as * struct btf_ext_header @@ -57,6 +59,7 @@ struct btf_ext_header { LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 11c25d9ea431..11a65db4b93f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7,6 +7,7 @@ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015 Huawei Inc. * Copyright (C) 2017 Nicira, Inc. + * Copyright (C) 2019 Isovalent, Inc. */ #ifndef _GNU_SOURCE @@ -52,6 +53,11 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +/* vsprintf() in __base_pr() uses nonliteral format string. It may break + * compilation if user enables corresponding warning. Disable it explicitly. + */ +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + #define __printf(a, b) __attribute__((format(printf, a, b))) static int __base_pr(enum libbpf_print_level level, const char *format, @@ -120,6 +126,8 @@ static inline __u64 ptr_to_u64(const void *ptr) struct bpf_capabilities { /* v4.14: kernel support for program & map names. */ __u32 name:1; + /* v5.2: kernel support for global data sections. */ + __u32 global_data:1; }; /* @@ -144,6 +152,7 @@ struct bpf_program { enum { RELO_LD64, RELO_CALL, + RELO_DATA, } type; int insn_idx; union { @@ -152,6 +161,7 @@ struct bpf_program { }; } *reloc_desc; int nr_reloc; + int log_level; struct { int nr; @@ -176,6 +186,19 @@ struct bpf_program { __u32 line_info_cnt; }; +enum libbpf_map_type { + LIBBPF_MAP_UNSPEC, + LIBBPF_MAP_DATA, + LIBBPF_MAP_BSS, + LIBBPF_MAP_RODATA, +}; + +static const char * const libbpf_type_to_btf_name[] = { + [LIBBPF_MAP_DATA] = ".data", + [LIBBPF_MAP_BSS] = ".bss", + [LIBBPF_MAP_RODATA] = ".rodata", +}; + struct bpf_map { int fd; char *name; @@ -187,11 +210,18 @@ struct bpf_map { __u32 btf_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; + enum libbpf_map_type libbpf_type; +}; + +struct bpf_secdata { + void *rodata; + void *data; }; static LIST_HEAD(bpf_objects_list); struct bpf_object { + char name[BPF_OBJ_NAME_LEN]; char license[64]; __u32 kern_version; @@ -199,6 +229,7 @@ struct bpf_object { size_t nr_programs; struct bpf_map *maps; size_t nr_maps; + struct bpf_secdata sections; bool loaded; bool has_pseudo_calls; @@ -214,6 +245,9 @@ struct bpf_object { Elf *elf; GElf_Ehdr ehdr; Elf_Data *symbols; + Elf_Data *data; + Elf_Data *rodata; + Elf_Data *bss; size_t strtabidx; struct { GElf_Shdr shdr; @@ -222,6 +256,9 @@ struct bpf_object { int nr_reloc; int maps_shndx; int text_shndx; + int data_shndx; + int rodata_shndx; + int bss_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -443,6 +480,7 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz) { struct bpf_object *obj; + char *end; obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); if (!obj) { @@ -451,8 +489,14 @@ static struct bpf_object *bpf_object__new(const char *path, } strcpy(obj->path, path); - obj->efile.fd = -1; + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); + end = strchr(obj->name, '.'); + if (end) + *end = 0; + obj->efile.fd = -1; /* * Caller of this function should also calls * bpf_object__elf_finish() after data collection to return @@ -462,6 +506,9 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; + obj->efile.data_shndx = -1; + obj->efile.rodata_shndx = -1; + obj->efile.bss_shndx = -1; obj->loaded = false; @@ -480,6 +527,9 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; + obj->efile.data = NULL; + obj->efile.rodata = NULL; + obj->efile.bss = NULL; zfree(&obj->efile.reloc); obj->efile.nr_reloc = 0; @@ -621,27 +671,182 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } +static int bpf_object_search_section_size(const struct bpf_object *obj, + const char *name, size_t *d_size) +{ + const GElf_Ehdr *ep = &obj->efile.ehdr; + Elf *elf = obj->efile.elf; + Elf_Scn *scn = NULL; + int idx = 0; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + const char *sec_name; + Elf_Data *data; + GElf_Shdr sh; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); + return -EIO; + } + + sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!sec_name) { + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); + return -EIO; + } + + if (strcmp(name, sec_name)) + continue; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); + return -EIO; + } + + *d_size = data->d_size; + return 0; + } + + return -ENOENT; +} + +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size) +{ + int ret = -ENOENT; + size_t d_size; + + *size = 0; + if (!name) { + return -EINVAL; + } else if (!strcmp(name, ".data")) { + if (obj->efile.data) + *size = obj->efile.data->d_size; + } else if (!strcmp(name, ".bss")) { + if (obj->efile.bss) + *size = obj->efile.bss->d_size; + } else if (!strcmp(name, ".rodata")) { + if (obj->efile.rodata) + *size = obj->efile.rodata->d_size; + } else { + ret = bpf_object_search_section_size(obj, name, &d_size); + if (!ret) + *size = d_size; + } + + return *size ? 0 : ret; +} + +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off) +{ + Elf_Data *symbols = obj->efile.symbols; + const char *sname; + size_t si; + + if (!name || !off) + return -EINVAL; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + continue; + + sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!sname) { + pr_warning("failed to get sym name string for var %s\n", + name); + return -EIO; + } + if (strcmp(name, sname) == 0) { + *off = sym.st_value; + return 0; + } + } + + return -ENOENT; +} + +static bool bpf_object__has_maps(const struct bpf_object *obj) +{ + return obj->efile.maps_shndx >= 0 || + obj->efile.data_shndx >= 0 || + obj->efile.rodata_shndx >= 0 || + obj->efile.bss_shndx >= 0; +} + +static int +bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, + enum libbpf_map_type type, Elf_Data *data, + void **data_buff) +{ + struct bpf_map_def *def = &map->def; + char map_name[BPF_OBJ_NAME_LEN]; + + map->libbpf_type = type; + map->offset = ~(typeof(map->offset))0; + snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, + libbpf_type_to_btf_name[type]); + map->name = strdup(map_name); + if (!map->name) { + pr_warning("failed to alloc map name\n"); + return -ENOMEM; + } + + def->type = BPF_MAP_TYPE_ARRAY; + def->key_size = sizeof(int); + def->value_size = data->d_size; + def->max_entries = 1; + def->map_flags = type == LIBBPF_MAP_RODATA ? + BPF_F_RDONLY_PROG : 0; + if (data_buff) { + *data_buff = malloc(data->d_size); + if (!*data_buff) { + zfree(&map->name); + pr_warning("failed to alloc map content buffer\n"); + return -ENOMEM; + } + memcpy(*data_buff, data->d_buf, data->d_size); + } + + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); + return 0; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { + int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0; bool strict = !(flags & MAPS_RELAX_COMPAT); - int i, map_idx, map_def_sz, nr_maps = 0; - Elf_Scn *scn; - Elf_Data *data = NULL; Elf_Data *symbols = obj->efile.symbols; + Elf_Data *data = NULL; + int ret = 0; - if (obj->efile.maps_shndx < 0) - return -EINVAL; if (!symbols) return -EINVAL; + nr_syms = symbols->d_size / sizeof(GElf_Sym); - scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); - if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); - return -EINVAL; + if (obj->efile.maps_shndx >= 0) { + Elf_Scn *scn = elf_getscn(obj->efile.elf, + obj->efile.maps_shndx); + + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warning("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); + return -EINVAL; + } } /* @@ -651,7 +856,16 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) * * TODO: Detect array of map and report error. */ - for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + if (obj->caps.global_data) { + if (obj->efile.data_shndx >= 0) + nr_maps_glob++; + if (obj->efile.rodata_shndx >= 0) + nr_maps_glob++; + if (obj->efile.bss_shndx >= 0) + nr_maps_glob++; + } + + for (i = 0; data && i < nr_syms; i++) { GElf_Sym sym; if (!gelf_getsym(symbols, i, &sym)) @@ -661,22 +875,24 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) nr_maps++; } - /* Alloc obj->maps and fill nr_maps. */ - pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, - nr_maps, data->d_size); - - if (!nr_maps) + if (!nr_maps && !nr_maps_glob) return 0; /* Assume equally sized map definitions */ - map_def_sz = data->d_size / nr_maps; - if (!data->d_size || (data->d_size % nr_maps) != 0) { - pr_warning("unable to determine map definition size " - "section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); - return -EINVAL; + if (data) { + pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, + nr_maps, data->d_size); + + map_def_sz = data->d_size / nr_maps; + if (!data->d_size || (data->d_size % nr_maps) != 0) { + pr_warning("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } } + nr_maps += nr_maps_glob; obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); if (!obj->maps) { pr_warning("alloc maps for object failed\n"); @@ -697,7 +913,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) /* * Fill obj->maps using data in "maps" section. */ - for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + for (i = 0, map_idx = 0; data && i < nr_syms; i++) { GElf_Sym sym; const char *map_name; struct bpf_map_def *def; @@ -710,6 +926,8 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); + + obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; obj->maps[map_idx].offset = sym.st_value; if (sym.st_value + map_def_sz > data->d_size) { pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", @@ -758,8 +976,31 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_idx++; } - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); - return 0; + if (!obj->caps.global_data) + goto finalize; + + /* + * Populate rest of obj->maps with libbpf internal maps. + */ + if (obj->efile.data_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_DATA, + obj->efile.data, + &obj->sections.data); + if (!ret && obj->efile.rodata_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_RODATA, + obj->efile.rodata, + &obj->sections.rodata); + if (!ret && obj->efile.bss_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_BSS, + obj->efile.bss, NULL); +finalize: + if (!ret) + qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), + compare_bpf_map); + return ret; } static bool section_have_execinstr(struct bpf_object *obj, int idx) @@ -785,6 +1026,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Data *btf_ext_data = NULL; + Elf_Data *btf_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -828,32 +1070,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); - if (strcmp(name, "license") == 0) + if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); - else if (strcmp(name, "version") == 0) + } else if (strcmp(name, "version") == 0) { err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); - else if (strcmp(name, "maps") == 0) + } else if (strcmp(name, "maps") == 0) { obj->efile.maps_shndx = idx; - else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size); - if (IS_ERR(obj->btf)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_ELF_SEC, PTR_ERR(obj->btf)); - obj->btf = NULL; - continue; - } - err = btf__load(obj->btf); - if (err) { - pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n", - BTF_ELF_SEC, err); - btf__free(obj->btf); - obj->btf = NULL; - err = 0; - } + } else if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { @@ -865,20 +1093,28 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.symbols = data; obj->efile.strtabidx = sh.sh_link; } - } else if ((sh.sh_type == SHT_PROGBITS) && - (sh.sh_flags & SHF_EXECINSTR) && - (data->d_size > 0)) { - if (strcmp(name, ".text") == 0) - obj->efile.text_shndx = idx; - err = bpf_object__add_program(obj, data->d_buf, - data->d_size, name, idx); - if (err) { - char errmsg[STRERR_BUFSIZE]; - char *cp = libbpf_strerror_r(-err, errmsg, - sizeof(errmsg)); - - pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, cp); + } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh.sh_flags & SHF_EXECINSTR) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; + err = bpf_object__add_program(obj, data->d_buf, + data->d_size, name, idx); + if (err) { + char errmsg[STRERR_BUFSIZE]; + char *cp = libbpf_strerror_r(-err, errmsg, + sizeof(errmsg)); + + pr_warning("failed to alloc program %s (%s): %s", + name, obj->path, cp); + } + } else if (strcmp(name, ".data") == 0) { + obj->efile.data = data; + obj->efile.data_shndx = idx; + } else if (strcmp(name, ".rodata") == 0) { + obj->efile.rodata = data; + obj->efile.rodata_shndx = idx; + } else { + pr_debug("skip section(%d) %s\n", idx, name); } } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; @@ -906,6 +1142,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.reloc[n].shdr = sh; obj->efile.reloc[n].data = data; } + } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + obj->efile.bss = data; + obj->efile.bss_shndx = idx; } else { pr_debug("skip section(%d) %s\n", idx, name); } @@ -917,6 +1156,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_data) { + obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(obj->btf)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_ELF_SEC, PTR_ERR(obj->btf)); + obj->btf = NULL; + } else { + err = btf__finalize_data(obj, obj->btf); + if (!err) + err = btf__load(obj->btf); + if (err) { + pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + err = 0; + } + } + } if (btf_ext_data) { if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", @@ -932,7 +1190,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) } } } - if (obj->efile.maps_shndx >= 0) { + if (bpf_object__has_maps(obj)) { err = bpf_object__init_maps(obj, flags); if (err) goto out; @@ -968,13 +1226,46 @@ bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) return NULL; } +static bool bpf_object__shndx_is_data(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.data_shndx || + shndx == obj->efile.bss_shndx || + shndx == obj->efile.rodata_shndx; +} + +static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.maps_shndx; +} + +static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.text_shndx || + bpf_object__shndx_is_maps(obj, shndx) || + bpf_object__shndx_is_data(obj, shndx); +} + +static enum libbpf_map_type +bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) +{ + if (shndx == obj->efile.data_shndx) + return LIBBPF_MAP_DATA; + else if (shndx == obj->efile.bss_shndx) + return LIBBPF_MAP_BSS; + else if (shndx == obj->efile.rodata_shndx) + return LIBBPF_MAP_RODATA; + else + return LIBBPF_MAP_UNSPEC; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) { Elf_Data *symbols = obj->efile.symbols; - int text_shndx = obj->efile.text_shndx; - int maps_shndx = obj->efile.maps_shndx; struct bpf_map *maps = obj->maps; size_t nr_maps = obj->nr_maps; int i, nrels; @@ -994,7 +1285,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GElf_Sym sym; GElf_Rel rel; unsigned int insn_idx; + unsigned int shdr_idx; struct bpf_insn *insns = prog->insns; + enum libbpf_map_type type; + const char *name; size_t map_idx; if (!gelf_getrel(data, i, &rel)) { @@ -1009,13 +1303,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - pr_debug("relo for %lld value %lld name %d\n", + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : "<?>"; + + pr_debug("relo for %lld value %lld name %d (\'%s\')\n", (long long) (rel.r_info >> 32), - (long long) sym.st_value, sym.st_name); + (long long) sym.st_value, sym.st_name, name); - if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { - pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", - prog->section_name, sym.st_shndx); + shdr_idx = sym.st_shndx; + if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { + pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", + prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; } @@ -1040,24 +1339,45 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__RELOC; } - /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].offset == sym.st_value) { - pr_debug("relocation: find map %zd (%s) for insn %u\n", - map_idx, maps[map_idx].name, insn_idx); - break; + if (bpf_object__shndx_is_maps(obj, shdr_idx) || + bpf_object__shndx_is_data(obj, shdr_idx)) { + type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + if (type != LIBBPF_MAP_UNSPEC) { + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { + pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", + name, insn_idx, insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } + if (!obj->caps.global_data) { + pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", + name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } } - } - if (map_idx >= nr_maps) { - pr_warning("bpf relocation: map_idx %d large than %d\n", - (int)map_idx, (int)nr_maps - 1); - return -LIBBPF_ERRNO__RELOC; - } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].libbpf_type != type) + continue; + if (type != LIBBPF_MAP_UNSPEC || + (type == LIBBPF_MAP_UNSPEC && + maps[map_idx].offset == sym.st_value)) { + pr_debug("relocation: find map %zd (%s) for insn %u\n", + map_idx, maps[map_idx].name, insn_idx); + break; + } + } - prog->reloc_desc[i].type = RELO_LD64; - prog->reloc_desc[i].insn_idx = insn_idx; - prog->reloc_desc[i].map_idx = map_idx; + if (map_idx >= nr_maps) { + pr_warning("bpf relocation: map_idx %d large than %d\n", + (int)map_idx, (int)nr_maps - 1); + return -LIBBPF_ERRNO__RELOC; + } + + prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ? + RELO_DATA : RELO_LD64; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].map_idx = map_idx; + } } return 0; } @@ -1065,18 +1385,27 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { struct bpf_map_def *def = &map->def; - __u32 key_type_id, value_type_id; + __u32 key_type_id = 0, value_type_id = 0; int ret; - ret = btf__get_map_kv_tids(btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); - if (ret) + if (!bpf_map__is_internal(map)) { + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + } else { + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + ret = btf__find_by_name(btf, + libbpf_type_to_btf_name[map->libbpf_type]); + } + if (ret < 0) return ret; map->btf_key_type_id = key_type_id; - map->btf_value_type_id = value_type_id; - + map->btf_value_type_id = bpf_map__is_internal(map) ? + ret : value_type_id; return 0; } @@ -1182,9 +1511,95 @@ bpf_object__probe_name(struct bpf_object *obj) } static int +bpf_object__probe_global_data(struct bpf_object *obj) +{ + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); + if (map < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, errno); + return -errno; + } + + insns[0].imm = map; + + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + if (ret >= 0) { + obj->caps.global_data = 1; + close(ret); + } + + close(map); + return 0; +} + +static int bpf_object__probe_caps(struct bpf_object *obj) { - return bpf_object__probe_name(obj); + int (*probe_fn[])(struct bpf_object *obj) = { + bpf_object__probe_name, + bpf_object__probe_global_data, + }; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { + ret = probe_fn[i](obj); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, zero = 0; + __u8 *data; + + /* Nothing to do here since kernel already zero-initializes .bss map. */ + if (map->libbpf_type == LIBBPF_MAP_BSS) + return 0; + + data = map->libbpf_type == LIBBPF_MAP_DATA ? + obj->sections.data : obj->sections.rodata; + + err = bpf_map_update_elem(map->fd, &zero, data, 0); + /* Freeze .rodata map as read-only from syscall side. */ + if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + err = bpf_map_freeze(map->fd); + if (err) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error freezing map(%s) as read-only: %s\n", + map->name, cp); + err = 0; + } + } + return err; } static int @@ -1244,6 +1659,7 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; +err_out: cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", map->name, cp); @@ -1251,6 +1667,15 @@ bpf_object__create_maps(struct bpf_object *obj) zclose(obj->maps[j].fd); return err; } + + if (bpf_map__is_internal(map)) { + err = bpf_object__populate_internal_map(obj, map); + if (err < 0) { + zclose(*pfd); + goto err_out; + } + } + pr_debug("create map %s: fd=%d\n", map->name, *pfd); } @@ -1405,21 +1830,29 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) return 0; for (i = 0; i < prog->nr_reloc; i++) { - if (prog->reloc_desc[i].type == RELO_LD64) { + if (prog->reloc_desc[i].type == RELO_LD64 || + prog->reloc_desc[i].type == RELO_DATA) { + bool relo_data = prog->reloc_desc[i].type == RELO_DATA; struct bpf_insn *insns = prog->insns; int insn_idx, map_idx; insn_idx = prog->reloc_desc[i].insn_idx; map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { + if (insn_idx + 1 >= (int)prog->insns_cnt) { pr_warning("relocation out of range: '%s'\n", prog->section_name); return -LIBBPF_ERRNO__RELOC; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + + if (!relo_data) { + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + } else { + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE; + insns[insn_idx + 1].imm = insns[insn_idx].imm; + } insns[insn_idx].imm = obj->maps[map_idx].fd; - } else { + } else if (prog->reloc_desc[i].type == RELO_CALL) { err = bpf_program__reloc_text(prog, obj, &prog->reloc_desc[i]); if (err) @@ -1494,6 +1927,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; + int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; int ret; @@ -1514,21 +1948,30 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info = prog->line_info; load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; + load_attr.log_level = prog->log_level; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; - log_buf = malloc(BPF_LOG_BUF_SIZE); +retry_load: + log_buf = malloc(log_buf_size); if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); if (ret >= 0) { + if (load_attr.log_level) + pr_debug("verifier log:\n%s", log_buf); *pfd = ret; ret = 0; goto out; } + if (errno == ENOSPC) { + log_buf_size <<= 1; + free(log_buf); + goto retry_load; + } ret = -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("load bpf program failed: %s\n", cp); @@ -1693,7 +2136,9 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: return false; case BPF_PROG_TYPE_KPROBE: default: @@ -1729,6 +2174,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); @@ -1822,7 +2268,6 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; - CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); @@ -2303,6 +2748,9 @@ void bpf_object__close(struct bpf_object *obj) obj->maps[i].priv = NULL; obj->maps[i].clear_priv = NULL; } + + zfree(&obj->sections.rodata); + zfree(&obj->sections.data); zfree(&obj->maps); obj->nr_maps = 0; @@ -2631,6 +3079,8 @@ static const struct { BPF_CGROUP_UDP4_SENDMSG), BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG), + BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_CGROUP_SYSCTL), }; #undef BPF_PROG_SEC_IMPL @@ -2780,6 +3230,11 @@ bool bpf_map__is_offload_neutral(struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; } +bool bpf_map__is_internal(struct bpf_map *map) +{ + return map->libbpf_type != LIBBPF_MAP_UNSPEC; +} + void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { map->map_ifindex = ifindex; @@ -2938,6 +3393,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, bpf_program__set_expected_attach_type(prog, expected_attach_type); + prog->log_level = attr->log_level; if (!first_prog) first_prog = prog; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c70785cc8ef5..c5ff00515ce7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -75,6 +75,10 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, const char *path); @@ -301,6 +305,7 @@ LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); +LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); @@ -314,6 +319,7 @@ struct bpf_prog_load_attr { enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; int ifindex; + int log_level; }; LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f3ce50500cf2..673001787cba 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -157,3 +157,10 @@ LIBBPF_0.0.2 { bpf_program__bpil_addr_to_offs; bpf_program__bpil_offs_to_addr; } LIBBPF_0.0.1; + +LIBBPF_0.0.3 { + global: + bpf_map__is_internal; + bpf_map_freeze; + btf__finalize_data; +} LIBBPF_0.0.2; diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template new file mode 100644 index 000000000000..ac17fcef2108 --- /dev/null +++ b/tools/lib/bpf/libbpf.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libbpf +Description: BPF library +Version: @VERSION@ +Libs: -L${libdir} -lbpf +Requires.private: libelf +Cflags: -I${includedir} diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 8c3a1c04dcb2..a2c64a9ce1a6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -9,6 +9,7 @@ #include <net/if.h> #include <sys/utsname.h> +#include <linux/btf.h> #include <linux/filter.h> #include <linux/kernel.h> @@ -93,10 +94,12 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SYSCTL: default: break; } @@ -129,11 +132,65 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) return errno != EINVAL && errno != EOPNOTSUPP; } +static int load_btf(void) +{ +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) + + const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; + /* struct bpf_spin_lock { + * int val; + * }; + * struct val { + * int cnt; + * struct bpf_spin_lock l; + * }; + */ + __u32 btf_raw_types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + }; + struct btf_header btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(btf_raw_types), + .str_off = sizeof(btf_raw_types), + .str_len = sizeof(btf_str_sec), + }; + __u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) + + sizeof(btf_str_sec)]; + + memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr)); + memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types)); + memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); + + return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); +} + bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) { int key_size, value_size, max_entries, map_flags; + __u32 btf_key_type_id = 0, btf_value_type_id = 0; struct bpf_create_map_attr attr = {}; - int fd = -1, fd_inner; + int fd = -1, btf_fd = -1, fd_inner; key_size = sizeof(__u32); value_size = sizeof(__u32); @@ -159,6 +216,16 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_STACK: key_size = 0; break; + case BPF_MAP_TYPE_SK_STORAGE: + btf_key_type_id = 1; + btf_value_type_id = 3; + value_size = 8; + max_entries = 0; + map_flags = BPF_F_NO_PREALLOC; + btf_fd = load_btf(); + if (btf_fd < 0) + return false; + break; case BPF_MAP_TYPE_UNSPEC: case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: @@ -204,11 +271,18 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) attr.max_entries = max_entries; attr.map_flags = map_flags; attr.map_ifindex = ifindex; + if (btf_fd >= 0) { + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_key_type_id; + attr.btf_value_type_id = btf_value_type_id; + } fd = bpf_create_map_xattr(&attr); } if (fd >= 0) close(fd); + if (btf_fd >= 0) + close(btf_fd); return fd >= 0; } diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index 81ecda0cb9c9..da94c4cb2e4d 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -23,6 +23,36 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +/* Use these barrier functions instead of smp_[rw]mb() when they are + * used in a libbpf header file. That way they can be built into the + * application that uses libbpf. + */ +#if defined(__i386__) || defined(__x86_64__) +# define libbpf_smp_rmb() asm volatile("" : : : "memory") +# define libbpf_smp_wmb() asm volatile("" : : : "memory") +# define libbpf_smp_mb() \ + asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") +/* Hinders stores to be observed before older loads. */ +# define libbpf_smp_rwmb() asm volatile("" : : : "memory") +#elif defined(__aarch64__) +# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() +#elif defined(__arm__) +/* These are only valid for armv7 and above */ +# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() +#else +/* Architecture missing native barrier functions. */ +# define libbpf_smp_rmb() __sync_synchronize() +# define libbpf_smp_wmb() __sync_synchronize() +# define libbpf_smp_mb() __sync_synchronize() +# define libbpf_smp_rwmb() __sync_synchronize() +#endif + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 8d0078b65486..a3d1a302bc9c 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, return 0; out_mmap: - munmap(umem->fill, - off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); out_socket: close(umem->fd); out_umem_alloc: @@ -259,7 +258,8 @@ out_umem_alloc: static int xsk_load_xdp_prog(struct xsk_socket *xsk) { - char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static const int log_buf_size = 16 * 1024; + char log_buf[log_buf_size]; int err, prog_fd; /* This is the C-program: @@ -308,10 +308,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, - "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf, - BPF_LOG_BUF_SIZE); + "LGPL-2.1 or BSD-2-Clause", 0, log_buf, + log_buf_size); if (prog_fd < 0) { - pr_warning("BPF log buffer:\n%s", bpf_log_buf); + pr_warning("BPF log buffer:\n%s", log_buf); return prog_fd; } @@ -387,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk) { close(xsk->qidconf_map_fd); close(xsk->xsks_map_fd); + xsk->qidconf_map_fd = -1; + xsk->xsks_map_fd = -1; } -static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, - int xsks_value) +static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) { - bool qidconf_map_updated = false, xsks_map_updated = false; + __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); + __u32 map_len = sizeof(struct bpf_map_info); struct bpf_prog_info prog_info = {}; - __u32 prog_len = sizeof(prog_info); struct bpf_map_info map_info; - __u32 map_len = sizeof(map_info); - __u32 *map_ids; - int reset_value = 0; - __u32 num_maps; - unsigned int i; - int err; + int fd, err; err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); if (err) @@ -422,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, goto out_map_ids; for (i = 0; i < prog_info.nr_map_ids; i++) { - int fd; + if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1) + break; fd = bpf_map_get_fd_by_id(map_ids[i]); - if (fd < 0) { - err = -errno; - goto out_maps; - } + if (fd < 0) + continue; err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); - if (err) - goto out_maps; + if (err) { + close(fd); + continue; + } if (!strcmp(map_info.name, "qidconf_map")) { - err = bpf_map_update_elem(fd, &xsk->queue_id, - &qidconf_value, 0); - if (err) - goto out_maps; - qidconf_map_updated = true; xsk->qidconf_map_fd = fd; - } else if (!strcmp(map_info.name, "xsks_map")) { - err = bpf_map_update_elem(fd, &xsk->queue_id, - &xsks_value, 0); - if (err) - goto out_maps; - xsks_map_updated = true; + continue; + } + + if (!strcmp(map_info.name, "xsks_map")) { xsk->xsks_map_fd = fd; + continue; } - if (qidconf_map_updated && xsks_map_updated) - break; + close(fd); } - if (!(qidconf_map_updated && xsks_map_updated)) { + err = 0; + if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) { err = -ENOENT; - goto out_maps; + xsk_delete_bpf_maps(xsk); } - err = 0; - goto out_success; - -out_maps: - if (qidconf_map_updated) - (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, - &reset_value, 0); - if (xsks_map_updated) - (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, - &reset_value, 0); -out_success: - if (qidconf_map_updated) - close(xsk->qidconf_map_fd); - if (xsks_map_updated) - close(xsk->xsks_map_fd); out_map_ids: free(map_ids); return err; } +static void xsk_clear_bpf_maps(struct xsk_socket *xsk) +{ + int qid = false; + + bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); + bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); +} + +static int xsk_set_bpf_maps(struct xsk_socket *xsk) +{ + int qid = true, fd = xsk->fd, err; + + err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); + if (err) + goto out; + + err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0); + if (err) + goto out; + + return 0; +out: + xsk_clear_bpf_maps(xsk); + return err; +} + static int xsk_setup_xdp_prog(struct xsk_socket *xsk) { - bool prog_attached = false; __u32 prog_id = 0; int err; @@ -491,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) return err; if (!prog_id) { - prog_attached = true; err = xsk_create_bpf_maps(xsk); if (err) return err; @@ -501,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) goto out_maps; } else { xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + err = xsk_lookup_bpf_maps(xsk); + if (err) + goto out_load; } - err = xsk_update_bpf_maps(xsk, true, xsk->fd); + err = xsk_set_bpf_maps(xsk); if (err) goto out_load; return 0; out_load: - if (prog_attached) - close(xsk->prog_fd); + close(xsk->prog_fd); out_maps: - if (prog_attached) - xsk_delete_bpf_maps(xsk); + xsk_delete_bpf_maps(xsk); return err; } @@ -523,11 +524,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *usr_config) { + void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; socklen_t optlen; - void *map; int err; if (!umem || !xsk_ptr || !rx || !tx) @@ -593,40 +594,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } if (rx) { - map = xsk_mmap(NULL, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_RX_RING); - if (map == MAP_FAILED) { + rx_map = xsk_mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (rx_map == MAP_FAILED) { err = -errno; goto out_socket; } rx->mask = xsk->config.rx_size - 1; rx->size = xsk->config.rx_size; - rx->producer = map + off.rx.producer; - rx->consumer = map + off.rx.consumer; - rx->ring = map + off.rx.desc; + rx->producer = rx_map + off.rx.producer; + rx->consumer = rx_map + off.rx.consumer; + rx->ring = rx_map + off.rx.desc; } xsk->rx = rx; if (tx) { - map = xsk_mmap(NULL, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_TX_RING); - if (map == MAP_FAILED) { + tx_map = xsk_mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (tx_map == MAP_FAILED) { err = -errno; goto out_mmap_rx; } tx->mask = xsk->config.tx_size - 1; tx->size = xsk->config.tx_size; - tx->producer = map + off.tx.producer; - tx->consumer = map + off.tx.consumer; - tx->ring = map + off.tx.desc; + tx->producer = tx_map + off.tx.producer; + tx->consumer = tx_map + off.tx.consumer; + tx->ring = tx_map + off.tx.desc; tx->cached_cons = xsk->config.tx_size; } xsk->tx = tx; @@ -642,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, goto out_mmap_tx; } + xsk->qidconf_map_fd = -1; + xsk->xsks_map_fd = -1; + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = xsk_setup_xdp_prog(xsk); if (err) @@ -653,13 +657,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, out_mmap_tx: if (tx) - munmap(xsk->tx, - off.tx.desc + + munmap(tx_map, off.tx.desc + xsk->config.tx_size * sizeof(struct xdp_desc)); out_mmap_rx: if (rx) - munmap(xsk->rx, - off.rx.desc + + munmap(rx_map, off.rx.desc + xsk->config.rx_size * sizeof(struct xdp_desc)); out_socket: if (--umem->refcount) @@ -684,9 +686,9 @@ int xsk_umem__delete(struct xsk_umem *umem) optlen = sizeof(off); err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); if (!err) { - munmap(umem->fill->ring, + munmap(umem->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * sizeof(__u64)); - munmap(umem->comp->ring, + munmap(umem->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * sizeof(__u64)); } @@ -698,6 +700,7 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { + size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; socklen_t optlen; int err; @@ -705,19 +708,21 @@ void xsk_socket__delete(struct xsk_socket *xsk) if (!xsk) return; - (void)xsk_update_bpf_maps(xsk, 0, 0); + xsk_clear_bpf_maps(xsk); + xsk_delete_bpf_maps(xsk); optlen = sizeof(off); err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); if (!err) { - if (xsk->rx) - munmap(xsk->rx->ring, - off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc)); - if (xsk->tx) - munmap(xsk->tx->ring, - off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc)); + if (xsk->rx) { + munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + xsk->config.tx_size * desc_sz); + } + } xsk->umem->refcount--; diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index a497f00e2962..82ea71a0f3ec 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -16,6 +16,7 @@ #include <linux/if_xdp.h> #include "libbpf.h" +#include "libbpf_util.h" #ifdef __cplusplus extern "C" { @@ -36,6 +37,10 @@ struct name { \ DEFINE_XSK_RING(xsk_ring_prod); DEFINE_XSK_RING(xsk_ring_cons); +/* For a detailed explanation on the memory barriers associated with the + * ring, please take a look at net/xdp/xsk_queue.h. + */ + struct xsk_umem; struct xsk_socket; @@ -105,7 +110,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, size_t nb, __u32 *idx) { - if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) + if (xsk_prod_nb_free(prod, nb) < nb) return 0; *idx = prod->cached_prod; @@ -116,10 +121,10 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) { - /* Make sure everything has been written to the ring before signalling - * this to the kernel. + /* Make sure everything has been written to the ring before indicating + * this to the kernel by writing the producer pointer. */ - smp_wmb(); + libbpf_smp_wmb(); *prod->producer += nb; } @@ -129,11 +134,11 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, { size_t entries = xsk_cons_nb_avail(cons, nb); - if (likely(entries > 0)) { + if (entries > 0) { /* Make sure we do not speculatively read the data before * we have received the packet buffers from the ring. */ - smp_rmb(); + libbpf_smp_rmb(); *idx = cons->cached_cons; cons->cached_cons += entries; @@ -144,6 +149,11 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) { + /* Make sure data has been read before indicating we are done + * with the entries by updating the consumer pointer. + */ + libbpf_smp_rwmb(); + *cons->consumer += nb; } |