diff options
Diffstat (limited to 'tools/lib')
-rw-r--r-- | tools/lib/bpf/Build | 2 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_gen_internal.h | 41 | ||||
-rw-r--r-- | tools/lib/bpf/gen_loader.c | 729 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 427 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 67 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 7 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_internal.h | 2 | ||||
-rw-r--r-- | tools/lib/bpf/linker.c | 18 | ||||
-rw-r--r-- | tools/lib/bpf/netlink.c | 568 | ||||
-rw-r--r-- | tools/lib/bpf/nlattr.h | 48 | ||||
-rw-r--r-- | tools/lib/bpf/skel_internal.h | 123 |
11 files changed, 1861 insertions, 171 deletions
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 9b057cc7650a..430f6874fa41 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,3 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h new file mode 100644 index 000000000000..615400391e57 --- /dev/null +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2021 Facebook */ +#ifndef __BPF_GEN_INTERNAL_H +#define __BPF_GEN_INTERNAL_H + +struct ksym_relo_desc { + const char *name; + int kind; + int insn_idx; +}; + +struct bpf_gen { + struct gen_loader_opts *opts; + void *data_start; + void *data_cur; + void *insn_start; + void *insn_cur; + ssize_t cleanup_label; + __u32 nr_progs; + __u32 nr_maps; + int log_level; + int error; + struct ksym_relo_desc *relos; + int relo_cnt; + char attach_target[128]; + int attach_kind; +}; + +void bpf_gen__init(struct bpf_gen *gen, int log_level); +int bpf_gen__finish(struct bpf_gen *gen); +void bpf_gen__free(struct bpf_gen *gen); +void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +struct bpf_prog_load_params; +void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); +void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); +void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); +void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); + +#endif diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c new file mode 100644 index 000000000000..8df718a6b142 --- /dev/null +++ b/tools/lib/bpf/gen_loader.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <linux/filter.h> +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" +#include "hashmap.h" +#include "bpf_gen_internal.h" +#include "skel_internal.h" + +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 + +/* The following structure describes the stack layout of the loader program. + * In addition R6 contains the pointer to context. + * R7 contains the result of the last sys_bpf command (typically error or FD). + * R9 contains the result of the last sys_close command. + * + * Naming convention: + * ctx - bpf program context + * stack - bpf program stack + * blob - bpf_attr-s, strings, insns, map data. + * All the bytes that loader prog will use for read/write. + */ +struct loader_stack { + __u32 btf_fd; + __u32 map_fd[MAX_USED_MAPS]; + __u32 prog_fd[MAX_USED_PROGS]; + __u32 inner_map_fd; +}; + +#define stack_off(field) \ + (__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field)) + +#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) + +static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) +{ + size_t off = gen->insn_cur - gen->insn_start; + void *insn_start; + + if (gen->error) + return gen->error; + if (size > INT32_MAX || off + size > INT32_MAX) { + gen->error = -ERANGE; + return -ERANGE; + } + insn_start = realloc(gen->insn_start, off + size); + if (!insn_start) { + gen->error = -ENOMEM; + free(gen->insn_start); + gen->insn_start = NULL; + return -ENOMEM; + } + gen->insn_start = insn_start; + gen->insn_cur = insn_start + off; + return 0; +} + +static int realloc_data_buf(struct bpf_gen *gen, __u32 size) +{ + size_t off = gen->data_cur - gen->data_start; + void *data_start; + + if (gen->error) + return gen->error; + if (size > INT32_MAX || off + size > INT32_MAX) { + gen->error = -ERANGE; + return -ERANGE; + } + data_start = realloc(gen->data_start, off + size); + if (!data_start) { + gen->error = -ENOMEM; + free(gen->data_start); + gen->data_start = NULL; + return -ENOMEM; + } + gen->data_start = data_start; + gen->data_cur = data_start + off; + return 0; +} + +static void emit(struct bpf_gen *gen, struct bpf_insn insn) +{ + if (realloc_insn_buf(gen, sizeof(insn))) + return; + memcpy(gen->insn_cur, &insn, sizeof(insn)); + gen->insn_cur += sizeof(insn); +} + +static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2) +{ + emit(gen, insn1); + emit(gen, insn2); +} + +void bpf_gen__init(struct bpf_gen *gen, int log_level) +{ + size_t stack_sz = sizeof(struct loader_stack); + int i; + + gen->log_level = log_level; + /* save ctx pointer into R6 */ + emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); + + /* bzero stack */ + emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10)); + emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + + /* jump over cleanup code */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, + /* size of cleanup code below */ + (stack_sz / 4) * 3 + 2)); + + /* remember the label where all error branches will jump to */ + gen->cleanup_label = gen->insn_cur - gen->insn_start; + /* emit cleanup code: close all temp FDs */ + for (i = 0; i < stack_sz; i += 4) { + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); + emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); + } + /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ + emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + emit(gen, BPF_EXIT_INSN()); +} + +static int add_data(struct bpf_gen *gen, const void *data, __u32 size) +{ + void *prev; + + if (realloc_data_buf(gen, size)) + return 0; + prev = gen->data_cur; + memcpy(gen->data_cur, data, size); + gen->data_cur += size; + return prev - gen->data_start; +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ + switch (sz) { + case 8: return BPF_DW; + case 4: return BPF_W; + case 2: return BPF_H; + case 1: return BPF_B; + default: return -1; + } +} + +/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */ +static void emit_rel_store(struct bpf_gen *gen, int off, int data) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, data)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); +} + +/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ +static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) +{ + emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); + emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, + bool check_non_zero) +{ + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off)); + if (check_non_zero) + /* If value in ctx is zero don't update the blob. + * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c + */ + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off) +{ + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off) +{ + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); +} + +static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size) +{ + emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, attr)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf)); + /* remember the result in R7 */ + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); +} + +static bool is_simm16(__s64 value) +{ + return value == (__s64)(__s16)value; +} + +static void emit_check_err(struct bpf_gen *gen) +{ + __s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1; + + /* R7 contains result of last sys_bpf command. + * if (R7 < 0) goto cleanup; + */ + if (is_simm16(off)) { + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off)); + } else { + gen->error = -ERANGE; + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1)); + } +} + +/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */ +static void emit_debug(struct bpf_gen *gen, int reg1, int reg2, + const char *fmt, va_list args) +{ + char buf[1024]; + int addr, len, ret; + + if (!gen->log_level) + return; + ret = vsnprintf(buf, sizeof(buf), fmt, args); + if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0) + /* The special case to accommodate common debug_ret(): + * to avoid specifying BPF_REG_7 and adding " r=%%d" to + * prints explicitly. + */ + strcat(buf, " r=%d"); + len = strlen(buf) + 1; + addr = add_data(gen, buf, len); + + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, addr)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + if (reg1 >= 0) + emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1)); + if (reg2 >= 0) + emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk)); +} + +static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + emit_debug(gen, reg1, reg2, fmt, args); + va_end(args); +} + +static void debug_ret(struct bpf_gen *gen, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + emit_debug(gen, BPF_REG_7, -1, fmt, args); + va_end(args); +} + +static void __emit_sys_close(struct bpf_gen *gen) +{ + emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, + /* 2 is the number of the following insns + * * 6 is additional insns in debug_regs + */ + 2 + (gen->log_level ? 6 : 0))); + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); + debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d"); +} + +static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off) +{ + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off)); + __emit_sys_close(gen); +} + +static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0)); + __emit_sys_close(gen); +} + +int bpf_gen__finish(struct bpf_gen *gen) +{ + int i; + + emit_sys_close_stack(gen, stack_off(btf_fd)); + for (i = 0; i < gen->nr_progs; i++) + move_stack2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * gen->nr_maps + + sizeof(struct bpf_prog_desc) * i + + offsetof(struct bpf_prog_desc, prog_fd), 4, + stack_off(prog_fd[i])); + for (i = 0; i < gen->nr_maps; i++) + move_stack2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + stack_off(map_fd[i])); + emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); + emit(gen, BPF_EXIT_INSN()); + pr_debug("gen: finish %d\n", gen->error); + if (!gen->error) { + struct gen_loader_opts *opts = gen->opts; + + opts->insns = gen->insn_start; + opts->insns_sz = gen->insn_cur - gen->insn_start; + opts->data = gen->data_start; + opts->data_sz = gen->data_cur - gen->data_start; + } + return gen->error; +} + +void bpf_gen__free(struct bpf_gen *gen) +{ + if (!gen) + return; + free(gen->data_start); + free(gen->insn_start); + free(gen); +} + +void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, + __u32 btf_raw_size) +{ + int attr_size = offsetofend(union bpf_attr, btf_log_level); + int btf_data, btf_load_attr; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: load_btf: size %d\n", btf_raw_size); + btf_data = add_data(gen, btf_raw_data, btf_raw_size); + + attr.btf_size = btf_raw_size; + btf_load_attr = add_data(gen, &attr, attr_size); + + /* populate union bpf_attr with user provided log details */ + move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, + offsetof(struct bpf_loader_ctx, log_level), false); + move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4, + offsetof(struct bpf_loader_ctx, log_size), false); + move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8, + offsetof(struct bpf_loader_ctx, log_buf), false); + /* populate union bpf_attr with a pointer to the BTF data */ + emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data); + /* emit BTF_LOAD command */ + emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size); + debug_ret(gen, "btf_load size %d", btf_raw_size); + emit_check_err(gen); + /* remember btf_fd in the stack, if successful */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd))); +} + +void bpf_gen__map_create(struct bpf_gen *gen, + struct bpf_create_map_attr *map_attr, int map_idx) +{ + int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); + bool close_inner_map_fd = false; + int map_create_attr; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + attr.map_type = map_attr->map_type; + attr.key_size = map_attr->key_size; + attr.value_size = map_attr->value_size; + attr.map_flags = map_attr->map_flags; + memcpy(attr.map_name, map_attr->name, + min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = map_attr->numa_node; + attr.map_ifindex = map_attr->map_ifindex; + attr.max_entries = map_attr->max_entries; + switch (attr.map_type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_RINGBUF: + break; + default: + attr.btf_key_type_id = map_attr->btf_key_type_id; + attr.btf_value_type_id = map_attr->btf_value_type_id; + } + + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", + attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); + + map_create_attr = add_data(gen, &attr, attr_size); + if (attr.btf_value_type_id) + /* populate union bpf_attr with btf_fd saved in the stack earlier */ + move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, + stack_off(btf_fd)); + switch (attr.map_type) { + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, + stack_off(inner_map_fd)); + close_inner_map_fd = true; + break; + default: + break; + } + /* conditionally update max_entries */ + if (map_idx >= 0) + move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * map_idx + + offsetof(struct bpf_map_desc, max_entries), + true /* check that max_entries != 0 */); + /* emit MAP_CREATE command */ + emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); + debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", + attr.map_name, map_idx, map_attr->map_type, attr.value_size, + attr.btf_value_type_id); + emit_check_err(gen); + /* remember map_fd in the stack, if successful */ + if (map_idx < 0) { + /* This bpf_gen__map_create() function is called with map_idx >= 0 + * for all maps that libbpf loading logic tracks. + * It's called with -1 to create an inner map. + */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(inner_map_fd))); + } else if (map_idx != gen->nr_maps) { + gen->error = -EDOM; /* internal bug */ + return; + } else { + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(map_fd[map_idx]))); + gen->nr_maps++; + } + if (close_inner_map_fd) + emit_sys_close_stack(gen, stack_off(inner_map_fd)); +} + +void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, + enum bpf_attach_type type) +{ + const char *prefix; + int kind, ret; + + btf_get_kernel_prefix_kind(type, &prefix, &kind); + gen->attach_kind = kind; + ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", + prefix, attach_name); + if (ret == sizeof(gen->attach_target)) + gen->error = -ENOSPC; +} + +static void emit_find_attach_target(struct bpf_gen *gen) +{ + int name, len = strlen(gen->attach_target) + 1; + + pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind); + name = add_data(gen, gen->attach_target, len); + + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind)); + emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "find_by_name_kind(%s,%d)", + gen->attach_target, gen->attach_kind); + emit_check_err(gen); + /* if successful, btf_id is in lower 32-bit of R7 and + * btf_obj_fd is in upper 32-bit + */ +} + +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, + int insn_idx) +{ + struct ksym_relo_desc *relo; + + relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo)); + if (!relo) { + gen->error = -ENOMEM; + return; + } + gen->relos = relo; + relo += gen->relo_cnt; + relo->name = name; + relo->kind = kind; + relo->insn_idx = insn_idx; + gen->relo_cnt++; +} + +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +{ + int name, insn, len = strlen(relo->name) + 1; + + pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); + name = add_data(gen, relo->name, len); + + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); + emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); + emit_check_err(gen); + /* store btf_id into insn[insn_idx].imm */ + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + + offsetof(struct bpf_insn, imm); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, insn)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); + if (relo->kind == BTF_KIND_VAR) { + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + sizeof(struct bpf_insn))); + } +} + +static void emit_relos(struct bpf_gen *gen, int insns) +{ + int i; + + for (i = 0; i < gen->relo_cnt; i++) + emit_relo(gen, gen->relos + i, insns); +} + +static void cleanup_relos(struct bpf_gen *gen, int insns) +{ + int i, insn; + + for (i = 0; i < gen->relo_cnt; i++) { + if (gen->relos[i].kind != BTF_KIND_VAR) + continue; + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = insns + + sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); + } + if (gen->relo_cnt) { + free(gen->relos); + gen->relo_cnt = 0; + gen->relos = NULL; + } +} + +void bpf_gen__prog_load(struct bpf_gen *gen, + struct bpf_prog_load_params *load_attr, int prog_idx) +{ + int attr_size = offsetofend(union bpf_attr, fd_array); + int prog_load_attr, license, insns, func_info, line_info; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: prog_load: type %d insns_cnt %zd\n", + load_attr->prog_type, load_attr->insn_cnt); + /* add license string to blob of bytes */ + license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); + /* add insns to blob of bytes */ + insns = add_data(gen, load_attr->insns, + load_attr->insn_cnt * sizeof(struct bpf_insn)); + + attr.prog_type = load_attr->prog_type; + attr.expected_attach_type = load_attr->expected_attach_type; + attr.attach_btf_id = load_attr->attach_btf_id; + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = 0; + attr.insn_cnt = (__u32)load_attr->insn_cnt; + attr.prog_flags = load_attr->prog_flags; + + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + func_info = add_data(gen, load_attr->func_info, + attr.func_info_cnt * attr.func_info_rec_size); + + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + line_info = add_data(gen, load_attr->line_info, + attr.line_info_cnt * attr.line_info_rec_size); + + memcpy(attr.prog_name, load_attr->name, + min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); + prog_load_attr = add_data(gen, &attr, attr_size); + + /* populate union bpf_attr with a pointer to license */ + emit_rel_store(gen, attr_field(prog_load_attr, license), license); + + /* populate union bpf_attr with a pointer to instructions */ + emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); + + /* populate union bpf_attr with a pointer to func_info */ + emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); + + /* populate union bpf_attr with a pointer to line_info */ + emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); + + /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ + emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), + stack_off(map_fd[0])); + + /* populate union bpf_attr with user provided log details */ + move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, + offsetof(struct bpf_loader_ctx, log_level), false); + move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4, + offsetof(struct bpf_loader_ctx, log_size), false); + move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8, + offsetof(struct bpf_loader_ctx, log_buf), false); + /* populate union bpf_attr with btf_fd saved in the stack earlier */ + move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4, + stack_off(btf_fd)); + if (gen->attach_kind) { + emit_find_attach_target(gen); + /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, prog_load_attr)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + offsetof(union bpf_attr, attach_btf_id))); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + offsetof(union bpf_attr, attach_btf_obj_fd))); + } + emit_relos(gen, insns); + /* emit PROG_LOAD command */ + emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); + debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); + /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ + cleanup_relos(gen, insns); + if (gen->attach_kind) + emit_sys_close_blob(gen, + attr_field(prog_load_attr, attach_btf_obj_fd)); + emit_check_err(gen); + /* remember prog_fd in the stack, if successful */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(prog_fd[gen->nr_progs]))); + gen->nr_progs++; +} + +void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, + __u32 value_size) +{ + int attr_size = offsetofend(union bpf_attr, flags); + int map_update_attr, value, key; + union bpf_attr attr; + int zero = 0; + + memset(&attr, 0, attr_size); + pr_debug("gen: map_update_elem: idx %d\n", map_idx); + + value = add_data(gen, pvalue, value_size); + key = add_data(gen, &zero, sizeof(zero)); + + /* if (map_desc[map_idx].initial_value) + * copy_from_user(value, initial_value, value_size); + */ + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * map_idx + + offsetof(struct bpf_map_desc, initial_value))); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, value)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); + + map_update_attr = add_data(gen, &attr, attr_size); + move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, + stack_off(map_fd[map_idx])); + emit_rel_store(gen, attr_field(map_update_attr, key), key); + emit_rel_store(gen, attr_field(map_update_attr, value), value); + /* emit MAP_UPDATE_ELEM command */ + emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); + debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size); + emit_check_err(gen); +} + +void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) +{ + int attr_size = offsetofend(union bpf_attr, map_fd); + int map_freeze_attr; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: map_freeze: idx %d\n", map_idx); + map_freeze_attr = add_data(gen, &attr, attr_size); + move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + stack_off(map_fd[map_idx])); + /* emit MAP_FREEZE command */ + emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); + debug_ret(gen, "map_freeze"); + emit_check_err(gen); +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c41d9b2b59ac..69cd1a835ebd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -54,6 +54,7 @@ #include "str_error.h" #include "libbpf_internal.h" #include "hashmap.h" +#include "bpf_gen_internal.h" #ifndef BPF_FS_MAGIC #define BPF_FS_MAGIC 0xcafe4a11 @@ -178,7 +179,7 @@ enum kern_feature_id { __FEAT_CNT, }; -static bool kernel_supports(enum kern_feature_id feat_id); +static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); enum reloc_type { RELO_LD64, @@ -432,6 +433,8 @@ struct bpf_object { bool loaded; bool has_subcalls; + struct bpf_gen *gen_loader; + /* * Information when doing elf related work. Only valid if fd * is valid. @@ -677,6 +680,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } + if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); + return -ENOTSUP; + } + pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); @@ -700,13 +708,14 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; - /* if function is a global/weak symbol, but has hidden - * visibility (STV_HIDDEN), mark its BTF FUNC as static to - * enable more permissive BPF verification mode with more - * outside context available to BPF verifier + /* if function is a global/weak symbol, but has restricted + * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC + * as static to enable more permissive BPF verification mode + * with more outside context available to BPF verifier */ if (GELF_ST_BIND(sym.st_info) != STB_LOCAL - && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN) + && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN + || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -1794,7 +1803,6 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (!symbols) return -EINVAL; - scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); data = elf_sec_data(obj, scn); if (!scn || !data) { @@ -1854,6 +1862,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -LIBBPF_ERRNO__FORMAT; } + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION + || GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); + return -ENOTSUP; + } + map->libbpf_type = LIBBPF_MAP_UNSPEC; map->sec_idx = sym.st_shndx; map->sec_offset = sym.st_value; @@ -2261,6 +2275,16 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def pr_debug("map '%s': found inner map definition.\n", map->name); } +static const char *btf_var_linkage_str(__u32 linkage) +{ + switch (linkage) { + case BTF_VAR_STATIC: return "static"; + case BTF_VAR_GLOBAL_ALLOCATED: return "global"; + case BTF_VAR_GLOBAL_EXTERN: return "extern"; + default: return "unknown"; + } +} + static int bpf_object__init_user_btf_map(struct bpf_object *obj, const struct btf_type *sec, int var_idx, int sec_idx, @@ -2293,10 +2317,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, map_name, btf_kind_str(var)); return -EINVAL; } - if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && - var_extra->linkage != BTF_VAR_STATIC) { - pr_warn("map '%s': unsupported var linkage %u.\n", - map_name, var_extra->linkage); + if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { + pr_warn("map '%s': unsupported map linkage %s.\n", + map_name, btf_var_linkage_str(var_extra->linkage)); return -EOPNOTSUPP; } @@ -2443,20 +2466,20 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) static bool btf_needs_sanitization(struct bpf_object *obj) { - bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); - bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); - bool has_float = kernel_supports(FEAT_BTF_FLOAT); - bool has_func = kernel_supports(FEAT_BTF_FUNC); + bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); + bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); + bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); + bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); return !has_func || !has_datasec || !has_func_global || !has_float; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { - bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); - bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); - bool has_float = kernel_supports(FEAT_BTF_FLOAT); - bool has_func = kernel_supports(FEAT_BTF_FUNC); + bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); + bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); + bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); + bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); struct btf_type *t; int i, j, vlen; @@ -2637,7 +2660,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) int err; /* btf_vmlinux could be loaded earlier */ - if (obj->btf_vmlinux) + if (obj->btf_vmlinux || obj->gen_loader) return 0; if (!force && !obj_needs_vmlinux_btf(obj)) @@ -2662,7 +2685,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!obj->btf) return 0; - if (!kernel_supports(FEAT_BTF)) { + if (!kernel_supports(obj, FEAT_BTF)) { if (kernel_needs_btf(obj)) { err = -EOPNOTSUPP; goto report; @@ -2719,7 +2742,20 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) bpf_object__sanitize_btf(obj, kern_btf); } - err = btf__load(kern_btf); + if (obj->gen_loader) { + __u32 raw_size = 0; + const void *raw_data = btf__get_raw_data(kern_btf, &raw_size); + + if (!raw_data) + return -ENOMEM; + bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); + /* Pretend to have valid FD to pass various fd >= 0 checks. + * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + */ + btf__set_fd(kern_btf, 0); + } else { + err = btf__load(kern_btf); + } if (sanitize) { if (!err) { /* move fd to libbpf's BTF */ @@ -4293,11 +4329,17 @@ static struct kern_feature_desc { }, }; -static bool kernel_supports(enum kern_feature_id feat_id) +static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; int ret; + if (obj->gen_loader) + /* To generate loader program assume the latest kernel + * to avoid doing extra prog_load, map_create syscalls. + */ + return true; + if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { ret = feat->probe(); if (ret > 0) { @@ -4380,6 +4422,13 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + if (obj->gen_loader) { + bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, + map->mmaped, map->def.value_size); + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) + bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); + return 0; + } err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; @@ -4405,14 +4454,14 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) +static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { struct bpf_create_map_attr create_attr; struct bpf_map_def *def = &map->def; memset(&create_attr, 0, sizeof(create_attr)); - if (kernel_supports(FEAT_PROG_NAME)) + if (kernel_supports(obj, FEAT_PROG_NAME)) create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; @@ -4453,7 +4502,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) if (map->inner_map) { int err; - err = bpf_object__create_map(obj, map->inner_map); + err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", map->name, err); @@ -4465,7 +4514,15 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) create_attr.inner_map_fd = map->inner_map_fd; } - map->fd = bpf_create_map_xattr(&create_attr); + if (obj->gen_loader) { + bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); + /* Pretend to have valid FD to pass various fd >= 0 checks. + * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + */ + map->fd = 0; + } else { + map->fd = bpf_create_map_xattr(&create_attr); + } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -4486,6 +4543,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) return -errno; if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { + if (obj->gen_loader) + map->inner_map->fd = -1; bpf_map__destroy(map->inner_map); zfree(&map->inner_map); } @@ -4493,11 +4552,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) return 0; } -static int init_map_slots(struct bpf_map *map) +static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) { const struct bpf_map *targ_map; unsigned int i; - int fd, err; + int fd, err = 0; for (i = 0; i < map->init_slots_sz; i++) { if (!map->init_slots[i]) @@ -4505,7 +4564,13 @@ static int init_map_slots(struct bpf_map *map) targ_map = map->init_slots[i]; fd = bpf_map__fd(targ_map); - err = bpf_map_update_elem(map->fd, &i, &fd, 0); + if (obj->gen_loader) { + pr_warn("// TODO map_update_elem: idx %ld key %d value==map_idx %ld\n", + map - obj->maps, i, targ_map - obj->maps); + return -ENOTSUP; + } else { + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + } if (err) { err = -errno; pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", @@ -4547,7 +4612,7 @@ bpf_object__create_maps(struct bpf_object *obj) pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); } else { - err = bpf_object__create_map(obj, map); + err = bpf_object__create_map(obj, map, false); if (err) goto err_out; @@ -4563,7 +4628,7 @@ bpf_object__create_maps(struct bpf_object *obj) } if (map->init_slots_sz) { - err = init_map_slots(map); + err = init_map_slots(obj, map); if (err < 0) { zclose(map->fd); goto err_out; @@ -4973,11 +5038,14 @@ static int load_module_btfs(struct bpf_object *obj) if (obj->btf_modules_loaded) return 0; + if (obj->gen_loader) + return 0; + /* don't do this again, even if we find no module BTFs */ obj->btf_modules_loaded = true; /* kernel too old to support module BTFs */ - if (!kernel_supports(FEAT_MODULE_BTF)) + if (!kernel_supports(obj, FEAT_MODULE_BTF)) return 0; while (true) { @@ -6120,6 +6188,12 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (str_is_empty(spec_str)) return -EINVAL; + if (prog->obj->gen_loader) { + pr_warn("// TODO core_relo: prog %ld insn[%d] %s %s kind %d\n", + prog - prog->obj->programs, relo->insn_off / 8, + local_name, spec_str, relo->kind); + return -ENOTSUP; + } err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", @@ -6371,19 +6445,34 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) switch (relo->type) { case RELO_LD64: - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + if (obj->gen_loader) { + insn[0].src_reg = BPF_PSEUDO_MAP_IDX; + insn[0].imm = relo->map_idx; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_FD; + insn[0].imm = obj->maps[relo->map_idx].fd; + } break; case RELO_DATA: - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[1].imm = insn[0].imm + relo->sym_off; - insn[0].imm = obj->maps[relo->map_idx].fd; + if (obj->gen_loader) { + insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; + insn[0].imm = relo->map_idx; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[relo->map_idx].fd; + } break; case RELO_EXTERN_VAR: ext = &obj->externs[relo->sym_off]; if (ext->type == EXT_KCFG) { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + if (obj->gen_loader) { + insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; + insn[0].imm = obj->kconfig_map_idx; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + } insn[1].imm = ext->kcfg.data_off; } else /* EXT_KSYM */ { if (ext->ksym.type_id) { /* typed ksyms */ @@ -6402,11 +6491,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].imm = ext->ksym.kernel_btf_id; break; case RELO_SUBPROG_ADDR: - insn[0].src_reg = BPF_PSEUDO_FUNC; - /* will be handled as a follow up pass */ + if (insn[0].src_reg != BPF_PSEUDO_FUNC) { + pr_warn("prog '%s': relo #%d: bad insn\n", + prog->name, i); + return -EINVAL; + } + /* handled already */ break; case RELO_CALL: - /* will be handled as a follow up pass */ + /* handled already */ break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", @@ -6497,7 +6590,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't * supprot func/line info */ - if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC)) + if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) return 0; /* only attempt func info relocation if main program's func_info @@ -6575,6 +6668,30 @@ static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, si sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); } +static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; + struct reloc_desc *relos; + int i; + + if (main_prog == subprog) + return 0; + relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); + if (!relos) + return -ENOMEM; + memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, + sizeof(*relos) * subprog->nr_reloc); + + for (i = main_prog->nr_reloc; i < new_cnt; i++) + relos[i].insn_idx += subprog->sub_insn_off; + /* After insn_idx adjustment the 'relos' array is still sorted + * by insn_idx and doesn't break bsearch. + */ + main_prog->reloc_desc = relos; + main_prog->nr_reloc = new_cnt; + return 0; +} + static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) @@ -6595,6 +6712,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, continue; relo = find_prog_insn_relo(prog, insn_idx); + if (relo && relo->type == RELO_EXTERN_FUNC) + /* kfunc relocations will be handled later + * in bpf_object__relocate_data() + */ + continue; if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", prog->name, insn_idx, relo->type); @@ -6669,6 +6791,10 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", main_prog->name, subprog->insns_cnt, subprog->name); + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; err = bpf_object__reloc_code(obj, main_prog, subprog); if (err) return err; @@ -6798,11 +6924,25 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) return 0; } +static void +bpf_object__free_relocs(struct bpf_object *obj) +{ + struct bpf_program *prog; + int i; + + /* free up relocation descriptors */ + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + zfree(&prog->reloc_desc); + prog->nr_reloc = 0; + } +} + static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; - size_t i; + size_t i, j; int err; if (obj->btf_ext) { @@ -6813,23 +6953,32 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - /* relocate data references first for all programs and sub-programs, - * as they don't change relative to code locations, so subsequent - * subprogram processing won't need to re-calculate any of them + + /* Before relocating calls pre-process relocations and mark + * few ld_imm64 instructions that points to subprogs. + * Otherwise bpf_object__reloc_code() later would have to consider + * all ld_imm64 insns as relocation candidates. That would + * reduce relocation speed, since amount of find_prog_insn_relo() + * would increase and most of them will fail to find a relo. */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - err = bpf_object__relocate_data(obj, prog); - if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); - return err; + for (j = 0; j < prog->nr_reloc; j++) { + struct reloc_desc *relo = &prog->reloc_desc[j]; + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + + /* mark the insn, so it's recognized by insn_is_pseudo_func() */ + if (relo->type == RELO_SUBPROG_ADDR) + insn[0].src_reg = BPF_PSEUDO_FUNC; } } - /* now relocate subprogram calls and append used subprograms to main + + /* relocate subprogram calls and append used subprograms to main * programs; each copy of subprogram code needs to be relocated * differently for each main program, because its code location might - * have changed + * have changed. + * Append subprog relos to main programs to allow data relos to be + * processed after text is completely relocated. */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; @@ -6846,12 +6995,20 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - /* free up relocation descriptors */ + /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - zfree(&prog->reloc_desc); - prog->nr_reloc = 0; + if (prog_is_subprog(obj, prog)) + continue; + err = bpf_object__relocate_data(obj, prog); + if (err) { + pr_warn("prog '%s': failed to relocate data references: %d\n", + prog->name, err); + return err; + } } + if (!obj->gen_loader) + bpf_object__free_relocs(obj); return 0; } @@ -7040,6 +7197,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program enum bpf_func_id func_id; int i; + if (obj->gen_loader) + return 0; + for (i = 0; i < prog->insns_cnt; i++, insn++) { if (!insn_is_helper_call(insn, &func_id)) continue; @@ -7051,12 +7211,12 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program switch (func_id) { case BPF_FUNC_probe_read_kernel: case BPF_FUNC_probe_read_user: - if (!kernel_supports(FEAT_PROBE_READ_KERN)) + if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) insn->imm = BPF_FUNC_probe_read; break; case BPF_FUNC_probe_read_kernel_str: case BPF_FUNC_probe_read_user_str: - if (!kernel_supports(FEAT_PROBE_READ_KERN)) + if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) insn->imm = BPF_FUNC_probe_read_str; break; default: @@ -7091,12 +7251,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_type = prog->type; /* old kernels might not support specifying expected_attach_type */ - if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def && + if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def && prog->sec_def->is_exp_attach_type_optional) load_attr.expected_attach_type = 0; else load_attr.expected_attach_type = prog->expected_attach_type; - if (kernel_supports(FEAT_PROG_NAME)) + if (kernel_supports(prog->obj, FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insn_cnt = insns_cnt; @@ -7112,7 +7272,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(prog->obj); - if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) { + if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -7124,6 +7284,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + if (prog->obj->gen_loader) { + bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, + prog - prog->obj->programs); + *pfd = -1; + return 0; + } retry_load: if (log_buf_size) { log_buf = malloc(log_buf_size); @@ -7142,7 +7308,7 @@ retry_load: pr_debug("verifier log:\n%s", log_buf); if (prog->obj->rodata_map_idx >= 0 && - kernel_supports(FEAT_PROG_BIND_MAP)) { + kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) { struct bpf_map *rodata_map = &prog->obj->maps[prog->obj->rodata_map_idx]; @@ -7201,6 +7367,38 @@ out: return ret; } +static int bpf_program__record_externs(struct bpf_program *prog) +{ + struct bpf_object *obj = prog->obj; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + struct reloc_desc *relo = &prog->reloc_desc[i]; + struct extern_desc *ext = &obj->externs[relo->sym_off]; + + switch (relo->type) { + case RELO_EXTERN_VAR: + if (ext->type != EXT_KSYM) + continue; + if (!ext->ksym.type_id) { + pr_warn("typeless ksym %s is not supported yet\n", + ext->name); + return -ENOTSUP; + } + bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, + relo->insn_idx); + break; + case RELO_EXTERN_FUNC: + bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, + relo->insn_idx); + break; + default: + continue; + } + } + return 0; +} + static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) @@ -7246,6 +7444,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) pr_warn("prog '%s': inconsistent nr(%d) != 1\n", prog->name, prog->instances.nr); } + if (prog->obj->gen_loader) + bpf_program__record_externs(prog); err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_ver, &fd); if (!err) @@ -7322,6 +7522,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } + if (obj->gen_loader) + bpf_object__free_relocs(obj); return 0; } @@ -7500,11 +7702,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) bpf_object__for_each_map(m, obj) { if (!bpf_map__is_internal(m)) continue; - if (!kernel_supports(FEAT_GLOBAL_DATA)) { + if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { pr_warn("kernel doesn't support global data\n"); return -ENOTSUP; } - if (!kernel_supports(FEAT_ARRAY_MMAP)) + if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) m->def.map_flags ^= BPF_F_MMAPABLE; } @@ -7702,6 +7904,12 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) if (ext->type != EXT_KSYM || !ext->ksym.type_id) continue; + if (obj->gen_loader) { + ext->is_set = true; + ext->ksym.kernel_btf_obj_fd = 0; + ext->ksym.kernel_btf_id = 0; + continue; + } t = btf__type_by_id(obj->btf, ext->btf_id); if (btf_is_var(t)) err = bpf_object__resolve_ksym_var_btf_id(obj, ext); @@ -7816,6 +8024,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return -EINVAL; } + if (obj->gen_loader) + bpf_gen__init(obj->gen_loader, attr->log_level); + err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); @@ -7826,6 +8037,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__relocate(obj, attr->target_btf_path); err = err ? : bpf_object__load_progs(obj, attr->log_level); + if (obj->gen_loader) { + /* reset FDs */ + btf__set_fd(obj->btf, -1); + for (i = 0; i < obj->nr_maps; i++) + obj->maps[i].fd = -1; + if (!err) + err = bpf_gen__finish(obj->gen_loader); + } + /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); @@ -8451,6 +8671,7 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); @@ -8541,6 +8762,22 @@ void *bpf_object__priv(const struct bpf_object *obj) return obj ? obj->priv : ERR_PTR(-EINVAL); } +int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) +{ + struct bpf_gen *gen; + + if (!opts) + return -EFAULT; + if (!OPTS_VALID(opts, gen_loader_opts)) + return -EINVAL; + gen = calloc(sizeof(*gen), 1); + if (!gen) + return -ENOMEM; + gen->opts = opts; + obj->gen_loader = gen; + return 0; +} + static struct bpf_program * __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, bool forward) @@ -8887,6 +9124,8 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_ITER, .is_attach_btf = true, .attach_fn = attach_iter), + SEC_DEF("syscall", SYSCALL, + .is_sleepable = true), BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, @@ -9176,6 +9415,28 @@ invalid_prog: #define BTF_ITER_PREFIX "bpf_iter_" #define BTF_MAX_NAME_SIZE 128 +void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, + const char **prefix, int *kind) +{ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + *prefix = BTF_TRACE_PREFIX; + *kind = BTF_KIND_TYPEDEF; + break; + case BPF_LSM_MAC: + *prefix = BTF_LSM_PREFIX; + *kind = BTF_KIND_FUNC; + break; + case BPF_TRACE_ITER: + *prefix = BTF_ITER_PREFIX; + *kind = BTF_KIND_FUNC; + break; + default: + *prefix = ""; + *kind = BTF_KIND_FUNC; + } +} + static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind) { @@ -9196,21 +9457,11 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, static inline int find_attach_btf_id(struct btf *btf, const char *name, enum bpf_attach_type attach_type) { - int err; + const char *prefix; + int kind; - if (attach_type == BPF_TRACE_RAW_TP) - err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, - BTF_KIND_TYPEDEF); - else if (attach_type == BPF_LSM_MAC) - err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name, - BTF_KIND_FUNC); - else if (attach_type == BPF_TRACE_ITER) - err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name, - BTF_KIND_FUNC); - else - err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); - - return err; + btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); + return find_btf_by_prefix_kind(btf, prefix, name, kind); } int libbpf_find_vmlinux_btf_id(const char *name, @@ -9309,7 +9560,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, __u32 attach_prog_fd = prog->attach_prog_fd; const char *name = prog->sec_name, *attach_name; const struct bpf_sec_def *sec = NULL; - int i, err; + int i, err = 0; if (!name) return -EINVAL; @@ -9344,7 +9595,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, } /* kernel/module BTF ID */ - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + if (prog->obj->gen_loader) { + bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); + *btf_obj_fd = 0; + *btf_type_id = 1; + } else { + err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + } if (err) { pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); return err; @@ -9501,6 +9758,14 @@ int bpf_map__set_initial_value(struct bpf_map *map, return 0; } +const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +{ + if (!map->mmaped) + return NULL; + *psize = map->def.value_size; + return map->mmaped; +} + bool bpf_map__is_offload_neutral(const struct bpf_map *map) { return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index bec4e6a6e31d..d98523558f39 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -471,6 +471,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); +LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); @@ -498,6 +499,7 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); +/* XDP related API */ struct xdp_link_info { __u32 prog_id; __u32 drv_prog_id; @@ -520,6 +522,49 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); +/* TC related API */ +enum bpf_tc_attach_point { + BPF_TC_INGRESS = 1 << 0, + BPF_TC_EGRESS = 1 << 1, + BPF_TC_CUSTOM = 1 << 2, +}; + +#define BPF_TC_PARENT(a, b) \ + ((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU)) + +enum bpf_tc_flags { + BPF_TC_F_REPLACE = 1 << 0, +}; + +struct bpf_tc_hook { + size_t sz; + int ifindex; + enum bpf_tc_attach_point attach_point; + __u32 parent; + size_t :0; +}; +#define bpf_tc_hook__last_field parent + +struct bpf_tc_opts { + size_t sz; + int prog_fd; + __u32 flags; + __u32 prog_id; + __u32 handle; + __u32 priority; + size_t :0; +}; +#define bpf_tc_opts__last_field priority + +LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook); +LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook); +LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook, + struct bpf_tc_opts *opts); +LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts); +LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, + struct bpf_tc_opts *opts); + /* Ring buffer APIs */ struct ring_buffer; @@ -756,6 +801,18 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); +struct gen_loader_opts { + size_t sz; /* size of this struct, for forward/backward compatiblity */ + const char *data; + const char *insns; + __u32 data_sz; + __u32 insns_sz; +}; + +#define gen_loader_opts__last_field insns_sz +LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj, + struct gen_loader_opts *opts); + enum libbpf_tristate { TRI_NO = 0, TRI_YES = 1, @@ -768,10 +825,18 @@ struct bpf_linker_opts { }; #define bpf_linker_opts__last_field sz +struct bpf_linker_file_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; +}; +#define bpf_linker_file_opts__last_field sz + struct bpf_linker; LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); -LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename); +LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, + const char *filename, + const struct bpf_linker_file_opts *opts); LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b9b29baf1df8..2abef6f17c06 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -359,6 +359,13 @@ LIBBPF_0.4.0 { bpf_linker__finalize; bpf_linker__free; bpf_linker__new; + bpf_map__initial_value; bpf_map__inner_map; + bpf_object__gen_loader; bpf_object__set_kversion; + bpf_tc_attach; + bpf_tc_detach; + bpf_tc_hook_create; + bpf_tc_hook_destroy; + bpf_tc_query; } LIBBPF_0.3.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index acbcf6c7bdf8..a2cc297edb99 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -263,6 +263,8 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); +void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, + const char **prefix, int *kind); struct btf_ext_info { /* diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9de084b1c699..b594a88620ce 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -158,7 +158,9 @@ struct bpf_linker { static int init_output_elf(struct bpf_linker *linker, const char *file); -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj); +static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts, + struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec); @@ -435,15 +437,19 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) return 0; } -int bpf_linker__add_file(struct bpf_linker *linker, const char *filename) +int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts) { struct src_obj obj = {}; int err = 0; + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return -EINVAL; + if (!linker->elf) return -EINVAL; - err = err ?: linker_load_obj_file(linker, filename, &obj); + err = err ?: linker_load_obj_file(linker, filename, opts, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); @@ -529,7 +535,9 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) return sec; } -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj) +static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts, + struct src_obj *obj) { #if __BYTE_ORDER == __LITTLE_ENDIAN const int host_endianness = ELFDATA2LSB; @@ -1780,7 +1788,7 @@ static void sym_update_visibility(Elf64_Sym *sym, int sym_vis) /* libelf doesn't provide setters for ST_VISIBILITY, * but it is stored in the lower 2 bits of st_other */ - sym->st_other &= 0x03; + sym->st_other &= ~0x03; sym->st_other |= sym_vis; } diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index d2cb28e9ef52..47444588e0d2 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -4,7 +4,10 @@ #include <stdlib.h> #include <memory.h> #include <unistd.h> +#include <arpa/inet.h> #include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> #include <linux/rtnetlink.h> #include <sys/socket.h> #include <errno.h> @@ -73,9 +76,20 @@ cleanup: return ret; } -static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, - __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, - void *cookie) +static void libbpf_netlink_close(int sock) +{ + close(sock); +} + +enum { + NL_CONT, + NL_NEXT, + NL_DONE, +}; + +static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, + __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, + void *cookie) { bool multipart = true; struct nlmsgerr *err; @@ -84,6 +98,7 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, int len, ret; while (multipart) { +start: multipart = false; len = recv(sock, buf, sizeof(buf), 0); if (len < 0) { @@ -121,8 +136,16 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, } if (_fn) { ret = _fn(nh, fn, cookie); - if (ret) + switch (ret) { + case NL_CONT: + break; + case NL_NEXT: + goto start; + case NL_DONE: + return 0; + default: return ret; + } } } } @@ -131,72 +154,72 @@ done: return ret; } +static int libbpf_netlink_send_recv(struct nlmsghdr *nh, + __dump_nlmsg_t parse_msg, + libbpf_dump_nlmsg_t parse_attr, + void *cookie) +{ + __u32 nl_pid = 0; + int sock, ret; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + nh->nlmsg_pid = 0; + nh->nlmsg_seq = time(NULL); + + if (send(sock, nh, nh->nlmsg_len, 0) < 0) { + ret = -errno; + goto out; + } + + ret = libbpf_netlink_recv(sock, nl_pid, nh->nlmsg_seq, + parse_msg, parse_attr, cookie); +out: + libbpf_netlink_close(sock); + return ret; +} + static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, __u32 flags) { - int sock, seq = 0, ret; - struct nlattr *nla, *nla_xdp; + struct nlattr *nla; + int ret; struct { struct nlmsghdr nh; struct ifinfomsg ifinfo; char attrbuf[64]; } req; - __u32 nl_pid = 0; - - sock = libbpf_netlink_open(&nl_pid); - if (sock < 0) - return sock; memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; - - /* started nested attribute for XDP */ - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | IFLA_XDP; - nla->nla_len = NLA_HDRLEN; - - /* add XDP fd */ - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len += nla_xdp->nla_len; - - /* if user passed in any flags, add those too */ + req.ifinfo.ifi_index = ifindex; + + nla = nlattr_begin_nested(&req.nh, sizeof(req), IFLA_XDP); + if (!nla) + return -EMSGSIZE; + ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FD, &fd, sizeof(fd)); + if (ret < 0) + return ret; if (flags) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FLAGS; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); - memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); - nla->nla_len += nla_xdp->nla_len; + ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FLAGS, &flags, + sizeof(flags)); + if (ret < 0) + return ret; } - if (flags & XDP_FLAGS_REPLACE) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd); - memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd)); - nla->nla_len += nla_xdp->nla_len; + ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_EXPECTED_FD, + &old_fd, sizeof(old_fd)); + if (ret < 0) + return ret; } + nlattr_end_nested(&req.nh, nla); - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - ret = -errno; - goto cleanup; - } - ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); - -cleanup: - close(sock); - return ret; + return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL); } int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, @@ -212,9 +235,7 @@ int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, flags |= XDP_FLAGS_REPLACE; } - return __bpf_set_link_xdp_fd_replace(ifindex, fd, - old_fd, - flags); + return __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags); } int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) @@ -231,6 +252,7 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) return -LIBBPF_ERRNO__NLPARSE; @@ -282,16 +304,21 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -static int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); - int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags) { struct xdp_id_md xdp_id = {}; - int sock, ret; - __u32 nl_pid = 0; __u32 mask; + int ret; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifm; + } req = { + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nh.nlmsg_type = RTM_GETLINK, + .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; if (flags & ~XDP_FLAGS_MASK || !info_size) return -EINVAL; @@ -302,14 +329,11 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, if (flags && flags & mask) return -EINVAL; - sock = libbpf_netlink_open(&nl_pid); - if (sock < 0) - return sock; - xdp_id.ifindex = ifindex; xdp_id.flags = flags; - ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); + ret = libbpf_netlink_send_recv(&req.nh, __dump_link_nlmsg, + get_xdp_info, &xdp_id); if (!ret) { size_t sz = min(info_size, sizeof(xdp_id.info)); @@ -317,7 +341,6 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, memset((void *) info + sz, 0, info_size - sz); } - close(sock); return ret; } @@ -349,24 +372,403 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) return ret; } -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t, + size_t maxsz); + +static int clsact_config(struct nlmsghdr *nh, struct tcmsg *t, size_t maxsz) { + t->tcm_parent = TC_H_CLSACT; + t->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); + + return nlattr_add(nh, maxsz, TCA_KIND, "clsact", sizeof("clsact")); +} + +static int attach_point_to_config(struct bpf_tc_hook *hook, + qdisc_config_t *config) +{ + switch (OPTS_GET(hook, attach_point, 0)) { + case BPF_TC_INGRESS: + case BPF_TC_EGRESS: + case BPF_TC_INGRESS | BPF_TC_EGRESS: + if (OPTS_GET(hook, parent, 0)) + return -EINVAL; + *config = &clsact_config; + return 0; + case BPF_TC_CUSTOM: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point, + __u32 *parent) +{ + switch (attach_point) { + case BPF_TC_INGRESS: + case BPF_TC_EGRESS: + if (*parent) + return -EINVAL; + *parent = TC_H_MAKE(TC_H_CLSACT, + attach_point == BPF_TC_INGRESS ? + TC_H_MIN_INGRESS : TC_H_MIN_EGRESS); + break; + case BPF_TC_CUSTOM: + if (!*parent) + return -EINVAL; + break; + default: + return -EINVAL; + } + return 0; +} + +static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) +{ + qdisc_config_t config; + int ret; struct { - struct nlmsghdr nlh; - struct ifinfomsg ifm; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlh.nlmsg_type = RTM_GETLINK, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .ifm.ifi_family = AF_PACKET, - }; - int seq = time(NULL); + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; + + ret = attach_point_to_config(hook, &config); + if (ret < 0) + return ret; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; + req.nh.nlmsg_type = cmd; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); + + ret = config(&req.nh, &req.tc, sizeof(req)); + if (ret < 0) + return ret; + + return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL); +} + +static int tc_qdisc_create_excl(struct bpf_tc_hook *hook) +{ + return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE); +} + +static int tc_qdisc_delete(struct bpf_tc_hook *hook) +{ + return tc_qdisc_modify(hook, RTM_DELQDISC, 0); +} + +int bpf_tc_hook_create(struct bpf_tc_hook *hook) +{ + if (!hook || !OPTS_VALID(hook, bpf_tc_hook) || + OPTS_GET(hook, ifindex, 0) <= 0) + return -EINVAL; + + return tc_qdisc_create_excl(hook); +} - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) +static int __bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts, + const bool flush); + +int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) +{ + if (!hook || !OPTS_VALID(hook, bpf_tc_hook) || + OPTS_GET(hook, ifindex, 0) <= 0) + return -EINVAL; + + switch (OPTS_GET(hook, attach_point, 0)) { + case BPF_TC_INGRESS: + case BPF_TC_EGRESS: + return __bpf_tc_detach(hook, NULL, true); + case BPF_TC_INGRESS | BPF_TC_EGRESS: + return tc_qdisc_delete(hook); + case BPF_TC_CUSTOM: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +struct bpf_cb_ctx { + struct bpf_tc_opts *opts; + bool processed; +}; + +static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb, + bool unicast) +{ + struct nlattr *tbb[TCA_BPF_MAX + 1]; + struct bpf_cb_ctx *info = cookie; + + if (!info || !info->opts) + return -EINVAL; + if (unicast && info->processed) + return -EINVAL; + if (!tb[TCA_OPTIONS]) + return NL_CONT; + + libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL); + if (!tbb[TCA_BPF_ID]) + return -EINVAL; + + OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID])); + OPTS_SET(info->opts, handle, tc->tcm_handle); + OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16); + + info->processed = true; + return unicast ? NL_NEXT : NL_DONE; +} + +static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + struct tcmsg *tc = NLMSG_DATA(nh); + struct nlattr *tb[TCA_MAX + 1]; + + libbpf_nla_parse(tb, TCA_MAX, + (struct nlattr *)((char *)tc + NLMSG_ALIGN(sizeof(*tc))), + NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL); + if (!tb[TCA_KIND]) + return NL_CONT; + return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO); +} + +static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char name[256]; + int len, ret; + + ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (ret < 0) + return ret; + + ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd)); + if (ret < 0) + return ret; + len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id); + if (len < 0) return -errno; + if (len >= sizeof(name)) + return -ENAMETOOLONG; + return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1); +} + +int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) +{ + __u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags; + int ret, ifindex, attach_point, prog_fd; + struct bpf_cb_ctx info = {}; + struct nlattr *nla; + struct { + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; + + if (!hook || !opts || + !OPTS_VALID(hook, bpf_tc_hook) || + !OPTS_VALID(opts, bpf_tc_opts)) + return -EINVAL; + + ifindex = OPTS_GET(hook, ifindex, 0); + parent = OPTS_GET(hook, parent, 0); + attach_point = OPTS_GET(hook, attach_point, 0); + + handle = OPTS_GET(opts, handle, 0); + priority = OPTS_GET(opts, priority, 0); + prog_fd = OPTS_GET(opts, prog_fd, 0); + prog_id = OPTS_GET(opts, prog_id, 0); + flags = OPTS_GET(opts, flags, 0); + + if (ifindex <= 0 || !prog_fd || prog_id) + return -EINVAL; + if (priority > UINT16_MAX) + return -EINVAL; + if (flags & ~BPF_TC_F_REPLACE) + return -EINVAL; + + flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL; + protocol = ETH_P_ALL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | + NLM_F_ECHO | flags; + req.nh.nlmsg_type = RTM_NEWTFILTER; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = ifindex; + req.tc.tcm_handle = handle; + req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + + ret = tc_get_tcm_parent(attach_point, &parent); + if (ret < 0) + return ret; + req.tc.tcm_parent = parent; + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + return ret; + nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS); + if (!nla) + return -EMSGSIZE; + ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd); + if (ret < 0) + return ret; + bpf_flags = TCA_BPF_FLAG_ACT_DIRECT; + ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags, + sizeof(bpf_flags)); + if (ret < 0) + return ret; + nlattr_end_nested(&req.nh, nla); + + info.opts = opts; + + ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info); + if (ret < 0) + return ret; + if (!info.processed) + return -ENOENT; + return ret; +} + +static int __bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts, + const bool flush) +{ + __u32 protocol = 0, handle, priority, parent, prog_id, flags; + int ret, ifindex, attach_point, prog_fd; + struct { + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; - return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, - dump_link_nlmsg, cookie); + if (!hook || + !OPTS_VALID(hook, bpf_tc_hook) || + !OPTS_VALID(opts, bpf_tc_opts)) + return -EINVAL; + + ifindex = OPTS_GET(hook, ifindex, 0); + parent = OPTS_GET(hook, parent, 0); + attach_point = OPTS_GET(hook, attach_point, 0); + + handle = OPTS_GET(opts, handle, 0); + priority = OPTS_GET(opts, priority, 0); + prog_fd = OPTS_GET(opts, prog_fd, 0); + prog_id = OPTS_GET(opts, prog_id, 0); + flags = OPTS_GET(opts, flags, 0); + + if (ifindex <= 0 || flags || prog_fd || prog_id) + return -EINVAL; + if (priority > UINT16_MAX) + return -EINVAL; + if (flags & ~BPF_TC_F_REPLACE) + return -EINVAL; + if (!flush) { + if (!handle || !priority) + return -EINVAL; + protocol = ETH_P_ALL; + } else { + if (handle || priority) + return -EINVAL; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_DELTFILTER; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = ifindex; + if (!flush) { + req.tc.tcm_handle = handle; + req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + } + + ret = tc_get_tcm_parent(attach_point, &parent); + if (ret < 0) + return ret; + req.tc.tcm_parent = parent; + + if (!flush) { + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, + "bpf", sizeof("bpf")); + if (ret < 0) + return ret; + } + + return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL); +} + +int bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts) +{ + return !opts ? -EINVAL : __bpf_tc_detach(hook, opts, false); +} + +int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) +{ + __u32 protocol, handle, priority, parent, prog_id, flags; + int ret, ifindex, attach_point, prog_fd; + struct bpf_cb_ctx info = {}; + struct { + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; + + if (!hook || !opts || + !OPTS_VALID(hook, bpf_tc_hook) || + !OPTS_VALID(opts, bpf_tc_opts)) + return -EINVAL; + + ifindex = OPTS_GET(hook, ifindex, 0); + parent = OPTS_GET(hook, parent, 0); + attach_point = OPTS_GET(hook, attach_point, 0); + + handle = OPTS_GET(opts, handle, 0); + priority = OPTS_GET(opts, priority, 0); + prog_fd = OPTS_GET(opts, prog_fd, 0); + prog_id = OPTS_GET(opts, prog_id, 0); + flags = OPTS_GET(opts, flags, 0); + + if (ifindex <= 0 || flags || prog_fd || prog_id || + !handle || !priority) + return -EINVAL; + if (priority > UINT16_MAX) + return -EINVAL; + + protocol = ETH_P_ALL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_GETTFILTER; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = ifindex; + req.tc.tcm_handle = handle; + req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + + ret = tc_get_tcm_parent(attach_point, &parent); + if (ret < 0) + return ret; + req.tc.tcm_parent = parent; + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + return ret; + + info.opts = opts; + + ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info); + if (ret < 0) + return ret; + if (!info.processed) + return -ENOENT; + return ret; } diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h index 6cc3ac91690f..3c780ab6d022 100644 --- a/tools/lib/bpf/nlattr.h +++ b/tools/lib/bpf/nlattr.h @@ -10,7 +10,10 @@ #define __LIBBPF_NLATTR_H #include <stdint.h> +#include <string.h> +#include <errno.h> #include <linux/netlink.h> + /* avoid multiple definition of netlink features */ #define __LINUX_NETLINK_H @@ -103,4 +106,49 @@ int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh); +static inline struct nlattr *nla_data(struct nlattr *nla) +{ + return (struct nlattr *)((char *)nla + NLA_HDRLEN); +} + +static inline struct nlattr *nh_tail(struct nlmsghdr *nh) +{ + return (struct nlattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len)); +} + +static inline int nlattr_add(struct nlmsghdr *nh, size_t maxsz, int type, + const void *data, int len) +{ + struct nlattr *nla; + + if (NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > maxsz) + return -EMSGSIZE; + if (!!data != !!len) + return -EINVAL; + + nla = nh_tail(nh); + nla->nla_type = type; + nla->nla_len = NLA_HDRLEN + len; + if (data) + memcpy(nla_data(nla), data, len); + nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(nla->nla_len); + return 0; +} + +static inline struct nlattr *nlattr_begin_nested(struct nlmsghdr *nh, + size_t maxsz, int type) +{ + struct nlattr *tail; + + tail = nh_tail(nh); + if (nlattr_add(nh, maxsz, type | NLA_F_NESTED, NULL, 0)) + return NULL; + return tail; +} + +static inline void nlattr_end_nested(struct nlmsghdr *nh, struct nlattr *tail) +{ + tail->nla_len = (char *)nh_tail(nh) - (char *)tail; +} + #endif /* __LIBBPF_NLATTR_H */ diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h new file mode 100644 index 000000000000..b22b50c1b173 --- /dev/null +++ b/tools/lib/bpf/skel_internal.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2021 Facebook */ +#ifndef __SKEL_INTERNAL_H +#define __SKEL_INTERNAL_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> + +/* This file is a base header for auto-generated *.lskel.h files. + * Its contents will change and may become part of auto-generation in the future. + * + * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent + * and will change from one version of libbpf to another and features + * requested during loader program generation. + */ +struct bpf_map_desc { + union { + /* input for the loader prog */ + struct { + __aligned_u64 initial_value; + __u32 max_entries; + }; + /* output of the loader prog */ + struct { + int map_fd; + }; + }; +}; +struct bpf_prog_desc { + int prog_fd; +}; + +struct bpf_loader_ctx { + size_t sz; + __u32 log_level; + __u32 log_size; + __u64 log_buf; +}; + +struct bpf_load_and_run_opts { + struct bpf_loader_ctx *ctx; + const void *data; + const void *insns; + __u32 data_sz; + __u32 insns_sz; + const char *errstr; +}; + +static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +static inline int skel_closenz(int fd) +{ + if (fd > 0) + return close(fd); + return -EINVAL; +} + +static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) +{ + int map_fd = -1, prog_fd = -1, key = 0, err; + union bpf_attr attr; + + map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, + opts->data_sz, 1, 0); + if (map_fd < 0) { + opts->errstr = "failed to create loader map"; + err = -errno; + goto out; + } + + err = bpf_map_update_elem(map_fd, &key, opts->data, 0); + if (err < 0) { + opts->errstr = "failed to update loader map"; + err = -errno; + goto out; + } + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SYSCALL; + attr.insns = (long) opts->insns; + attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + attr.license = (long) "Dual BSD/GPL"; + memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); + attr.fd_array = (long) &map_fd; + attr.log_level = opts->ctx->log_level; + attr.log_size = opts->ctx->log_size; + attr.log_buf = opts->ctx->log_buf; + attr.prog_flags = BPF_F_SLEEPABLE; + prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + if (prog_fd < 0) { + opts->errstr = "failed to load loader prog"; + err = -errno; + goto out; + } + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.ctx_in = (long) opts->ctx; + attr.test.ctx_size_in = opts->ctx->sz; + err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); + if (err < 0 || (int)attr.test.retval < 0) { + opts->errstr = "failed to execute loader prog"; + if (err < 0) + err = -errno; + else + err = (int)attr.test.retval; + goto out; + } + err = 0; +out: + if (map_fd >= 0) + close(map_fd); + if (prog_fd >= 0) + close(prog_fd); + return err; +} + +#endif |