diff options
author | Jakub Kicinski <kuba@kernel.org> | 2020-10-13 01:16:50 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2020-10-13 01:16:50 +0200 |
commit | ccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae (patch) | |
tree | 3028901b29bf4ab04cd50d61da4fecdb20b182b6 /tools | |
parent | Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next (diff) | |
parent | Merge branch 'bpf, sockmap: allow verdict only sk_skb progs' (diff) | |
download | linux-ccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae.tar.xz linux-ccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2020-10-12
The main changes are:
1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong.
2) libbpf relocation support for different size load/store, from Andrii.
3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel.
4) BPF support for per-cpu variables, form Hao.
5) sockmap improvements, from John.
====================
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools')
38 files changed, 2997 insertions, 367 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4f556cfcbfbe..bf5a99d803e4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -356,18 +356,36 @@ enum bpf_link_type { #define BPF_F_SLEEPABLE (1U << 4) /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -417,6 +435,9 @@ enum { /* Share perf_event among processes */ BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ @@ -1680,7 +1701,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return @@ -2235,7 +2256,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The @@ -3661,10 +3682,59 @@ union bpf_attr { * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it - * fills in e.g. MAC addresses based on the L3 information from - * the packet. This helper is supported for IPv4 and IPv6 protocols. + * populates L2 addresses as well, meaning, internally, the helper + * performs a FIB lookup based on the skb's networking header to + * get the address of the next hop and then relies on the neighbor + * lookup for the L2 address of the nexthop. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types. + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. @@ -3823,6 +3893,9 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ + FN(bpf_per_cpu_ptr), \ + FN(bpf_this_cpu_ptr), \ + FN(redirect_peer), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3df1f4db9f3a..313034117070 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -390,6 +390,12 @@ struct extern_desc { } kcfg; struct { unsigned long long addr; + + /* target btf_id of the corresponding kernel var. */ + int vmlinux_btf_id; + + /* local btf_id of the ksym extern's type. */ + __u32 type_id; } ksym; }; }; @@ -2522,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) { bool need_vmlinux_btf = false; struct bpf_program *prog; - int err; + int i, err; /* CO-RE relocations need kernel BTF */ if (obj->btf_ext && obj->btf_ext->core_relo_info.len) need_vmlinux_btf = true; + /* Support for typed ksyms needs kernel BTF */ + for (i = 0; i < obj->nr_extern; i++) { + const struct extern_desc *ext; + + ext = &obj->externs[i]; + if (ext->type == EXT_KSYM && ext->ksym.type_id) { + need_vmlinux_btf = true; + break; + } + } + bpf_object__for_each_program(prog, obj) { if (!prog->load) continue; @@ -3156,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { - const struct btf_type *vt; - ksym_sec = sec; ext->type = EXT_KSYM; - - vt = skip_mods_and_typedefs(obj->btf, t->type, NULL); - if (!btf_is_void(vt)) { - pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name); - return -ENOTSUP; - } + skip_mods_and_typedefs(obj->btf, t->type, + &ext->ksym.type_id); } else { pr_warn("unrecognized extern section '%s'\n", sec_name); return -ENOTSUP; @@ -4192,6 +4203,36 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) return 0; } +static int init_map_slots(struct bpf_map *map) +{ + const struct bpf_map *targ_map; + unsigned int i; + int fd, err; + + for (i = 0; i < map->init_slots_sz; i++) { + if (!map->init_slots[i]) + continue; + + targ_map = map->init_slots[i]; + fd = bpf_map__fd(targ_map); + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + if (err) { + err = -errno; + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", + map->name, i, targ_map->name, + fd, err); + return err; + } + pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", + map->name, i, targ_map->name, fd); + } + + zfree(&map->init_slots); + map->init_slots_sz = 0; + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -4215,47 +4256,29 @@ bpf_object__create_maps(struct bpf_object *obj) if (map->fd >= 0) { pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); - continue; - } - - err = bpf_object__create_map(obj, map); - if (err) - goto err_out; - - pr_debug("map '%s': created successfully, fd=%d\n", map->name, - map->fd); - - if (bpf_map__is_internal(map)) { - err = bpf_object__populate_internal_map(obj, map); - if (err < 0) { - zclose(map->fd); + } else { + err = bpf_object__create_map(obj, map); + if (err) goto err_out; - } - } - if (map->init_slots_sz) { - for (j = 0; j < map->init_slots_sz; j++) { - const struct bpf_map *targ_map; - int fd; + pr_debug("map '%s': created successfully, fd=%d\n", + map->name, map->fd); - if (!map->init_slots[j]) - continue; + if (bpf_map__is_internal(map)) { + err = bpf_object__populate_internal_map(obj, map); + if (err < 0) { + zclose(map->fd); + goto err_out; + } + } - targ_map = map->init_slots[j]; - fd = bpf_map__fd(targ_map); - err = bpf_map_update_elem(map->fd, &j, &fd, 0); - if (err) { - err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, j, targ_map->name, - fd, err); + if (map->init_slots_sz) { + err = init_map_slots(map); + if (err < 0) { + zclose(map->fd); goto err_out; } - pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", - map->name, j, targ_map->name, fd); } - zfree(&map->init_slots); - map->init_slots_sz = 0; } if (map->pin_path && !map->pinned) { @@ -5017,16 +5040,19 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, static int bpf_core_calc_field_relo(const struct bpf_program *prog, const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, bool *validate) + __u32 *val, __u32 *field_sz, __u32 *type_id, + bool *validate) { const struct bpf_core_accessor *acc; const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; const struct btf_member *m; const struct btf_type *mt; bool bitfield; __s64 sz; + *field_sz = 0; + if (relo->kind == BPF_FIELD_EXISTS) { *val = spec ? 1 : 0; return 0; @@ -5042,6 +5068,12 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, if (!acc->name) { if (relo->kind == BPF_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; + /* remember field size for load/store mem size */ + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; + *type_id = acc->type_id; } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { sz = btf__resolve_size(spec->btf, acc->type_id); if (sz < 0) @@ -5058,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, } m = btf_members(t) + acc->idx; - mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); + mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); bit_off = spec->bit_offset; bit_sz = btf_member_bitfield_size(t, acc->idx); @@ -5078,7 +5110,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, byte_off = bit_off / 8 / byte_sz * byte_sz; } } else { - sz = btf__resolve_size(spec->btf, m->type); + sz = btf__resolve_size(spec->btf, field_type_id); if (sz < 0) return -EINVAL; byte_sz = sz; @@ -5096,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, switch (relo->kind) { case BPF_FIELD_BYTE_OFFSET: *val = byte_off; + if (!bitfield) { + *field_sz = byte_sz; + *type_id = field_type_id; + } break; case BPF_FIELD_BYTE_SIZE: *val = byte_sz; @@ -5196,6 +5232,19 @@ struct bpf_core_relo_res bool poison; /* some relocations can't be validated against orig_val */ bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + <off>) = rY + * rX = *(T *)(rY + <off>), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; }; /* Calculate original and target relocation values, given local and target @@ -5217,10 +5266,56 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, res->new_val = 0; res->poison = false; res->validate = true; + res->fail_memsz_adjust = false; + res->orig_sz = res->new_sz = 0; + res->orig_type_id = res->new_type_id = 0; if (core_relo_is_field_based(relo->kind)) { - err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate); - err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL); + err = bpf_core_calc_field_relo(prog, relo, local_spec, + &res->orig_val, &res->orig_sz, + &res->orig_type_id, &res->validate); + err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, + &res->new_val, &res->new_sz, + &res->new_type_id, NULL); + if (err) + goto done; + /* Validate if it's safe to adjust load/store memory size. + * Adjustments are performed only if original and new memory + * sizes differ. + */ + res->fail_memsz_adjust = false; + if (res->orig_sz != res->new_sz) { + const struct btf_type *orig_t, *new_t; + + orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + + /* There are two use cases in which it's safe to + * adjust load/store's mem size: + * - reading a 32-bit kernel pointer, while on BPF + * size pointers are always 64-bit; in this case + * it's safe to "downsize" instruction size due to + * pointer being treated as unsigned integer with + * zero-extended upper 32-bits; + * - reading unsigned integers, again due to + * zero-extension is preserving the value correctly. + * + * In all other cases it's incorrect to attempt to + * load/store field because read value will be + * incorrect, so we poison relocated instruction. + */ + if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) + goto done; + if (btf_is_int(orig_t) && btf_is_int(new_t) && + btf_int_encoding(orig_t) != BTF_INT_SIGNED && + btf_int_encoding(new_t) != BTF_INT_SIGNED) + goto done; + + /* mark as invalid mem size adjustment, but this will + * only be checked for LDX/STX/ST insns + */ + res->fail_memsz_adjust = true; + } } else if (core_relo_is_type_based(relo->kind)) { err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); @@ -5229,6 +5324,7 @@ static int bpf_core_calc_relo(const struct bpf_program *prog, err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); } +done: if (err == -EUCLEAN) { /* EUCLEAN is used to signal instruction poisoning request */ res->poison = true; @@ -5268,6 +5364,28 @@ static bool is_ldimm64(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static int insn_bpf_size_to_bytes(struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_DW: return 8; + case BPF_W: return 4; + case BPF_H: return 2; + case BPF_B: return 1; + default: return -1; + } +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ + switch (sz) { + case 8: return BPF_DW; + case 4: return BPF_W; + case 2: return BPF_H; + case 1: return BPF_B; + default: return -1; + } +} + /* * Patch relocatable BPF instruction. * @@ -5277,10 +5395,13 @@ static bool is_ldimm64(struct bpf_insn *insn) * spec, and is checked before patching instruction. If actual insn->imm value * is wrong, bail out with error. * - * Currently three kinds of BPF instructions are supported: + * Currently supported classes of BPF instruction are: * 1. rX = <imm> (assignment with immediate operand); * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. rX = <imm64> (load with 64-bit immediate value). + * 3. rX = <imm64> (load with 64-bit immediate value); + * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; + * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; + * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. */ static int bpf_core_patch_insn(struct bpf_program *prog, const struct bpf_core_relo *relo, @@ -5304,6 +5425,7 @@ static int bpf_core_patch_insn(struct bpf_program *prog, class = BPF_CLASS(insn->code); if (res->poison) { +poison: /* poison second part of ldimm64 to avoid confusing error from * verifier about "unknown opcode 00" */ @@ -5346,10 +5468,39 @@ static int bpf_core_patch_insn(struct bpf_program *prog, prog->name, relo_idx, insn_idx, new_val); return -ERANGE; } + if (res->fail_memsz_adjust) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " + "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", + prog->name, relo_idx, insn_idx); + goto poison; + } + orig_val = insn->off; insn->off = new_val; pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", prog->name, relo_idx, insn_idx, orig_val, new_val); + + if (res->new_sz != res->orig_sz) { + int insn_bytes_sz, insn_bpf_sz; + + insn_bytes_sz = insn_bpf_size_to_bytes(insn); + if (insn_bytes_sz != res->orig_sz) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", + prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); + return -EINVAL; + } + + insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); + if (insn_bpf_sz < 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", + prog->name, relo_idx, insn_idx, res->new_sz); + return -EINVAL; + } + + insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", + prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); + } break; case BPF_LD: { __u64 imm; @@ -5691,7 +5842,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) return 0; if (targ_btf_path) - targ_btf = btf__parse_elf(targ_btf_path, NULL); + targ_btf = btf__parse(targ_btf_path, NULL); else targ_btf = obj->btf_vmlinux; if (IS_ERR_OR_NULL(targ_btf)) { @@ -5742,6 +5893,11 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = -EINVAL; goto out; } + /* no need to apply CO-RE relocation if the program is + * not going to be loaded + */ + if (!prog->load) + continue; err = bpf_core_apply_relo(prog, rec, i, obj->btf, targ_btf, cand_cache); @@ -5800,8 +5956,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; insn[1].imm = ext->kcfg.data_off; } else /* EXT_KSYM */ { - insn[0].imm = (__u32)ext->ksym.addr; - insn[1].imm = ext->ksym.addr >> 32; + if (ext->ksym.type_id) { /* typed ksyms */ + insn[0].src_reg = BPF_PSEUDO_BTF_ID; + insn[0].imm = ext->ksym.vmlinux_btf_id; + } else { /* typeless ksyms */ + insn[0].imm = (__u32)ext->ksym.addr; + insn[1].imm = ext->ksym.addr >> 32; + } } relo->processed = true; break; @@ -6933,10 +7094,72 @@ out: return err; } +static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) +{ + struct extern_desc *ext; + int i, id; + + for (i = 0; i < obj->nr_extern; i++) { + const struct btf_type *targ_var, *targ_type; + __u32 targ_type_id, local_type_id; + const char *targ_var_name; + int ret; + + ext = &obj->externs[i]; + if (ext->type != EXT_KSYM || !ext->ksym.type_id) + continue; + + id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name, + BTF_KIND_VAR); + if (id <= 0) { + pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n", + ext->name); + return -ESRCH; + } + + /* find local type_id */ + local_type_id = ext->ksym.type_id; + + /* find target type_id */ + targ_var = btf__type_by_id(obj->btf_vmlinux, id); + targ_var_name = btf__name_by_offset(obj->btf_vmlinux, + targ_var->name_off); + targ_type = skip_mods_and_typedefs(obj->btf_vmlinux, + targ_var->type, + &targ_type_id); + + ret = bpf_core_types_are_compat(obj->btf, local_type_id, + obj->btf_vmlinux, targ_type_id); + if (ret <= 0) { + const struct btf_type *local_type; + const char *targ_name, *local_name; + + local_type = btf__type_by_id(obj->btf, local_type_id); + local_name = btf__name_by_offset(obj->btf, + local_type->name_off); + targ_name = btf__name_by_offset(obj->btf_vmlinux, + targ_type->name_off); + + pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", + ext->name, local_type_id, + btf_kind_str(local_type), local_name, targ_type_id, + btf_kind_str(targ_type), targ_name); + return -EINVAL; + } + + ext->is_set = true; + ext->ksym.vmlinux_btf_id = id; + pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", + ext->name, id, btf_kind_str(targ_var), targ_var_name); + } + return 0; +} + static int bpf_object__resolve_externs(struct bpf_object *obj, const char *extra_kconfig) { bool need_config = false, need_kallsyms = false; + bool need_vmlinux_btf = false; struct extern_desc *ext; void *kcfg_data = NULL; int err, i; @@ -6967,7 +7190,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; } else if (ext->type == EXT_KSYM) { - need_kallsyms = true; + if (ext->ksym.type_id) + need_vmlinux_btf = true; + else + need_kallsyms = true; } else { pr_warn("unrecognized extern '%s'\n", ext->name); return -EINVAL; @@ -6996,6 +7222,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, if (err) return -EINVAL; } + if (need_vmlinux_btf) { + err = bpf_object__resolve_ksyms_btf_id(obj); + if (err) + return -EINVAL; + } for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; @@ -7028,10 +7259,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } err = bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); err = err ? : bpf_object__relocate(obj, attr->target_btf_path); @@ -10353,9 +10584,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog, btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); else - btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, - attach_func_name, - prog->expected_attach_type); + btf_id = libbpf_find_vmlinux_btf_id(attach_func_name, + prog->expected_attach_type); if (btf_id < 0) return btf_id; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 30b4ca5d2ac7..e3c98c007825 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -705,7 +705,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, struct xsk_ctx *ctx; int err, ifindex; - if (!umem || !xsk_ptr || !(rx || tx) || !fill || !comp) + if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; xsk = calloc(1, sizeof(*xsk)); @@ -735,6 +735,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, ctx = xsk_get_ctx(umem, ifindex, queue_id); if (!ctx) { + if (!fill || !comp) { + err = -EFAULT; + goto out_socket; + } + ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, fill, comp); if (!ctx) { diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 66acfcf15ff2..ac9eda830187 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -7,6 +7,44 @@ General instructions on running selftests can be found in Additional information about selftest failures are documented here. +profiler[23] test failures with clang/llvm <12.0.0 +================================================== + +With clang/llvm <12.0.0, the profiler[23] test may fail. +The symptom looks like + +.. code-block:: c + + // r9 is a pointer to map_value + // r7 is a scalar + 17: bf 96 00 00 00 00 00 00 r6 = r9 + 18: 0f 76 00 00 00 00 00 00 r6 += r7 + math between map_value pointer and register with unbounded min value is not allowed + + // the instructions below will not be seen in the verifier log + 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1 + 20: bf 96 00 00 00 00 00 00 r6 = r9 + // r6 is used here + +The verifier will reject such code with above error. +At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and +the insn 20 undoes map_value addition. It is currently impossible for the +verifier to understand such speculative pointer arithmetic. +Hence + https://reviews.llvm.org/D85570 +addresses it on the compiler side. It was committed on llvm 12. + +The corresponding C code +.. code-block:: c + + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, ...); + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); // workaround + payload += filepart_length; + } + } + bpf_iter test failures with clang/llvm 10.0.0 ============================================= diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index c548aded6585..52414058a627 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, @@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = { * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, }, }, @@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, }, }, }; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 540fea4c91a5..76ebe4c250f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -55,10 +55,10 @@ static int kern_sync_rcu(void) static void test_lookup_update(void) { - int err, key = 0, val, i; + int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; + int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd; struct test_btf_map_in_map *skel; - int outer_arr_fd, outer_hash_fd; - int fd, map1_fd, map2_fd, map1_id, map2_id; + int err, key = 0, val, i, fd; skel = test_btf_map_in_map__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) @@ -70,32 +70,45 @@ static void test_lookup_update(void) map1_fd = bpf_map__fd(skel->maps.inner_map1); map2_fd = bpf_map__fd(skel->maps.inner_map2); + map3_fd = bpf_map__fd(skel->maps.inner_map3); + map4_fd = bpf_map__fd(skel->maps.inner_map4); + map5_fd = bpf_map__fd(skel->maps.inner_map5); + outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn); outer_arr_fd = bpf_map__fd(skel->maps.outer_arr); outer_hash_fd = bpf_map__fd(skel->maps.outer_hash); - /* inner1 = input, inner2 = input + 1 */ - map1_fd = bpf_map__fd(skel->maps.inner_map1); + /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0); - map2_fd = bpf_map__fd(skel->maps.inner_map2); bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0); skel->bss->input = 1; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); + bpf_map_lookup_elem(map3_fd, &key, &val); + CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3); - /* inner1 = input + 1, inner2 = input */ + /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0); bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0); skel->bss->input = 3; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + bpf_map_lookup_elem(map4_fd, &key, &val); + CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5); + + /* inner5 = input + 2 */ + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0); + skel->bss->input = 5; + usleep(1); + bpf_map_lookup_elem(map5_fd, &key, &val); + CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7); for (i = 0; i < 5; i++) { val = i % 2 ? map1_fd : map2_fd; @@ -106,7 +119,13 @@ static void test_lookup_update(void) } err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0); if (CHECK_FAIL(err)) { - printf("failed to update hash_of_maps on iter #%d\n", i); + printf("failed to update array_of_maps on iter #%d\n", i); + goto cleanup; + } + val = i % 2 ? map4_fd : map5_fd; + err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0); + if (CHECK_FAIL(err)) { + printf("failed to update array_of_maps (dyn) on iter #%d\n", i); goto cleanup; } } diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c new file mode 100644 index 000000000000..981c251453d9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <bpf/btf.h> + +/* real layout and sizes according to test's (32-bit) BTF + * needs to be defined before skeleton is included */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; +}; + +#include "test_core_autosize.skel.h" + +static int duration = 0; + +static struct { + unsigned long long ptr_samesized; + unsigned long long val1_samesized; + unsigned long long val2_samesized; + unsigned long long val3_samesized; + unsigned long long val4_samesized; + struct test_struct___real output_samesized; + + unsigned long long ptr_downsized; + unsigned long long val1_downsized; + unsigned long long val2_downsized; + unsigned long long val3_downsized; + unsigned long long val4_downsized; + struct test_struct___real output_downsized; + + unsigned long long ptr_probed; + unsigned long long val1_probed; + unsigned long long val2_probed; + unsigned long long val3_probed; + unsigned long long val4_probed; + + unsigned long long ptr_signed; + unsigned long long val1_signed; + unsigned long long val2_signed; + unsigned long long val3_signed; + unsigned long long val4_signed; + struct test_struct___real output_signed; +} out; + +void test_core_autosize(void) +{ + char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; + int err, fd = -1, zero = 0; + int char_id, short_id, int_id, long_long_id, void_ptr_id, id; + struct test_core_autosize* skel = NULL; + struct bpf_object_load_attr load_attr = {}; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct btf *btf = NULL; + size_t written; + const void *raw_data; + __u32 raw_sz; + FILE *f = NULL; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + /* Emit the following struct with 32-bit pointer size: + * + * struct test_struct { + * void *ptr; + * unsigned long val2; + * unsigned long long val1; + * unsigned short val3; + * unsigned char val4; + * char: 8; + * }; + * + * This struct is going to be used as the "kernel BTF" for this test. + * It's equivalent memory-layout-wise to test_struct__real above. + */ + + /* force 32-bit pointer size */ + btf__set_pointer_size(btf, 4); + + char_id = btf__add_int(btf, "unsigned char", 1, 0); + ASSERT_EQ(char_id, 1, "char_id"); + short_id = btf__add_int(btf, "unsigned short", 2, 0); + ASSERT_EQ(short_id, 2, "short_id"); + /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */ + int_id = btf__add_int(btf, "long unsigned int", 4, 0); + ASSERT_EQ(int_id, 3, "int_id"); + long_long_id = btf__add_int(btf, "unsigned long long", 8, 0); + ASSERT_EQ(long_long_id, 4, "long_long_id"); + void_ptr_id = btf__add_ptr(btf, 0); + ASSERT_EQ(void_ptr_id, 5, "void_ptr_id"); + + id = btf__add_struct(btf, "test_struct", 20 /* bytes */); + ASSERT_EQ(id, 6, "struct_id"); + err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0); + err = err ?: btf__add_field(btf, "val2", int_id, 32, 0); + err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0); + err = err ?: btf__add_field(btf, "val3", short_id, 128, 0); + err = err ?: btf__add_field(btf, "val4", char_id, 144, 0); + ASSERT_OK(err, "struct_fields"); + + fd = mkstemp(btf_file); + if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd)) + goto cleanup; + f = fdopen(fd, "w"); + if (!ASSERT_OK_PTR(f, "btf_fdopen")) + goto cleanup; + + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data")) + goto cleanup; + written = fwrite(raw_data, 1, raw_sz, f); + if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno)) + goto cleanup; + fflush(f); + fclose(f); + f = NULL; + close(fd); + fd = -1; + + /* open and load BPF program with custom BTF as the kernel BTF */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* disable handle_signed() for now */ + prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + bpf_program__set_autoload(prog, false); + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_OK(err, "prog_load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_samesize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_downsize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_probed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_probed = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach")) + goto cleanup; + + usleep(1); + + bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) + goto cleanup; + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out); + if (!ASSERT_OK(err, "bss_lookup")) + goto cleanup; + + ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized"); + ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized"); + + ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized"); + ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized"); + + ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed"); + ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed"); + ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed"); + ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed"); + ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed"); + + test_core_autosize__destroy(skel); + skel = NULL; + + /* now re-load with handle_signed() enabled, it should fail loading */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_ERR(err, "bad_prog_load")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + if (fd >= 0) + close(fd); + remove(btf_file); + btf__free(btf); + test_core_autosize__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c index b771804b2342..b295969b263b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -7,40 +7,28 @@ static int duration; -static __u64 kallsyms_find(const char *sym) -{ - char type, name[500]; - __u64 addr, res = 0; - FILE *f; - - f = fopen("/proc/kallsyms", "r"); - if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) - return 0; - - while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { - if (strcmp(name, sym) == 0) { - res = addr; - goto out; - } - } - - CHECK(false, "not_found", "symbol %s not found\n", sym); -out: - fclose(f); - return res; -} - void test_ksyms(void) { - __u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start"); - __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); const char *btf_path = "/sys/kernel/btf/vmlinux"; struct test_ksyms *skel; struct test_ksyms__data *data; + __u64 link_fops_addr, per_cpu_start_addr; struct stat st; __u64 btf_size; int err; + err = kallsyms_find("bpf_link_fops", &link_fops_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n")) + return; + + err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n")) + return; + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) return; btf_size = st.st_size; diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c new file mode 100644 index 000000000000..28e26bd3e0ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "test_ksyms_btf.skel.h" + +static int duration; + +void test_ksyms_btf(void) +{ + __u64 runqueues_addr, bpf_prog_active_addr; + __u32 this_rq_cpu; + int this_bpf_prog_active; + struct test_ksyms_btf *skel = NULL; + struct test_ksyms_btf__data *data; + struct btf *btf; + int percpu_datasec; + int err; + + err = kallsyms_find("runqueues", &runqueues_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n")) + return; + + err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n")) + return; + + btf = libbpf_find_kernel_btf(); + if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", + PTR_ERR(btf))) + return; + + percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", + BTF_KIND_DATASEC); + if (percpu_datasec < 0) { + printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", + __func__); + test__skip(); + goto cleanup; + } + + skel = test_ksyms_btf__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + goto cleanup; + + err = test_ksyms_btf__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__runqueues_addr, + (unsigned long long)runqueues_addr); + CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__bpf_prog_active_addr, + (unsigned long long)bpf_prog_active_addr); + + CHECK(data->out__rq_cpu == -1, "rq_cpu", + "got %u, exp != -1\n", data->out__rq_cpu); + CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active", + "got %d, exp >= 0\n", data->out__bpf_prog_active); + CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu", + "got %u, exp 0\n", data->out__cpu_0_rq_cpu); + + this_rq_cpu = data->out__this_rq_cpu; + CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu", + "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu); + + this_bpf_prog_active = data->out__this_bpf_prog_active; + CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active", + "got %d, exp %d\n", this_bpf_prog_active, + data->out__bpf_prog_active); + +cleanup: + btf__free(btf); + test_ksyms_btf__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index 041952524c55..fcf54b3a1dd0 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -37,7 +37,7 @@ void test_pinning(void) struct stat statbuf = {}; struct bpf_object *obj; struct bpf_map *map; - int err; + int err, map_fd; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .pin_root_path = custpath, ); @@ -213,6 +213,53 @@ void test_pinning(void) if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) goto out; + /* remove the custom pin path to re-test it with reuse fd below */ + err = unlink(custpinpath); + if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + err = rmdir(custpath); + if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* test pinning at custom path with reuse fd */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), + sizeof(__u64), 1, 0); + if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) + goto out; + + map = bpf_object__find_map_by_name(obj, "pinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto close_map_fd; + + err = bpf_map__reuse_fd(map, map_fd); + if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_map__set_pin_path(map, custpinpath); + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_object__load(obj); + if (CHECK(err, "custom load", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + /* check that pinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto close_map_fd; + +close_map_fd: + close(map_fd); out: unlink(pinpath); unlink(nopinpath); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 4c4224e3e10a..85f73261fab0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -198,7 +198,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, len, src_fd, iter_fd, duration = 0; - union bpf_iter_link_info linfo = {0}; + union bpf_iter_link_info linfo = {}; __u32 i, num_sockets, num_elems; struct bpf_iter_sockmap *skel; __s64 *sock_fd = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 24ba0d21b641..c86e67214a9e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -264,9 +264,19 @@ static int check_error_linum(const struct sk_fds *sk_fds) static void check_hdr_and_close_fds(struct sk_fds *sk_fds) { + const __u32 expected_inherit_cb_flags = + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_STATE_CB_FLAG; + if (sk_fds_shutdown(sk_fds)) goto check_linum; + if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags, + "Unexpected inherit_cb_flags", "0x%x != 0x%x\n", + skel->bss->inherit_cb_flags, expected_inherit_cb_flags)) + goto check_linum; + if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd, "passive_hdr_stg")) goto check_linum; @@ -321,6 +331,8 @@ static void reset_test(void) memset(&skel->bss->active_estab_in, 0, optsize); memset(&skel->bss->active_fin_in, 0, optsize); + skel->bss->inherit_cb_flags = 0; + skel->data->test_kind = TCPOPT_EXP; skel->data->test_magic = 0xeB9F; diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c new file mode 100644 index 000000000000..4ca275101ee0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include "progs/profiler.h" +#include "profiler1.skel.h" +#include "profiler2.skel.h" +#include "profiler3.skel.h" + +static int sanity_run(struct bpf_program *prog) +{ + struct bpf_prog_test_run_attr test_attr = {}; + __u64 args[] = {1, 2, 3}; + __u32 duration = 0; + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + if (CHECK(err || test_attr.retval, "test_run", + "err %d errno %d retval %d duration %d\n", + err, errno, test_attr.retval, duration)) + return -1; + return 0; +} + +void test_test_profiler(void) +{ + struct profiler1 *profiler1_skel = NULL; + struct profiler2 *profiler2_skel = NULL; + struct profiler3 *profiler3_skel = NULL; + __u32 duration = 0; + int err; + + profiler1_skel = profiler1__open_and_load(); + if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n")) + goto cleanup; + + err = profiler1__attach(profiler1_skel); + if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler2_skel = profiler2__open_and_load(); + if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n")) + goto cleanup; + + err = profiler2__attach(profiler2_skel); + if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler3_skel = profiler3__open_and_load(); + if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n")) + goto cleanup; + + err = profiler3__attach(profiler3_skel); + if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; +cleanup: + profiler1__destroy(profiler1_skel); + profiler2__destroy(profiler2_skel); + profiler3__destroy(profiler3_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index a1f06424cf83..0281095de266 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -25,7 +25,7 @@ void test_xdp_noinline(void) __u8 flags; } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; - __u32 duration, retval, size; + __u32 duration = 0, retval, size; int err, i; __u64 bytes = 0, pkts = 0; char buf[128]; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index b1b2773c0b9d..a943d394fd3a 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -23,6 +23,10 @@ #define TCP_CA_NAME_MAX 16 #endif +#ifndef TCP_NOTSENT_LOWAT +#define TCP_NOTSENT_LOWAT 25 +#endif + #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif @@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx) +{ + int lowat = 65535; + + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat))) + return 1; + } + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (set_keepalive(ctx)) return 0; + if (set_notsent_lowat(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h new file mode 100644 index 000000000000..3bac4fdd4bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.h @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#pragma once + +#define TASK_COMM_LEN 16 +#define MAX_ANCESTORS 4 +#define MAX_PATH 256 +#define KILL_TARGET_LEN 64 +#define CTL_MAXNAME 10 +#define MAX_ARGS_LEN 4096 +#define MAX_FILENAME_LEN 512 +#define MAX_ENVIRON_LEN 8192 +#define MAX_PATH_DEPTH 32 +#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH) +#define MAX_CGROUPS_PATH_DEPTH 8 + +#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN + +#define MAX_CGROUP_PAYLOAD_LEN \ + (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH)) + +#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + +#define MAX_SYSCTL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH) + +#define MAX_KILL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \ + KILL_TARGET_LEN) + +#define MAX_EXEC_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \ + MAX_ARGS_LEN + MAX_ENVIRON_LEN) + +#define MAX_FILEMOD_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \ + MAX_FILEPATH_LENGTH) + +enum data_type { + INVALID_EVENT, + EXEC_EVENT, + FORK_EVENT, + KILL_EVENT, + SYSCTL_EVENT, + FILEMOD_EVENT, + MAX_DATA_TYPE_EVENT +}; + +enum filemod_type { + FMOD_OPEN, + FMOD_LINK, + FMOD_SYMLINK, +}; + +struct ancestors_data_t { + pid_t ancestor_pids[MAX_ANCESTORS]; + uint32_t ancestor_exec_ids[MAX_ANCESTORS]; + uint64_t ancestor_start_times[MAX_ANCESTORS]; + uint32_t num_ancestors; +}; + +struct var_metadata_t { + enum data_type type; + pid_t pid; + uint32_t exec_id; + uid_t uid; + gid_t gid; + uint64_t start_time; + uint32_t cpu_id; + uint64_t bpf_stats_num_perf_events; + uint64_t bpf_stats_start_ktime_ns; + uint8_t comm_length; +}; + +struct cgroup_data_t { + ino_t cgroup_root_inode; + ino_t cgroup_proc_inode; + uint64_t cgroup_root_mtime; + uint64_t cgroup_proc_mtime; + uint16_t cgroup_root_length; + uint16_t cgroup_proc_length; + uint16_t cgroup_full_length; + int cgroup_full_path_root_pos; +}; + +struct var_sysctl_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + uint8_t sysctl_val_length; + uint16_t sysctl_path_length; + char payload[MAX_SYSCTL_PAYLOAD_LEN]; +}; + +struct var_kill_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + pid_t kill_target_pid; + int kill_sig; + uint32_t kill_count; + uint64_t last_kill_time; + uint8_t kill_target_name_length; + uint8_t kill_target_cgroup_proc_length; + char payload[MAX_KILL_PAYLOAD_LEN]; + size_t payload_length; +}; + +struct var_exec_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + pid_t parent_pid; + uint32_t parent_exec_id; + uid_t parent_uid; + uint64_t parent_start_time; + uint16_t bin_path_length; + uint16_t cmdline_length; + uint16_t environment_length; + char payload[MAX_EXEC_PAYLOAD_LEN]; +}; + +struct var_fork_data_t { + struct var_metadata_t meta; + pid_t parent_pid; + uint32_t parent_exec_id; + uint64_t parent_start_time; + char payload[MAX_METADATA_PAYLOAD_LEN]; +}; + +struct var_filemod_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + enum filemod_type fmod_type; + unsigned int dst_flags; + uint32_t src_device_id; + uint32_t dst_device_id; + ino_t src_inode; + ino_t dst_inode; + uint16_t src_filepath_length; + uint16_t dst_filepath_length; + char payload[MAX_FILEMOD_PAYLOAD_LEN]; +}; + +struct profiler_config_struct { + bool fetch_cgroups_from_bpf; + ino_t cgroup_fs_inode; + ino_t cgroup_login_session_inode; + uint64_t kill_signals_mask; + ino_t inode_filter; + uint32_t stale_info_secs; + bool use_variable_buffers; + bool read_environ_from_exec; + bool enable_cgroup_v1_resolver; +}; + +struct bpf_func_stats_data { + uint64_t time_elapsed_ns; + uint64_t num_executions; + uint64_t num_perf_events; +}; + +struct bpf_func_stats_ctx { + uint64_t start_time_ns; + struct bpf_func_stats_data* bpf_func_stats_data_val; +}; + +enum bpf_function_id { + profiler_bpf_proc_sys_write, + profiler_bpf_sched_process_exec, + profiler_bpf_sched_process_exit, + profiler_bpf_sys_enter_kill, + profiler_bpf_do_filp_open_ret, + profiler_bpf_sched_process_fork, + profiler_bpf_vfs_link, + profiler_bpf_vfs_symlink, + profiler_bpf_max_function_id +}; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h new file mode 100644 index 000000000000..00578311a423 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -0,0 +1,969 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "profiler.h" + +#ifndef NULL +#define NULL 0 +#endif + +#define O_WRONLY 00000001 +#define O_RDWR 00000002 +#define O_DIRECTORY 00200000 +#define __O_TMPFILE 020000000 +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#define MAX_ERRNO 4095 +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 +#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) +#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) +#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO + +#define KILL_DATA_ARRAY_SIZE 8 + +struct var_kill_data_arr_t { + struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; +}; + +union any_profiler_data_t { + struct var_exec_data_t var_exec; + struct var_kill_data_t var_kill; + struct var_sysctl_data_t var_sysctl; + struct var_filemod_data_t var_filemod; + struct var_fork_data_t var_fork; + struct var_kill_data_arr_t var_kill_data_arr; +}; + +volatile struct profiler_config_struct bpf_config = {}; + +#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) +#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) +#define CGROUP_LOGIN_SESSION_INODE \ + (bpf_config.cgroup_login_session_inode) +#define KILL_SIGNALS (bpf_config.kill_signals_mask) +#define STALE_INFO (bpf_config.stale_info_secs) +#define INODE_FILTER (bpf_config.inode_filter) +#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) +#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) + +struct kernfs_iattrs___52 { + struct iattr ia_iattr; +}; + +struct kernfs_node___52 { + union /* kernfs_node_id */ { + struct { + u32 ino; + u32 generation; + }; + u64 id; + } id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, union any_profiler_data_t); +} data_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, KILL_DATA_ARRAY_SIZE); + __type(key, u32); + __type(value, struct var_kill_data_arr_t); +} var_tpid_to_data SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, profiler_bpf_max_function_id); + __type(key, u32); + __type(value, struct bpf_func_stats_data); +} bpf_func_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} allowed_devices SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_file_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_directory_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} disallowed_exec_inodes SEC(".maps"); + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + +static INLINE bool IS_ERR(const void* ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static INLINE u32 get_userspace_pid() +{ + return bpf_get_current_pid_tgid() >> 32; +} + +static INLINE bool is_init_process(u32 tgid) +{ + return tgid == 1 || tgid == 0; +} + +static INLINE unsigned long +probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) +{ + len = len < max ? len : max; + if (len > 1) { + if (bpf_probe_read(dst, len, src)) + return 0; + } else if (len == 1) { + if (bpf_probe_read(dst, 1, src)) + return 0; + } + return len; +} + +static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, + int spid) +{ +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == spid) + return i; + return -1; +} + +static INLINE void populate_ancestors(struct task_struct* task, + struct ancestors_data_t* ancestors_data) +{ + struct task_struct* parent = task; + u32 num_ancestors, ppid; + + ancestors_data->num_ancestors = 0; +#ifdef UNROLL +#pragma unroll +#endif + for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { + parent = BPF_CORE_READ(parent, real_parent); + if (parent == NULL) + break; + ppid = BPF_CORE_READ(parent, tgid); + if (is_init_process(ppid)) + break; + ancestors_data->ancestor_pids[num_ancestors] = ppid; + ancestors_data->ancestor_exec_ids[num_ancestors] = + BPF_CORE_READ(parent, self_exec_id); + ancestors_data->ancestor_start_times[num_ancestors] = + BPF_CORE_READ(parent, start_time); + ancestors_data->num_ancestors = num_ancestors; + } +} + +static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, + struct kernfs_node* cgroup_root_node, + void* payload, + int* root_pos) +{ + void* payload_start = payload; + size_t filepart_length; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name)); + if (!cgroup_node) + return payload; + if (cgroup_node == cgroup_root_node) + *root_pos = payload - payload_start; + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); + payload += filepart_length; + } + cgroup_node = BPF_CORE_READ(cgroup_node, parent); + } + return payload; +} + +static ino_t get_inode_from_kernfs(struct kernfs_node* node) +{ + struct kernfs_node___52* node52 = (void*)node; + + if (bpf_core_field_exists(node52->id.ino)) { + barrier_var(node52); + return BPF_CORE_READ(node52, id.ino); + } else { + barrier_var(node); + return (u64)BPF_CORE_READ(node, id); + } +} + +int pids_cgrp_id = 1; + +static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, + struct task_struct* task, + void* payload) +{ + struct kernfs_node* root_kernfs = + BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + + if (ENABLE_CGROUP_V1_RESOLVER) { +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys_state* subsys = + BPF_CORE_READ(task, cgroups, subsys[i]); + if (subsys != NULL) { + int subsys_id = BPF_CORE_READ(subsys, ss, id); + if (subsys_id == pids_cgrp_id) { + proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); + root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); + break; + } + } + } + } + + cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); + cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); + + if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); + } else { + struct kernfs_iattrs___52* root_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); + + struct kernfs_iattrs___52* proc_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); + } + + cgroup_data->cgroup_root_length = 0; + cgroup_data->cgroup_proc_length = 0; + cgroup_data->cgroup_full_length = 0; + + size_t cgroup_root_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); + barrier_var(cgroup_root_length); + if (cgroup_root_length <= MAX_PATH) { + barrier_var(cgroup_root_length); + cgroup_data->cgroup_root_length = cgroup_root_length; + payload += cgroup_root_length; + } + + size_t cgroup_proc_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= MAX_PATH) { + barrier_var(cgroup_proc_length); + cgroup_data->cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + if (FETCH_CGROUPS_FROM_BPF) { + cgroup_data->cgroup_full_path_root_pos = -1; + void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, + &cgroup_data->cgroup_full_path_root_pos); + cgroup_data->cgroup_full_length = payload_end_pos - payload; + payload = payload_end_pos; + } + + return (void*)payload; +} + +static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, + struct task_struct* task, + u32 pid, void* payload) +{ + u64 uid_gid = bpf_get_current_uid_gid(); + + metadata->uid = (u32)uid_gid; + metadata->gid = uid_gid >> 32; + metadata->pid = pid; + metadata->exec_id = BPF_CORE_READ(task, self_exec_id); + metadata->start_time = BPF_CORE_READ(task, start_time); + metadata->comm_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + metadata->comm_length = comm_length; + payload += comm_length; + } + + return (void*)payload; +} + +static INLINE struct var_kill_data_t* +get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) +{ + int zero = 0; + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (kill_data == NULL) + return NULL; + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); + payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); + size_t payload_length = payload - (void*)kill_data->payload; + kill_data->payload_length = payload_length; + populate_ancestors(task, &kill_data->ancestors_info); + kill_data->meta.type = KILL_EVENT; + kill_data->kill_target_pid = tpid; + kill_data->kill_sig = sig; + kill_data->kill_count = 1; + kill_data->last_kill_time = bpf_ktime_get_ns(); + return kill_data; +} + +static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) +{ + if ((KILL_SIGNALS & (1ULL << sig)) == 0) + return 0; + + u32 spid = get_userspace_pid(); + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + + if (arr_struct == NULL) { + struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); + int zero = 0; + + if (kill_data == NULL) + return 0; + arr_struct = bpf_map_lookup_elem(&data_heap, &zero); + if (arr_struct == NULL) + return 0; + bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data); + } else { + int index = get_var_spid_index(arr_struct, spid); + + if (index == -1) { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == 0) { + bpf_probe_read(&arr_struct->array[i], + sizeof(arr_struct->array[i]), kill_data); + bpf_map_update_elem(&var_tpid_to_data, &tpid, + arr_struct, 0); + + return 0; + } + return 0; + } + + struct var_kill_data_t* kill_data = &arr_struct->array[index]; + + u64 delta_sec = + (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; + + if (delta_sec < STALE_INFO) { + kill_data->kill_count++; + kill_data->last_kill_time = bpf_ktime_get_ns(); + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } else { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } + } + bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); + return 0; +} + +static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, + enum bpf_function_id func_id) +{ + int func_id_key = func_id; + + bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); + bpf_stat_ctx->bpf_func_stats_data_val = + bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; +} + +static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += + bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; +} + +static INLINE void +bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, + struct var_metadata_t* meta) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) { + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; + meta->bpf_stats_num_perf_events = + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; + } + meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; + meta->cpu_id = bpf_get_smp_processor_id(); +} + +static INLINE size_t +read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) +{ + size_t length = 0; + size_t filepart_length; + struct dentry* parent_dentry; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp_dentry, d_name.name)); + barrier_var(filepart_length); + if (filepart_length > MAX_PATH) + break; + barrier_var(filepart_length); + payload += filepart_length; + length += filepart_length; + + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + + return length; +} + +static INLINE bool +is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) +{ + struct dentry* parent_dentry; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); + bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); + + if (allowed_dir != NULL) + return true; + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + return false; +} + +static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, + u32* device_id, + u64* file_ino) +{ + u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); + *device_id = dev_id; + bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); + + if (allowed_device == NULL) + return false; + + u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); + *file_ino = ino; + bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); + + if (allowed_file == NULL) + if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) + return false; + return true; +} + +SEC("kprobe/proc_sys_write") +ssize_t BPF_KPROBE(kprobe__proc_sys_write, + struct file* filp, const char* buf, + size_t count, loff_t* ppos) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); + + u32 pid = get_userspace_pid(); + int zero = 0; + struct var_sysctl_data_t* sysctl_data = + bpf_map_lookup_elem(&data_heap, &zero); + if (!sysctl_data) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + sysctl_data->meta.type = SYSCTL_EVENT; + void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); + payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); + + populate_ancestors(task, &sysctl_data->ancestors_info); + + sysctl_data->sysctl_val_length = 0; + sysctl_data->sysctl_path_length = 0; + + size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf); + barrier_var(sysctl_val_length); + if (sysctl_val_length <= CTL_MAXNAME) { + barrier_var(sysctl_val_length); + sysctl_data->sysctl_val_length = sysctl_val_length; + payload += sysctl_val_length; + } + + size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp, f_path.dentry, d_name.name)); + barrier_var(sysctl_path_length); + if (sysctl_path_length <= MAX_PATH) { + barrier_var(sysctl_path_length); + sysctl_data->sysctl_path_length = sysctl_path_length; + payload += sysctl_path_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); + unsigned long data_len = payload - (void*)sysctl_data; + data_len = data_len > sizeof(struct var_sysctl_data_t) + ? sizeof(struct var_sysctl_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_kill") +int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + + bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); + int pid = ctx->args[0]; + int sig = ctx->args[1]; + int ret = trace_var_sys_kill(ctx, pid, sig); + bpf_stats_exit(&stats_ctx); + return ret; +}; + +SEC("raw_tracepoint/sched_process_exit") +int raw_tracepoint__sched_process_exit(void* ctx) +{ + int zero = 0; + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); + + u32 tpid = get_userspace_pid(); + + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (arr_struct == NULL || kill_data == NULL) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { + struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; + + if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { + bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data); + void* payload = kill_data->payload; + size_t offset = kill_data->payload_length; + if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + return 0; + payload += offset; + + kill_data->kill_target_name_length = 0; + kill_data->kill_target_cgroup_proc_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + kill_data->kill_target_name_length = comm_length; + payload += comm_length; + } + + size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN, + BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= KILL_TARGET_LEN) { + barrier_var(cgroup_proc_length); + kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); + unsigned long data_len = (void*)payload - (void*)kill_data; + data_len = data_len > sizeof(struct var_kill_data_t) + ? sizeof(struct var_kill_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); + } + } + bpf_map_delete_elem(&var_tpid_to_data, &tpid); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_exec") +int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); + + struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; + u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); + + bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); + if (should_filter_binprm != NULL) + goto out; + + int zero = 0; + struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!proc_exec_data) + goto out; + + if (INODE_FILTER && inode != INODE_FILTER) + return 0; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + proc_exec_data->meta.type = EXEC_EVENT; + proc_exec_data->bin_path_length = 0; + proc_exec_data->cmdline_length = 0; + proc_exec_data->environment_length = 0; + void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, + proc_exec_data->payload); + payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); + + struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); + proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); + proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); + proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); + proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); + + const char* filename = BPF_CORE_READ(bprm, filename); + size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename); + barrier_var(bin_path_length); + if (bin_path_length <= MAX_FILENAME_LEN) { + barrier_var(bin_path_length); + proc_exec_data->bin_path_length = bin_path_length; + payload += bin_path_length; + } + + void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); + void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); + unsigned int cmdline_length = probe_read_lim(payload, arg_start, + arg_end - arg_start, MAX_ARGS_LEN); + + if (cmdline_length <= MAX_ARGS_LEN) { + barrier_var(cmdline_length); + proc_exec_data->cmdline_length = cmdline_length; + payload += cmdline_length; + } + + if (READ_ENVIRON_FROM_EXEC) { + void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); + void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); + unsigned long env_len = probe_read_lim(payload, env_start, + env_end - env_start, MAX_ENVIRON_LEN); + if (cmdline_length <= MAX_ENVIRON_LEN) { + proc_exec_data->environment_length = env_len; + payload += env_len; + } + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); + unsigned long data_len = payload - (void*)proc_exec_data; + data_len = data_len > sizeof(struct var_exec_data_t) + ? sizeof(struct var_exec_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kretprobe/do_filp_open") +int kprobe_ret__do_filp_open(struct pt_regs* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); + + struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); + + if (filp == NULL || IS_ERR(filp)) + goto out; + unsigned int flags = BPF_CORE_READ(filp, f_flags); + if ((flags & (O_RDWR | O_WRONLY)) == 0) + goto out; + if ((flags & O_TMPFILE) > 0) + goto out; + struct inode* file_inode = BPF_CORE_READ(filp, f_inode); + umode_t mode = BPF_CORE_READ(file_inode, i_mode); + if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || + S_ISSOCK(mode)) + goto out; + + struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); + u32 device_id = 0; + u64 file_ino = 0; + if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_OPEN; + filemod_data->dst_flags = flags; + filemod_data->src_inode = 0; + filemod_data->dst_inode = file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_link") +int BPF_KPROBE(kprobe__vfs_link, + struct dentry* old_dentry, struct inode* dir, + struct dentry* new_dentry, struct inode** delegated_inode) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); + + u32 src_device_id = 0; + u64 src_file_ino = 0; + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && + !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_LINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = src_file_ino; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = src_device_id; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + + len = read_absolute_file_path_from_dentry(new_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_symlink") +int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, + const char* oldname) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); + + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_SYMLINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = 0; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + len = read_absolute_file_path_from_dentry(dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_fork") +int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); + + int zero = 0; + struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!fork_data) + goto out; + + struct task_struct* parent = (struct task_struct*)ctx->args[0]; + struct task_struct* child = (struct task_struct*)ctx->args[1]; + fork_data->meta.type = FORK_EVENT; + + void* payload = populate_var_metadata(&fork_data->meta, child, + BPF_CORE_READ(child, pid), fork_data->payload); + fork_data->parent_pid = BPF_CORE_READ(parent, pid); + fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); + fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); + + unsigned long data_len = payload - (void*)fork_data; + data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c new file mode 100644 index 000000000000..4df9088bfc00 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#define UNROLL +#define INLINE __always_inline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c new file mode 100644 index 000000000000..0f32a3cbf556 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler2.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +/* undef #define UNROLL */ +#define INLINE /**/ +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c new file mode 100644 index 000000000000..6249fc31ccb0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler3.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +#define UNROLL +#define INLINE __noinline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index 193fe0198b21..c1e0c8c7c55f 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -41,6 +41,43 @@ struct outer_arr { .values = { (void *)&inner_map1, 0, (void *)&inner_map2 }, }; +struct inner_map_sz3 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 3); + __type(key, int); + __type(value, int); +} inner_map3 SEC(".maps"), + inner_map4 SEC(".maps"); + +struct inner_map_sz4 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 5); + __type(key, int); + __type(value, int); +} inner_map5 SEC(".maps"); + +struct outer_arr_dyn { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 3); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + }); +} outer_arr_dyn SEC(".maps") = { + .values = { + [0] = (void *)&inner_map3, + [1] = (void *)&inner_map4, + [2] = (void *)&inner_map5, + }, +}; + struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); @@ -101,6 +138,12 @@ int handle__sys_enter(void *ctx) val = input + 1; bpf_map_update_elem(inner_map, &key, &val, 0); + inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); + if (!inner_map) + return 1; + val = input + 2; + bpf_map_update_elem(inner_map, &key, &val, 0); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c new file mode 100644 index 000000000000..44f5aa2e8956 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +/* fields of exactly the same size */ +struct test_struct___samesize { + void *ptr; + unsigned long long val1; + unsigned int val2; + unsigned short val3; + unsigned char val4; +} __attribute((preserve_access_index)); + +/* unsigned fields that have to be downsized by libbpf */ +struct test_struct___downsize { + void *ptr; + unsigned long val1; + unsigned long val2; + unsigned long val3; + unsigned long val4; + /* total sz: 40 */ +} __attribute__((preserve_access_index)); + +/* fields with signed integers of wrong size, should be rejected */ +struct test_struct___signed { + void *ptr; + long val1; + long val2; + long val3; + long val4; +} __attribute((preserve_access_index)); + +/* real layout and sizes according to test's (32-bit) BTF */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; + /* total sz: 20 */ +}; + +struct test_struct___real input = { + .ptr = 0x01020304, + .val1 = 0x1020304050607080, + .val2 = 0x0a0b0c0d, + .val3 = 0xfeed, + .val4 = 0xb9, + ._pad = 0xff, /* make sure no accidental zeros are present */ +}; + +unsigned long long ptr_samesized = 0; +unsigned long long val1_samesized = 0; +unsigned long long val2_samesized = 0; +unsigned long long val3_samesized = 0; +unsigned long long val4_samesized = 0; +struct test_struct___real output_samesized = {}; + +unsigned long long ptr_downsized = 0; +unsigned long long val1_downsized = 0; +unsigned long long val2_downsized = 0; +unsigned long long val3_downsized = 0; +unsigned long long val4_downsized = 0; +struct test_struct___real output_downsized = {}; + +unsigned long long ptr_probed = 0; +unsigned long long val1_probed = 0; +unsigned long long val2_probed = 0; +unsigned long long val3_probed = 0; +unsigned long long val4_probed = 0; + +unsigned long long ptr_signed = 0; +unsigned long long val1_signed = 0; +unsigned long long val2_signed = 0; +unsigned long long val3_signed = 0; +unsigned long long val4_signed = 0; +struct test_struct___real output_signed = {}; + +SEC("raw_tp/sys_exit") +int handle_samesize(void *ctx) +{ + struct test_struct___samesize *in = (void *)&input; + struct test_struct___samesize *out = (void *)&output_samesized; + + ptr_samesized = (unsigned long long)in->ptr; + val1_samesized = in->val1; + val2_samesized = in->val2; + val3_samesized = in->val3; + val4_samesized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handle_downsize(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + struct test_struct___downsize *out = (void *)&output_downsized; + + ptr_downsized = (unsigned long long)in->ptr; + val1_downsized = in->val1; + val2_downsized = in->val2; + val3_downsized = in->val3; + val4_downsized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_probed(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + __u64 tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr); + ptr_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1); + val1_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2); + val2_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3); + val3_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4); + val4_probed = tmp; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_signed(void *ctx) +{ + struct test_struct___signed *in = (void *)&input; + struct test_struct___signed *out = (void *)&output_signed; + + val2_signed = in->val2; + val3_signed = in->val3; + val4_signed = in->val4; + + out->val2= in->val2; + out->val3= in->val3; + out->val4= in->val4; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c new file mode 100644 index 000000000000..bb8ea9270f29 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +__u64 out__runqueues_addr = -1; +__u64 out__bpf_prog_active_addr = -1; + +__u32 out__rq_cpu = -1; /* percpu struct fields */ +int out__bpf_prog_active = -1; /* percpu int */ + +__u32 out__this_rq_cpu = -1; +int out__this_bpf_prog_active = -1; + +__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */ + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + struct rq *rq; + int *active; + __u32 cpu; + + out__runqueues_addr = (__u64)&runqueues; + out__bpf_prog_active_addr = (__u64)&bpf_prog_active; + + cpu = bpf_get_smp_processor_id(); + + /* test bpf_per_cpu_ptr() */ + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + if (rq) + out__rq_cpu = rq->cpu; + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) + out__bpf_prog_active = *active; + + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); + if (rq) /* should always be valid, but we can't spare the check. */ + out__cpu_0_rq_cpu = rq->cpu; + + /* test bpf_this_cpu_ptr */ + rq = (struct rq *)bpf_this_cpu_ptr(&runqueues); + out__this_rq_cpu = rq->cpu; + active = (int *)bpf_this_cpu_ptr(&bpf_prog_active); + out__this_bpf_prog_active = *active; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 3a216d1d0226..72ec0178f653 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -304,10 +304,10 @@ int misc_estab(struct bpf_sock_ops *skops) passive_lport_n = __bpf_htons(passive_lport_h); bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, &true_val, sizeof(true_val)); - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_TCP_CONNECT_CB: - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 3dca4c2e2418..1858435de7aa 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb) } -SEC("sk_skb3") -int bpf_prog3(struct __sk_buff *skb) +static inline void bpf_write_pass(struct __sk_buff *skb, int offset) { - const int one = 1; - int err, *f, ret = SK_PASS; + int err = bpf_skb_pull_data(skb, 6 + offset); void *data_end; char *c; - err = bpf_skb_pull_data(skb, 19); if (err) - goto tls_out; + return; c = (char *)(long)skb->data; data_end = (void *)(long)skb->data_end; - if (c + 18 < data_end) - memcpy(&c[13], "PASS", 4); + if (c + 5 + offset < data_end) + memcpy(c + offset, "PASS", 4); +} + +SEC("sk_skb3") +int bpf_prog3(struct __sk_buff *skb) +{ + int err, *f, ret = SK_PASS; + const int one = 1; + f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) { __u64 flags = 0; ret = 0; flags = *f; + + err = bpf_skb_adjust_room(skb, -13, 0, 0); + if (err) + return SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 0); #ifdef SOCKMAP return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags); #else return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) ret = SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 13); tls_out: return ret; } diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 889a72c3024f..fe182616b112 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -13,17 +13,10 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#ifndef barrier_data -# define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory") -#endif - #ifndef ctx_ptr # define ctx_ptr(field) (void *)(long)(field) #endif -#define dst_to_src_tmp 0xeeddddeeU -#define src_to_dst_tmp 0xeeffffeeU - #define ip4_src 0xac100164 /* 172.16.1.100 */ #define ip4_dst 0xac100264 /* 172.16.2.100 */ @@ -39,6 +32,18 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) { @@ -73,7 +78,14 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -SEC("chk_neigh") int tc_chk(struct __sk_buff *skb) +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -87,7 +99,6 @@ SEC("chk_neigh") int tc_chk(struct __sk_buff *skb) SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) { - int idx = dst_to_src_tmp; __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -103,19 +114,15 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (!redirect) return TC_ACT_OK; - barrier_data(&idx); - idx = bpf_ntohl(idx); - __builtin_memset(&zero, 0, sizeof(zero)); if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(idx, 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0); } SEC("src_ingress") int tc_src(struct __sk_buff *skb) { - int idx = src_to_dst_tmp; __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -131,14 +138,11 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (!redirect) return TC_ACT_OK; - barrier_data(&idx); - idx = bpf_ntohl(idx); - __builtin_memset(&zero, 0, sizeof(zero)); if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(idx, 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c new file mode 100644 index 000000000000..fc84a7685aa2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <stdbool.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> + +#include <bpf/bpf_helpers.h> + +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + return TC_ACT_SHOT; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c index 9197a23df3da..678bd0fad29e 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -21,6 +21,7 @@ __u8 test_kind = TCPOPT_EXP; __u16 test_magic = 0xeB9F; +__u32 inherit_cb_flags = 0; struct bpf_test_option passive_synack_out = {}; struct bpf_test_option passive_fin_out = {}; @@ -467,6 +468,8 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) struct tcphdr *th; int err; + inherit_cb_flags = skops->bpf_sock_ops_cb_flags; + err = load_option(skops, &passive_estab_in, true); if (err == -ENOENT) { /* saved_syn is not found. It was in syncookie mode. @@ -600,10 +603,10 @@ int estab(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_TCP_LISTEN_CB: bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, &true_val, sizeof(true_val)); - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); break; case BPF_SOCK_OPS_TCP_CONNECT_CB: - set_hdr_cb_flags(skops); + set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 9b6fb00dc7a0..0fa1e421c3d7 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir; int ktls; int peek_flag; int skb_use_parser; +int txmsg_omit_skb_parser; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -111,6 +112,7 @@ static const struct option long_options[] = { {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, + {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1}, {"whitelist", required_argument, NULL, 'n' }, {"blacklist", required_argument, NULL, 'b' }, {0, 0, NULL, 0 } @@ -175,6 +177,7 @@ static void test_reset(void) txmsg_apply = txmsg_cork = 0; txmsg_ingress = txmsg_redir_skb = 0; txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; + txmsg_omit_skb_parser = 0; skb_use_parser = 0; } @@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) if (i == 0 && txmsg_ktls_skb) { if (msg->msg_iov[i].iov_len < 4) return -EIO; - if (txmsg_ktls_skb_redir) { - if (memcmp(&d[13], "PASS", 4) != 0) { - fprintf(stderr, - "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]); - return -EIO; - } - d[13] = 0; - d[14] = 1; - d[15] = 2; - d[16] = 3; - j = 13; - } else if (txmsg_ktls_skb) { - if (memcmp(d, "PASS", 4) != 0) { - fprintf(stderr, - "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); - return -EIO; - } - d[0] = 0; - d[1] = 1; - d[2] = 2; - d[3] = 3; + if (memcmp(d, "PASS", 4) != 0) { + fprintf(stderr, + "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", + i, 0, d[0], d[1], d[2], d[3]); + return -EIO; } + j = 4; /* advance index past PASS header */ } for (; j < msg->msg_iov[i].iov_len && size; j++) { @@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) goto run; /* Attach programs to sockmap */ - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[0], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[1], map_fd[0], @@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[8], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[8], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[2], map_fd[8], @@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt) txmsg_ktls_skb_drop = 0; txmsg_ktls_skb_redir = 1; test_exec(cgrp, opt); + txmsg_ktls_skb_redir = 0; + + /* Tests that omit skb_parser */ + txmsg_omit_skb_parser = 1; + ktls = 0; + txmsg_ktls_skb = 0; + test_exec(cgrp, opt); + + txmsg_ktls_skb_drop = 1; + test_exec(cgrp, opt); + txmsg_ktls_skb_drop = 0; + + txmsg_ktls_skb_redir = 1; + test_exec(cgrp, opt); + + ktls = 1; + test_exec(cgrp, opt); + txmsg_omit_skb_parser = 0; opt->data_test = data; ktls = k; } - /* Test cork with hung data. This tests poor usage patterns where * cork can leave data on the ring if user program is buggy and * doesn't flush them somehow. They do take some time however diff --git a/tools/testing/selftests/bpf/test_tc_neigh.sh b/tools/testing/selftests/bpf/test_tc_neigh.sh deleted file mode 100755 index 31d8c3df8b24..000000000000 --- a/tools/testing/selftests/bpf/test_tc_neigh.sh +++ /dev/null @@ -1,168 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into bpf_redirect_peer() to perform the -# neigh addr population and redirect; it also installs a dropper prog on the -# egress side to drop skbs if neigh addrs were not populated. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that nc, dd, ping, ping6 and timeout are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -command -v ping6 >/dev/null 2>&1 || \ - { echo >&2 "ping6 is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -trap cleanup EXIT - -set -e - -ip netns add "${NS_SRC}" -ip netns add "${NS_FWD}" -ip netns add "${NS_DST}" - -ip link add veth_src type veth peer name veth_src_fwd -ip link add veth_dst type veth peer name veth_dst_fwd - -ip link set veth_src netns ${NS_SRC} -ip link set veth_src_fwd netns ${NS_FWD} - -ip link set veth_dst netns ${NS_DST} -ip link set veth_dst_fwd netns ${NS_FWD} - -ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src -ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - -# The fwd netns automatically get a v6 LL address / routes, but also needs v4 -# one in order to start ARP probing. IP4_NET route is added to the endpoints -# so that the ARP processing will reply. - -ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd -ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - -ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad -ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - -ip -netns ${NS_SRC} link set dev veth_src up -ip -netns ${NS_FWD} link set dev veth_src_fwd up - -ip -netns ${NS_DST} link set dev veth_dst up -ip -netns ${NS_FWD} link set dev veth_dst_fwd up - -ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global -ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global -ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - -ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global -ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - -ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global -ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global -ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - -ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global -ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - -fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) -fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - -ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src -ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - -ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src -ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst - -veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}') -veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}') - -xxd -p < test_tc_neigh.o | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o -xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o - -ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact -ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress -ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh - -ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact -ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress -ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh - -rm -f test_tc_neigh.x.o test_tc_neigh.y.o - -ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" -ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - -set +e - -TEST="TCPv4 connectivity test" -ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" - -TEST="TCPv6 connectivity test" -ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" - -TEST="ICMPv4 connectivity test" -ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" - -TEST="ICMPv6 connectivity test" -ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} -if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 -fi -echo -e "${TEST}: ${GREEN}PASS${NC}" diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh new file mode 100755 index 000000000000..6d7482562140 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link +# between src and dst. The netns fwd has veth links to each src and dst. The +# client is in src and server in dst. The test installs a TC BPF program to each +# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the +# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace +# switch from ingress side; it also installs a checker prog on the egress side +# to drop unexpected traffic. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that needed tools are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "dd is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } +command -v ping >/dev/null 2>&1 || \ + { echo >&2 "ping is not available"; exit 1; } +command -v ping6 >/dev/null 2>&1 || \ + { echo >&2 "ping6 is not available"; exit 1; } +command -v perl >/dev/null 2>&1 || \ + { echo >&2 "perl is not available"; exit 1; } +command -v jq >/dev/null 2>&1 || \ + { echo >&2 "jq is not available"; exit 1; } +command -v bpftool >/dev/null 2>&1 || \ + { echo >&2 "bpftool is not available"; exit 1; } + +readonly GREEN='\033[0;92m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +readonly PING_ARG="-c 3 -w 10 -q" + +readonly TIMEOUT=10 + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP4_SRC="172.16.1.100" +readonly IP4_DST="172.16.2.100" + +readonly IP6_SRC="::1:dead:beef:cafe" +readonly IP6_DST="::2:dead:beef:cafe" + +readonly IP4_SLL="169.254.0.1" +readonly IP4_DLL="169.254.0.2" +readonly IP4_NET="169.254.0.0" + +netns_cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_FWD} + ip netns del ${NS_DST} +} + +netns_setup() +{ + ip netns add "${NS_SRC}" + ip netns add "${NS_FWD}" + ip netns add "${NS_DST}" + + ip link add veth_src type veth peer name veth_src_fwd + ip link add veth_dst type veth peer name veth_dst_fwd + + ip link set veth_src netns ${NS_SRC} + ip link set veth_src_fwd netns ${NS_FWD} + + ip link set veth_dst netns ${NS_DST} + ip link set veth_dst_fwd netns ${NS_FWD} + + ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src + ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst + + # The fwd netns automatically get a v6 LL address / routes, but also + # needs v4 one in order to start ARP probing. IP4_NET route is added + # to the endpoints so that the ARP processing will reply. + + ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd + ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd + + ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad + ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad + + ip -netns ${NS_SRC} link set dev veth_src up + ip -netns ${NS_FWD} link set dev veth_src_fwd up + + ip -netns ${NS_DST} link set dev veth_dst up + ip -netns ${NS_FWD} link set dev veth_dst_fwd up + + ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global + ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global + + ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global + + ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global + ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global + + ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global + + fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) + fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) + + ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst + + ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst +} + +netns_test_connectivity() +{ + set +e + + ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" + ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" + + TEST="TCPv4 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="TCPv6 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv4 connectivity test" + ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv6 connectivity test" + ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + set -e +} + +hex_mem_str() +{ + perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 +} + +netns_setup_bpf() +{ + local obj=$1 + + ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress + + ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress + + veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) + veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) + + progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') + for prog in $progs; do + map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') + if [ ! -z "$map" ]; then + bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) + bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) + fi + done +} + +trap netns_cleanup EXIT +set -e + +netns_setup +netns_setup_bpf test_tc_neigh.o +netns_test_connectivity +netns_cleanup +netns_setup +netns_setup_bpf test_tc_peer.o +netns_test_connectivity diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h index 78a8cf9eab42..6118e3ab61fc 100644 --- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -110,12 +110,13 @@ static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops) BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)); } -static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops) +static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra) { bpf_sock_ops_cb_flags_set(skops, skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | - BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + extra); } static inline void clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4d0e913bbb22..1bbd1d9830c8 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -90,6 +90,33 @@ long ksym_get_addr(const char *name) return 0; } +/* open kallsyms and read symbol addresses on the fly. Without caching all symbols, + * this is faster than load + find. + */ +int kallsyms_find(const char *sym, unsigned long long *addr) +{ + char type, name[500]; + unsigned long long value; + int err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) + return -EINVAL; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + *addr = value; + goto out; + } + } + err = -ENOENT; + +out: + fclose(f); + return err; +} + void read_trace_pipe(void) { int trace_fd; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 25ef597dd03f..f62fdef9e589 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -12,6 +12,10 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); + +/* open kallsyms and find addresses on the fly, faster than load + search. */ +int kallsyms_find(const char *sym, unsigned long long *addr); + void read_trace_pipe(void); #endif diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c index b8d18642653a..de84f0d57082 100644 --- a/tools/testing/selftests/bpf/verifier/basic.c +++ b/tools/testing/selftests/bpf/verifier/basic.c @@ -2,7 +2,7 @@ "empty prog", .insns = { }, - .errstr = "unknown opcode 00", + .errstr = "last insn is not an exit or jmp", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index 2c5fbe7bcd27..ae72536603fe 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -529,7 +529,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index 3856dba733e9..f9297900cea6 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -51,14 +51,6 @@ .result = REJECT, }, { - "test5 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, -}, -{ "test6 ld_imm64", .insns = { BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c new file mode 100644 index 000000000000..ac71b824f97a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -0,0 +1,243 @@ +{ + "regalloc basic", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=1", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc three regs", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc after call", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, |