diff options
author | David S. Miller <davem@davemloft.net> | 2019-02-02 05:12:18 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-02-02 05:12:18 +0100 |
commit | beb73559bf57b0151dba0c27c4f866599f57bb0b (patch) | |
tree | f6d2fd8d8409add55ee3ae57d39e793f971b1b88 | |
parent | Merge branch 'devlink-add-device-driver-information-API' (diff) | |
parent | Merge branch 'shifts-cleanup' (diff) | |
download | linux-beb73559bf57b0151dba0c27c4f866599f57bb0b.tar.xz linux-beb73559bf57b0151dba0c27c4f866599f57bb0b.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2019-02-01
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) introduce bpf_spin_lock, from Alexei.
2) convert xdp samples to libbpf, from Maciej.
3) skip verifier tests for unsupported program/map types, from Stanislav.
4) powerpc64 JIT support for BTF line info, from Sandipan.
5) assorted fixed, from Valdis, Jesper, Jiong.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
50 files changed, 2197 insertions, 262 deletions
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 15bba765fa79..4194d3cfb60c 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1185,6 +1185,7 @@ skip_codegen_passes: bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: kfree(addrs); kfree(jit_data); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index aa3a2098a583..4d9d3806908e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1967,6 +1967,9 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) */ static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, @@ -2079,6 +2082,9 @@ static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) */ static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, shift_amt); @@ -2180,6 +2186,9 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) */ static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { + if (!shift_amt) + return 0; + if (shift_amt < 32) { emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, shift_amt); @@ -2388,10 +2397,13 @@ static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) { - /* Set signedness bit (MSB of result). */ - emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, reg_imm(0)); - emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, reg_b(dst), - SHF_SC_R_SHF, shift_amt); + if (shift_amt) { + /* Set signedness bit (MSB of result). */ + emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + } wrp_immed(nfp_prog, reg_both(dst + 1), 0); return 0; @@ -2429,18 +2441,75 @@ static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return __ashr_imm(nfp_prog, dst, insn->imm); } +static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF, shift_amt); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + return 0; +} + +static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __shr_imm(nfp_prog, dst, insn->imm); +} + +static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shr_imm(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_SHF); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + return 0; +} + +static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt) + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + return 0; +} + static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - if (!insn->imm) - return 1; /* TODO: zero shift means indirect */ + return __shl_imm(nfp_prog, dst, insn->imm); +} - emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), - reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), - SHF_SC_L_SHF, insn->imm); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); +static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + dst = insn->dst_reg * 2; + umin = meta->umin_src; + umax = meta->umax_src; + if (umin == umax) + return __shl_imm(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + shl_reg64_lt32_low(nfp_prog, dst, src); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); return 0; } @@ -3350,7 +3419,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_DIV | BPF_X] = div_reg, [BPF_ALU | BPF_DIV | BPF_K] = div_imm, [BPF_ALU | BPF_NEG] = neg_reg, + [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, + [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 588dd5f0bd85..695b2a880d9a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -78,7 +78,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + enum bpf_attach_type type); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0394f1f9213b..bd169a7bcc93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -72,14 +72,15 @@ struct bpf_map { u32 value_size; u32 max_entries; u32 map_flags; - u32 pages; + int spin_lock_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; + u32 pages; bool unpriv_array; - /* 55 bytes hole */ + /* 51 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -91,6 +92,36 @@ struct bpf_map { char name[BPF_OBJ_NAME_LEN]; }; +static inline bool map_value_has_spin_lock(const struct bpf_map *map) +{ + return map->spin_lock_off >= 0; +} + +static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) +{ + if (likely(!map_value_has_spin_lock(map))) + return; + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; +} + +/* copy everything but bpf_spin_lock */ +static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +{ + if (unlikely(map_value_has_spin_lock(map))) { + u32 off = map->spin_lock_off; + + memcpy(dst, src, off); + memcpy(dst + off + sizeof(struct bpf_spin_lock), + src + off + sizeof(struct bpf_spin_lock), + map->value_size - off - sizeof(struct bpf_spin_lock)); + } else { + memcpy(dst, src, map->value_size); + } +} +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src); + struct bpf_offload_dev; struct bpf_offloaded_map; @@ -162,6 +193,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ + ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ }; /* type of values returned from helper functions */ @@ -879,7 +911,8 @@ extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; extern const struct bpf_func_proto bpf_sk_redirect_map_proto; - +extern const struct bpf_func_proto bpf_spin_lock_proto; +extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; /* Shared helpers among cBPF and eBPF. */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 44d9ab4809bd..08bf2f1fe553 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -6,9 +6,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +#ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) +#endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0620e418dde5..69f7a3449eda 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -148,6 +148,7 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; u32 curframe; + u32 active_spin_lock; bool speculative; }; diff --git a/include/linux/btf.h b/include/linux/btf.h index 12502e25e767..455d31b55828 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -50,6 +50,7 @@ u32 btf_id(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); diff --git a/include/linux/filter.h b/include/linux/filter.h index e4b473f85b46..7317376734f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -880,7 +880,9 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); - +u64 bpf_jit_alloc_exec_limit(void); +void *bpf_jit_alloc_exec(unsigned long size); +void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); int bpf_jit_get_func_addr(const struct bpf_prog *prog, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 60b99b730a41..1777fa0c61e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -267,6 +267,7 @@ enum bpf_attach_type { #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ /* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) @@ -2422,7 +2423,9 @@ union bpf_attr { FN(map_peek_elem), \ FN(msg_push_data), \ FN(msg_pop_data), \ - FN(rc_pointer_rel), + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3056,4 +3059,7 @@ struct bpf_line_info { __u32 line_col; }; +struct bpf_spin_lock { + __u32 val; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 84d882f3e299..fbba478ae522 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS def_bool y if ARCH_USE_QUEUED_SPINLOCKS depends on SMP +config BPF_ARCH_SPINLOCK + bool + config ARCH_USE_QUEUED_RWLOCKS bool diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 25632a75d630..c72e0d8e1e65 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; + char *val; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; @@ -262,17 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; - if (unlikely(map_flags == BPF_NOEXIST)) + if (unlikely(map_flags & BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; - if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + if (unlikely((map_flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map))) + return -EINVAL; + + if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); - else - memcpy(array->value + - array->elem_size * (index & array->index_mask), - value, map->value_size); + } else { + val = array->value + + array->elem_size * (index & array->index_mask); + if (map_flags & BPF_F_LOCK) + copy_map_value_locked(map, val, value, false); + else + copy_map_value(map, val, value); + } return 0; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3d661f0606fe..7019c1f05cab 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t) return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; } +static bool __btf_type_is_struct(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; +} + static bool btf_type_is_array(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; @@ -2045,6 +2050,43 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } +/* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +{ + const struct btf_member *member; + u32 i, off = -ENOENT; + + if (!__btf_type_is_struct(t)) + return -EINVAL; + + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + if (!__btf_type_is_struct(member_type)) + continue; + if (member_type->size != sizeof(struct bpf_spin_lock)) + continue; + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), + "bpf_spin_lock")) + continue; + if (off != -ENOENT) + /* only one 'struct bpf_spin_lock' is allowed */ + return -E2BIG; + off = btf_member_bit_offset(t, member); + if (off % 8) + /* valid C code cannot generate such BTF */ + return -EINVAL; + off /= 8; + if (off % __alignof__(struct bpf_spin_lock)) + /* valid struct bpf_spin_lock will be 4 byte aligned */ + return -EINVAL; + } + return off; +} + static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d78cfec5807d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -230,6 +230,7 @@ cleanup: * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach * @type: Type of attach operation + * @flags: Option flags * * Must be called with cgroup_mutex held. */ @@ -363,7 +364,7 @@ cleanup: * Must be called with cgroup_mutex held. */ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 unused_flags) + enum bpf_attach_type type) { struct list_head *progs = &cgrp->bpf.progs[type]; enum bpf_cgroup_storage_type stype; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index a7bcb23bee84..ef88b167959d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1263,8 +1263,9 @@ bool bpf_opcode_in_insntable(u8 code) #ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * __bpf_prog_run - run eBPF program on a given context - * @ctx: is the data we are operating on + * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions + * @stack: is the eBPF storage stack * * Decode and execute eBPF instructions. */ @@ -2001,6 +2002,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_spin_lock_proto __weak; +const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..937776531998 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -718,21 +718,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) BITS_PER_LONG == 64; } -static u32 htab_size_value(const struct bpf_htab *htab, bool percpu) -{ - u32 size = htab->map.value_size; - - if (percpu || fd_htab_map_needs_adjust(htab)) - size = round_up(size, 8); - return size; -} - static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, struct htab_elem *old_elem) { - u32 size = htab_size_value(htab, percpu); + u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); struct htab_elem *l_new, **pl_new; void __percpu *pptr; @@ -770,10 +761,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-ENOMEM); goto dec_count; } + check_and_init_map_lock(&htab->map, + l_new->key + round_up(key_size, 8)); } memcpy(l_new->key, key, key_size); if (percpu) { + size = round_up(size, 8); if (prealloc) { pptr = htab_elem_get_ptr(l_new, key_size); } else { @@ -791,8 +785,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); - } else { + } else if (fd_htab_map_needs_adjust(htab)) { + size = round_up(size, 8); memcpy(l_new->key + round_up(key_size, 8), value, size); + } else { + copy_map_value(&htab->map, + l_new->key + round_up(key_size, 8), + value); } l_new->hash = hash; @@ -805,11 +804,11 @@ dec_count: static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, u64 map_flags) { - if (l_old && map_flags == BPF_NOEXIST) + if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) /* elem already exists */ return -EEXIST; - if (!l_old && map_flags == BPF_EXIST) + if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) /* elem doesn't exist, cannot update it */ return -ENOENT; @@ -828,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, u32 key_size, hash; int ret; - if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; @@ -841,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, b = __select_bucket(htab, hash); head = &b->head; + if (unlikely(map_flags & BPF_F_LOCK)) { + if (unlikely(!map_value_has_spin_lock(map))) + return -EINVAL; + /* find an element without taking the bucket lock */ + l_old = lookup_nulls_elem_raw(head, hash, key, key_size, + htab->n_buckets); + ret = check_flags(htab, l_old, map_flags); + if (ret) + return ret; + if (l_old) { + /* grab the element lock and update value in place */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + return 0; + } + /* fall through, grab the bucket lock and lookup again. + * 99.9% chance that the element won't be found, + * but second lookup under lock has to be done. + */ + } + /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&b->lock, flags); @@ -850,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (ret) goto err; + if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { + /* first lookup without the bucket lock didn't find the element, + * but second lookup with the bucket lock found it. + * This case is highly unlikely, but has to be dealt with: + * grab the element lock in addition to the bucket lock + * and update element in place + */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + ret = 0; + goto err; + } + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, l_old); if (IS_ERR(l_new)) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a74972b07e74..a411fc17d265 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -221,6 +221,102 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .arg2_type = ARG_CONST_SIZE, }; +#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + union { + __u32 val; + arch_spinlock_t lock; + } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; + + compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); + BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); + BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); + arch_spin_lock(l); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + arch_spinlock_t *l = (void *)lock; + + arch_spin_unlock(l); +} + +#else + +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); + do { + atomic_cond_read_relaxed(l, !VAL); + } while (atomic_xchg(l, 1)); +} + +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) +{ + atomic_t *l = (void *)lock; + + atomic_set_release(l, 0); +} + +#endif + +static DEFINE_PER_CPU(unsigned long, irqsave_flags); + +notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + local_irq_save(flags); + __bpf_spin_lock(lock); + __this_cpu_write(irqsave_flags, flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_lock_proto = { + .func = bpf_spin_lock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + +notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +{ + unsigned long flags; + + flags = __this_cpu_read(irqsave_flags); + __bpf_spin_unlock(lock); + local_irq_restore(flags); + return 0; +} + +const struct bpf_func_proto bpf_spin_unlock_proto = { + .func = bpf_spin_unlock, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_SPIN_LOCK, +}; + +void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, + bool lock_src) +{ + struct bpf_spin_lock *lock; + + if (lock_src) + lock = src + map->spin_lock_off; + else + lock = dst + map->spin_lock_off; + preempt_disable(); + ____bpf_spin_lock(lock); + copy_map_value(map, dst, src); + ____bpf_spin_unlock(lock); + preempt_enable(); +} + #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 07a34ef562a0..6b572e2de7fb 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; - if (flags != BPF_ANY && flags != BPF_EXIST) + if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST))) + return -EINVAL; + + if (unlikely(flags & BPF_NOEXIST)) + return -EINVAL; + + if (unlikely((flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map))) return -EINVAL; storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, @@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, if (!storage) return -ENOENT; + if (flags & BPF_F_LOCK) { + copy_map_value_locked(map, storage->buf->data, value, false); + return 0; + } + new = kmalloc_node(sizeof(struct bpf_storage_buffer) + map->value_size, __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, @@ -147,6 +159,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, return -ENOMEM; memcpy(&new->data[0], value, map->value_size); + check_and_init_map_lock(map, new->data); new = xchg(&storage->buf, new); kfree_rcu(new, rcu); @@ -483,6 +496,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, storage->buf = kmalloc_node(size, flags, map->numa_node); if (!storage->buf) goto enomem; + check_and_init_map_lock(map, storage->buf->data); } else { storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); if (!storage->percpu_buf) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 52378d3e34b3..583346a0ab29 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } + if (map_value_has_spin_lock(inner_map)) { + fdput(f); + return ERR_PTR(-ENOTSUPP); + } + inner_map_meta_size = sizeof(*inner_map_meta); /* In some cases verifier needs to access beyond just base map. */ if (inner_map->ops == &array_map_ops) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..0834958f1dc4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -463,7 +463,7 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; @@ -478,6 +478,22 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf, if (!value_type || value_size != map->value_size) return -EINVAL; + map->spin_lock_off = btf_find_spin_lock(btf, value_type); + + if (map_value_has_spin_lock(map)) { + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY && + map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) + return -ENOTSUPP; + if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > + map->value_size) { + WARN_ONCE(1, + "verifier bug spin_lock_off %d value_size %d\n", + map->spin_lock_off, map->value_size); + return -EFAULT; + } + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type); @@ -542,6 +558,8 @@ static int map_create(union bpf_attr *attr) map->btf = btf; map->btf_key_type_id = attr->btf_key_type_id; map->btf_value_type_id = attr->btf_value_type_id; + } else { + map->spin_lock_off = -EINVAL; } err = security_bpf_map_alloc(map); @@ -664,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags static int map_lookup_elem(union bpf_attr *attr) { @@ -680,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr) if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL; + if (attr->flags & ~BPF_F_LOCK) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) @@ -690,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@ -740,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr) err = -ENOENT; } else { err = 0; - memcpy(value, ptr, value_size); + if (attr->flags & BPF_F_LOCK) + /* lock 'ptr' and copy everything but lock */ + copy_map_value_locked(map, value, ptr, true); + else + copy_map_value(map, value, ptr); + /* mask lock, since value wasn't zero inited */ + check_and_init_map_lock(map, value); } rcu_read_unlock(); } @@ -800,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8c1c21cd50b4..b63bc77af2d1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -213,6 +213,7 @@ struct bpf_call_arg_meta { s64 msize_smax_value; u64 msize_umax_value; int ptr_id; + int func_id; }; static DEFINE_MUTEX(bpf_verifier_lock); @@ -351,6 +352,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg) return type_is_refcounted(reg->type); } +static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) +{ + return reg->type == PTR_TO_MAP_VALUE && + map_value_has_spin_lock(reg->map_ptr); +} + static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) { return type_is_refcounted_or_null(reg->type); @@ -712,6 +719,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, } dst_state->speculative = src->speculative; dst_state->curframe = src->curframe; + dst_state->active_spin_lock = src->active_spin_lock; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1483,6 +1491,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, if (err) verbose(env, "R%d max value is outside of the array range\n", regno); + + if (map_value_has_spin_lock(reg->map_ptr)) { + u32 lock = reg->map_ptr->spin_lock_off; + + /* if any part of struct bpf_spin_lock can be touched by + * load/store reject this program. + * To check that [x1, x2) overlaps with [y1, y2) + * it is sufficient to check x1 < y2 && y1 < x2. + */ + if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && + lock < reg->umax_value + off + size) { + verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); + return -EACCES; + } + } return err; } @@ -2192,6 +2215,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +/* Implementation details: + * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL + * Two bpf_map_lookups (even with the same key) will have different reg->id. + * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after + * value_or_null->value transition, since the verifier only cares about + * the range of access to valid map value pointer and doesn't care about actual + * address of the map element. + * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps + * reg->id > 0 after value_or_null->value transition. By doing so + * two bpf_map_lookups will be considered two different pointers that + * point to different bpf_spin_locks. + * The verifier allows taking only one bpf_spin_lock at a time to avoid + * dead-locks. + * Since only one bpf_spin_lock is allowed the checks are simpler than + * reg_is_refcounted() logic. The verifier needs to remember only + * one spin_lock instead of array of acquired_refs. + * cur_state->active_spin_lock remembers which map value element got locked + * and clears it after bpf_spin_unlock. + */ +static int process_spin_lock(struct bpf_verifier_env *env, int regno, + bool is_lock) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_verifier_state *cur = env->cur_state; + bool is_const = tnum_is_const(reg->var_off); + struct bpf_map *map = reg->map_ptr; + u64 val = reg->var_off.value; + + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "R%d is not a pointer to map_value\n", regno); + return -EINVAL; + } + if (!is_const) { + verbose(env, + "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map->btf) { + verbose(env, + "map '%s' has to have BTF in order to use bpf_spin_lock\n", + map->name); + return -EINVAL; + } + if (!map_value_has_spin_lock(map)) { + if (map->spin_lock_off == -E2BIG) + verbose(env, + "map '%s' has more than one 'struct bpf_spin_lock'\n", + map->name); + else if (map->spin_lock_off == -ENOENT) + verbose(env, + "map '%s' doesn't have 'struct bpf_spin_lock'\n", + map->name); + else + verbose(env, + "map '%s' is not a struct type or bpf_spin_lock is mangled\n", + map->name); + return -EINVAL; + } + if (map->spin_lock_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", + val + reg->off); + return -EINVAL; + } + if (is_lock) { + if (cur->active_spin_lock) { + verbose(env, + "Locking two bpf_spin_locks are not allowed\n"); + return -EINVAL; + } + cur->active_spin_lock = reg->id; + } else { + if (!cur->active_spin_lock) { + verbose(env, "bpf_spin_unlock without taking a lock\n"); + return -EINVAL; + } + if (cur->active_spin_lock != reg->id) { + verbose(env, "bpf_spin_unlock of different lock\n"); + return -EINVAL; + } + cur->active_spin_lock = 0; + } + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return type == ARG_PTR_TO_MEM || @@ -2268,6 +2376,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, return -EFAULT; } meta->ptr_id = reg->id; + } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { + if (meta->func_id == BPF_FUNC_spin_lock) { + if (process_spin_lock(env, regno, true)) + return -EACCES; + } else if (meta->func_id == BPF_FUNC_spin_unlock) { + if (process_spin_lock(env, regno, false)) + return -EACCES; + } else { + verbose(env, "verifier internal error\n"); + return -EFAULT; + } } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be @@ -2887,6 +3006,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } + meta.func_id = func_id; /* check args */ err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); if (err) @@ -2969,6 +3089,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].map_ptr = meta.map_ptr; if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + if (map_value_has_spin_lock(meta.map_ptr)) + regs[BPF_REG_0].id = ++env->id_gen; } else { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; @@ -4473,7 +4595,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { reg->type = PTR_TO_SOCKET; } - if (is_null || !reg_is_refcounted(reg)) { + if (is_null || !(reg_is_refcounted(reg) || + reg_may_point_to_spin_lock(reg))) { /* We don't need id from this point onwards anymore, * thus we should better reset it, so that state * pruning has chances to take effect. @@ -4871,6 +4994,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } + if (env->cur_state->active_spin_lock) { + verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); + return -EINVAL; + } + if (regs[BPF_REG_6].type != PTR_TO_CTX) { verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); @@ -5607,8 +5735,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. - * We don't care about the 'id' value, because nothing - * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL) + * 'id' is not compared, since it's only used for maps with + * bpf_spin_lock inside map element and in such cases if + * the rest of the prog is valid for one map element then + * it's valid for all map elements regardless of the key + * used in bpf_map_lookup() */ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && @@ -5811,6 +5942,9 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; + if (old->active_spin_lock != cur->active_spin_lock) + return false; + /* for states to be equal callsites have to be the same * and all frame states need to be equivalent */ @@ -6229,6 +6363,12 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + if (env->cur_state->active_spin_lock && + (insn->src_reg == BPF_PSEUDO_CALL || + insn->imm != BPF_FUNC_spin_unlock)) { + verbose(env, "function calls are not allowed while holding a lock\n"); + return -EINVAL; + } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else @@ -6259,6 +6399,11 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } + if (env->cur_state->active_spin_lock) { + verbose(env, "bpf_spin_unlock is missing\n"); + return -EINVAL; + } + if (state->curframe) { /* exit from nested function */ env->prev_insn_idx = env->insn_idx; @@ -6356,6 +6501,19 @@ static int check_map_prealloc(struct bpf_map *map) !(map->map_flags & BPF_F_NO_PREALLOC); } +static bool is_tracing_prog_type(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return true; + default: + return false; + } +} + static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog) @@ -6378,6 +6536,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if ((is_tracing_prog_type(prog->type) || + prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && + map_value_has_spin_lock(map)) { + verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); + return -EINVAL; + } + if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..9f617605dacb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5996,7 +5996,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, type, flags); + ret = __cgroup_bpf_detach(cgrp, prog, type); mutex_unlock(&cgroup_mutex); return ret; } diff --git a/net/core/dev.c b/net/core/dev.c index 8e276e0192a1..bfa4be42afff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7983,8 +7983,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) + if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { + NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); return -EOPNOTSUPP; + } if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) bpf_op = generic_xdp_install; if (bpf_op == bpf_chk) @@ -7992,11 +7994,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, if (fd >= 0) { if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || - __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) + __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) { + NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; + } if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_query(dev, bpf_op, query)) + __dev_xdp_query(dev, bpf_op, query)) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; + } prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, bpf_op == ops->ndo_bpf); diff --git a/net/core/filter.c b/net/core/filter.c index 41984ad4b9b4..3a49f68eda10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5314,10 +5314,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* else, fall through */ + return bpf_get_trace_printk_proto(); default: return NULL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index db1a91dfa702..a0ef7eddd0b3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -87,18 +87,18 @@ test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o -xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o +xdp_router_ipv4-objs := xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS) sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o +xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect-objs := bpf_load.o xdp_redirect_user.o -xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o +xdp_redirect-objs := xdp_redirect_user.o +xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 8bfda95c77ad..6a64e93365e1 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -22,11 +22,23 @@ #include "bpf/libbpf.h" static int ifindex; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); exit(0); } @@ -63,7 +75,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFACE\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -73,11 +86,14 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; - const char *optstr = "SN"; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + const char *optstr = "FSN"; int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; char filename[256]; + int err; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -87,6 +103,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; @@ -135,6 +154,13 @@ int main(int argc, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + poll_stats(map_fd, 2); return 0; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 3042ce37dae8..07e1b9269e49 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -24,12 +24,25 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static void int_exit(int sig) { - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given iface\n"); + else + printf("program on interface changed, not removing\n"); + } exit(0); } @@ -60,6 +73,7 @@ static void usage(const char *cmd) printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); + printf(" -F force loading prog\n"); printf(" -h Display this help\n"); } @@ -70,12 +84,15 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; unsigned char opt_flags[256] = {}; + const char *optstr = "i:T:SNFh"; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; - const char *optstr = "i:T:SNh"; int i, prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; char filename[256]; + int err; for (i = 0; i < strlen(optstr); i++) if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') @@ -96,6 +113,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(argv[0]); return 1; @@ -142,9 +162,15 @@ int main(int argc, char **argv) return 1; } - poll_stats(map_fd, kill_after_s); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return 1; + } + prog_id = info.id; - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + poll_stats(map_fd, kill_after_s); + int_exit(0); return 0; } diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 2d23054aaccf..586b294d72d3 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -24,20 +24,26 @@ static const char *__doc__ = /* How many xdp_progs are defined in _kern.c */ #define MAX_PROG 6 -/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead - * use bpf/libbpf.h), but cannot as (currently) needed for XDP - * attaching to a device via bpf_set_link_xdp_fd() - */ #include <bpf/bpf.h> -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "bpf_util.h" static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; - -static __u32 xdp_flags; +static __u32 prog_id; + +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int cpu_map_fd; +static int rx_cnt_map_fd; +static int redirect_err_cnt_map_fd; +static int cpumap_enqueue_cnt_map_fd; +static int cpumap_kthread_cnt_map_fd; +static int cpus_available_map_fd; +static int cpus_count_map_fd; +static int cpus_iterator_map_fd; +static int exception_cnt_map_fd; /* Exit return codes */ #define EXIT_OK 0 @@ -51,27 +57,50 @@ static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"dev", required_argument, NULL, 'd' }, {"skb-mode", no_argument, NULL, 'S' }, - {"debug", no_argument, NULL, 'D' }, {"sec", required_argument, NULL, 's' }, - {"prognum", required_argument, NULL, 'p' }, + {"progname", required_argument, NULL, 'p' }, {"qsize", required_argument, NULL, 'q' }, {"cpu", required_argument, NULL, 'c' }, {"stress-mode", no_argument, NULL, 'x' }, {"no-separators", no_argument, NULL, 'z' }, + {"force", no_argument, NULL, 'F' }, {0, 0, NULL, 0 } }; static void int_exit(int sig) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAIL); + } + if (prog_id == curr_prog_id) { + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a given iface\n"); + } else { + printf("program on interface changed, not removing\n"); + } + } exit(EXIT_OK); } -static void usage(char *argv[]) +static void print_avail_progs(struct bpf_object *obj) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (bpf_program__is_xdp(pos)) + printf(" %s\n", bpf_program__title(pos, false)); + } +} + +static void usage(char *argv[], struct bpf_object *obj) { int i; @@ -89,6 +118,8 @@ static void usage(char *argv[]) long_options[i].val); printf("\n"); } + printf("\n Programs to be used for --progname:\n"); + print_avail_progs(obj); printf("\n"); } @@ -263,7 +294,7 @@ static __u64 calc_errs_pps(struct datarec *r, static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, - int prog_num) + char *prog_name) { unsigned int nr_cpus = bpf_num_possible_cpus(); double pps = 0, drop = 0, err = 0; @@ -273,7 +304,7 @@ static void stats_print(struct stats_record *stats_rec, int i; /* Header */ - printf("Running XDP/eBPF prog_num:%d\n", prog_num); + printf("Running XDP/eBPF prog_name:%s\n", prog_name); printf("%-15s %-7s %-14s %-11s %-9s\n", "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info"); @@ -424,20 +455,20 @@ static void stats_collect(struct stats_record *rec) { int fd, i; - fd = map_fd[1]; /* map: rx_cnt */ + fd = rx_cnt_map_fd; map_collect_percpu(fd, 0, &rec->rx_cnt); - fd = map_fd[2]; /* map: redirect_err_cnt */ + fd = redirect_err_cnt_map_fd; map_collect_percpu(fd, 1, &rec->redir_err); - fd = map_fd[3]; /* map: cpumap_enqueue_cnt */ + fd = cpumap_enqueue_cnt_map_fd; for (i = 0; i < MAX_CPUS; i++) map_collect_percpu(fd, i, &rec->enq[i]); - fd = map_fd[4]; /* map: cpumap_kthread_cnt */ + fd = cpumap_kthread_cnt_map_fd; map_collect_percpu(fd, 0, &rec->kthread); - fd = map_fd[8]; /* map: exception_cnt */ + fd = exception_cnt_map_fd; map_collect_percpu(fd, 0, &rec->exception); } @@ -462,7 +493,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Add a CPU entry to cpumap, as this allocate a cpu entry in * the kernel for the cpu. */ - ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0); + ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0); if (ret) { fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); exit(EXIT_FAIL_BPF); @@ -471,23 +502,22 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Inform bpf_prog's that a new CPU is available to select * from via some control maps. */ - /* map_fd[5] = cpus_available */ - ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0); + ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0); if (ret) { fprintf(stderr, "Add to avail CPUs failed\n"); exit(EXIT_FAIL_BPF); } /* When not replacing/updating existing entry, bump the count */ - /* map_fd[6] = cpus_count */ - ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count); + ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count); if (ret) { fprintf(stderr, "Failed reading curr cpus_count\n"); exit(EXIT_FAIL_BPF); } if (new) { curr_cpus_count++; - ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0); + ret = bpf_map_update_elem(cpus_count_map_fd, &key, + &curr_cpus_count, 0); if (ret) { fprintf(stderr, "Failed write curr cpus_count\n"); exit(EXIT_FAIL_BPF); @@ -510,8 +540,8 @@ static void mark_cpus_unavailable(void) int ret, i; for (i = 0; i < MAX_CPUS; i++) { - /* map_fd[5] = cpus_available */ - ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0); + ret = bpf_map_update_elem(cpus_available_map_fd, &i, + &invalid_cpu, 0); if (ret) { fprintf(stderr, "Failed marking CPU unavailable\n"); exit(EXIT_FAIL_BPF); @@ -531,7 +561,7 @@ static void stress_cpumap(void) create_cpu_entry(1, 16000, 0, false); } -static void stats_poll(int interval, bool use_separators, int prog_num, +static void stats_poll(int interval, bool use_separators, char *prog_name, bool stress_mode) { struct stats_record *record, *prev; @@ -547,7 +577,7 @@ static void stats_poll(int interval, bool use_separators, int prog_num, while (1) { swap(&prev, &record); stats_collect(record); - stats_print(record, prev, prog_num); + stats_print(record, prev, prog_name); sleep(interval); if (stress_mode) stress_cpumap(); @@ -557,20 +587,55 @@ static void stats_poll(int interval, bool use_separators, int prog_num, free_stats_record(prev); } +static int init_map_fds(struct bpf_object *obj) +{ + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + redirect_err_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); + cpumap_enqueue_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); + cpumap_kthread_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); + cpus_available_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_available"); + cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); + cpus_iterator_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); + exception_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); + + if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || + redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || + cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 || + cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 || + exception_cnt_map_fd < 0) + return -ENOENT; + + return 0; +} + int main(int argc, char **argv) { struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); bool use_separators = true; bool stress_mode = false; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; - bool debug = false; int added_cpus = 0; int longindex = 0; int interval = 2; - int prog_num = 5; int add_cpu = -1; + int opt, err; + int prog_fd; __u32 qsize; - int opt; /* Notice: choosing he queue size is very important with the * ixgbe driver, because it's driver page recycling trick is @@ -581,26 +646,29 @@ int main(int argc, char **argv) qsize = 128+64; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); return 1; } - if (load_bpf_file(filename)) { - fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return EXIT_FAIL; - } - if (!prog_fd[0]) { - fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno)); + if (prog_fd < 0) { + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", + strerror(errno)); + return EXIT_FAIL; + } + if (init_map_fds(obj) < 0) { + fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); return EXIT_FAIL; } - mark_cpus_unavailable(); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -624,9 +692,6 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; - case 'D': - debug = true; - break; case 'x': stress_mode = true; break; @@ -635,13 +700,7 @@ int main(int argc, char **argv) break; case 'p': /* Selecting eBPF prog to load */ - prog_num = atoi(optarg); - if (prog_num < 0 || prog_num >= MAX_PROG) { - fprintf(stderr, - "--prognum too large err(%d):%s\n", - errno, strerror(errno)); - goto error; - } + prog_name = optarg; break; case 'c': /* Add multiple CPUs */ @@ -658,24 +717,27 @@ int main(int argc, char **argv) case 'q': qsize = atoi(optarg); break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'h': error: default: - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } } /* Required option */ if (ifindex == -1) { fprintf(stderr, "ERR: required option --dev missing\n"); - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } /* Required option */ if (add_cpu == -1) { fprintf(stderr, "ERR: required option --cpu missing\n"); fprintf(stderr, " Specify multiple --cpu option to add more\n"); - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } @@ -683,16 +745,30 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) { + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) { + fprintf(stderr, "bpf_object__find_program_by_title failed\n"); + return EXIT_FAIL; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "bpf_program__fd failed\n"); + return EXIT_FAIL; + } + + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { fprintf(stderr, "link set xdp fd failed\n"); return EXIT_FAIL_XDP; } - if (debug) { - printf("Debug-mode reading trace pipe (fix #define DEBUG)\n"); - read_trace_pipe(); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; } + prog_id = info.id; - stats_poll(interval, use_separators, prog_num, stress_mode); + stats_poll(interval, use_separators, prog_name, stress_mode); return EXIT_OK; } diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 4445e76854b5..327226be5a06 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -22,21 +22,48 @@ #include <libgen.h> #include <sys/resource.h> -#include "bpf_load.h" #include "bpf_util.h" #include <bpf/bpf.h> +#include "bpf/libbpf.h" static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; +static __u32 prog_id; +static __u32 dummy_prog_id; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int rxcnt_map_fd; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - if (ifindex_out_xdp_dummy_attached) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface IN\n"); + else + printf("program on iface IN changed, not removing\n"); + + if (ifindex_out_xdp_dummy_attached) { + curr_prog_id = 0; + if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, + xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface OUT\n"); + else + printf("program on iface OUT changed, not removing\n"); + } exit(0); } @@ -53,7 +80,7 @@ static void poll_stats(int interval, int ifindex) int i; sleep(interval); - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[i]); if (sum) @@ -69,16 +96,26 @@ static void usage(const char *prog) "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - const char *optstr = "SN"; - char filename[256]; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_program *prog, *dummy_prog; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int prog_fd, dummy_prog_fd; + const char *optstr = "FSN"; + struct bpf_object *obj; int ret, opt, key = 0; + char filename[256]; + int tx_port_map_fd; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -88,6 +125,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; @@ -109,37 +149,65 @@ int main(int argc, char **argv) printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + prog = bpf_program__next(NULL, obj); + dummy_prog = bpf_program__next(prog, obj); + if (!prog || !dummy_prog) { + printf("finding a prog in obj file failed\n"); + return 1; + } + /* bpf_prog_load_xattr gives us the pointer to first prog's fd, + * so we're missing only the fd for dummy prog + */ + dummy_prog_fd = bpf_program__fd(dummy_prog); + if (prog_fd < 0 || dummy_prog_fd < 0) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + prog_id = info.id; + /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1], + if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { printf("WARN: link set xdp fd failed on %d\n", ifindex_out); ifindex_out_xdp_dummy_attached = false; } + memset(&info, 0, sizeof(info)); + ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + dummy_prog_id = info.id; + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", - map_fd[0], map_fd[1], map_fd[2]); - /* populate virtual to physical port map */ - ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); if (ret) { perror("bpf_update_elem"); goto out; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 81a69e36cb78..a5d8ad3129ed 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -22,21 +22,48 @@ #include <libgen.h> #include <sys/resource.h> -#include "bpf_load.h" #include "bpf_util.h" #include <bpf/bpf.h> +#include "bpf/libbpf.h" static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; +static __u32 prog_id; +static __u32 dummy_prog_id; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int rxcnt_map_fd; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - if (ifindex_out_xdp_dummy_attached) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface IN\n"); + else + printf("program on iface IN changed, not removing\n"); + + if (ifindex_out_xdp_dummy_attached) { + curr_prog_id = 0; + if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, + xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface OUT\n"); + else + printf("program on iface OUT changed, not removing\n"); + } exit(0); } @@ -53,7 +80,7 @@ static void poll_stats(int interval, int ifindex) int i; sleep(interval); - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[i]); if (sum) @@ -69,7 +96,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -77,9 +105,18 @@ static void usage(const char *prog) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - const char *optstr = "SN"; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_program *prog, *dummy_prog; + int prog_fd, tx_port_map_fd, opt; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + const char *optstr = "FSN"; + struct bpf_object *obj; char filename[256]; - int ret, opt, key = 0; + int dummy_prog_fd; + int ret, key = 0; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -89,6 +126,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; @@ -110,34 +150,65 @@ int main(int argc, char **argv) printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + prog = bpf_program__next(NULL, obj); + dummy_prog = bpf_program__next(prog, obj); + if (!prog || !dummy_prog) { + printf("finding a prog in obj file failed\n"); + return 1; + } + /* bpf_prog_load_xattr gives us the pointer to first prog's fd, + * so we're missing only the fd for dummy prog + */ + dummy_prog_fd = bpf_program__fd(dummy_prog); + if (prog_fd < 0 || dummy_prog_fd < 0) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + prog_id = info.id; + /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1], + if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { printf("WARN: link set xdp fd failed on %d\n", ifindex_out); ifindex_out_xdp_dummy_attached = false; } + memset(&info, 0, sizeof(info)); + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + dummy_prog_id = info.id; + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); /* bpf redirect port */ - ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); if (ret) { perror("bpf_update_elem"); goto out; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index b2b4dfa776c8..79fe7bc26ab4 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -15,7 +15,6 @@ #include <string.h> #include <sys/socket.h> #include <unistd.h> -#include "bpf_load.h" #include <bpf/bpf.h> #include <arpa/inet.h> #include <fcntl.h> @@ -25,32 +24,52 @@ #include <sys/ioctl.h> #include <sys/syscall.h> #include "bpf_util.h" +#include "bpf/libbpf.h" +#include <sys/resource.h> +#include <libgen.h> -int sock, sock_arp, flags = 0; +int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int total_ifindex; -int *ifindex_list; +static int *ifindex_list; +static __u32 *prog_id_list; char buf[8192]; +static int lpm_map_fd; +static int rxcnt_map_fd; +static int arp_table_map_fd; +static int exact_match_map_fd; +static int tx_port_map_fd; static int get_route_table(int rtm_family); static void int_exit(int sig) { + __u32 prog_id = 0; int i = 0; - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + for (i = 0; i < total_ifindex; i++) { + if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) { + printf("bpf_get_link_xdp_id on iface %d failed\n", + ifindex_list[i]); + exit(1); + } + if (prog_id_list[i] == prog_id) + bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + else if (!prog_id) + printf("couldn't find a prog id on iface %d\n", + ifindex_list[i]); + else + printf("program on iface %d changed, not removing\n", + ifindex_list[i]); + prog_id = 0; + } exit(0); } static void close_and_exit(int sig) { - int i = 0; - close(sock); close(sock_arp); - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); - exit(0); + int_exit(0); } /* Get the mac address of the interface given interface name */ @@ -179,14 +198,10 @@ static void read_route(struct nlmsghdr *nh, int nll) route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); route.iface_name = if_indextoname(route.iface, route.iface_name); route.mac = getmac(route.iface_name); - if (route.mac == -1) { - int i = 0; - - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); - exit(0); - } - assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); + if (route.mac == -1) + int_exit(0); + assert(bpf_map_update_elem(tx_port_map_fd, + &route.iface, &route.iface, 0) == 0); if (rtm_family == AF_INET) { struct trie_value { __u8 prefix[4]; @@ -207,11 +222,16 @@ static void read_route(struct nlmsghdr *nh, int nll) direct_entry.arp.dst = 0; if (route.dst_len == 32) { if (nh->nlmsg_type == RTM_DELROUTE) { - assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); + assert(bpf_map_delete_elem(exact_match_map_fd, + &route.dst) == 0); } else { - if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) + if (bpf_map_lookup_elem(arp_table_map_fd, + &route.dst, + &direct_entry.arp.mac) == 0) direct_entry.arp.dst = route.dst; - assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); + assert(bpf_map_update_elem(exact_match_map_fd, + &route.dst, + &direct_entry, 0) == 0); } } for (i = 0; i < 4; i++) @@ -225,7 +245,7 @@ static void read_route(struct nlmsghdr *nh, int nll) route.gw, route.dst_len, route.metric, route.iface_name); - if (bpf_map_lookup_elem(map_fd[0], prefix_key, + if (bpf_map_lookup_elem(lpm_map_fd, prefix_key, prefix_value) < 0) { for (i = 0; i < 4; i++) prefix_value->prefix[i] = prefix_key->data[i]; @@ -234,7 +254,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_value->gw = route.gw; prefix_value->metric = route.metric; - assert(bpf_map_update_elem(map_fd[0], + assert(bpf_map_update_elem(lpm_map_fd, prefix_key, prefix_value, 0 ) == 0); @@ -247,7 +267,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_key->data[2], prefix_key->data[3], prefix_key->prefixlen); - assert(bpf_map_delete_elem(map_fd[0], + assert(bpf_map_delete_elem(lpm_map_fd, prefix_key ) == 0); /* Rereading the route table to check if @@ -275,8 +295,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_value->ifindex = route.iface; prefix_value->gw = route.gw; prefix_value->metric = route.metric; - assert(bpf_map_update_elem( - map_fd[0], + assert(bpf_map_update_elem(lpm_map_fd, prefix_key, prefix_value, 0) == 0); @@ -401,7 +420,8 @@ static void read_arp(struct nlmsghdr *nh, int nll) arp_entry.mac = atol(mac); printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); if (ndm_family == AF_INET) { - if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, + if (bpf_map_lookup_elem(exact_match_map_fd, + &arp_entry.dst, &direct_entry) == 0) { if (nh->nlmsg_type == RTM_DELNEIGH) { direct_entry.arp.dst = 0; @@ -410,16 +430,17 @@ static void read_arp(struct nlmsghdr *nh, int nll) direct_entry.arp.dst = arp_entry.dst; direct_entry.arp.mac = arp_entry.mac; } - assert(bpf_map_update_elem(map_fd[3], + assert(bpf_map_update_elem(exact_match_map_fd, &arp_entry.dst, &direct_entry, 0 ) == 0); memset(&direct_entry, 0, sizeof(direct_entry)); } if (nh->nlmsg_type == RTM_DELNEIGH) { - assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); + assert(bpf_map_delete_elem(arp_table_map_fd, + &arp_entry.dst) == 0); } else if (nh->nlmsg_type == RTM_NEWNEIGH) { - assert(bpf_map_update_elem(map_fd[2], + assert(bpf_map_update_elem(arp_table_map_fd, &arp_entry.dst, &arp_entry.mac, 0 ) == 0); @@ -553,7 +574,8 @@ static int monitor_route(void) for (key = 0; key < nr_keys; key++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, + &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[key][i]); if (sum) @@ -594,36 +616,87 @@ cleanup: return ret; } +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTS] interface name list\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -F force loading prog\n", + __func__, prog); +} + int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + const char *optstr = "SF"; + struct bpf_object *obj; char filename[256]; char **ifname_list; - int i = 1; + int prog_fd, opt; + int err, i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (ac < 2) { - printf("usage: %s [-S] Interface name list\n", argv[0]); - return 1; + prog_load_attr.file = filename; + + total_ifindex = ac - 1; + ifname_list = (argv + 1); + + while ((opt = getopt(ac, argv, optstr)) != -1) { + switch (opt) { + case 'S': + flags |= XDP_FLAGS_SKB_MODE; + total_ifindex--; + ifname_list++; + break; + case 'F': + flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + total_ifindex--; + ifname_list++; + break; + default: + usage(basename(argv[0])); + return 1; + } } - if (!strcmp(argv[1], "-S")) { - flags = XDP_FLAGS_SKB_MODE; - total_ifindex = ac - 2; - ifname_list = (argv + 2); - } else { - flags = 0; - total_ifindex = ac - 1; - ifname_list = (argv + 1); + + if (optind == ac) { + usage(basename(argv[0])); + return 1; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); return 1; } + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + printf("\n**************loading bpf file*********************\n\n\n"); - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + if (!prog_fd) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); + + lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table"); + exact_match_map_fd = bpf_object__find_map_fd_by_name(obj, + "exact_match"); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 || + exact_match_map_fd < 0 || tx_port_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + return 1; + } + + ifindex_list = (int *)calloc(total_ifindex, sizeof(int *)); for (i = 0; i < total_ifindex; i++) { ifindex_list[i] = if_nametoindex(ifname_list[i]); if (!ifindex_list[i]) { @@ -632,8 +705,9 @@ int main(int ac, char **argv) return 1; } } + prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); for (i = 0; i < total_ifindex; i++) { - if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { printf("link set xdp fd failed\n"); int recovery_index = i; @@ -642,6 +716,13 @@ int main(int ac, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id_list[i] = info.id; + memset(&info, 0, sizeof(info)); printf("Attached to %d\n", ifindex_list[i]); } signal(SIGINT, int_exit); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index ef26f882f92f..1210f3b170f0 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -29,8 +29,9 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; +static __u32 prog_id; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static struct bpf_map *stats_global_map; static struct bpf_map *rx_queue_index_map; @@ -52,16 +53,30 @@ static const struct option long_options[] = { {"action", required_argument, NULL, 'a' }, {"readmem", no_argument, NULL, 'r' }, {"swapmac", no_argument, NULL, 'm' }, + {"force", no_argument, NULL, 'F' }, {0, 0, NULL, 0 } }; static void int_exit(int sig) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAIL); + } + if (prog_id == curr_prog_id) { + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a given iface\n"); + } else { + printf("program on interface changed, not removing\n"); + } + } exit(EXIT_OK); } @@ -446,6 +461,8 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int prog_fd, map_fd, opt, err; bool use_separators = true; struct config cfg = { 0 }; @@ -487,7 +504,7 @@ int main(int argc, char **argv) } /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:", + while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -524,6 +541,9 @@ int main(int argc, char **argv) case 'm': cfg_options |= SWAP_MAC; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'h': error: default: @@ -576,6 +596,13 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + stats_poll(interval, action, cfg_options); return EXIT_OK; } diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 8dd87c1eb560..dc66345a929a 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -12,6 +12,9 @@ #include <signal.h> #include <libbpf.h> #include <bpf/bpf.h> +#include <sys/resource.h> +#include <libgen.h> +#include <linux/if_link.h> #include "perf-sys.h" #include "trace_helpers.h" @@ -20,25 +23,50 @@ static int pmu_fds[MAX_CPUS], if_idx; static struct perf_event_mmap_page *headers[MAX_CPUS]; static char *if_name; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static int do_attach(int idx, int fd, const char *name) { + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int err; - err = bpf_set_link_xdp_fd(idx, fd, 0); - if (err < 0) + err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); + if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); + return err; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; return err; } static int do_detach(int idx, const char *name) { - int err; + __u32 curr_prog_id = 0; + int err = 0; - err = bpf_set_link_xdp_fd(idx, -1, 0); - if (err < 0) - printf("ERROR: failed to detach program from %s\n", name); + err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0); + if (err) { + printf("bpf_get_link_xdp_id failed\n"); + return err; + } + if (prog_id == curr_prog_id) { + err = bpf_set_link_xdp_fd(idx, -1, 0); + if (err < 0) + printf("ERROR: failed to detach prog from %s\n", name); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a %s\n", name); + } else { + printf("program on interface changed, not removing\n"); + } return err; } @@ -97,20 +125,47 @@ static void sig_handler(int signo) exit(0); } +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTS] <ifname|ifindex>\n\n" + "OPTS:\n" + " -F force loading prog\n", + __func__, prog); +} + int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + const char *optstr = "F"; + int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; - int prog_fd, map_fd; char filename[256]; int ret, err, i; int numcpus; - if (argc < 2) { - printf("Usage: %s <ifname>\n", argv[0]); + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + usage(basename(argv[0])); + return 1; + } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); return 1; } @@ -136,16 +191,16 @@ int main(int argc, char **argv) } map_fd = bpf_map__fd(map); - if_idx = if_nametoindex(argv[1]); + if_idx = if_nametoindex(argv[optind]); if (!if_idx) - if_idx = strtoul(argv[1], NULL, 0); + if_idx = strtoul(argv[optind], NULL, 0); if (!if_idx) { fprintf(stderr, "Invalid ifname\n"); return 1; } - if_name = argv[1]; - err = do_attach(if_idx, prog_fd, argv[1]); + if_name = argv[optind]; + err = do_attach(if_idx, prog_fd, if_name); if (err) return err; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index a4ccc33adac0..4a1511eb7812 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -17,7 +17,7 @@ #include <netinet/ether.h> #include <unistd.h> #include <time.h> -#include "bpf_load.h" +#include "bpf/libbpf.h" #include <bpf/bpf.h> #include "bpf_util.h" #include "xdp_tx_iptunnel_common.h" @@ -25,12 +25,26 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; -static __u32 xdp_flags = 0; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int rxcnt_map_fd; +static __u32 prog_id; static void int_exit(int sig) { - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given iface\n"); + else + printf("program on interface changed, not removing\n"); + } exit(0); } @@ -53,7 +67,8 @@ static void poll_stats(unsigned int kill_after_s) for (proto = 0; proto < nr_protos; proto++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto, + values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[proto][i]); @@ -81,6 +96,7 @@ static void usage(const char *cmd) printf(" -P <IP-Protocol> Default is TCP\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); + printf(" -F Force loading the XDP prog\n"); printf(" -h Display this help\n"); } @@ -138,16 +154,22 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port) int main(int argc, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int min_port = 0, max_port = 0, vip2tnl_map_fd; + const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:SNh"; - int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; struct vip vip = {}; char filename[256]; - int opt; - int i; + int opt, prog_fd; + int i, err; tnl.family = AF_UNSPEC; vip.protocol = IPPROTO_TCP; @@ -211,6 +233,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(argv[0]); return 1; @@ -232,33 +257,47 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - } - if (!prog_fd[0]) { + if (!prog_fd) { printf("load_bpf_file: %s\n", strerror(errno)); return 1; } + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl"); + if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + return 1; + } + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); while (min_port <= max_port) { vip.dport = htons(min_port++); - if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { + if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl, + BPF_NOEXIST)) { perror("bpf_map_update_elem(&vip2tnl)"); return 1; } } - if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + poll_stats(kill_after_s); bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 57ecadc58403..f73055e0191f 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -68,7 +68,7 @@ enum benchmark_type { }; static enum benchmark_type opt_bench = BENCH_RXDROP; -static u32 opt_xdp_flags; +static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static const char *opt_if = ""; static int opt_ifindex; static int opt_queue; @@ -76,6 +76,7 @@ static int opt_poll; static int opt_shared_packet_buffer; static int opt_interval = 1; static u32 opt_xdp_bind_flags; +static __u32 prog_id; struct xdp_umem_uqueue { u32 cached_prod; @@ -631,9 +632,20 @@ static void *poller(void *arg) static void int_exit(int sig) { + __u32 curr_prog_id = 0; + (void)sig; dump_stats(); - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAILURE); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); exit(EXIT_SUCCESS); } @@ -682,7 +694,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "rtli:q:psSNn:cz", long_options, + c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options, &option_index); if (c == -1) break; @@ -725,6 +737,9 @@ static void parse_command_line(int argc, char **argv) case 'c': opt_xdp_bind_flags |= XDP_COPY; break; + case 'F': + opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); } @@ -904,6 +919,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; int prog_fd, qidconf_map, xsks_map; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); struct bpf_object *obj; char xdp_filename[256]; struct bpf_map *map; @@ -950,6 +967,13 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return 1; + } + prog_id = info.id; + ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 60b99b730a41..1777fa0c61e4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -267,6 +267,7 @@ enum bpf_attach_type { #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ /* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) @@ -2422,7 +2423,9 @@ union bpf_attr { FN(map_peek_elem), \ FN(msg_push_data), \ FN(msg_pop_data), \ - FN(rc_pointer_rel), + FN(rc_pointer_rel), \ + FN(spin_lock), \ + FN(spin_unlock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3056,4 +3059,7 @@ struct bpf_line_info { __u32 line_col; }; +struct bpf_spin_lock { + __u32 val; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 88cbd110ae58..3defad77dc7a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -368,6 +368,19 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } +int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + attr.flags = flags; + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8f09de482839..ed09eed2dc3b 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -110,6 +110,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, + __u64 flags); LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2ccde17957e6..03bc01ca2577 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2884,6 +2884,12 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) return NULL; } +int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name) +{ + return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); +} + struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 62ae6cb93da1..43c77e98df6f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -264,6 +264,9 @@ struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); +LIBBPF_API int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name); + /* * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. @@ -314,6 +317,7 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 266bc95d0142..62c680fb13d1 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -130,4 +130,7 @@ LIBBPF_0.0.2 { bpf_probe_helper; bpf_probe_map_type; bpf_probe_prog_type; + bpf_map_lookup_elem_flags; + bpf_object__find_map_fd_by_name; + bpf_get_link_xdp_id; } LIBBPF_0.0.1; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 0ce67aea8f3b..ce3ec81b71c0 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -21,6 +21,12 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); +struct xdp_id_md { + int ifindex; + __u32 flags; + __u32 id; +}; + int libbpf_netlink_open(__u32 *nl_pid) { struct sockaddr_nl sa; @@ -196,6 +202,85 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, return dump_link_nlmsg(cookie, ifi, tb); } +static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) +{ + if (mode != XDP_ATTACHED_MULTI) + return IFLA_XDP_PROG_ID; + if (flags & XDP_FLAGS_DRV_MODE) + return IFLA_XDP_DRV_PROG_ID; + if (flags & XDP_FLAGS_HW_MODE) + return IFLA_XDP_HW_PROG_ID; + if (flags & XDP_FLAGS_SKB_MODE) + return IFLA_XDP_SKB_PROG_ID; + + return IFLA_XDP_UNSPEC; +} + +static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; + struct xdp_id_md *xdp_id = cookie; + struct ifinfomsg *ifinfo = msg; + unsigned char mode, xdp_attr; + int ret; + + if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) + return 0; + + if (!tb[IFLA_XDP]) + return 0; + + ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); + if (ret) + return ret; + + if (!xdp_tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); + if (!xdp_attr || !xdp_tb[xdp_attr]) + return 0; + + xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_id_md xdp_id = {}; + int sock, ret; + __u32 nl_pid; + __u32 mask; + + if (flags & ~XDP_FLAGS_MASK) + return -EINVAL; + + /* Check whether the single {HW,DRV,SKB} mode is set */ + flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); + mask = flags - 1; + if (flags && flags & mask) + return -EINVAL; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + xdp_id.ifindex = ifindex; + xdp_id.flags = flags; + + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); + if (!ret) + *prog_id = xdp_id.id; + + close(sock); + return ret; +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8993e9c8f410..383d2ff13fc7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ BPF_OBJ_FILES = \ sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ - xdp_dummy.o test_map_in_map.o + xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o # Objects are built with default compilation flags and with sub-register # code-gen enabled. @@ -216,7 +216,8 @@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -$(OUTPUT)/test_verifier: $(OUTPUT)/verifier/tests.h +VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h +$(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H) $(OUTPUT)/test_verifier: CFLAGS += -I$(OUTPUT) VERIFIER_TEST_FILES := $(wildcard verifier/*.c) @@ -227,6 +228,7 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TEST_FILES) ls *.c 2> /dev/null | \ sed -e 's@\(.*\)@#include \"\1\"@'; \ echo '#endif' \ - ) > $(OUTPUT)/verifier/tests.h) + ) > $(VERIFIER_TESTS_H)) -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \ + $(VERIFIER_TESTS_H) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6c77cf7bedce..6a0ce0f055c5 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -172,6 +172,10 @@ static int (*bpf_skb_vlan_pop)(void *ctx) = (void *) BPF_FUNC_skb_vlan_pop; static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = (void *) BPF_FUNC_rc_pointer_rel; +static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = + (void *) BPF_FUNC_spin_lock; +static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = + (void *) BPF_FUNC_spin_unlock; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/test_map_lock.c b/tools/testing/selftests/bpf/test_map_lock.c new file mode 100644 index 000000000000..af8cc68ed2f9 --- /dev/null +++ b/tools/testing/selftests/bpf/test_map_lock.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +#define VAR_NUM 16 + +struct hmap_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct hmap_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem); + +struct array_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct array_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem); + +SEC("map_lock_demo") +int bpf_map_lock_test(struct __sk_buff *skb) +{ + struct hmap_elem zero = {}, *val; + int rnd = bpf_get_prandom_u32(); + int key = 0, err = 1, i; + struct array_elem *q; + + val = bpf_map_lookup_elem(&hash_map, &key); + if (!val) + goto err; + /* spin_lock in hash map */ + bpf_spin_lock(&val->lock); + for (i = 0; i < VAR_NUM; i++) + val->var[i] = rnd; + bpf_spin_unlock(&val->lock); + + /* spin_lock in array */ + q = bpf_map_lookup_elem(&array_map, &key); + if (!q) + goto err; + bpf_spin_lock(&q->lock); + for (i = 0; i < VAR_NUM; i++) + q->var[i] = rnd; + bpf_spin_unlock(&q->lock); + err = 0; +err: + return err; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 1dfef77cff6f..e7798dd97f4b 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -32,6 +32,8 @@ #define ENOTSUPP 524 #endif +static int skips; + static int map_flags; #define CHECK(condition, tag, format...) ({ \ @@ -724,6 +726,15 @@ static void test_sockmap(int tasks, void *data) sizeof(key), sizeof(value), 6, 0); if (fd < 0) { + if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) { + printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n", + __func__); + skips++; + for (i = 0; i < 6; i++) + close(sfd[i]); + return; + } + printf("Failed to create sockmap %i\n", fd); goto out_sockmap; } @@ -1701,6 +1712,6 @@ int main(void) map_flags = BPF_F_NO_PREALLOC; run_all_tests(); - printf("test_maps: OK\n"); + printf("test_maps: OK, %d SKIPPED\n", skips); return 0; } diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index d8940b8b2f8d..a08d026ac396 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -28,7 +28,7 @@ typedef __u16 __sum16; #include <sys/wait.h> #include <sys/types.h> #include <fcntl.h> - +#include <pthread.h> #include <linux/bpf.h> #include <linux/err.h> #include <bpf/bpf.h> @@ -1985,6 +1985,119 @@ static void test_flow_dissector(void) bpf_object__close(obj); } +static void *test_spin_lock(void *arg) +{ + __u32 duration, retval; + int err, prog_fd = *(u32 *) arg; + + err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + pthread_exit(arg); +} + +static void test_spinlock(void) +{ + const char *file = "./test_spin_lock.o"; + pthread_t thread_id[4]; + struct bpf_object *obj; + int prog_fd; + int err = 0, i; + void *ret; + + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (err) { + printf("test_spin_lock:bpf_prog_load errno %d\n", errno); + goto close_prog; + } + for (i = 0; i < 4; i++) + assert(pthread_create(&thread_id[i], NULL, + &test_spin_lock, &prog_fd) == 0); + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && + ret == (void *)&prog_fd); + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static void *parallel_map_access(void *arg) +{ + int err, map_fd = *(u32 *) arg; + int vars[17], i, j, rnd, key = 0; + + for (i = 0; i < 10000; i++) { + err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK); + if (err) { + printf("lookup failed\n"); + error_cnt++; + goto out; + } + if (vars[0] != 0) { + printf("lookup #%d var[0]=%d\n", i, vars[0]); + error_cnt++; + goto out; + } + rnd = vars[1]; + for (j = 2; j < 17; j++) { + if (vars[j] == rnd) + continue; + printf("lookup #%d var[1]=%d var[%d]=%d\n", + i, rnd, j, vars[j]); + error_cnt++; + goto out; + } + } +out: + pthread_exit(arg); +} + +static void test_map_lock(void) +{ + const char *file = "./test_map_lock.o"; + int prog_fd, map_fd[2], vars[17] = {}; + pthread_t thread_id[6]; + struct bpf_object *obj; + int err = 0, key = 0, i; + void *ret; + + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (err) { + printf("test_map_lock:bpf_prog_load errno %d\n", errno); + goto close_prog; + } + map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); + if (map_fd[0] < 0) + goto close_prog; + map_fd[1] = bpf_find_map(__func__, obj, "array_map"); + if (map_fd[1] < 0) + goto close_prog; + + bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK); + + for (i = 0; i < 4; i++) + assert(pthread_create(&thread_id[i], NULL, + &test_spin_lock, &prog_fd) == 0); + for (i = 4; i < 6; i++) + assert(pthread_create(&thread_id[i], NULL, + ¶llel_map_access, &map_fd[i - 4]) == 0); + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && + ret == (void *)&prog_fd); + for (i = 4; i < 6; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && + ret == (void *)&map_fd[i - 4]); + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + int main(void) { srand(time(NULL)); @@ -2013,6 +2126,8 @@ int main(void) test_queue_stack_map(QUEUE); test_queue_stack_map(STACK); test_flow_dissector(); + test_spinlock(); + test_map_lock(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_spin_lock.c b/tools/testing/selftests/bpf/test_spin_lock.c new file mode 100644 index 000000000000..40f904312090 --- /dev/null +++ b/tools/testing/selftests/bpf/test_spin_lock.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +struct hmap_elem { + volatile int cnt; + struct bpf_spin_lock lock; + int test_padding; +}; + +struct bpf_map_def SEC("maps") hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct hmap_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem); + + +struct cls_elem { + struct bpf_spin_lock lock; + volatile int cnt; +}; + +struct bpf_map_def SEC("maps") cls_map = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct cls_elem), +}; + +BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key, + struct cls_elem); + +struct bpf_vqueue { + struct bpf_spin_lock lock; + /* 4 byte hole */ + unsigned long long lasttime; + int credit; + unsigned int rate; +}; + +struct bpf_map_def SEC("maps") vqueue = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_vqueue), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue); +#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) + +SEC("spin_lock_demo") +int bpf_sping_lock_test(struct __sk_buff *skb) +{ + volatile int credit = 0, max_credit = 100, pkt_len = 64; + struct hmap_elem zero = {}, *val; + unsigned long long curtime; + struct bpf_vqueue *q; + struct cls_elem *cls; + int key = 0; + int err = 0; + + val = bpf_map_lookup_elem(&hmap, &key); + if (!val) { + bpf_map_update_elem(&hmap, &key, &zero, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (!val) { + err = 1; + goto err; + } + } + /* spin_lock in hash map run time test */ + bpf_spin_lock(&val->lock); + if (val->cnt) + val->cnt--; + else + val->cnt++; + if (val->cnt != 0 && val->cnt != 1) + err = 1; + bpf_spin_unlock(&val->lock); + + /* spin_lock in array. virtual queue demo */ + q = bpf_map_lookup_elem(&vqueue, &key); + if (!q) + goto err; + curtime = bpf_ktime_get_ns(); + bpf_spin_lock(&q->lock); + q->credit += CREDIT_PER_NS(curtime - q->lasttime, q->rate); + q->lasttime = curtime; + if (q->credit > max_credit) + q->credit = max_credit; + q->credit -= pkt_len; + credit = q->credit; + bpf_spin_unlock(&q->lock); + + /* spin_lock in cgroup local storage */ + cls = bpf_get_local_storage(&cls_map, 0); + bpf_spin_lock(&cls->lock); + cls->cnt++; + bpf_spin_unlock(&cls->lock); + +err: + return err; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c5e22422a852..477a9dcf9fff 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -32,8 +32,10 @@ #include <linux/bpf_perf_event.h> #include <linux/bpf.h> #include <linux/if_ether.h> +#include <linux/btf.h> #include <bpf/bpf.h> +#include <bpf/libbpf.h> #ifdef HAVE_GENHDR # include "autoconf.h" @@ -49,7 +51,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 13 +#define MAX_NR_MAPS 14 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -59,6 +61,7 @@ #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" static bool unpriv_disabled = false; +static int skips; struct bpf_test { const char *descr; @@ -76,6 +79,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; int fixup_cgroup_storage[MAX_FIXUPS]; int fixup_percpu_cgroup_storage[MAX_FIXUPS]; + int fixup_map_spin_lock[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t retval, retval_unpriv, insn_processed; @@ -263,6 +267,16 @@ static int probe_filter_length(const struct bpf_insn *fp) return len + 1; } +static bool skip_unsupported_map(enum bpf_map_type map_type) +{ + if (!bpf_probe_map_type(map_type, 0)) { + printf("SKIP (unsupported map type %d)\n", map_type); + skips++; + return true; + } + return false; +} + static int create_map(uint32_t type, uint32_t size_key, uint32_t size_value, uint32_t max_elem) { @@ -270,8 +284,11 @@ static int create_map(uint32_t type, uint32_t size_key, fd = bpf_create_map(type, size_key, size_value, max_elem, type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0); - if (fd < 0) + if (fd < 0) { + if (skip_unsupported_map(type)) + return -1; printf("Failed to create hash map '%s'!\n", strerror(errno)); + } return fd; } @@ -321,6 +338,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), sizeof(int), max_elem, 0); if (mfd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY)) + return -1; printf("Failed to create prog array '%s'!\n", strerror(errno)); return -1; } @@ -351,15 +370,20 @@ static int create_map_in_map(void) inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(int), 1, 0); if (inner_map_fd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY)) + return -1; printf("Failed to create array '%s'!\n", strerror(errno)); return inner_map_fd; } outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, sizeof(int), inner_map_fd, 1, 0); - if (outer_map_fd < 0) + if (outer_map_fd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS)) + return -1; printf("Failed to create array of maps '%s'!\n", strerror(errno)); + } close(inner_map_fd); @@ -374,10 +398,105 @@ static int create_cgroup_storage(bool percpu) fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), TEST_DATA_LEN, 0, 0); - if (fd < 0) + if (fd < 0) { + if (skip_unsupported_map(type)) + return -1; printf("Failed to create cgroup storage '%s'!\n", strerror(errno)); + } + + return fd; +} + +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) + +struct btf_raw_data { + __u32 raw_types[64]; + const char *str_sec; + __u32 str_sec_size; +}; + +/* struct bpf_spin_lock { + * int val; + * }; + * struct val { + * int cnt; + * struct bpf_spin_lock l; + * }; + */ +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; +static __u32 btf_raw_types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ +}; + +static int load_btf(void) +{ + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(btf_raw_types), + .str_off = sizeof(btf_raw_types), + .str_len = sizeof(btf_str_sec), + }; + void *ptr, *raw_btf; + int btf_fd; + + ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) + + sizeof(btf_str_sec)); + + memcpy(ptr, &hdr, sizeof(hdr)); + ptr += sizeof(hdr); + memcpy(ptr, btf_raw_types, hdr.type_len); + ptr += hdr.type_len; + memcpy(ptr, btf_str_sec, hdr.str_len); + ptr += hdr.str_len; + + btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0); + free(raw_btf); + if (btf_fd < 0) + return -1; + return btf_fd; +} + +static int create_map_spin_lock(void) +{ + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = 4, + .value_size = 8, + .max_entries = 1, + .btf_key_type_id = 1, + .btf_value_type_id = 3, + }; + int fd, btf_fd; + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); + if (fd < 0) + printf("Failed to create map with spin_lock\n"); return fd; } @@ -399,6 +518,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_in_map = test->fixup_map_in_map; int *fixup_cgroup_storage = test->fixup_cgroup_storage; int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage; + int *fixup_map_spin_lock = test->fixup_map_spin_lock; if (test->fill_helper) test->fill_helper(test); @@ -515,6 +635,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_stacktrace++; } while (*fixup_map_stacktrace); } + if (*fixup_map_spin_lock) { + map_fds[13] = create_map_spin_lock(); + do { + prog[*fixup_map_spin_lock].imm = map_fds[13]; + fixup_map_spin_lock++; + } while (*fixup_map_spin_lock); + } } static int set_admin(bool admin) @@ -580,6 +707,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int fixup_skips; __u32 pflags; int i, err; @@ -588,7 +716,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + fixup_skips = skips; do_test_fixup(test, prog_type, prog, map_fds); + /* If there were some map skips during fixup due to missing bpf + * features, skip this test. + */ + if (fixup_skips != skips) + return; prog_len = probe_filter_length(prog); pflags = 0; @@ -598,6 +732,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, pflags |= BPF_F_ANY_ALIGNMENT; fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); + if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { + printf("SKIP (unsupported program type %d)\n", prog_type); + skips++; + goto close_fds; + } expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; @@ -751,7 +890,7 @@ static bool test_as_unpriv(struct bpf_test *test) static int do_test(bool unpriv, unsigned int from, unsigned int to) { - int i, passes = 0, errors = 0, skips = 0; + int i, passes = 0, errors = 0; for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c new file mode 100644 index 000000000000..d829eef372a4 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/spin_lock.c @@ -0,0 +1,331 @@ +{ + "spin_lock: test1 success", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test2 direct ld/st", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "cannot be accessed directly", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test3 direct ld/st", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "cannot be accessed directly", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test4 direct ld/st", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "cannot be accessed directly", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test5 call within a locked region", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "calls are not allowed", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test6 missing unlock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "unlock is missing", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test7 unlock without lock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "without taking a lock", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test8 double lock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "calls are not allowed", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test9 different lock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3, 11 }, + .result = REJECT, + .errstr = "unlock of different lock", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test10 lock in subprog without unlock", + .insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .result = REJECT, + .errstr = "unlock is missing", + .result_unpriv = REJECT, + .errstr_unpriv = "", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "spin_lock: test11 ld_abs under lock", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, + 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock), + BPF_LD_ABS(BPF_B, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 4 }, + .result = REJECT, + .errstr = "inside bpf_spin_lock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index dca58cf1a4ab..3e046695fad7 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -76,6 +76,7 @@ .errstr_unpriv = "unknown func bpf_trace_printk#6", .result_unpriv = REJECT, .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { "unpriv: pass pointer to helper function", |