diff options
Diffstat (limited to 'tools')
164 files changed, 13419 insertions, 1024 deletions
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index b13926432b84..8d6e8901ed2b 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,7 +1,9 @@ *.d +/_bpftool /bpftool bpftool*.8 bpf-helpers.* FEATURE-DUMP.bpftool feature libbpf +profiler.skel.h diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 4d08f35034a2..b04156cfd7a3 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -19,19 +19,24 @@ SYNOPSIS FEATURE COMMANDS ================ -| **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]] +| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]] | **bpftool** **feature help** | | *COMPONENT* := { **kernel** | **dev** *NAME* } DESCRIPTION =========== - **bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]] + **bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]] Probe the running kernel and dump a number of eBPF-related parameters, such as availability of the **bpf()** system call, JIT status, eBPF program types availability, eBPF helper functions availability, and more. + By default, bpftool **does not run probes** for + **bpf_probe_write_user**\ () and **bpf_trace_printk**\() + helpers which print warnings to kernel logs. To enable them + and run all probes, the **full** keyword should be used. + If the **macros** keyword (but not the **-j** option) is passed, a subset of the output is dumped as a list of **#define** macros that are ready to be included in a C @@ -44,16 +49,12 @@ DESCRIPTION Keyword **kernel** can be omitted. If no probe target is specified, probing the kernel is the default behaviour. - Note that when probed, some eBPF helpers (e.g. - **bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may - print warnings to kernel logs. - - **bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]] + **bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]] Probe network device for supported eBPF features and dump results to the console. - The two keywords **macros** and **prefix** have the same - role as when probing the kernel. + The keywords **full**, **macros** and **prefix** have the + same role as when probing the kernel. **bpftool feature help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 64ddf8a4c518..9f19404f470e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -30,6 +30,7 @@ PROG COMMANDS | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** | **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] +| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -42,11 +43,15 @@ PROG COMMANDS | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** +| **cgroup/getsockopt** | **cgroup/setsockopt** | +| **struct_ops** | **fentry** | **fexit** | **freplace** | } | *ATTACH_TYPE* := { | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** | } +| *METRIC* := { +| **cycles** | **instructions** | **l1d_loads** | **llc_misses** +| } DESCRIPTION @@ -188,6 +193,12 @@ DESCRIPTION not all of them can take the **ctx_in**/**ctx_out** arguments. bpftool does not perform checks on program types. + **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs* + Profile *METRICs* for bpf program *PROG* for *DURATION* + seconds or until user hits Ctrl-C. *DURATION* is optional. + If *DURATION* is not specified, the profiling will run up to + UINT_MAX seconds. + **bpftool prog help** Print short help message. @@ -310,6 +321,15 @@ EXAMPLES **# rm /sys/fs/bpf/xdp1** +| +| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses** + +:: + 51397 run_cnt + 40176203 cycles (83.05%) + 42518139 instructions # 1.06 insns per cycle (83.39%) + 123 llc_misses # 2.89 LLC misses per million insns (83.15%) + SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst new file mode 100644 index 000000000000..f045cc89dd6d --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -0,0 +1,116 @@ +================== +bpftool-struct_ops +================== +------------------------------------------------------------------------------- +tool to register/unregister/introspect BPF struct_ops +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** | **dump** | **register** | **unregister** | **help** } + +STRUCT_OPS COMMANDS +=================== + +| **bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*] +| **bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*] +| **bpftool** **struct_ops register** *OBJ* +| **bpftool** **struct_ops unregister** *STRUCT_OPS_MAP* +| **bpftool** **struct_ops help** +| +| *STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* } +| *OBJ* := /a/file/of/bpf_struct_ops.o + + +DESCRIPTION +=========== + **bpftool struct_ops { show | list }** [*STRUCT_OPS_MAP*] + Show brief information about the struct_ops in the system. + If *STRUCT_OPS_MAP* is specified, it shows information only + for the given struct_ops. Otherwise, it lists all struct_ops + currently existing in the system. + + Output will start with struct_ops map ID, followed by its map + name and its struct_ops's kernel type. + + **bpftool struct_ops dump** [*STRUCT_OPS_MAP*] + Dump details information about the struct_ops in the system. + If *STRUCT_OPS_MAP* is specified, it dumps information only + for the given struct_ops. Otherwise, it dumps all struct_ops + currently existing in the system. + + **bpftool struct_ops register** *OBJ* + Register bpf struct_ops from *OBJ*. All struct_ops under + the ELF section ".struct_ops" will be registered to + its kernel subsystem. + + **bpftool struct_ops unregister** *STRUCT_OPS_MAP* + Unregister the *STRUCT_OPS_MAP* from the kernel subsystem. + + **bpftool struct_ops help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -V, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -d, --debug + Print all logs available, even debug-level information. This + includes logs from libbpf as well as from the verifier, when + attempting to load programs. + +EXAMPLES +======== +**# bpftool struct_ops show** + +:: + + 100: dctcp tcp_congestion_ops + 105: cubic tcp_congestion_ops + +**# bpftool struct_ops unregister id 105** + +:: + + Unregistered tcp_congestion_ops cubic id 105 + +**# bpftool struct_ops register bpf_cubic.o** + +:: + + Registered tcp_congestion_ops cubic id 110 + + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) + **bpftool-gen**\ (8) + diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c4e810335810..f584d1fdfc64 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -59,10 +59,12 @@ LIBS = $(LIBBPF) -lelf -lz INSTALL ?= install RM ?= rm -f +CLANG ?= clang FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib -FEATURE_DISPLAY = libbfd disassembler-four-args zlib +FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \ + clang-bpf-global-var +FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -110,14 +112,39 @@ SRCS += $(BFD_SRCS) endif OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o + +ifeq ($(feature-clang-bpf-global-var),1) + __OBJS = $(OBJS) +else + __OBJS = $(_OBJS) +endif + +$(OUTPUT)_prog.o: prog.c + $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< + +$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) + +skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) + $(QUIET_CLANG)$(CLANG) \ + -I$(srctree)/tools/include/uapi/ \ + -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ + -g -O2 -target bpf -c $< -o $@ + +profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o + $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ + +$(OUTPUT)prog.o: prog.c profiler.skel.h + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)feature.o: | zdep -$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) +$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -125,6 +152,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 754d8395e451..45ee99b159e2 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -337,6 +337,7 @@ _bpftool() local PROG_TYPE='id pinned tag name' local MAP_TYPE='id pinned name' + local METRIC_TYPE='cycles instructions l1d_loads llc_misses' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -388,7 +389,7 @@ _bpftool() _bpftool_get_prog_ids ;; name) - _bpftool_get_map_names + _bpftool_get_prog_names ;; pinned) _filedir @@ -469,7 +470,8 @@ _bpftool() cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ - cgroup/setsockopt" -- \ + cgroup/setsockopt struct_ops \ + fentry fexit freplace" -- \ "$cur" ) ) return 0 ;; @@ -497,9 +499,51 @@ _bpftool() tracelog) return 0 ;; + profile) + case $cword in + 3) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 4) + case $prev in + id) + _bpftool_get_prog_ids + ;; + name) + _bpftool_get_prog_names + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 5) + COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) ) + return 0 + ;; + 6) + case $prev in + duration) + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + return 0 + ;; + esac + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) + return 0 + ;; + esac + ;; run) - if [[ ${#words[@]} -lt 5 ]]; then - _filedir + if [[ ${#words[@]} -eq 4 ]]; then + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) return 0 fi case $prev in @@ -507,6 +551,10 @@ _bpftool() _bpftool_get_prog_ids return 0 ;; + name) + _bpftool_get_prog_names + return 0 + ;; data_in|data_out|ctx_in|ctx_out) _filedir return 0 @@ -524,7 +572,35 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'dump help pin attach detach \ - load loadall show list tracelog run' -- "$cur" ) ) + load loadall show list tracelog run profile' -- "$cur" ) ) + ;; + esac + ;; + struct_ops) + local STRUCT_OPS_TYPE='id name' + case $command in + show|list|dump|unregister) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$STRUCT_OPS_TYPE" -- "$cur" ) ) + ;; + id) + _bpftool_get_map_ids_for_type struct_ops + ;; + name) + _bpftool_get_map_names_for_type struct_ops + ;; + esac + return 0 + ;; + register) + _filedir + return 0 + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'register unregister show list dump help' \ + -- "$cur" ) ) ;; esac ;; @@ -712,11 +788,17 @@ _bpftool() esac ;; pin) - if [[ $prev == "$command" ]]; then - COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) - else - _filedir - fi + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + ;; + id) + _bpftool_get_map_ids + ;; + name) + _bpftool_get_map_names + ;; + esac return 0 ;; event_pipe) @@ -843,7 +925,7 @@ _bpftool() case $command in skeleton) _filedir - ;; + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) @@ -943,6 +1025,9 @@ _bpftool() id) _bpftool_get_prog_ids ;; + name) + _bpftool_get_prog_names + ;; pinned) _filedir ;; @@ -983,11 +1068,12 @@ _bpftool() probe) [[ $prev == "prefix" ]] && return 0 if _bpftool_search_list 'macros'; then - COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) ) + _bpftool_once_attr 'prefix' else COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) ) fi _bpftool_one_of_list 'kernel dev' + _bpftool_once_attr 'full' return 0 ;; *) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index b3745ed711ba..bcaf55b59498 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -389,6 +389,9 @@ static int dump_btf_c(const struct btf *btf, if (IS_ERR(d)) return PTR_ERR(d); + printf("#ifndef __VMLINUX_H__\n"); + printf("#define __VMLINUX_H__\n"); + printf("\n"); printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); printf("#endif\n\n"); @@ -412,6 +415,8 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute pop\n"); printf("#endif\n"); + printf("\n"); + printf("#endif /* __VMLINUX_H__ */\n"); done: btf_dump__free(d); diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 01cc52b834fa..497807bec675 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -4,11 +4,13 @@ #include <ctype.h> #include <stdio.h> /* for (FILE *) used by json_writer */ #include <string.h> +#include <unistd.h> #include <asm/byteorder.h> #include <linux/bitops.h> #include <linux/btf.h> #include <linux/err.h> #include <bpf/btf.h> +#include <bpf/bpf.h> #include "json_writer.h" #include "main.h" @@ -22,13 +24,102 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, __u8 bit_offset, const void *data); -static void btf_dumper_ptr(const void *data, json_writer_t *jw, - bool is_plain_text) +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size); + +static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, + const struct btf_type *func_proto, + __u32 prog_id) { - if (is_plain_text) - jsonw_printf(jw, "%p", *(void **)data); + struct bpf_prog_info_linear *prog_info = NULL; + const struct btf_type *func_type; + const char *prog_name = NULL; + struct bpf_func_info *finfo; + struct btf *prog_btf = NULL; + struct bpf_prog_info *info; + int prog_fd, func_sig_len; + char prog_str[1024]; + + /* Get the ptr's func_proto */ + func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0, + sizeof(prog_str)); + if (func_sig_len == -1) + return -1; + + if (!prog_id) + goto print; + + /* Get the bpf_prog's name. Obtain from func_info. */ + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd == -1) + goto print; + + prog_info = bpf_program__get_prog_info_linear(prog_fd, + 1UL << BPF_PROG_INFO_FUNC_INFO); + close(prog_fd); + if (IS_ERR(prog_info)) { + prog_info = NULL; + goto print; + } + info = &prog_info->info; + + if (!info->btf_id || !info->nr_func_info || + btf__get_from_id(info->btf_id, &prog_btf)) + goto print; + finfo = (struct bpf_func_info *)info->func_info; + func_type = btf__type_by_id(prog_btf, finfo->type_id); + if (!func_type || !btf_is_func(func_type)) + goto print; + + prog_name = btf__name_by_offset(prog_btf, func_type->name_off); + +print: + if (!prog_id) + snprintf(&prog_str[func_sig_len], + sizeof(prog_str) - func_sig_len, " 0"); + else if (prog_name) + snprintf(&prog_str[func_sig_len], + sizeof(prog_str) - func_sig_len, + " %s/prog_id:%u", prog_name, prog_id); else - jsonw_printf(jw, "%lu", *(unsigned long *)data); + snprintf(&prog_str[func_sig_len], + sizeof(prog_str) - func_sig_len, + " <unknown_prog_name>/prog_id:%u", prog_id); + + prog_str[sizeof(prog_str) - 1] = '\0'; + jsonw_string(d->jw, prog_str); + btf__free(prog_btf); + free(prog_info); + return 0; +} + +static void btf_dumper_ptr(const struct btf_dumper *d, + const struct btf_type *t, + const void *data) +{ + unsigned long value = *(unsigned long *)data; + const struct btf_type *ptr_type; + __s32 ptr_type_id; + + if (!d->prog_id_as_func_ptr || value > UINT32_MAX) + goto print_ptr_value; + + ptr_type_id = btf__resolve_type(d->btf, t->type); + if (ptr_type_id < 0) + goto print_ptr_value; + ptr_type = btf__type_by_id(d->btf, ptr_type_id); + if (!ptr_type || !btf_is_func_proto(ptr_type)) + goto print_ptr_value; + + if (!dump_prog_id_as_func_ptr(d, ptr_type, value)) + return; + +print_ptr_value: + if (d->is_plain_text) + jsonw_printf(d->jw, "%p", (void *)value); + else + jsonw_printf(d->jw, "%lu", value); } static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, @@ -43,9 +134,78 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, return btf_dumper_do_type(d, actual_type_id, bit_offset, data); } -static void btf_dumper_enum(const void *data, json_writer_t *jw) +static int btf_dumper_enum(const struct btf_dumper *d, + const struct btf_type *t, + const void *data) +{ + const struct btf_enum *enums = btf_enum(t); + __s64 value; + __u16 i; + + switch (t->size) { + case 8: + value = *(__s64 *)data; + break; + case 4: + value = *(__s32 *)data; + break; + case 2: + value = *(__s16 *)data; + break; + case 1: + value = *(__s8 *)data; + break; + default: + return -EINVAL; + } + + for (i = 0; i < btf_vlen(t); i++) { + if (value == enums[i].val) { + jsonw_string(d->jw, + btf__name_by_offset(d->btf, + enums[i].name_off)); + return 0; + } + } + + jsonw_int(d->jw, value); + return 0; +} + +static bool is_str_array(const struct btf *btf, const struct btf_array *arr, + const char *s) { - jsonw_printf(jw, "%d", *(int *)data); + const struct btf_type *elem_type; + const char *end_s; + + if (!arr->nelems) + return false; + + elem_type = btf__type_by_id(btf, arr->type); + /* Not skipping typedef. typedef to char does not count as + * a string now. + */ + while (elem_type && btf_is_mod(elem_type)) + elem_type = btf__type_by_id(btf, elem_type->type); + + if (!elem_type || !btf_is_int(elem_type) || elem_type->size != 1) + return false; + + if (btf_int_encoding(elem_type) != BTF_INT_CHAR && + strcmp("char", btf__name_by_offset(btf, elem_type->name_off))) + return false; + + end_s = s + arr->nelems; + while (s < end_s) { + if (!*s) + return true; + if (*s <= 0x1f || *s >= 0x7f) + return false; + s++; + } + + /* '\0' is not found */ + return false; } static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, @@ -57,6 +217,11 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, int ret = 0; __u32 i; + if (is_str_array(d->btf, arr, data)) { + jsonw_string(d->jw, data); + return 0; + } + elem_size = btf__resolve_size(d->btf, arr->type); if (elem_size < 0) return elem_size; @@ -366,10 +531,9 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_ARRAY: return btf_dumper_array(d, type_id, data); case BTF_KIND_ENUM: - btf_dumper_enum(data, d->jw); - return 0; + return btf_dumper_enum(d, t, data); case BTF_KIND_PTR: - btf_dumper_ptr(data, d->jw, d->is_plain_text); + btf_dumper_ptr(d, t, data); return 0; case BTF_KIND_UNKN: jsonw_printf(d->jw, "(unknown)"); @@ -414,10 +578,6 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, return -1; \ } while (0) -static int btf_dump_func(const struct btf *btf, char *func_sig, - const struct btf_type *func_proto, - const struct btf_type *func, int pos, int size); - static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig, int pos, int size) { @@ -526,8 +686,15 @@ static int btf_dump_func(const struct btf *btf, char *func_sig, BTF_PRINT_ARG(", "); if (arg->type) { BTF_PRINT_TYPE(arg->type); - BTF_PRINT_ARG("%s", - btf__name_by_offset(btf, arg->name_off)); + if (arg->name_off) + BTF_PRINT_ARG("%s", + btf__name_by_offset(btf, arg->name_off)); + else if (pos && func_sig[pos - 1] == ' ') + /* Remove unnecessary space for + * FUNC_PROTO that does not have + * arg->name_off + */ + func_sig[--pos] = '\0'; } else { BTF_PRINT_ARG("..."); } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b75b8ec5469c..f2223dbdfb0a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name) return err; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) { - unsigned int id; - char *endptr; int err; int fd; - if (argc < 3) { - p_err("too few arguments, id ID and FILE path is required"); - return -1; - } else if (argc > 3) { - p_err("too many arguments"); - return -1; - } - - if (!is_prefix(*argv, "id")) { - p_err("expected 'id' got %s", *argv); - return -1; - } - NEXT_ARG(); - - id = strtoul(*argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", *argv); - return -1; - } - NEXT_ARG(); - - fd = get_fd_by_id(id); - if (fd < 0) { - p_err("can't open object by id (%u): %s", id, strerror(errno)); - return -1; - } + fd = get_fd(&argc, &argv); + if (fd < 0) + return fd; err = do_pin_fd(fd, *argv); @@ -597,3 +572,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what) return 0; } + +int __printf(2, 0) +print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 941873d778d8..88718ee6a438 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -112,18 +112,12 @@ print_start_section(const char *json_title, const char *plain_title, } } -static void -print_end_then_start_section(const char *json_title, const char *plain_title, - const char *define_comment, - const char *define_prefix) +static void print_end_section(void) { if (json_output) jsonw_end_object(json_wtr); else printf("\n"); - - print_start_section(json_title, plain_title, define_comment, - define_prefix); } /* Probing functions */ @@ -520,13 +514,38 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, } static void +probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, + const char *define_prefix, unsigned int id, + const char *ptype_name, __u32 ifindex) +{ + bool res; + + if (!supported_type) + res = false; + else + res = bpf_probe_helper(id, prog_type, ifindex); + + if (json_output) { + if (res) + jsonw_string(json_wtr, helper_name[id]); + } else if (define_prefix) { + printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n", + define_prefix, ptype_name, helper_name[id], + res ? "1" : "0"); + } else { + if (res) + printf("\n\t- %s", helper_name[id]); + } +} + +static void probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, - const char *define_prefix, __u32 ifindex) + const char *define_prefix, bool full_mode, + __u32 ifindex) { const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; - bool res; if (ifindex) /* Only test helpers for offload-able program types */ @@ -548,21 +567,19 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } for (id = 1; id < ARRAY_SIZE(helper_name); id++) { - if (!supported_type) - res = false; - else - res = bpf_probe_helper(id, prog_type, ifindex); - - if (json_output) { - if (res) - jsonw_string(json_wtr, helper_name[id]); - } else if (define_prefix) { - printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n", - define_prefix, ptype_name, helper_name[id], - res ? "1" : "0"); - } else { - if (res) - printf("\n\t- %s", helper_name[id]); + /* Skip helper functions which emit dmesg messages when not in + * the full mode. + */ + switch (id) { + case BPF_FUNC_trace_printk: + case BPF_FUNC_probe_write_user: + if (!full_mode) + continue; + /* fallthrough */ + default: + probe_helper_for_progtype(prog_type, supported_type, + define_prefix, id, ptype_name, + ifindex); } } @@ -584,13 +601,132 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex) res, define_prefix); } +static void +section_system_config(enum probe_component target, const char *define_prefix) +{ + switch (target) { + case COMPONENT_KERNEL: + case COMPONENT_UNSPEC: + if (define_prefix) + break; + + print_start_section("system_config", + "Scanning system configuration...", + NULL, /* define_comment never used here */ + NULL); /* define_prefix always NULL here */ + if (check_procfs()) { + probe_unprivileged_disabled(); + probe_jit_enable(); + probe_jit_harden(); + probe_jit_kallsyms(); + probe_jit_limit(); + } else { + p_info("/* procfs not mounted, skipping related probes */"); + } + probe_kernel_image_config(); + print_end_section(); + break; + default: + break; + } +} + +static bool section_syscall_config(const char *define_prefix) +{ + bool res; + + print_start_section("syscall_config", + "Scanning system call availability...", + "/*** System call availability ***/", + define_prefix); + res = probe_bpf_syscall(define_prefix); + print_end_section(); + + return res; +} + +static void +section_program_types(bool *supported_types, const char *define_prefix, + __u32 ifindex) +{ + unsigned int i; + + print_start_section("program_types", + "Scanning eBPF program types...", + "/*** eBPF program types ***/", + define_prefix); + + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + probe_prog_type(i, supported_types, define_prefix, ifindex); + + print_end_section(); +} + +static void section_map_types(const char *define_prefix, __u32 ifindex) +{ + unsigned int i; + + print_start_section("map_types", + "Scanning eBPF map types...", + "/*** eBPF map types ***/", + define_prefix); + + for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) + probe_map_type(i, define_prefix, ifindex); + + print_end_section(); +} + +static void +section_helpers(bool *supported_types, const char *define_prefix, + bool full_mode, __u32 ifindex) +{ + unsigned int i; + + print_start_section("helpers", + "Scanning eBPF helper functions...", + "/*** eBPF helper functions ***/", + define_prefix); + + if (define_prefix) + printf("/*\n" + " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n" + " * to determine if <helper_name> is available for <prog_type_name>,\n" + " * e.g.\n" + " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n" + " * // do stuff with this helper\n" + " * #elif\n" + " * // use a workaround\n" + " * #endif\n" + " */\n" + "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n" + " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", + define_prefix, define_prefix, define_prefix, + define_prefix); + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + probe_helpers_for_progtype(i, supported_types[i], + define_prefix, full_mode, ifindex); + + print_end_section(); +} + +static void section_misc(const char *define_prefix, __u32 ifindex) +{ + print_start_section("misc", + "Scanning miscellaneous eBPF features...", + "/*** eBPF misc features ***/", + define_prefix); + probe_large_insn_limit(define_prefix, ifindex); + print_end_section(); +} + static int do_probe(int argc, char **argv) { enum probe_component target = COMPONENT_UNSPEC; const char *define_prefix = NULL; bool supported_types[128] = {}; + bool full_mode = false; __u32 ifindex = 0; - unsigned int i; char *ifname; /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN). @@ -629,6 +765,9 @@ static int do_probe(int argc, char **argv) strerror(errno)); return -1; } + } else if (is_prefix(*argv, "full")) { + full_mode = true; + NEXT_ARG(); } else if (is_prefix(*argv, "macros") && !define_prefix) { define_prefix = ""; NEXT_ARG(); @@ -658,97 +797,19 @@ static int do_probe(int argc, char **argv) jsonw_start_object(json_wtr); } - switch (target) { - case COMPONENT_KERNEL: - case COMPONENT_UNSPEC: - if (define_prefix) - break; - - print_start_section("system_config", - "Scanning system configuration...", - NULL, /* define_comment never used here */ - NULL); /* define_prefix always NULL here */ - if (check_procfs()) { - probe_unprivileged_disabled(); - probe_jit_enable(); - probe_jit_harden(); - probe_jit_kallsyms(); - probe_jit_limit(); - } else { - p_info("/* procfs not mounted, skipping related probes */"); - } - probe_kernel_image_config(); - if (json_output) - jsonw_end_object(json_wtr); - else - printf("\n"); - break; - default: - break; - } - - print_start_section("syscall_config", - "Scanning system call availability...", - "/*** System call availability ***/", - define_prefix); - - if (!probe_bpf_syscall(define_prefix)) + section_system_config(target, define_prefix); + if (!section_syscall_config(define_prefix)) /* bpf() syscall unavailable, don't probe other BPF features */ goto exit_close_json; - - print_end_then_start_section("program_types", - "Scanning eBPF program types...", - "/*** eBPF program types ***/", - define_prefix); - - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) - probe_prog_type(i, supported_types, define_prefix, ifindex); - - print_end_then_start_section("map_types", - "Scanning eBPF map types...", - "/*** eBPF map types ***/", - define_prefix); - - for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) - probe_map_type(i, define_prefix, ifindex); - - print_end_then_start_section("helpers", - "Scanning eBPF helper functions...", - "/*** eBPF helper functions ***/", - define_prefix); - - if (define_prefix) - printf("/*\n" - " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n" - " * to determine if <helper_name> is available for <prog_type_name>,\n" - " * e.g.\n" - " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n" - " * // do stuff with this helper\n" - " * #elif\n" - " * // use a workaround\n" - " * #endif\n" - " */\n" - "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n" - " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", - define_prefix, define_prefix, define_prefix, - define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) - probe_helpers_for_progtype(i, supported_types[i], - define_prefix, ifindex); - - print_end_then_start_section("misc", - "Scanning miscellaneous eBPF features...", - "/*** eBPF misc features ***/", - define_prefix); - probe_large_insn_limit(define_prefix, ifindex); + section_program_types(supported_types, define_prefix, ifindex); + section_map_types(define_prefix, ifindex); + section_helpers(supported_types, define_prefix, full_mode, ifindex); + section_misc(define_prefix, ifindex); exit_close_json: - if (json_output) { - /* End current "section" of probes */ - jsonw_end_object(json_wtr); + if (json_output) /* End root object */ jsonw_end_object(json_wtr); - } return 0; } @@ -761,7 +822,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n" + "Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n" " %s %s help\n" "\n" " COMPONENT := { kernel | dev NAME }\n" diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 6d41bbfc6459..466c269eabdd 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -58,7 +58,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen | struct_ops }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -79,13 +79,6 @@ static int do_version(int argc, char **argv) return 0; } -static int __printf(2, 0) -print_all_levels(__maybe_unused enum libbpf_print_level level, - const char *format, va_list args) -{ - return vfprintf(stderr, format, args); -} - int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)) { @@ -228,6 +221,7 @@ static const struct cmd cmds[] = { { "feature", do_feature }, { "btf", do_btf }, { "gen", do_gen }, + { "struct_ops", do_struct_ops }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 4e75b58d3989..86f14ce26fd7 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -14,6 +14,8 @@ #include <linux/hashtable.h> #include <tools/libc_compat.h> +#include <bpf/libbpf.h> + #include "json_writer.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -76,6 +78,9 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", }; extern const char * const map_type_name[]; @@ -143,7 +148,7 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path, bool quiet); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int mount_bpffs_for_pin(const char *name); -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); @@ -156,6 +161,7 @@ int do_tracelog(int argc, char **arg); int do_feature(int argc, char **argv); int do_btf(int argc, char **argv); int do_gen(int argc, char **argv); +int do_struct_ops(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); @@ -200,6 +206,7 @@ struct btf_dumper { const struct btf *btf; json_writer_t *jw; bool is_plain_text; + bool prog_id_as_func_ptr; }; /* btf_dumper_type - print data along with type information @@ -226,4 +233,7 @@ struct tcmsg; int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, const char *devname, int ifindex); + +int print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args); #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e6c85680b34d..693a632f6813 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, bpf_map_get_fd_by_id); + err = do_pin_any(argc, argv, map_parse_fd); if (!err && json_output) jsonw_null(json_wtr); return err; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index b352ab041160..f6a5974a7b0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -4,6 +4,7 @@ #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> +#include <signal.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -11,10 +12,13 @@ #include <time.h> #include <unistd.h> #include <net/if.h> +#include <sys/ioctl.h> #include <sys/types.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <linux/err.h> +#include <linux/perf_event.h> #include <linux/sizes.h> #include <bpf/bpf.h> @@ -809,7 +813,7 @@ static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id); + err = do_pin_any(argc, argv, prog_parse_fd); if (!err && json_output) jsonw_null(json_wtr); return err; @@ -1243,6 +1247,25 @@ free_data_in: return err; } +static int +get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + libbpf_print_fn_t print_backup; + int ret; + + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); + if (!ret) + return ret; + + /* libbpf_prog_type_by_name() failed, let's re-run with debug level */ + print_backup = libbpf_set_print(print_all_levels); + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); + libbpf_set_print(print_backup); + + return ret; +} + static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; @@ -1292,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) strcat(type, *argv); strcat(type, "/"); - err = libbpf_prog_type_by_name(type, &common_prog_type, - &expected_attach_type); + err = get_prog_type_by_name(type, &common_prog_type, + &expected_attach_type); free(type); if (err < 0) goto err_free_reuse_maps; @@ -1392,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (prog_type == BPF_PROG_TYPE_UNSPEC) { const char *sec_name = bpf_program__title(pos, false); - err = libbpf_prog_type_by_name(sec_name, &prog_type, - &expected_attach_type); + err = get_prog_type_by_name(sec_name, &prog_type, + &expected_attach_type); if (err < 0) goto err_close_obj; } @@ -1537,6 +1560,422 @@ static int do_loadall(int argc, char **argv) return load_with_options(argc, argv, false); } +#ifdef BPFTOOL_WITHOUT_SKELETONS + +static int do_profile(int argc, char **argv) +{ + p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0"); + return 0; +} + +#else /* BPFTOOL_WITHOUT_SKELETONS */ + +#include "profiler.skel.h" + +struct profile_metric { + const char *name; + struct bpf_perf_event_value val; + struct perf_event_attr attr; + bool selected; + + /* calculate ratios like instructions per cycle */ + const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */ + const char *ratio_desc; + const float ratio_mul; +} metrics[] = { + { + .name = "cycles", + .attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .exclude_user = 1, + }, + }, + { + .name = "instructions", + .attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .exclude_user = 1, + }, + .ratio_metric = 1, + .ratio_desc = "insns per cycle", + .ratio_mul = 1.0, + }, + { + .name = "l1d_loads", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + .exclude_user = 1, + }, + }, + { + .name = "llc_misses", + .attr = { + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_LL | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .exclude_user = 1 + }, + .ratio_metric = 2, + .ratio_desc = "LLC misses per million insns", + .ratio_mul = 1e6, + }, +}; + +static __u64 profile_total_count; + +#define MAX_NUM_PROFILE_METRICS 4 + +static int profile_parse_metrics(int argc, char **argv) +{ + unsigned int metric_cnt; + int selected_cnt = 0; + unsigned int i; + + metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); + + while (argc > 0) { + for (i = 0; i < metric_cnt; i++) { + if (is_prefix(argv[0], metrics[i].name)) { + if (!metrics[i].selected) + selected_cnt++; + metrics[i].selected = true; + break; + } + } + if (i == metric_cnt) { + p_err("unknown metric %s", argv[0]); + return -1; + } + NEXT_ARG(); + } + if (selected_cnt > MAX_NUM_PROFILE_METRICS) { + p_err("too many (%d) metrics, please specify no more than %d metrics at at time", + selected_cnt, MAX_NUM_PROFILE_METRICS); + return -1; + } + return selected_cnt; +} + +static void profile_read_values(struct profiler_bpf *obj) +{ + __u32 m, cpu, num_cpu = obj->rodata->num_cpu; + int reading_map_fd, count_map_fd; + __u64 counts[num_cpu]; + __u32 key = 0; + int err; + + reading_map_fd = bpf_map__fd(obj->maps.accum_readings); + count_map_fd = bpf_map__fd(obj->maps.counts); + if (reading_map_fd < 0 || count_map_fd < 0) { + p_err("failed to get fd for map"); + return; + } + + err = bpf_map_lookup_elem(count_map_fd, &key, counts); + if (err) { + p_err("failed to read count_map: %s", strerror(errno)); + return; + } + + profile_total_count = 0; + for (cpu = 0; cpu < num_cpu; cpu++) + profile_total_count += counts[cpu]; + + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + struct bpf_perf_event_value values[num_cpu]; + + if (!metrics[m].selected) + continue; + + err = bpf_map_lookup_elem(reading_map_fd, &key, values); + if (err) { + p_err("failed to read reading_map: %s", + strerror(errno)); + return; + } + for (cpu = 0; cpu < num_cpu; cpu++) { + metrics[m].val.counter += values[cpu].counter; + metrics[m].val.enabled += values[cpu].enabled; + metrics[m].val.running += values[cpu].running; + } + key++; + } +} + +static void profile_print_readings_json(void) +{ + __u32 m; + + jsonw_start_array(json_wtr); + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + if (!metrics[m].selected) + continue; + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "metric", metrics[m].name); + jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count); + jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter); + jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled); + jsonw_lluint_field(json_wtr, "running", metrics[m].val.running); + + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); +} + +static void profile_print_readings_plain(void) +{ + __u32 m; + + printf("\n%18llu %-20s\n", profile_total_count, "run_cnt"); + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + struct bpf_perf_event_value *val = &metrics[m].val; + int r; + + if (!metrics[m].selected) + continue; + printf("%18llu %-20s", val->counter, metrics[m].name); + + r = metrics[m].ratio_metric - 1; + if (r >= 0 && metrics[r].selected && + metrics[r].val.counter > 0) { + printf("# %8.2f %-30s", + val->counter * metrics[m].ratio_mul / + metrics[r].val.counter, + metrics[m].ratio_desc); + } else { + printf("%-41s", ""); + } + + if (val->enabled > val->running) + printf("(%4.2f%%)", + val->running * 100.0 / val->enabled); + printf("\n"); + } +} + +static void profile_print_readings(void) +{ + if (json_output) + profile_print_readings_json(); + else + profile_print_readings_plain(); +} + +static char *profile_target_name(int tgt_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; + const struct btf_type *t; + char *name = NULL; + struct btf *btf; + + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("failed to get info_linear for prog FD %d", tgt_fd); + return NULL; + } + + if (info_linear->info.btf_id == 0 || + btf__get_from_id(info_linear->info.btf_id, &btf)) { + p_err("prog FD %d doesn't have valid btf", tgt_fd); + goto out; + } + + func_info = (struct bpf_func_info *)(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); + if (!t) { + p_err("btf %d doesn't have type %d", + info_linear->info.btf_id, func_info[0].type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); +out: + free(info_linear); + return name; +} + +static struct profiler_bpf *profile_obj; +static int profile_tgt_fd = -1; +static char *profile_tgt_name; +static int *profile_perf_events; +static int profile_perf_event_cnt; + +static void profile_close_perf_events(struct profiler_bpf *obj) +{ + int i; + + for (i = profile_perf_event_cnt - 1; i >= 0; i--) + close(profile_perf_events[i]); + + free(profile_perf_events); + profile_perf_event_cnt = 0; +} + +static int profile_open_perf_events(struct profiler_bpf *obj) +{ + unsigned int cpu, m; + int map_fd, pmu_fd; + + profile_perf_events = calloc( + sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + if (!profile_perf_events) { + p_err("failed to allocate memory for perf_event array: %s", + strerror(errno)); + return -1; + } + map_fd = bpf_map__fd(obj->maps.events); + if (map_fd < 0) { + p_err("failed to get fd for events map"); + return -1; + } + + for (m = 0; m < ARRAY_SIZE(metrics); m++) { + if (!metrics[m].selected) + continue; + for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { + pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, + -1/*pid*/, cpu, -1/*group_fd*/, 0); + if (pmu_fd < 0 || + bpf_map_update_elem(map_fd, &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + p_err("failed to create event %s on cpu %d", + metrics[m].name, cpu); + return -1; + } + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + } + } + return 0; +} + +static void profile_print_and_cleanup(void) +{ + profile_close_perf_events(profile_obj); + profile_read_values(profile_obj); + profile_print_readings(); + profiler_bpf__destroy(profile_obj); + + close(profile_tgt_fd); + free(profile_tgt_name); +} + +static void int_exit(int signo) +{ + profile_print_and_cleanup(); + exit(0); +} + +static int do_profile(int argc, char **argv) +{ + int num_metric, num_cpu, err = -1; + struct bpf_program *prog; + unsigned long duration; + char *endptr; + + /* we at least need two args for the prog and one metric */ + if (!REQ_ARGS(3)) + return -EINVAL; + + /* parse target fd */ + profile_tgt_fd = prog_parse_fd(&argc, &argv); + if (profile_tgt_fd < 0) { + p_err("failed to parse fd"); + return -1; + } + + /* parse profiling optional duration */ + if (argc > 2 && is_prefix(argv[0], "duration")) { + NEXT_ARG(); + duration = strtoul(*argv, &endptr, 0); + if (*endptr) + usage(); + NEXT_ARG(); + } else { + duration = UINT_MAX; + } + + num_metric = profile_parse_metrics(argc, argv); + if (num_metric <= 0) + goto out; + + num_cpu = libbpf_num_possible_cpus(); + if (num_cpu <= 0) { + p_err("failed to identify number of CPUs"); + goto out; + } + + profile_obj = profiler_bpf__open(); + if (!profile_obj) { + p_err("failed to open and/or load BPF object"); + goto out; + } + + profile_obj->rodata->num_cpu = num_cpu; + profile_obj->rodata->num_metric = num_metric; + + /* adjust map sizes */ + bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu); + bpf_map__resize(profile_obj->maps.fentry_readings, num_metric); + bpf_map__resize(profile_obj->maps.accum_readings, num_metric); + bpf_map__resize(profile_obj->maps.counts, 1); + + /* change target name */ + profile_tgt_name = profile_target_name(profile_tgt_fd); + if (!profile_tgt_name) + goto out; + + bpf_object__for_each_program(prog, profile_obj->obj) { + err = bpf_program__set_attach_target(prog, profile_tgt_fd, + profile_tgt_name); + if (err) { + p_err("failed to set attach target\n"); + goto out; + } + } + + set_max_rlimit(); + err = profiler_bpf__load(profile_obj); + if (err) { + p_err("failed to load profile_obj"); + goto out; + } + + err = profile_open_perf_events(profile_obj); + if (err) + goto out; + + err = profiler_bpf__attach(profile_obj); + if (err) { + p_err("failed to attach profile_obj"); + goto out; + } + signal(SIGINT, int_exit); + + sleep(duration); + profile_print_and_cleanup(); + return 0; + +out: + profile_close_perf_events(profile_obj); + if (profile_obj) + profiler_bpf__destroy(profile_obj); + close(profile_tgt_fd); + free(profile_tgt_name); + return err; +} + +#endif /* BPFTOOL_WITHOUT_SKELETONS */ + static int do_help(int argc, char **argv) { if (json_output) { @@ -1560,6 +1999,7 @@ static int do_help(int argc, char **argv) " [data_out FILE [data_size_out L]] \\\n" " [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n" " [repeat N]\n" + " %s %s profile PROG [duration DURATION] METRICs\n" " %s %s tracelog\n" " %s %s help\n" "\n" @@ -1573,16 +2013,17 @@ static int do_help(int argc, char **argv) " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n" - " cgroup/recvmsg6 | cgroup/getsockopt |\n" - " cgroup/setsockopt }\n" + " cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n" + " struct_ops | fentry | fexit | freplace }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" + " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1599,6 +2040,7 @@ static const struct cmd cmds[] = { { "detach", do_detach }, { "tracelog", do_tracelog }, { "run", do_run }, + { "profile", do_profile }, { 0 } }; diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c new file mode 100644 index 000000000000..20034c12f7c5 --- /dev/null +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include "profiler.h" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* map of perf event fds, num_cpu * num_metric entries */ +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +/* readings at fentry */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} fentry_readings SEC(".maps"); + +/* accumulated readings */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} accum_readings SEC(".maps"); + +/* sample counts, one per cpu */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u64)); +} counts SEC(".maps"); + +const volatile __u32 num_cpu = 1; +const volatile __u32 num_metric = 1; +#define MAX_NUM_MATRICS 4 + +SEC("fentry/XXX") +int BPF_PROG(fentry_XXX) +{ + struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS]; + u32 key = bpf_get_smp_processor_id(); + u32 i; + + /* look up before reading, to reduce error */ + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + u32 flag = i; + + ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); + if (!ptrs[i]) + return 0; + } + + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + struct bpf_perf_event_value reading; + int err; + + err = bpf_perf_event_read_value(&events, key, &reading, + sizeof(reading)); + if (err) + return 0; + *(ptrs[i]) = reading; + key += num_cpu; + } + + return 0; +} + +static inline void +fexit_update_maps(u32 id, struct bpf_perf_event_value *after) +{ + struct bpf_perf_event_value *before, diff, *accum; + + before = bpf_map_lookup_elem(&fentry_readings, &id); + /* only account samples with a valid fentry_reading */ + if (before && before->counter) { + struct bpf_perf_event_value *accum; + + diff.counter = after->counter - before->counter; + diff.enabled = after->enabled - before->enabled; + diff.running = after->running - before->running; + + accum = bpf_map_lookup_elem(&accum_readings, &id); + if (accum) { + accum->counter += diff.counter; + accum->enabled += diff.enabled; + accum->running += diff.running; + } + } +} + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; + u32 cpu = bpf_get_smp_processor_id(); + u32 i, one = 1, zero = 0; + int err; + u64 *count; + + /* read all events before updating the maps, to reduce error */ + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, + readings + i, sizeof(*readings)); + if (err) + return 0; + } + count = bpf_map_lookup_elem(&counts, &zero); + if (count) { + *count += 1; + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + fexit_update_maps(i, &readings[i]); + } + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h new file mode 100644 index 000000000000..1f767e9510f7 --- /dev/null +++ b/tools/bpf/bpftool/skeleton/profiler.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __PROFILER_H +#define __PROFILER_H + +/* useful typedefs from vmlinux.h */ + +typedef signed char __s8; +typedef unsigned char __u8; +typedef short int __s16; +typedef short unsigned int __u16; +typedef int __s32; +typedef unsigned int __u32; +typedef long long int __s64; +typedef long long unsigned int __u64; + +typedef __s8 s8; +typedef __u8 u8; +typedef __s16 s16; +typedef __u16 u16; +typedef __s32 s32; +typedef __u32 u32; +typedef __s64 s64; +typedef __u64 u64; + +enum { + false = 0, + true = 1, +}; + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif + +typedef __u16 __bitwise__ __le16; +typedef __u16 __bitwise__ __be16; +typedef __u32 __bitwise__ __le32; +typedef __u32 __bitwise__ __be32; +typedef __u64 __bitwise__ __le64; +typedef __u64 __bitwise__ __be64; + +typedef __u16 __bitwise__ __sum16; +typedef __u32 __bitwise__ __wsum; + +#endif /* __PROFILER_H */ diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c new file mode 100644 index 000000000000..2a7befbd11ad --- /dev/null +++ b/tools/bpf/bpftool/struct_ops.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2020 Facebook */ + +#include <errno.h> +#include <stdio.h> +#include <unistd.h> + +#include <linux/err.h> + +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> + +#include "json_writer.h" +#include "main.h" + +#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" + +static const struct btf_type *map_info_type; +static __u32 map_info_alloc_len; +static struct btf *btf_vmlinux; +static __s32 map_info_type_id; + +struct res { + unsigned int nr_maps; + unsigned int nr_errs; +}; + +static const struct btf *get_btf_vmlinux(void) +{ + if (btf_vmlinux) + return btf_vmlinux; + + btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(btf_vmlinux)) + p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); + + return btf_vmlinux; +} + +static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) +{ + const struct btf *kern_btf; + const struct btf_type *t; + const char *st_ops_name; + + kern_btf = get_btf_vmlinux(); + if (IS_ERR(kern_btf)) + return "<btf_vmlinux_not_found>"; + + t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); + st_ops_name = btf__name_by_offset(kern_btf, t->name_off); + st_ops_name += strlen(STRUCT_OPS_VALUE_PREFIX); + + return st_ops_name; +} + +static __s32 get_map_info_type_id(void) +{ + const struct btf *kern_btf; + + if (map_info_type_id) + return map_info_type_id; + + kern_btf = get_btf_vmlinux(); + if (IS_ERR(kern_btf)) { + map_info_type_id = PTR_ERR(kern_btf); + return map_info_type_id; + } + + map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info", + BTF_KIND_STRUCT); + if (map_info_type_id < 0) { + p_err("can't find bpf_map_info from btf_vmlinux"); + return map_info_type_id; + } + map_info_type = btf__type_by_id(kern_btf, map_info_type_id); + + /* Ensure map_info_alloc() has at least what the bpftool needs */ + map_info_alloc_len = map_info_type->size; + if (map_info_alloc_len < sizeof(struct bpf_map_info)) + map_info_alloc_len = sizeof(struct bpf_map_info); + + return map_info_type_id; +} + +/* If the subcmd needs to print out the bpf_map_info, + * it should always call map_info_alloc to allocate + * a bpf_map_info object instead of allocating it + * on the stack. + * + * map_info_alloc() will take the running kernel's btf + * into account. i.e. it will consider the + * sizeof(struct bpf_map_info) of the running kernel. + * + * It will enable the "struct_ops" cmd to print the latest + * "struct bpf_map_info". + * + * [ Recall that "struct_ops" requires the kernel's btf to + * be available ] + */ +static struct bpf_map_info *map_info_alloc(__u32 *alloc_len) +{ + struct bpf_map_info *info; + + if (get_map_info_type_id() < 0) + return NULL; + + info = calloc(1, map_info_alloc_len); + if (!info) + p_err("mem alloc failed"); + else + *alloc_len = map_info_alloc_len; + + return info; +} + +/* It iterates all struct_ops maps of the system. + * It returns the fd in "*res_fd" and map_info in "*info". + * In the very first iteration, info->id should be 0. + * An optional map "*name" filter can be specified. + * The filter can be made more flexible in the future. + * e.g. filter by kernel-struct-ops-name, regex-name, glob-name, ...etc. + * + * Return value: + * 1: A struct_ops map found. It is returned in "*res_fd" and "*info". + * The caller can continue to call get_next in the future. + * 0: No struct_ops map is returned. + * All struct_ops map has been found. + * -1: Error and the caller should abort the iteration. + */ +static int get_next_struct_ops_map(const char *name, int *res_fd, + struct bpf_map_info *info, __u32 info_len) +{ + __u32 id = info->id; + int err, fd; + + while (true) { + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + return 0; + p_err("can't get next map: %s", strerror(errno)); + return -1; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, info, &info_len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + close(fd); + return -1; + } + + if (info->type == BPF_MAP_TYPE_STRUCT_OPS && + (!name || !strcmp(name, info->name))) { + *res_fd = fd; + return 1; + } + close(fd); + } +} + +static int cmd_retval(const struct res *res, bool must_have_one_map) +{ + if (res->nr_errs || (!res->nr_maps && must_have_one_map)) + return -1; + + return 0; +} + +/* "data" is the work_func private storage */ +typedef int (*work_func)(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr); + +/* Find all struct_ops map in the system. + * Filter out by "name" (if specified). + * Then call "func(fd, info, data, wtr)" on each struct_ops map found. + */ +static struct res do_search(const char *name, work_func func, void *data, + struct json_writer *wtr) +{ + struct bpf_map_info *info; + struct res res = {}; + __u32 info_len; + int fd, err; + + info = map_info_alloc(&info_len); + if (!info) { + res.nr_errs++; + return res; + } + + if (wtr) + jsonw_start_array(wtr); + while ((err = get_next_struct_ops_map(name, &fd, info, info_len)) == 1) { + res.nr_maps++; + err = func(fd, info, data, wtr); + if (err) + res.nr_errs++; + close(fd); + } + if (wtr) + jsonw_end_array(wtr); + + if (err) + res.nr_errs++; + + if (!wtr && name && !res.nr_errs && !res.nr_maps) + /* It is not printing empty []. + * Thus, needs to specifically say nothing found + * for "name" here. + */ + p_err("no struct_ops found for %s", name); + else if (!wtr && json_output && !res.nr_errs) + /* The "func()" above is not writing any json (i.e. !wtr + * test here). + * + * However, "-j" is enabled and there is no errs here, + * so call json_null() as the current convention of + * other cmds. + */ + jsonw_null(json_wtr); + + free(info); + return res; +} + +static struct res do_one_id(const char *id_str, work_func func, void *data, + struct json_writer *wtr) +{ + struct bpf_map_info *info; + struct res res = {}; + unsigned long id; + __u32 info_len; + char *endptr; + int fd; + + id = strtoul(id_str, &endptr, 0); + if (*endptr || !id || id > UINT32_MAX) { + p_err("invalid id %s", id_str); + res.nr_errs++; + return res; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd == -1) { + p_err("can't get map by id (%lu): %s", id, strerror(errno)); + res.nr_errs++; + return res; + } + + info = map_info_alloc(&info_len); + if (!info) { + res.nr_errs++; + goto done; + } + + if (bpf_obj_get_info_by_fd(fd, info, &info_len)) { + p_err("can't get map info: %s", strerror(errno)); + res.nr_errs++; + goto done; + } + + if (info->type != BPF_MAP_TYPE_STRUCT_OPS) { + p_err("%s id %u is not a struct_ops map", info->name, info->id); + res.nr_errs++; + goto done; + } + + res.nr_maps++; + + if (func(fd, info, data, wtr)) + res.nr_errs++; + else if (!wtr && json_output) + /* The "func()" above is not writing any json (i.e. !wtr + * test here). + * + * However, "-j" is enabled and there is no errs here, + * so call json_null() as the current convention of + * other cmds. + */ + jsonw_null(json_wtr); + +done: + free(info); + close(fd); + + return res; +} + +static struct res do_work_on_struct_ops(const char *search_type, + const char *search_term, + work_func func, void *data, + struct json_writer *wtr) +{ + if (search_type) { + if (is_prefix(search_type, "id")) + return do_one_id(search_term, func, data, wtr); + else if (!is_prefix(search_type, "name")) + usage(); + } + + return do_search(search_term, func, data, wtr); +} + +static int __do_show(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr) +{ + if (wtr) { + jsonw_start_object(wtr); + jsonw_uint_field(wtr, "id", info->id); + jsonw_string_field(wtr, "name", info->name); + jsonw_string_field(wtr, "kernel_struct_ops", + get_kern_struct_ops_name(info)); + jsonw_end_object(wtr); + } else { + printf("%u: %-15s %-32s\n", info->id, info->name, + get_kern_struct_ops_name(info)); + } + + return 0; +} + +static int do_show(int argc, char **argv) +{ + const char *search_type = NULL, *search_term = NULL; + struct res res; + + if (argc && argc != 2) + usage(); + + if (argc == 2) { + search_type = GET_ARG(); + search_term = GET_ARG(); + } + + res = do_work_on_struct_ops(search_type, search_term, __do_show, + NULL, json_wtr); + + return cmd_retval(&res, !!search_term); +} + +static int __do_dump(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr) +{ + struct btf_dumper *d = (struct btf_dumper *)data; + const struct btf_type *struct_ops_type; + const struct btf *kern_btf = d->btf; + const char *struct_ops_name; + int zero = 0; + void *value; + + /* note: d->jw == wtr */ + + kern_btf = d->btf; + + /* The kernel supporting BPF_MAP_TYPE_STRUCT_OPS must have + * btf_vmlinux_value_type_id. + */ + struct_ops_type = btf__type_by_id(kern_btf, + info->btf_vmlinux_value_type_id); + struct_ops_name = btf__name_by_offset(kern_btf, + struct_ops_type->name_off); + value = calloc(1, info->value_size); + if (!value) { + p_err("mem alloc failed"); + return -1; + } + + if (bpf_map_lookup_elem(fd, &zero, value)) { + p_err("can't lookup struct_ops map %s id %u", + info->name, info->id); + free(value); + return -1; + } + + jsonw_start_object(wtr); + jsonw_name(wtr, "bpf_map_info"); + btf_dumper_type(d, map_info_type_id, (void *)info); + jsonw_end_object(wtr); + + jsonw_start_object(wtr); + jsonw_name(wtr, struct_ops_name); + btf_dumper_type(d, info->btf_vmlinux_value_type_id, value); + jsonw_end_object(wtr); + + free(value); + + return 0; +} + +static int do_dump(int argc, char **argv) +{ + const char *search_type = NULL, *search_term = NULL; + json_writer_t *wtr = json_wtr; + const struct btf *kern_btf; + struct btf_dumper d = {}; + struct res res; + + if (argc && argc != 2) + usage(); + + if (argc == 2) { + search_type = GET_ARG(); + search_term = GET_ARG(); + } + + kern_btf = get_btf_vmlinux(); + if (IS_ERR(kern_btf)) + return -1; + + if (!json_output) { + wtr = jsonw_new(stdout); + if (!wtr) { + p_err("can't create json writer"); + return -1; + } + jsonw_pretty(wtr, true); + } + + d.btf = kern_btf; + d.jw = wtr; + d.is_plain_text = !json_output; + d.prog_id_as_func_ptr = true; + + res = do_work_on_struct_ops(search_type, search_term, __do_dump, &d, + wtr); + + if (!json_output) + jsonw_destroy(&wtr); + + return cmd_retval(&res, !!search_term); +} + +static int __do_unregister(int fd, const struct bpf_map_info *info, void *data, + struct json_writer *wtr) +{ + int zero = 0; + + if (bpf_map_delete_elem(fd, &zero)) { + p_err("can't unload %s %s id %u: %s", + get_kern_struct_ops_name(info), info->name, + info->id, strerror(errno)); + return -1; + } + + p_info("Unregistered %s %s id %u", + get_kern_struct_ops_name(info), info->name, + info->id); + + return 0; +} + +static int do_unregister(int argc, char **argv) +{ + const char *search_type, *search_term; + struct res res; + + if (argc != 2) + usage(); + + search_type = GET_ARG(); + search_term = GET_ARG(); + + res = do_work_on_struct_ops(search_type, search_term, + __do_unregister, NULL, NULL); + + return cmd_retval(&res, true); +} + +static int do_register(int argc, char **argv) +{ + const struct bpf_map_def *def; + struct bpf_map_info info = {}; + __u32 info_len = sizeof(info); + int nr_errs = 0, nr_maps = 0; + struct bpf_object *obj; + struct bpf_link *link; + struct bpf_map *map; + const char *file; + + if (argc != 1) + usage(); + + file = GET_ARG(); + + obj = bpf_object__open(file); + if (IS_ERR_OR_NULL(obj)) + return -1; + + set_max_rlimit(); + + if (bpf_object__load(obj)) { + bpf_object__close(obj); + return -1; + } + + bpf_object__for_each_map(map, obj) { + def = bpf_map__def(map); + if (def->type != BPF_MAP_TYPE_STRUCT_OPS) + continue; + + link = bpf_map__attach_struct_ops(map); + if (IS_ERR(link)) { + p_err("can't register struct_ops %s: %s", + bpf_map__name(map), + strerror(-PTR_ERR(link))); + nr_errs++; + continue; + } + nr_maps++; + + bpf_link__disconnect(link); + bpf_link__destroy(link); + + if (!bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, + &info_len)) + p_info("Registered %s %s id %u", + get_kern_struct_ops_name(&info), + bpf_map__name(map), + info.id); + else + /* Not p_err. The struct_ops was attached + * successfully. + */ + p_info("Registered %s but can't find id: %s", + bpf_map__name(map), strerror(errno)); + } + + bpf_object__close(obj); + + if (nr_errs) + return -1; + + if (!nr_maps) { + p_err("no struct_ops found in %s", file); + return -1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n" + " %s %s dump [STRUCT_OPS_MAP]\n" + " %s %s register OBJ\n" + " %s %s unregister STRUCT_OPS_MAP\n" + " %s %s help\n" + "\n" + " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n" + " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n", + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "register", do_register }, + { "unregister", do_unregister }, + { "dump", do_dump }, + { "help", do_help }, + { 0 } +}; + +int do_struct_ops(int argc, char **argv) +{ + int err; + + err = cmd_select(cmds, argc, argv, do_help); + + btf__free(btf_vmlinux); + return err; +} diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 48a39f72fadf..1f18a409f044 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -5,9 +5,7 @@ #include "runqslower.h" #define TASK_RUNNING 0 - -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +#define BPF_F_CURRENT_CPU 0xffffffffULL const volatile __u64 min_us = 0; const volatile pid_t targ_pid = 0; diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 7ac0d8088565..ab8e89a7009c 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -67,7 +67,8 @@ FILES= \ test-llvm.bin \ test-llvm-version.bin \ test-libaio.bin \ - test-libzstd.bin + test-libzstd.bin \ + test-clang-bpf-global-var.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -75,6 +76,7 @@ CC ?= $(CROSS_COMPILE)gcc CXX ?= $(CROSS_COMPILE)g++ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config +CLANG ?= clang all: $(FILES) @@ -321,6 +323,11 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd +$(OUTPUT)test-clang-bpf-global-var.bin: + $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ + grep BTF_KIND_VAR + + ############################### clean: diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c new file mode 100644 index 000000000000..221f1481d52e --- /dev/null +++ b/tools/build/feature/test-clang-bpf-global-var.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +volatile int global_value_for_test = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 22f235260a3a..2e29a671d67e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { @@ -111,6 +111,8 @@ enum bpf_cmd { BPF_MAP_LOOKUP_AND_DELETE_BATCH, BPF_MAP_UPDATE_BATCH, BPF_MAP_DELETE_BATCH, + BPF_LINK_CREATE, + BPF_LINK_UPDATE, }; enum bpf_map_type { @@ -181,6 +183,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_TRACING, BPF_PROG_TYPE_STRUCT_OPS, BPF_PROG_TYPE_EXT, + BPF_PROG_TYPE_LSM, }; enum bpf_attach_type { @@ -210,6 +213,8 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, + BPF_LSM_MAC, __MAX_BPF_ATTACH_TYPE }; @@ -325,44 +330,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +398,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -534,7 +543,7 @@ union bpf_attr { __u32 prog_cnt; } query; - struct { + struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ __u64 name; __u32 prog_fd; } raw_tracepoint; @@ -562,6 +571,24 @@ union bpf_attr { __u64 probe_offset; /* output: probe_offset */ __u64 probe_addr; /* output: probe_addr */ } task_fd_query; + + struct { /* struct used by BPF_LINK_CREATE command */ + __u32 prog_fd; /* eBPF program to attach */ + __u32 target_fd; /* object to attach to */ + __u32 attach_type; /* attach type */ + __u32 flags; /* extra flags */ + } link_create; + + struct { /* struct used by BPF_LINK_UPDATE command */ + __u32 link_fd; /* link fd */ + /* new program fd to update link with */ + __u32 new_prog_fd; + __u32 flags; /* extra flags */ + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags */ + __u32 old_prog_fd; + } link_update; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -2890,6 +2917,114 @@ union bpf_attr { * Obtain the 64bit jiffies * Return * The 64 bit jiffies + * + * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * Description + * For an eBPF program attached to a perf event, retrieve the + * branch records (struct perf_branch_entry) associated to *ctx* + * and store it in the buffer pointed by *buf* up to size + * *size* bytes. + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to + * instead return the number of bytes required to store all the + * branch entries. If this flag is set, *buf* may be NULL. + * + * **-EINVAL** if arguments invalid or **size** not a multiple + * of sizeof(struct perf_branch_entry). + * + * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_get_netns_cookie(void *ctx) + * Description + * Retrieve the cookie (generated by the kernel) of the network + * namespace the input *ctx* is associated with. The network + * namespace cookie remains stable for its lifetime and provides + * a global identifier that can be assumed unique. If *ctx* is + * NULL, then the helper returns the cookie for the initial + * network namespace. The cookie itself is very similar to that + * of bpf_get_socket_cookie() helper, but for network namespaces + * instead of sockets. + * Return + * A 8-byte long opaque number. + * + * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of the cgroup associated + * with the current task at the *ancestor_level*. The root cgroup + * is at *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with the current task, then return value will be the + * same as that of **bpf_get_current_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with the current task. + * + * The format of returned id and helper limitations are same as in + * **bpf_get_current_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * Description + * Assign the *sk* to the *skb*. When combined with appropriate + * routing configuration to receive the packet towards the socket, + * will cause *skb* to be delivered to the specified socket. + * Subsequent redirection of *skb* via **bpf_redirect**\ (), + * **bpf_clone_redirect**\ () or other methods outside of BPF may + * interfere with successful delivery to the socket. + * + * This operation is only valid from TC ingress path. + * + * The *flags* argument must be zero. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EINVAL** Unsupported flags specified. + * * **-ENOENT** Socket is unavailable for assignment. + * * **-ENETUNREACH** Socket is unreachable (wrong netns). + * * **-EOPNOTSUPP** Unsupported operation, for example a + * call from outside of TC ingress. + * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3010,7 +3145,13 @@ union bpf_attr { FN(probe_read_kernel_str), \ FN(tcp_send_ack), \ FN(send_signal_thread), \ - FN(jiffies64), + FN(jiffies64), \ + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), \ + FN(get_netns_cookie), \ + FN(get_current_ancestor_cgroup_id), \ + FN(sk_assign), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3025,69 +3166,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; + +/* BPF_FUNC_read_branch_records flags. */ +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3153,6 +3325,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { @@ -3505,13 +3678,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3590,8 +3764,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3599,12 +3775,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3620,8 +3800,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3693,9 +3875,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; @@ -3761,4 +3945,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 024af2d1d0af..ca6665ea758a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -463,6 +463,7 @@ enum { IFLA_MACSEC_REPLAY_PROTECT, IFLA_MACSEC_VALIDATION, IFLA_MACSEC_PAD, + IFLA_MACSEC_OFFLOAD, __IFLA_MACSEC_MAX, }; @@ -489,6 +490,7 @@ enum macsec_validation_type { enum macsec_offload { MACSEC_OFFLOAD_OFF = 0, MACSEC_OFFLOAD_PHY = 1, + MACSEC_OFFLOAD_MAC = 2, __MACSEC_OFFLOAD_END, MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, }; @@ -960,11 +962,12 @@ enum { #define XDP_FLAGS_SKB_MODE (1U << 1) #define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_REPLACE (1U << 4) #define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ XDP_FLAGS_DRV_MODE | \ XDP_FLAGS_HW_MODE) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_MODES) + XDP_FLAGS_MODES | XDP_FLAGS_REPLACE) /* These are stored into IFLA_XDP_ATTACHED on dump. */ enum { @@ -984,6 +987,7 @@ enum { IFLA_XDP_DRV_PROG_ID, IFLA_XDP_SKB_PROG_ID, IFLA_XDP_HW_PROG_ID, + IFLA_XDP_EXPECTED_FD, __IFLA_XDP_MAX, }; diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..91fa51a9c31d 100644 --- a/tools/testing/selftests/bpf/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c6dafe563176..5cc1b0785d18 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -235,7 +235,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) { + if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS || + attr.prog_type == BPF_PROG_TYPE_LSM) { attr.attach_btf_id = load_attr->attach_btf_id; } else if (attr.prog_type == BPF_PROG_TYPE_TRACING || attr.prog_type == BPF_PROG_TYPE_EXT) { @@ -584,6 +585,40 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } +int bpf_link_create(int prog_fd, int target_fd, + enum bpf_attach_type attach_type, + const struct bpf_link_create_opts *opts) +{ + union bpf_attr attr; + + if (!OPTS_VALID(opts, bpf_link_create_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.link_create.prog_fd = prog_fd; + attr.link_create.target_fd = target_fd; + attr.link_create.attach_type = attach_type; + + return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); +} + +int bpf_link_update(int link_fd, int new_prog_fd, + const struct bpf_link_update_opts *opts) +{ + union bpf_attr attr; + + if (!OPTS_VALID(opts, bpf_link_update_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.link_update.link_fd = link_fd; + attr.link_update.new_prog_fd = new_prog_fd; + attr.link_update.flags = OPTS_GET(opts, flags, 0); + attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + + return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); +} + int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) { diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index b976e77316cc..46d47afdd887 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,6 +168,25 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +struct bpf_link_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ +}; +#define bpf_link_create_opts__last_field sz + +LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, + enum bpf_attach_type attach_type, + const struct bpf_link_create_opts *opts); + +struct bpf_link_update_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; /* extra flags */ + __u32 old_prog_fd; /* expected old program FD */ +}; +#define bpf_link_update_opts__last_field old_prog_fd + +LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd, + const struct bpf_link_update_opts *opts); + struct bpf_prog_test_run_attr { int prog_fd; int repeat; diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index b0dafe8b4ebc..f3f3c3fb98cb 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -49,7 +49,8 @@ #if defined(bpf_target_x86) -#ifdef __KERNEL__ +#if defined(__KERNEL__) || defined(__VMLINUX_H__) + #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) #define PT_REGS_PARM3(x) ((x)->dx) @@ -60,7 +61,20 @@ #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) + #else + #ifdef __i386__ /* i386 kernel is built with -mregparm=3 */ #define PT_REGS_PARM1(x) ((x)->eax) @@ -73,7 +87,20 @@ #define PT_REGS_RC(x) ((x)->eax) #define PT_REGS_SP(x) ((x)->esp) #define PT_REGS_IP(x) ((x)->eip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) +#define PT_REGS_PARM4_CORE(x) 0 +#define PT_REGS_PARM5_CORE(x) 0 +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) + #else + #define PT_REGS_PARM1(x) ((x)->rdi) #define PT_REGS_PARM2(x) ((x)->rsi) #define PT_REGS_PARM3(x) ((x)->rdx) @@ -84,6 +111,18 @@ #define PT_REGS_RC(x) ((x)->rax) #define PT_REGS_SP(x) ((x)->rsp) #define PT_REGS_IP(x) ((x)->rip) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) + #endif #endif @@ -104,6 +143,17 @@ struct pt_regs; #define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) #define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr) + #elif defined(bpf_target_arm) #define PT_REGS_PARM1(x) ((x)->uregs[0]) @@ -117,6 +167,17 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->uregs[13]) #define PT_REGS_IP(x) ((x)->uregs[12]) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) + #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ @@ -134,6 +195,17 @@ struct pt_regs; #define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) #define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) + #elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) @@ -147,6 +219,17 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) + #elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) @@ -158,6 +241,15 @@ struct pt_regs; #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) + #elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) @@ -169,11 +261,22 @@ struct pt_regs; #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) + /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) #else #define PT_REGS_IP(x) ((x)->pc) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif #endif @@ -192,4 +295,122 @@ struct pt_regs; (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif +#define ___bpf_concat(a, b) a ## b +#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_empty(...) \ + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast(args...) \ + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) + +/* + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and + * similar kinds of BPF programs, that accept input arguments as a single + * pointer to untyped u64 array, where each u64 can actually be a typed + * pointer or integer of different size. Instead of requring user to write + * manual casts and work with array elements by index, BPF_PROG macro + * allows user to declare a list of named and typed input arguments in the + * same syntax as for normal C function. All the casting is hidden and + * performed transparently, while user code can just assume working with + * function arguments of specified type and name. + * + * Original raw context argument is preserved as well as 'ctx' argument. + * This is useful when using BPF helpers that expect original context + * as one of the parameters (e.g., for bpf_perf_event_output()). + */ +#define BPF_PROG(name, args...) \ +name(unsigned long long *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_ctx_cast(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args) + +struct pt_regs; + +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) \ + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) \ + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) \ + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) \ + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) \ + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) \ + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific + * low-level way of getting kprobe input arguments from struct pt_regs, and + * provides a familiar typed and named function arguments syntax and + * semantics of accessing kprobe input paremeters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + */ +#define BPF_KPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_args1(x) \ + ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args(args...) \ + ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional + * return value (in addition to `struct pt_regs *ctx`), but no input + * arguments, because they will be clobbered by the time probed function + * returns. + */ +#define BPF_KRETPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kretprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) + #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3d1c25fc97ae..bfef3d606b54 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -657,22 +657,32 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) int btf__load(struct btf *btf) { - __u32 log_buf_size = BPF_LOG_BUF_SIZE; + __u32 log_buf_size = 0; char *log_buf = NULL; int err = 0; if (btf->fd >= 0) return -EEXIST; - log_buf = malloc(log_buf_size); - if (!log_buf) - return -ENOMEM; +retry_load: + if (log_buf_size) { + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; - *log_buf = 0; + *log_buf = 0; + } btf->fd = bpf_load_btf(btf->data, btf->data_size, log_buf, log_buf_size, false); if (btf->fd < 0) { + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, + log_buf_size << 1); + free(log_buf); + goto retry_load; + } + err = -errno; pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); if (*log_buf) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index bd09ed1710f1..0c28ee82834b 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, /* enumerators share namespace with typedef idents */ dup_cnt = btf_dump_name_dups(d, d->ident_names, name); if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %d,", + btf_dump_printf(d, "\n%s%s___%zu = %u,", pfx(lvl + 1), name, dup_cnt, - (__s32)v->val); + (__u32)v->val); } else { - btf_dump_printf(d, "\n%s%s = %d,", + btf_dump_printf(d, "\n%s%s = %u,", pfx(lvl + 1), name, - (__s32)v->val); + (__u32)v->val); } } btf_dump_printf(d, "\n%s}", pfx(lvl)); @@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) return -EINVAL; - fname = OPTS_GET(opts, field_name, NULL); + fname = OPTS_GET(opts, field_name, ""); lvl = OPTS_GET(opts, indent_level, 0); btf_dump_emit_type_decl(d, id, fname, lvl); return 0; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7469c7dcc15e..ff9174282a8c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1845,7 +1845,6 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) * type definition, while using only sizeof(void *) space in ELF data section. */ static bool get_map_field_int(const char *map_name, const struct btf *btf, - const struct btf_type *def, const struct btf_member *m, __u32 *res) { const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); @@ -1972,19 +1971,19 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return -EINVAL; } if (strcmp(name, "type") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, + if (!get_map_field_int(map_name, obj->btf, m, &map->def.type)) return -EINVAL; pr_debug("map '%s': found type = %u.\n", map_name, map->def.type); } else if (strcmp(name, "max_entries") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, + if (!get_map_field_int(map_name, obj->btf, m, &map->def.max_entries)) return -EINVAL; pr_debug("map '%s': found max_entries = %u.\n", map_name, map->def.max_entries); } else if (strcmp(name, "map_flags") == 0) { - if (!get_map_field_int(map_name, obj->btf, def, m, + if (!get_map_field_int(map_name, obj->btf, m, &map->def.map_flags)) return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", @@ -1992,8 +1991,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } else if (strcmp(name, "key_size") == 0) { __u32 sz; - if (!get_map_field_int(map_name, obj->btf, def, m, - &sz)) + if (!get_map_field_int(map_name, obj->btf, m, &sz)) return -EINVAL; pr_debug("map '%s': found key_size = %u.\n", map_name, sz); @@ -2035,8 +2033,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } else if (strcmp(name, "value_size") == 0) { __u32 sz; - if (!get_map_field_int(map_name, obj->btf, def, m, - &sz)) + if (!get_map_field_int(map_name, obj->btf, m, &sz)) return -EINVAL; pr_debug("map '%s': found value_size = %u.\n", map_name, sz); @@ -2079,8 +2076,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, __u32 val; int err; - if (!get_map_field_int(map_name, obj->btf, def, m, - &val)) + if (!get_map_field_int(map_name, obj->btf, m, &val)) return -EINVAL; pr_debug("map '%s': found pinning = %u.\n", map_name, val); @@ -2284,11 +2280,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) } } -static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) +static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || - obj->efile.st_ops_shndx >= 0 || - obj->nr_extern > 0; + obj->efile.st_ops_shndx >= 0 || + obj->nr_extern > 0; +} + +static bool kernel_needs_btf(const struct bpf_object *obj) +{ + return obj->efile.st_ops_shndx >= 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -2324,7 +2325,7 @@ static int bpf_object__init_btf(struct bpf_object *obj, } } out: - if (err && bpf_object__is_btf_mandatory(obj)) { + if (err && libbpf_needs_btf(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); return err; } @@ -2348,7 +2349,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) btf_ext__free(obj->btf_ext); obj->btf_ext = NULL; - if (bpf_object__is_btf_mandatory(obj)) { + if (libbpf_needs_btf(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); return -ENOENT; } @@ -2357,7 +2358,8 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) { - if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || + prog->type == BPF_PROG_TYPE_LSM) return true; /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs @@ -2412,7 +2414,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) obj->btf_ext = NULL; } - if (bpf_object__is_btf_mandatory(obj)) + if (kernel_needs_btf(obj)) return err; } return 0; @@ -3868,6 +3870,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (str_is_empty(targ_name)) continue; + t = skip_mods_and_typedefs(targ_btf, i, NULL); + if (!btf_is_composite(t) && !btf_is_array(t)) + continue; + targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != local_essent_len) continue; @@ -4846,8 +4852,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; - int log_buf_size = BPF_LOG_BUF_SIZE; - char *log_buf; + size_t log_buf_size = 0; + char *log_buf = NULL; int btf_fd, ret; if (!insns || !insns_cnt) @@ -4861,7 +4867,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; - if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || + prog->type == BPF_PROG_TYPE_LSM) { load_attr.attach_btf_id = prog->attach_btf_id; } else if (prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_EXT) { @@ -4887,22 +4894,28 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_flags = prog->prog_flags; retry_load: - log_buf = malloc(log_buf_size); - if (!log_buf) - pr_warn("Alloc log buffer for bpf loader error, continue without log\n"); + if (log_buf_size) { + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + } ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); if (ret >= 0) { - if (load_attr.log_level) + if (log_buf && load_attr.log_level) pr_debug("verifier log:\n%s", log_buf); *pfd = ret; ret = 0; goto out; } - if (errno == ENOSPC) { - log_buf_size <<= 1; + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, + log_buf_size << 1); + free(log_buf); goto retry_load; } @@ -4945,8 +4958,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i, btf_id; - if (prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_EXT) { + if ((prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { btf_id = libbpf_find_attach_btf_id(prog); if (btf_id <= 0) return btf_id; @@ -6185,6 +6199,7 @@ bool bpf_program__is_##NAME(const struct bpf_program *prog) \ } \ BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); +BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM); BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); @@ -6251,6 +6266,8 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, struct bpf_program *prog); static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, struct bpf_program *prog); +static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, + struct bpf_program *prog); struct bpf_sec_def { const char *sec; @@ -6290,6 +6307,10 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_FENTRY, .is_attach_btf = true, .attach_fn = attach_trace), + SEC_DEF("fmod_ret/", TRACING, + .expected_attach_type = BPF_MODIFY_RETURN, + .is_attach_btf = true, + .attach_fn = attach_trace), SEC_DEF("fexit/", TRACING, .expected_attach_type = BPF_TRACE_FEXIT, .is_attach_btf = true, @@ -6297,6 +6318,10 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("freplace/", EXT, .is_attach_btf = true, .attach_fn = attach_trace), + SEC_DEF("lsm/", LSM, + .is_attach_btf = true, + .expected_attach_type = BPF_LSM_MAC, + .attach_fn = attach_lsm), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -6559,6 +6584,7 @@ invalid_prog: } #define BTF_TRACE_PREFIX "btf_trace_" +#define BTF_LSM_PREFIX "bpf_lsm_" #define BTF_MAX_NAME_SIZE 128 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, @@ -6586,9 +6612,15 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, if (attach_type == BPF_TRACE_RAW_TP) err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, BTF_KIND_TYPEDEF); + else if (attach_type == BPF_LSM_MAC) + err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name, + BTF_KIND_FUNC); else err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + if (err <= 0) + pr_warn("%s is not found in vmlinux BTF\n", name); + return err; } @@ -6661,8 +6693,6 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog) err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, name + section_defs[i].len, attach_type); - if (err <= 0) - pr_warn("%s is not found in vmlinux BTF\n", name); return err; } pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); @@ -6742,6 +6772,17 @@ void *bpf_map__priv(const struct bpf_map *map) return map ? map->priv : ERR_PTR(-EINVAL); } +int bpf_map__set_initial_value(struct bpf_map *map, + const void *data, size_t size) +{ + if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || + size != map->def.value_size || map->fd >= 0) + return -EINVAL; + + memcpy(map->mmaped, data, size); + return 0; +} + bool bpf_map__is_offload_neutral(const struct bpf_map *map) { return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; @@ -6932,9 +6973,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_link { int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ bool disconnected; }; +/* Replace link's underlying BPF program with the new one */ +int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) +{ + return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); +} + /* Release "ownership" of underlying BPF resource (typically, BPF program * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected * link, when destructed through bpf_link__destroy() call won't attempt to @@ -6961,26 +7010,109 @@ int bpf_link__destroy(struct bpf_link *link) err = link->detach(link); if (link->destroy) link->destroy(link); + if (link->pin_path) + free(link->pin_path); free(link); return err; } -struct bpf_link_fd { - struct bpf_link link; /* has to be at the top of struct */ - int fd; /* hook FD */ -}; +int bpf_link__fd(const struct bpf_link *link) +{ + return link->fd; +} + +const char *bpf_link__pin_path(const struct bpf_link *link) +{ + return link->pin_path; +} + +static int bpf_link__detach_fd(struct bpf_link *link) +{ + return close(link->fd); +} + +struct bpf_link *bpf_link__open(const char *path) +{ + struct bpf_link *link; + int fd; + + fd = bpf_obj_get(path); + if (fd < 0) { + fd = -errno; + pr_warn("failed to open link at %s: %d\n", path, fd); + return ERR_PTR(fd); + } + + link = calloc(1, sizeof(*link)); + if (!link) { + close(fd); + return ERR_PTR(-ENOMEM); + } + link->detach = &bpf_link__detach_fd; + link->fd = fd; + + link->pin_path = strdup(path); + if (!link->pin_path) { + bpf_link__destroy(link); + return ERR_PTR(-ENOMEM); + } + + return link; +} + +int bpf_link__pin(struct bpf_link *link, const char *path) +{ + int err; + + if (link->pin_path) + return -EBUSY; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); + if (err) + return err; + + link->pin_path = strdup(path); + if (!link->pin_path) + return -ENOMEM; + + if (bpf_obj_pin(link->fd, link->pin_path)) { + err = -errno; + zfree(&link->pin_path); + return err; + } + + pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); + return 0; +} + +int bpf_link__unpin(struct bpf_link *link) +{ + int err; + + if (!link->pin_path) + return -EINVAL; + + err = unlink(link->pin_path); + if (err != 0) + return -errno; + + pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); + zfree(&link->pin_path); + return 0; +} static int bpf_link__detach_perf_event(struct bpf_link *link) { - struct bpf_link_fd *l = (void *)link; int err; - err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); + err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0); if (err) err = -errno; - close(l->fd); + close(link->fd); return err; } @@ -6988,7 +7120,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, err; if (pfd < 0) { @@ -7006,7 +7138,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_perf_event; + link->detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -7025,7 +7157,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } - return (struct bpf_link *)link; + return link; } /* @@ -7313,18 +7445,11 @@ out: return link; } -static int bpf_link__detach_fd(struct bpf_link *link) -{ - struct bpf_link_fd *l = (void *)link; - - return close(l->fd); -} - struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, pfd; prog_fd = bpf_program__fd(prog); @@ -7337,7 +7462,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_fd; + link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -7349,7 +7474,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, @@ -7360,10 +7485,11 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, return bpf_program__attach_raw_tracepoint(prog, tp_name); } -struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +/* Common logic for all BPF program types that attach to a btf_id */ +static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; - struct bpf_link_fd *link; + struct bpf_link *link; int prog_fd, pfd; prog_fd = bpf_program__fd(prog); @@ -7376,13 +7502,13 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.detach = &bpf_link__detach_fd; + link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { pfd = -errno; free(link); - pr_warn("program '%s': failed to attach to trace: %s\n", + pr_warn("program '%s': failed to attach: %s\n", bpf_program__title(prog, false), libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); @@ -7391,12 +7517,68 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return (struct bpf_link *)link; } +struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +{ + return bpf_program__attach_btf_id(prog); +} + +struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog) +{ + return bpf_program__attach_btf_id(prog); +} + static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, struct bpf_program *prog) { return bpf_program__attach_trace(prog); } +static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_lsm(prog); +} + +struct bpf_link * +bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) +{ + const struct bpf_sec_def *sec_def; + enum bpf_attach_type attach_type; + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int prog_fd, link_fd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = calloc(1, sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->detach = &bpf_link__detach_fd; + + attach_type = bpf_program__get_expected_attach_type(prog); + if (!attach_type) { + sec_def = find_sec_def(bpf_program__title(prog, false)); + if (sec_def) + attach_type = sec_def->attach_type; + } + link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL); + if (link_fd < 0) { + link_fd = -errno; + free(link); + pr_warn("program '%s': failed to attach to cgroup: %s\n", + bpf_program__title(prog, false), + libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + return ERR_PTR(link_fd); + } + link->fd = link_fd; + return link; +} + struct bpf_link *bpf_program__attach(struct bpf_program *prog) { const struct bpf_sec_def *sec_def; @@ -7410,10 +7592,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog) static int bpf_link__detach_struct_ops(struct bpf_link *link) { - struct bpf_link_fd *l = (void *)link; __u32 zero = 0; - if (bpf_map_delete_elem(l->fd, &zero)) + if (bpf_map_delete_elem(link->fd, &zero)) return -errno; return 0; @@ -7422,7 +7603,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link) struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) { struct bpf_struct_ops *st_ops; - struct bpf_link_fd *link; + struct bpf_link *link; __u32 i, zero = 0; int err; @@ -7454,10 +7635,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) return ERR_PTR(err); } - link->link.detach = bpf_link__detach_struct_ops; + link->detach = bpf_link__detach_struct_ops; link->fd = map->fd; - return (struct bpf_link *)link; + return link; } enum bpf_perf_event_ret @@ -8138,6 +8319,31 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } +int bpf_program__set_attach_target(struct bpf_program *prog, + int attach_prog_fd, + const char *attach_func_name) +{ + int btf_id; + + if (!prog || attach_prog_fd < 0 || !attach_func_name) + return -EINVAL; + + if (attach_prog_fd) + btf_id = libbpf_find_prog_btf_id(attach_func_name, + attach_prog_fd); + else + btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, + attach_func_name, + prog->expected_attach_type); + + if (btf_id < 0) + return btf_id; + + prog->attach_btf_id = btf_id; + prog->attach_prog_fd = attach_prog_fd; + return 0; +} + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { int err = 0, n, len, start, end = -1; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3fe12c9d1f92..44df1d3e7287 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -219,6 +219,13 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_link; +LIBBPF_API struct bpf_link *bpf_link__open(const char *path); +LIBBPF_API int bpf_link__fd(const struct bpf_link *link); +LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); +LIBBPF_API int bpf_link__unpin(struct bpf_link *link); +LIBBPF_API int bpf_link__update_program(struct bpf_link *link, + struct bpf_program *prog); LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); @@ -240,11 +247,17 @@ bpf_program__attach_tracepoint(struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name); - LIBBPF_API struct bpf_link * bpf_program__attach_trace(struct bpf_program *prog); +LIBBPF_API struct bpf_link * +bpf_program__attach_lsm(struct bpf_program *prog); +LIBBPF_API struct bpf_link * +bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd); + struct bpf_map; + LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); + struct bpf_insn; /* @@ -316,6 +329,7 @@ LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog); LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); @@ -334,10 +348,15 @@ LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); +LIBBPF_API int +bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, + const char *attach_func_name); + LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); @@ -398,6 +417,8 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, + const void *data, size_t size); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); @@ -435,7 +456,15 @@ struct xdp_link_info { __u8 attach_mode; }; +struct bpf_xdp_set_link_opts { + size_t sz; + __u32 old_fd; +}; +#define bpf_xdp_set_link_opts__last_field old_fd + LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, + const struct bpf_xdp_set_link_opts *opts); LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b035122142bb..bb8831605b25 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -235,3 +235,22 @@ LIBBPF_0.0.7 { btf__align_of; libbpf_find_kernel_btf; } LIBBPF_0.0.6; + +LIBBPF_0.0.8 { + global: + bpf_link__fd; + bpf_link__open; + bpf_link__pin; + bpf_link__pin_path; + bpf_link__unpin; + bpf_link__update_program; + bpf_link_create; + bpf_link_update; + bpf_map__set_initial_value; + bpf_program__attach_cgroup; + bpf_program__attach_lsm; + bpf_program__is_lsm; + bpf_program__set_attach_target; + bpf_program__set_lsm; + bpf_set_link_xdp_fd_opts; +} LIBBPF_0.0.7; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index b782ebef6ac9..2c92059c0c90 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -108,6 +108,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_LSM: default: break; } diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 431bd25c6cdb..18b5319025e1 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -132,7 +132,8 @@ done: return ret; } -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, + __u32 flags) { int sock, seq = 0, ret; struct nlattr *nla, *nla_xdp; @@ -178,6 +179,14 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) nla->nla_len += nla_xdp->nla_len; } + if (flags & XDP_FLAGS_REPLACE) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd); + memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd)); + nla->nla_len += nla_xdp->nla_len; + } + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { @@ -191,6 +200,29 @@ cleanup: return ret; } +int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, + const struct bpf_xdp_set_link_opts *opts) +{ + int old_fd = -1; + + if (!OPTS_VALID(opts, bpf_xdp_set_link_opts)) + return -EINVAL; + + if (OPTS_HAS(opts, old_fd)) { + old_fd = OPTS_GET(opts, old_fd, -1); + flags |= XDP_FLAGS_REPLACE; + } + + return __bpf_set_link_xdp_fd_replace(ifindex, fd, + old_fd, + flags); +} + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags); +} + static int __dump_link_nlmsg(struct nlmsghdr *nlh, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 9807903f121e..f7f4efb70a4c 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -280,7 +280,11 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, fill->consumer = map + off.fr.consumer; fill->flags = map + off.fr.flags; fill->ring = map + off.fr.desc; - fill->cached_cons = umem->config.fill_size; + fill->cached_prod = *fill->producer; + /* cached_cons is "size" bigger than the real consumer pointer + * See xsk_prod_nb_free + */ + fill->cached_cons = *fill->consumer + umem->config.fill_size; map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, @@ -297,6 +301,8 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, comp->consumer = map + off.cr.consumer; comp->flags = map + off.cr.flags; comp->ring = map + off.cr.desc; + comp->cached_prod = *comp->producer; + comp->cached_cons = *comp->consumer; *umem_ptr = umem; return 0; @@ -672,6 +678,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, rx->consumer = rx_map + off.rx.consumer; rx->flags = rx_map + off.rx.flags; rx->ring = rx_map + off.rx.desc; + rx->cached_prod = *rx->producer; + rx->cached_cons = *rx->consumer; } xsk->rx = rx; @@ -691,7 +699,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, tx->consumer = tx_map + off.tx.consumer; tx->flags = tx_map + off.tx.flags; tx->ring = tx_map + off.tx.desc; - tx->cached_cons = xsk->config.tx_size; + tx->cached_prod = *tx->producer; + /* cached_cons is r->size bigger than the real consumer pointer + * See xsk_prod_nb_free + */ + tx->cached_cons = *tx->consumer + xsk->config.tx_size; } xsk->tx = tx; diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 6d2f3a1b2249..a7974638561c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -106,6 +106,7 @@ ifneq ($(silent),1) ifneq ($(V),1) QUIET_CC = @echo ' CC '$@; QUIET_CC_FPIC = @echo ' CC FPIC '$@; + QUIET_CLANG = @echo ' CLANG '$@; QUIET_AR = @echo ' AR '$@; QUIET_LINK = @echo ' LINK '$@; QUIET_MKDIR = @echo ' MKDIR '$@; diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index 61df01cdf0b2..304fdf1a21dc 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -3,4 +3,7 @@ gpiogpio-hammer gpioinclude/ gpiolsgpio tpm2/SpaceTest.log -tpm2/*.pyc + +# Python bytecode and cache +__pycache__/ +*.py[cod] diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b93fa645ee54..077818d0197f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -36,7 +36,6 @@ TARGETS += net TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter -TARGETS += networking/timestamping TARGETS += nsfs TARGETS += pidfd TARGETS += powerpc diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index ec464859c6b6..2198cd876675 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -31,6 +31,7 @@ test_tcp_check_syncookie_user test_sysctl test_hashmap test_btf_dump +test_current_pid_tgid_new_ns xdping test_cpp *.skel.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 257a1aaaa37d..7729892e0b04 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,8 +20,9 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \ -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ + -I$(APIDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread @@ -32,7 +33,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_progs-no_alu32 + test_progs-no_alu32 \ + test_current_pid_tgid_new_ns # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) @@ -62,7 +64,8 @@ TEST_PROGS := test_kmod.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ test_xdping.sh \ - test_bpftool_build.sh + test_bpftool_build.sh \ + test_bpftool.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -128,10 +131,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) +VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ @@ -171,6 +177,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): $(call msg,MKDIR,,$@) mkdir -p $@ +$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) + $(call msg,GEN,,$@) + $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. @@ -189,8 +199,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ - -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ - -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ + -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types @@ -209,7 +219,7 @@ define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 + $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE @@ -223,7 +233,7 @@ define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ - $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 + $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE @@ -279,6 +289,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ + $$(INCLUDE_DIR)/vmlinux.h \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 8f21965ffc6c..5bf2fe9b1efa 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -6,7 +6,7 @@ #include <linux/types.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h deleted file mode 100644 index c6f1354d93fb..000000000000 --- a/tools/testing/selftests/bpf/bpf_trace_helpers.h +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BPF_TRACE_HELPERS_H -#define __BPF_TRACE_HELPERS_H - -#include <bpf/bpf_helpers.h> - -#define ___bpf_concat(a, b) a ## b -#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) -#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N -#define ___bpf_narg(...) \ - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#define ___bpf_empty(...) \ - ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) - -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] -#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] -#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] -#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) \ - ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) - -/* - * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and - * similar kinds of BPF programs, that accept input arguments as a single - * pointer to untyped u64 array, where each u64 can actually be a typed - * pointer or integer of different size. Instead of requring user to write - * manual casts and work with array elements by index, BPF_PROG macro - * allows user to declare a list of named and typed input arguments in the - * same syntax as for normal C function. All the casting is hidden and - * performed transparently, while user code can just assume working with - * function arguments of specified type and name. - * - * Original raw context argument is preserved as well as 'ctx' argument. - * This is useful when using BPF helpers that expect original context - * as one of the parameters (e.g., for bpf_perf_event_output()). - */ -#define BPF_PROG(name, args...) \ -name(unsigned long long *ctx); \ -static __always_inline typeof(name(0)) \ -____##name(unsigned long long *ctx, ##args); \ -typeof(name(0)) name(unsigned long long *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_ctx_cast(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) \ -____##name(unsigned long long *ctx, ##args) - -struct pt_regs; - -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) \ - ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) \ - ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) \ - ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) \ - ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) \ - ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) \ - ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) - -/* - * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for - * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific - * low-level way of getting kprobe input arguments from struct pt_regs, and - * provides a familiar typed and named function arguments syntax and - * semantics of accessing kprobe input paremeters. - * - * Original struct pt_regs* context is preserved as 'ctx' argument. This might - * be necessary when using BPF helpers like bpf_perf_event_output(). - */ -#define BPF_KPROBE(name, args...) \ -name(struct pt_regs *ctx); \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_kprobe_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) - -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_argsN(x, args...) \ - ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) -#define ___bpf_kretprobe_args(args...) \ - ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) - -/* - * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all - * input kprobe arguments, one last extra argument has to be specified, which - * captures kprobe return value. - */ -#define BPF_KRETPROBE(name, args...) \ -name(struct pt_regs *ctx); \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_kretprobe_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) -#endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 5dc109f4c097..60e3ae5d4e48 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -35,3 +35,5 @@ CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_IPV6_SIT=m CONFIG_BPF_JIT=y +CONFIG_BPF_LSM=y +CONFIG_SECURITY=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 8482bbc67eec..9a8f47fc0b91 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -11,6 +11,7 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static const struct timeval timeo_sec = { .tv_sec = 10 }; static const size_t timeo_optlen = sizeof(timeo_sec); +static int expected_stg = 0xeB9F; static int stop, duration; static int settimeo(int fd) @@ -88,7 +89,7 @@ done: return NULL; } -static void do_test(const char *tcp_ca) +static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) { struct sockaddr_in6 sa6 = {}; ssize_t nr_recv = 0, bytes = 0; @@ -126,14 +127,34 @@ static void do_test(const char *tcp_ca) err = listen(lfd, 1); if (CHECK(err == -1, "listen", "errno:%d\n", errno)) goto done; - err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (CHECK(err != 0, "pthread_create", "err:%d\n", err)) - goto done; + + if (sk_stg_map) { + err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, + &expected_stg, BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem(sk_stg_map)", + "err:%d errno:%d\n", err, errno)) + goto done; + } /* connect to server */ err = connect(fd, (struct sockaddr *)&sa6, addrlen); if (CHECK(err == -1, "connect", "errno:%d\n", errno)) - goto wait_thread; + goto done; + + if (sk_stg_map) { + int tmp_stg; + + err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, + &tmp_stg); + if (CHECK(!err || errno != ENOENT, + "bpf_map_lookup_elem(sk_stg_map)", + "err:%d errno:%d\n", err, errno)) + goto done; + } + + err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); + if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno)) + goto done; /* recv total_bytes */ while (bytes < total_bytes && !READ_ONCE(stop)) { @@ -149,7 +170,6 @@ static void do_test(const char *tcp_ca) CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", bytes, total_bytes, nr_recv, errno); -wait_thread: WRITE_ONCE(stop, 1); pthread_join(srv_thread, &thread_ret); CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", @@ -175,7 +195,7 @@ static void test_cubic(void) return; } - do_test("bpf_cubic"); + do_test("bpf_cubic", NULL); bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); @@ -197,7 +217,10 @@ static void test_dctcp(void) return; } - do_test("bpf_dctcp"); + do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); + CHECK(dctcp_skel->bss->stg_result != expected_stg, + "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n", + dctcp_skel->bss->stg_result, expected_stg); bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 7390d3061065..cb33a7ee4e04 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -125,6 +125,6 @@ void test_btf_dump() { if (!test__start_subtest(t->name)) continue; - test_btf_dump_case(i, &btf_dump_test_cases[i]); + test_btf_dump_case(i, &btf_dump_test_cases[i]); } } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 5b13f2c6c402..70e94e783070 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -6,7 +6,7 @@ #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int prog_load(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 2ff21dbce179..139f8e82c7c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -6,7 +6,7 @@ #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int map_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 9d8cb48b99de..9e96f8d87fea 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -8,7 +8,7 @@ #define BAR "/foo/bar/" #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int prog_load(int verdict) { diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c new file mode 100644 index 000000000000..6e04f8d1d15b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "test_cgroup_link.skel.h" + +static __u32 duration = 0; +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +static struct test_cgroup_link *skel = NULL; + +int ping_and_check(int exp_calls, int exp_alt_calls) +{ + skel->bss->calls = 0; + skel->bss->alt_calls = 0; + CHECK_FAIL(system(PING_CMD)); + if (CHECK(skel->bss->calls != exp_calls, "call_cnt", + "exp %d, got %d\n", exp_calls, skel->bss->calls)) + return -EINVAL; + if (CHECK(skel->bss->alt_calls != exp_alt_calls, "alt_call_cnt", + "exp %d, got %d\n", exp_alt_calls, skel->bss->alt_calls)) + return -EINVAL; + return 0; +} + +void test_cgroup_link(void) +{ + struct { + const char *path; + int fd; + } cgs[] = { + { "/cg1" }, + { "/cg1/cg2" }, + { "/cg1/cg2/cg3" }, + { "/cg1/cg2/cg3/cg4" }, + }; + int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts); + struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link; + __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags; + int i = 0, err, prog_fd; + bool detach_legacy = false; + + skel = test_cgroup_link__open_and_load(); + if (CHECK(!skel, "skel_open_load", "failed to open/load skeleton\n")) + return; + prog_fd = bpf_program__fd(skel->progs.egress); + + err = setup_cgroup_environment(); + if (CHECK(err, "cg_init", "failed: %d\n", err)) + goto cleanup; + + for (i = 0; i < cg_nr; i++) { + cgs[i].fd = create_and_get_cgroup(cgs[i].path); + if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd)) + goto cleanup; + } + + err = join_cgroup(cgs[last_cg].path); + if (CHECK(err, "cg_join", "fail: %d\n", err)) + goto cleanup; + + for (i = 0; i < cg_nr; i++) { + links[i] = bpf_program__attach_cgroup(skel->progs.egress, + cgs[i].fd); + if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n", + i, PTR_ERR(links[i]))) + goto cleanup; + } + + ping_and_check(cg_nr, 0); + + /* query the number of effective progs and attach flags in root cg */ + err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL, + &prog_cnt); + CHECK_FAIL(err); + CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI); + if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt)) + goto cleanup; + + /* query the number of effective progs in last cg */ + err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, NULL, NULL, + &prog_cnt); + CHECK_FAIL(err); + CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI); + if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n", + cg_nr, prog_cnt)) + goto cleanup; + + /* query the effective prog IDs in last cg */ + err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt); + CHECK_FAIL(err); + CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI); + if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n", + cg_nr, prog_cnt)) + goto cleanup; + for (i = 1; i < prog_cnt; i++) { + CHECK(prog_ids[i - 1] != prog_ids[i], "prog_id_check", + "idx %d, prev id %d, cur id %d\n", + i, prog_ids[i - 1], prog_ids[i]); + } + + /* detach bottom program and ping again */ + bpf_link__destroy(links[last_cg]); + links[last_cg] = NULL; + + ping_and_check(cg_nr - 1, 0); + + /* mix in with non link-based multi-attachments */ + err = bpf_prog_attach(prog_fd, cgs[last_cg].fd, + BPF_CGROUP_INET_EGRESS, BPF_F_ALLOW_MULTI); + if (CHECK(err, "cg_attach_legacy", "errno=%d\n", errno)) + goto cleanup; + detach_legacy = true; + + links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress, + cgs[last_cg].fd); + if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n", + PTR_ERR(links[last_cg]))) + goto cleanup; + + ping_and_check(cg_nr + 1, 0); + + /* detach link */ + bpf_link__destroy(links[last_cg]); + links[last_cg] = NULL; + + /* detach legacy */ + err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS); + if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno)) + goto cleanup; + detach_legacy = false; + + /* attach legacy exclusive prog attachment */ + err = bpf_prog_attach(prog_fd, cgs[last_cg].fd, + BPF_CGROUP_INET_EGRESS, 0); + if (CHECK(err, "cg_attach_exclusive", "errno=%d\n", errno)) + goto cleanup; + detach_legacy = true; + + /* attempt to mix in with multi-attach bpf_link */ + tmp_link = bpf_program__attach_cgroup(skel->progs.egress, + cgs[last_cg].fd); + if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) { + bpf_link__destroy(tmp_link); + goto cleanup; + } + + ping_and_check(cg_nr, 0); + + /* detach */ + err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS); + if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno)) + goto cleanup; + detach_legacy = false; + + ping_and_check(cg_nr - 1, 0); + + /* attach back link-based one */ + links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress, + cgs[last_cg].fd); + if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n", + PTR_ERR(links[last_cg]))) + goto cleanup; + + ping_and_check(cg_nr, 0); + + /* check legacy exclusive prog can't be attached */ + err = bpf_prog_attach(prog_fd, cgs[last_cg].fd, + BPF_CGROUP_INET_EGRESS, 0); + if (CHECK(!err, "cg_attach_exclusive", "unexpected success")) { + bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS); + goto cleanup; + } + + /* replace BPF programs inside their links for all but first link */ + for (i = 1; i < cg_nr; i++) { + err = bpf_link__update_program(links[i], skel->progs.egress_alt); + if (CHECK(err, "prog_upd", "link #%d\n", i)) + goto cleanup; + } + + ping_and_check(1, cg_nr - 1); + + /* Attempt program update with wrong expected BPF program */ + link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress_alt); + link_upd_opts.flags = BPF_F_REPLACE; + err = bpf_link_update(bpf_link__fd(links[0]), + bpf_program__fd(skel->progs.egress_alt), + &link_upd_opts); + if (CHECK(err == 0 || errno != EPERM, "prog_cmpxchg1", + "unexpectedly succeeded, err %d, errno %d\n", err, -errno)) + goto cleanup; + + /* Compare-exchange single link program from egress to egress_alt */ + link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress); + link_upd_opts.flags = BPF_F_REPLACE; + err = bpf_link_update(bpf_link__fd(links[0]), + bpf_program__fd(skel->progs.egress_alt), + &link_upd_opts); + if (CHECK(err, "prog_cmpxchg2", "errno %d\n", -errno)) + goto cleanup; + + /* ping */ + ping_and_check(0, cg_nr); + + /* close cgroup FDs before detaching links */ + for (i = 0; i < cg_nr; i++) { + if (cgs[i].fd > 0) { + close(cgs[i].fd); + cgs[i].fd = -1; + } + } + + /* BPF programs should still get called */ + ping_and_check(0, cg_nr); + + /* leave cgroup and remove them, don't detach programs */ + cleanup_cgroup_environment(); + + /* BPF programs should have been auto-detached */ + ping_and_check(0, 0); + +cleanup: + if (detach_legacy) + bpf_prog_detach2(prog_fd, cgs[last_cg].fd, + BPF_CGROUP_INET_EGRESS); + + for (i = 0; i < cg_nr; i++) { + if (!IS_ERR(links[i])) + bpf_link__destroy(links[i]); + } + test_cgroup_link__destroy(skel); + + for (i = 0; i < cg_nr; i++) { + if (cgs[i].fd > 0) + close(cgs[i].fd); + } + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 235ac4f67f5b..83493bd5745c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" #include "fexit_test.skel.h" void test_fentry_fexit(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; struct fexit_test *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; - int err, pkt_fd, i; + int err, prog_fd, i; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; @@ -31,8 +26,8 @@ void test_fentry_fexit(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", @@ -49,7 +44,6 @@ void test_fentry_fexit(void) } close_prog: - test_pkt_access__destroy(pkt_skel); fentry_test__destroy(fentry_skel); fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 5cc06021f27d..04ebbf1cb390 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,20 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" void test_fentry_test(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; - int err, pkt_fd, i; + int err, prog_fd, i; __u32 duration = 0, retval; __u64 *result; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto cleanup; @@ -23,10 +18,10 @@ void test_fentry_test(void) if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto cleanup; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fentry_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); @@ -39,5 +34,4 @@ void test_fentry_test(void) cleanup: fentry_test__destroy(fentry_skel); - test_pkt_access__destroy(pkt_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index d2c3655dd7a3..78d7a2765c27 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,64 +1,37 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include "fexit_test.skel.h" void test_fexit_test(void) { - struct bpf_prog_load_attr attr = { - .file = "./fexit_test.o", - }; - - char prog_name[] = "fexit/bpf_fentry_testX"; - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, kfree_skb_fd, i; - struct bpf_link *link[6] = {}; - struct bpf_program *prog[6]; + struct fexit_test *fexit_skel = NULL; + int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - u64 result[6]; + __u64 *result; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) - return; - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) - goto close_prog; + fexit_skel = fexit_test__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; - for (i = 0; i < 6; i++) { - prog_name[sizeof(prog_name) - 2] = '1' + i; - prog[i] = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) - goto close_prog; + err = fexit_test__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - for (i = 0; i < 6; i++) - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", - i + 1, result[i])) - goto close_prog; + result = (__u64 *)fexit_skel->bss; + for (i = 0; i < 6; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_test%d failed err %lld\n", i + 1, result[i])) + goto cleanup; + } -close_prog: - for (i = 0; i < 6; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - bpf_object__close(obj); - bpf_object__close(pkt_obj); +cleanup: + fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index eba9a970703b..925722217edf 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -82,6 +82,7 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) void test_get_stack_raw_tp(void) { const char *file = "./test_get_stack_rawtp.o"; + const char *file_err = "./test_get_stack_rawtp_err.o"; const char *prog_name = "raw_tracepoint/sys_enter"; int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP; struct perf_buffer_opts pb_opts = {}; @@ -93,6 +94,10 @@ void test_get_stack_raw_tp(void) struct bpf_map *map; cpu_set_t cpu_set; + err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c new file mode 100644 index 000000000000..3bdaa5a40744 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_global_data_init(void) +{ + const char *file = "./test_global_data.o"; + int err = -ENOMEM, map_fd, zero = 0; + __u8 *buff = NULL, *newval = NULL; + struct bpf_object *obj; + struct bpf_map *map; + __u32 duration = 0; + size_t sz; + + obj = bpf_object__open_file(file, NULL); + if (CHECK_FAIL(!obj)) + return; + + map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); + if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) + goto out; + + sz = bpf_map__def(map)->value_size; + newval = malloc(sz); + if (CHECK_FAIL(!newval)) + goto out; + + memset(newval, 0, sz); + /* wrong size, should fail */ + err = bpf_map__set_initial_value(map, newval, sz - 1); + if (CHECK(!err, "reject set initial value wrong size", "err %d\n", err)) + goto out; + + err = bpf_map__set_initial_value(map, newval, sz); + if (CHECK(err, "set initial value", "err %d\n", err)) + goto out; + + err = bpf_object__load(obj); + if (CHECK_FAIL(err)) + goto out; + + map_fd = bpf_map__fd(map); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + buff = malloc(sz); + if (buff) + err = bpf_map_lookup_elem(map_fd, &zero, buff); + if (CHECK(!buff || err || memcmp(buff, newval, sz), + "compare .rodata map data override", + "err %d errno %d\n", err, errno)) + goto out; + + memset(newval, 1, sz); + /* object loaded - should fail */ + err = bpf_map__set_initial_value(map, newval, sz); + CHECK(!err, "reject set initial value after load", "err %d\n", err); +out: + free(buff); + free(newval); + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c new file mode 100644 index 000000000000..a743288cf384 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <sys/stat.h> + +#include "test_link_pinning.skel.h" + +static int duration = 0; + +void test_link_pinning_subtest(struct bpf_program *prog, + struct test_link_pinning__bss *bss) +{ + const char *link_pin_path = "/sys/fs/bpf/pinned_link_test"; + struct stat statbuf = {}; + struct bpf_link *link; + int err, i; + + link = bpf_program__attach(prog); + if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + bss->in = 1; + usleep(1); + CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out); + + /* pin link */ + err = bpf_link__pin(link, link_pin_path); + if (CHECK(err, "link_pin", "err: %d\n", err)) + goto cleanup; + + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1", + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); + + /* check that link was pinned */ + err = stat(link_pin_path, &statbuf); + if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss->in = 2; + usleep(1); + CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out); + + /* destroy link, pinned link should keep program attached */ + bpf_link__destroy(link); + link = NULL; + + bss->in = 3; + usleep(1); + CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out); + + /* re-open link from BPFFS */ + link = bpf_link__open(link_pin_path); + if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link))) + goto cleanup; + + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2", + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); + + /* unpin link from BPFFS, program still attached */ + err = bpf_link__unpin(link); + if (CHECK(err, "link_unpin", "err: %d\n", err)) + goto cleanup; + + /* still active, as we have FD open now */ + bss->in = 4; + usleep(1); + CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out); + + bpf_link__destroy(link); + link = NULL; + + /* Validate it's finally detached. + * Actual detachment might get delayed a bit, so there is no reliable + * way to validate it immediately here, let's count up for long enough + * and see if eventually output stops being updated + */ + for (i = 5; i < 10000; i++) { + bss->in = i; + usleep(1); + if (bss->out == i - 1) + break; + } + CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i); + +cleanup: + if (!IS_ERR(link)) + bpf_link__destroy(link); +} + +void test_link_pinning(void) +{ + struct test_link_pinning* skel; + + skel = test_link_pinning__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + if (test__start_subtest("pin_raw_tp")) + test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss); + if (test__start_subtest("pin_tp_btf")) + test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss); + + test_link_pinning__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c new file mode 100644 index 000000000000..97fec70c600b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include "modify_return.skel.h" + +#define LOWER(x) ((x) & 0xffff) +#define UPPER(x) ((x) >> 16) + + +static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) +{ + struct modify_return *skel = NULL; + int err, prog_fd; + __u32 duration = 0, retval; + __u16 side_effect; + __s16 ret; + + skel = modify_return__open_and_load(); + if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n")) + goto cleanup; + + err = modify_return__attach(skel); + if (CHECK(err, "modify_return", "attach failed: %d\n", err)) + goto cleanup; + + skel->bss->input_retval = input_retval; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0, + &retval, &duration); + + CHECK(err, "test_run", "err %d errno %d\n", err, errno); + + side_effect = UPPER(retval); + ret = LOWER(retval); + + CHECK(ret != want_ret, "test_run", + "unexpected ret: %d, expected: %d\n", ret, want_ret); + CHECK(side_effect != want_side_effect, "modify_return", + "unexpected side_effect: %d\n", side_effect); + + CHECK(skel->bss->fentry_result != 1, "modify_return", + "fentry failed\n"); + CHECK(skel->bss->fexit_result != 1, "modify_return", + "fexit failed\n"); + CHECK(skel->bss->fmod_ret_result != 1, "modify_return", + "fmod_ret failed\n"); + +cleanup: + modify_return__destroy(skel); +} + +void test_modify_return(void) +{ + run_test(0 /* input_retval */, + 1 /* want_side_effect */, + 4 /* want_ret */); + run_test(-EINVAL /* input_retval */, + 0 /* want_side_effect */, + -EINVAL /* want_ret */); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c new file mode 100644 index 000000000000..542240e16564 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ +#include <test_progs.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/syscall.h> + +struct bss { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +}; + +void test_ns_current_pid_tgid(void) +{ + const char *probe_name = "raw_tracepoint/sys_enter"; + const char *file = "test_ns_current_pid_tgid.o"; + int err, key = 0, duration = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + struct bss bss; + struct stat st; + __u64 id; + + obj = bpf_object__open_file(file, NULL); + if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); + if (CHECK(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", + probe_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + pid_t tid = syscall(SYS_gettid); + pid_t pid = getpid(); + + id = (__u64) tid << 32 | pid; + bss.user_pid_tgid = id; + + if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) { + perror("Failed to stat /proc/self/ns/pid"); + goto cleanup; + } + + bss.dev = st.st_dev; + bss.ino = st.st_ino; + + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); + if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger some syscalls */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); + if (CHECK(err, "set_bss", "failed to get bss : %d\n", err)) + goto cleanup; + + if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) + goto cleanup; +cleanup: + if (!link) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c new file mode 100644 index 000000000000..e35c444902a7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <pthread.h> +#include <sched.h> +#include <sys/socket.h> +#include <test_progs.h> +#include "bpf/libbpf_internal.h" +#include "test_perf_branches.skel.h" + +static void check_good_sample(struct test_perf_branches *skel) +{ + int written_global = skel->bss->written_global_out; + int required_size = skel->bss->required_size_out; + int written_stack = skel->bss->written_stack_out; + int pbe_size = sizeof(struct perf_branch_entry); + int duration = 0; + + if (CHECK(!skel->bss->valid, "output not valid", + "no valid sample from prog")) + return; + + /* + * It's hard to validate the contents of the branch entries b/c it + * would require some kind of disassembler and also encoding the + * valid jump instructions for supported architectures. So just check + * the easy stuff for now. + */ + CHECK(required_size <= 0, "read_branches_size", "err %d\n", required_size); + CHECK(written_stack < 0, "read_branches_stack", "err %d\n", written_stack); + CHECK(written_stack % pbe_size != 0, "read_branches_stack", + "stack bytes written=%d not multiple of struct size=%d\n", + written_stack, pbe_size); + CHECK(written_global < 0, "read_branches_global", "err %d\n", written_global); + CHECK(written_global % pbe_size != 0, "read_branches_global", + "global bytes written=%d not multiple of struct size=%d\n", + written_global, pbe_size); + CHECK(written_global < written_stack, "read_branches_size", + "written_global=%d < written_stack=%d\n", written_global, written_stack); +} + +static void check_bad_sample(struct test_perf_branches *skel) +{ + int written_global = skel->bss->written_global_out; + int required_size = skel->bss->required_size_out; + int written_stack = skel->bss->written_stack_out; + int duration = 0; + + if (CHECK(!skel->bss->valid, "output not valid", + "no valid sample from prog")) + return; + + CHECK((required_size != -EINVAL && required_size != -ENOENT), + "read_branches_size", "err %d\n", required_size); + CHECK((written_stack != -EINVAL && written_stack != -ENOENT), + "read_branches_stack", "written %d\n", written_stack); + CHECK((written_global != -EINVAL && written_global != -ENOENT), + "read_branches_global", "written %d\n", written_global); +} + +static void test_perf_branches_common(int perf_fd, + void (*cb)(struct test_perf_branches *)) +{ + struct test_perf_branches *skel; + int err, i, duration = 0; + bool detached = false; + struct bpf_link *link; + volatile int j = 0; + cpu_set_t cpu_set; + + skel = test_perf_branches__open_and_load(); + if (CHECK(!skel, "test_perf_branches_load", + "perf_branches skeleton failed\n")) + return; + + /* attach perf_event */ + link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd); + if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link))) + goto out_destroy_skel; + + /* generate some branches on cpu 0 */ + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) + goto out_destroy; + /* spin the loop for a while (random high number) */ + for (i = 0; i < 1000000; ++i) + ++j; + + test_perf_branches__detach(skel); + detached = true; + + cb(skel); +out_destroy: + bpf_link__destroy(link); +out_destroy_skel: + if (!detached) + test_perf_branches__detach(skel); + test_perf_branches__destroy(skel); +} + +static void test_perf_branches_hw(void) +{ + struct perf_event_attr attr = {0}; + int duration = 0; + int pfd; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.freq = 1; + attr.sample_freq = 4000; + attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY; + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + + /* + * Some setups don't support branch records (virtual machines, !x86), + * so skip test in this case. + */ + if (pfd == -1) { + if (errno == ENOENT || errno == EOPNOTSUPP) { + printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n", + __func__); + test__skip(); + return; + } + if (CHECK(pfd < 0, "perf_event_open", "err %d errno %d\n", + pfd, errno)) + return; + } + + test_perf_branches_common(pfd, check_good_sample); + + close(pfd); +} + +/* + * Tests negative case -- run bpf_read_branch_records() on improperly configured + * perf event. + */ +static void test_perf_branches_no_hw(void) +{ + struct perf_event_attr attr = {0}; + int duration = 0; + int pfd; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; + attr.freq = 1; + attr.sample_freq = 4000; + pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd)) + return; + + test_perf_branches_common(pfd, check_bad_sample); + + close(pfd); +} + +void test_perf_branches(void) +{ + if (test__start_subtest("perf_branches_hw")) + test_perf_branches_hw(); + if (test__start_subtest("perf_branches_no_hw")) + test_perf_branches_no_hw(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 0800036ed654..821b4146b7b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -36,6 +36,7 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map; static __u32 expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; static int reuseport_array = -1, outer_map = -1; +static enum bpf_map_type inner_map_type; static int select_by_skb_data_prog; static int saved_tcp_syncookie = -1; static struct bpf_object *obj; @@ -63,13 +64,15 @@ static union sa46 { } \ }) -static int create_maps(void) +static int create_maps(enum bpf_map_type inner_type) { struct bpf_create_map_attr attr = {}; + inner_map_type = inner_type; + /* Creating reuseport_array */ attr.name = "reuseport_array"; - attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; + attr.map_type = inner_type; attr.key_size = sizeof(__u32); attr.value_size = sizeof(__u32); attr.max_entries = REUSEPORT_ARRAY_SIZE; @@ -506,11 +509,6 @@ static void test_syncookie(int type, sa_family_t family) .pass_on_failure = 0, }; - if (type != SOCK_STREAM) { - test__skip(); - return; - } - /* * +1 for TCP-SYN and * +1 for the TCP-ACK (ack the syncookie) @@ -728,12 +726,36 @@ static void cleanup_per_test(bool no_inner_map) static void cleanup(void) { - if (outer_map != -1) + if (outer_map != -1) { close(outer_map); - if (reuseport_array != -1) + outer_map = -1; + } + + if (reuseport_array != -1) { close(reuseport_array); - if (obj) + reuseport_array = -1; + } + + if (obj) { bpf_object__close(obj); + obj = NULL; + } + + memset(expected_results, 0, sizeof(expected_results)); +} + +static const char *maptype_str(enum bpf_map_type type) +{ + switch (type) { + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + return "reuseport_sockarray"; + case BPF_MAP_TYPE_SOCKMAP: + return "sockmap"; + case BPF_MAP_TYPE_SOCKHASH: + return "sockhash"; + default: + return "unknown"; + } } static const char *family_str(sa_family_t family) @@ -760,7 +782,7 @@ static const char *sotype_str(int sotype) } } -#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ } +#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ } static void test_config(int sotype, sa_family_t family, bool inany) { @@ -768,12 +790,15 @@ static void test_config(int sotype, sa_family_t family, bool inany) void (*fn)(int sotype, sa_family_t family); const char *name; bool no_inner_map; + int need_sotype; } tests[] = { - TEST_INIT(test_err_inner_map, true /* no_inner_map */), + TEST_INIT(test_err_inner_map, + .no_inner_map = true), TEST_INIT(test_err_skb_data), TEST_INIT(test_err_sk_select_port), TEST_INIT(test_pass), - TEST_INIT(test_syncookie), + TEST_INIT(test_syncookie, + .need_sotype = SOCK_STREAM), TEST_INIT(test_pass_on_err), TEST_INIT(test_detach_bpf), }; @@ -781,7 +806,11 @@ static void test_config(int sotype, sa_family_t family, bool inany) const struct test *t; for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { - snprintf(s, sizeof(s), "%s/%s %s %s", + if (t->need_sotype && t->need_sotype != sotype) + continue; /* test not compatible with socket type */ + + snprintf(s, sizeof(s), "%s %s/%s %s %s", + maptype_str(inner_map_type), family_str(family), sotype_str(sotype), inany ? "INANY" : "LOOPBACK", t->name); @@ -816,13 +845,20 @@ static void test_all(void) test_config(c->sotype, c->family, c->inany); } -void test_select_reuseport(void) +void test_map_type(enum bpf_map_type mt) { - if (create_maps()) + if (create_maps(mt)) goto out; if (prepare_bpf_obj()) goto out; + test_all(); +out: + cleanup(); +} + +void test_select_reuseport(void) +{ saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); if (saved_tcp_fo < 0) goto out; @@ -835,8 +871,9 @@ void test_select_reuseport(void) if (disable_syncookie()) goto out; - test_all(); + test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + test_map_type(BPF_MAP_TYPE_SOCKMAP); + test_map_type(BPF_MAP_TYPE_SOCKHASH); out: - cleanup(); restore_sysctls(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c new file mode 100644 index 000000000000..d572e1a2c297 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook +// Copyright (c) 2019 Cloudflare +// Copyright (c) 2020 Isovalent, Inc. +/* + * Test that the socket assign program is able to redirect traffic towards a + * socket, regardless of whether the port or address destination of the traffic + * matches the port. + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#include "test_progs.h" + +#define BIND_PORT 1234 +#define CONNECT_PORT 4321 +#define TEST_DADDR (0xC0A80203) +#define NS_SELF "/proc/self/ns/net" + +static const struct timeval timeo_sec = { .tv_sec = 3 }; +static const size_t timeo_optlen = sizeof(timeo_sec); +static int stop, duration; + +static bool +configure_stack(void) +{ + char tc_cmd[BUFSIZ]; + + /* Move to a new networking namespace */ + if (CHECK_FAIL(unshare(CLONE_NEWNET))) + return false; + + /* Configure necessary links, routes */ + if (CHECK_FAIL(system("ip link set dev lo up"))) + return false; + if (CHECK_FAIL(system("ip route add local default dev lo"))) + return false; + if (CHECK_FAIL(system("ip -6 route add local default dev lo"))) + return false; + + /* Load qdisc, BPF program */ + if (CHECK_FAIL(system("tc qdisc add dev lo clsact"))) + return false; + sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", + "direct-action object-file ./test_sk_assign.o", + "section classifier/sk_assign_test", + (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : ""); + if (CHECK(system(tc_cmd), "BPF load failed;", + "run with -vv for more info\n")) + return false; + + return true; +} + +static int +start_server(const struct sockaddr *addr, socklen_t len, int type) +{ + int fd; + + fd = socket(addr->sa_family, type, 0); + if (CHECK_FAIL(fd == -1)) + goto out; + if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, + timeo_optlen))) + goto close_out; + if (CHECK_FAIL(bind(fd, addr, len) == -1)) + goto close_out; + if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1)) + goto close_out; + + goto out; +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int +connect_to_server(const struct sockaddr *addr, socklen_t len, int type) +{ + int fd = -1; + + fd = socket(addr->sa_family, type, 0); + if (CHECK_FAIL(fd == -1)) + goto out; + if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec, + timeo_optlen))) + goto close_out; + if (CHECK_FAIL(connect(fd, addr, len))) + goto close_out; + + goto out; +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static in_port_t +get_port(int fd) +{ + struct sockaddr_storage ss; + socklen_t slen = sizeof(ss); + in_port_t port = 0; + + if (CHECK_FAIL(getsockname(fd, (struct sockaddr *)&ss, &slen))) + return port; + + switch (ss.ss_family) { + case AF_INET: + port = ((struct sockaddr_in *)&ss)->sin_port; + break; + case AF_INET6: + port = ((struct sockaddr_in6 *)&ss)->sin6_port; + break; + default: + CHECK(1, "Invalid address family", "%d\n", ss.ss_family); + } + return port; +} + +static ssize_t +rcv_msg(int srv_client, int type) +{ + struct sockaddr_storage ss; + char buf[BUFSIZ]; + socklen_t slen; + + if (type == SOCK_STREAM) + return read(srv_client, &buf, sizeof(buf)); + else + return recvfrom(srv_client, &buf, sizeof(buf), 0, + (struct sockaddr *)&ss, &slen); +} + +static int +run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type) +{ + int client = -1, srv_client = -1; + char buf[] = "testing"; + in_port_t port; + int ret = 1; + + client = connect_to_server(addr, len, type); + if (client == -1) { + perror("Cannot connect to server"); + goto out; + } + + if (type == SOCK_STREAM) { + srv_client = accept(server_fd, NULL, NULL); + if (CHECK_FAIL(srv_client == -1)) { + perror("Can't accept connection"); + goto out; + } + } else { + srv_client = server_fd; + } + if (CHECK_FAIL(write(client, buf, sizeof(buf)) != sizeof(buf))) { + perror("Can't write on client"); + goto out; + } + if (CHECK_FAIL(rcv_msg(srv_client, type) != sizeof(buf))) { + perror("Can't read on server"); + goto out; + } + + port = get_port(srv_client); + if (CHECK_FAIL(!port)) + goto out; + /* SOCK_STREAM is connected via accept(), so the server's local address + * will be the CONNECT_PORT rather than the BIND port that corresponds + * to the listen socket. SOCK_DGRAM on the other hand is connectionless + * so we can't really do the same check there; the server doesn't ever + * create a socket with CONNECT_PORT. + */ + if (type == SOCK_STREAM && + CHECK(port != htons(CONNECT_PORT), "Expected", "port %u but got %u", + CONNECT_PORT, ntohs(port))) + goto out; + else if (type == SOCK_DGRAM && + CHECK(port != htons(BIND_PORT), "Expected", + "port %u but got %u", BIND_PORT, ntohs(port))) + goto out; + + ret = 0; +out: + close(client); + if (srv_client != server_fd) + close(srv_client); + if (ret) + WRITE_ONCE(stop, 1); + return ret; +} + +static void +prepare_addr(struct sockaddr *addr, int family, __u16 port, bool rewrite_addr) +{ + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = family; + addr4->sin_port = htons(port); + if (rewrite_addr) + addr4->sin_addr.s_addr = htonl(TEST_DADDR); + else + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)addr; + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = family; + addr6->sin6_port = htons(port); + addr6->sin6_addr = in6addr_loopback; + if (rewrite_addr) + addr6->sin6_addr.s6_addr32[3] = htonl(TEST_DADDR); + break; + default: + fprintf(stderr, "Invalid family %d", family); + } +} + +struct test_sk_cfg { + const char *name; + int family; + struct sockaddr *addr; + socklen_t len; + int type; + bool rewrite_addr; +}; + +#define TEST(NAME, FAMILY, TYPE, REWRITE) \ +{ \ + .name = NAME, \ + .family = FAMILY, \ + .addr = (FAMILY == AF_INET) ? (struct sockaddr *)&addr4 \ + : (struct sockaddr *)&addr6, \ + .len = (FAMILY == AF_INET) ? sizeof(addr4) : sizeof(addr6), \ + .type = TYPE, \ + .rewrite_addr = REWRITE, \ +} + +void test_sk_assign(void) +{ + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + struct test_sk_cfg tests[] = { + TEST("ipv4 tcp port redir", AF_INET, SOCK_STREAM, false), + TEST("ipv4 tcp addr redir", AF_INET, SOCK_STREAM, true), + TEST("ipv6 tcp port redir", AF_INET6, SOCK_STREAM, false), + TEST("ipv6 tcp addr redir", AF_INET6, SOCK_STREAM, true), + TEST("ipv4 udp port redir", AF_INET, SOCK_DGRAM, false), + TEST("ipv4 udp addr redir", AF_INET, SOCK_DGRAM, true), + TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false), + TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true), + }; + int server = -1; + int self_net; + + self_net = open(NS_SELF, O_RDONLY); + if (CHECK_FAIL(self_net < 0)) { + perror("Unable to open "NS_SELF); + return; + } + + if (!configure_stack()) { + perror("configure_stack"); + goto cleanup; + } + + for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { + struct test_sk_cfg *test = &tests[i]; + const struct sockaddr *addr; + + if (!test__start_subtest(test->name)) + continue; + prepare_addr(test->addr, test->family, BIND_PORT, false); + addr = (const struct sockaddr *)test->addr; + server = start_server(addr, test->len, test->type); + if (server == -1) + goto cleanup; + + /* connect to unbound ports */ + prepare_addr(test->addr, test->family, CONNECT_PORT, + test->rewrite_addr); + if (run_test(server, addr, test->len, test->type)) + goto close; + + close(server); + server = -1; + } + +close: + close(server); +cleanup: + if (CHECK_FAIL(setns(self_net, CLONE_NEWNET))) + perror("Failed to setns("NS_SELF")"); + close(self_net); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index c6d6b685a946..4538bd08203f 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -14,6 +14,7 @@ void test_skb_ctx(void) .wire_len = 100, .gso_segs = 8, .mark = 9, + .gso_size = 10, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c new file mode 100644 index 000000000000..06b86addc181 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +/* + * Tests for sockmap/sockhash holding kTLS sockets. + */ + +#include "test_progs.h" + +#define MAX_TEST_NAME 80 +#define TCP_ULP 31 + +static int tcp_server(int family) +{ + int err, s; + + s = socket(family, SOCK_STREAM, 0); + if (CHECK_FAIL(s == -1)) { + perror("socket"); + return -1; + } + + err = listen(s, SOMAXCONN); + if (CHECK_FAIL(err)) { + perror("listen"); + return -1; + } + + return s; +} + +static int disconnect(int fd) +{ + struct sockaddr unspec = { AF_UNSPEC }; + + return connect(fd, &unspec, sizeof(unspec)); +} + +/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */ +static void test_sockmap_ktls_disconnect_after_delete(int family, int map) +{ + struct sockaddr_storage addr = {0}; + socklen_t len = sizeof(addr); + int err, cli, srv, zero = 0; + + srv = tcp_server(family); + if (srv == -1) + return; + + err = getsockname(srv, (struct sockaddr *)&addr, &len); + if (CHECK_FAIL(err)) { + perror("getsockopt"); + goto close_srv; + } + + cli = socket(family, SOCK_STREAM, 0); + if (CHECK_FAIL(cli == -1)) { + perror("socket"); + goto close_srv; + } + + err = connect(cli, (struct sockaddr *)&addr, len); + if (CHECK_FAIL(err)) { + perror("connect"); + goto close_cli; + } + + err = bpf_map_update_elem(map, &zero, &cli, 0); + if (CHECK_FAIL(err)) { + perror("bpf_map_update_elem"); + goto close_cli; + } + + err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (CHECK_FAIL(err)) { + perror("setsockopt(TCP_ULP)"); + goto close_cli; + } + + err = bpf_map_delete_elem(map, &zero); + if (CHECK_FAIL(err)) { + perror("bpf_map_delete_elem"); + goto close_cli; + } + + err = disconnect(cli); + if (CHECK_FAIL(err)) + perror("disconnect"); + +close_cli: + close(cli); +close_srv: + close(srv); +} + +static void run_tests(int family, enum bpf_map_type map_type) +{ + char test_name[MAX_TEST_NAME]; + int map; + + map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); + if (CHECK_FAIL(map == -1)) { + perror("bpf_map_create"); + return; + } + + snprintf(test_name, MAX_TEST_NAME, + "sockmap_ktls disconnect_after_delete %s %s", + family == AF_INET ? "IPv4" : "IPv6", + map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"); + if (!test__start_subtest(test_name)) + return; + + test_sockmap_ktls_disconnect_after_delete(family, map); + + close(map); +} + +void test_sockmap_ktls(void) +{ + run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP); + run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH); + run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP); + run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c new file mode 100644 index 000000000000..d7d65a700799 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -0,0 +1,1635 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +/* + * Test suite for SOCKMAP/SOCKHASH holding listening sockets. + * Covers: + * 1. BPF map operations - bpf_map_{update,lookup delete}_elem + * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map + * 3. BPF reuseport helper - bpf_sk_select_reuseport + */ + +#include <linux/compiler.h> +#include <errno.h> +#include <error.h> +#include <limits.h> +#include <netinet/in.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <sys/select.h> +#include <unistd.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_util.h" +#include "test_progs.h" +#include "test_sockmap_listen.skel.h" + +#define IO_TIMEOUT_SEC 30 +#define MAX_STRERR_LEN 256 +#define MAX_TEST_NAME 80 + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + +/* Wrappers that fail the test on error and report it. */ + +#define xaccept_nonblock(fd, addr, len) \ + ({ \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +#define xbpf_map_delete_elem(fd, key) \ + ({ \ + int __ret = bpf_map_delete_elem((fd), (key)); \ + if (__ret == -1) \ + FAIL_ERRNO("map_delete"); \ + __ret; \ + }) + +#define xbpf_map_lookup_elem(fd, key, val) \ + ({ \ + int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ + if (__ret == -1) \ + FAIL_ERRNO("map_lookup"); \ + __ret; \ + }) + +#define xbpf_map_update_elem(fd, key, val, flags) \ + ({ \ + int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("map_update"); \ + __ret; \ + }) + +#define xbpf_prog_attach(prog, target, type, flags) \ + ({ \ + int __ret = \ + bpf_prog_attach((prog), (target), (type), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("prog_attach(" #type ")"); \ + __ret; \ + }) + +#define xbpf_prog_detach2(prog, target, type) \ + ({ \ + int __ret = bpf_prog_detach2((prog), (target), (type)); \ + if (__ret == -1) \ + FAIL_ERRNO("prog_detach2(" #type ")"); \ + __ret; \ + }) + +#define xpthread_create(thread, attr, func, arg) \ + ({ \ + int __ret = pthread_create((thread), (attr), (func), (arg)); \ + errno = __ret; \ + if (__ret) \ + FAIL_ERRNO("pthread_create"); \ + __ret; \ + }) + +#define xpthread_join(thread, retval) \ + ({ \ + int __ret = pthread_join((thread), (retval)); \ + errno = __ret; \ + if (__ret) \ + FAIL_ERRNO("pthread_join"); \ + __ret; \ + }) + +static int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + +static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static int socket_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + if (sotype & SOCK_DGRAM) + return s; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static int socket_loopback(int family, int sotype) +{ + return socket_loopback_reuseport(family, sotype, -1); +} + +static void test_insert_invalid(int family, int sotype, int mapfd) +{ + u32 key = 0; + u64 value; + int err; + + value = -1; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EINVAL) + FAIL_ERRNO("map_update: expected EINVAL"); + + value = INT_MAX; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EBADF) + FAIL_ERRNO("map_update: expected EBADF"); +} + +static void test_insert_opened(int family, int sotype, int mapfd) +{ + u32 key = 0; + u64 value; + int err, s; + + s = xsocket(family, sotype, 0); + if (s == -1) + return; + + errno = 0; + value = s; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EOPNOTSUPP) + FAIL_ERRNO("map_update: expected EOPNOTSUPP"); + + xclose(s); +} + +static void test_insert_bound(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + socklen_t len; + u32 key = 0; + u64 value; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return; + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + errno = 0; + value = s; + err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + if (!err || errno != EOPNOTSUPP) + FAIL_ERRNO("map_update: expected EOPNOTSUPP"); +close: + xclose(s); +} + +static void test_insert(int family, int sotype, int mapfd) +{ + u64 value; + u32 key; + int s; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xclose(s); +} + +static void test_delete_after_insert(int family, int sotype, int mapfd) +{ + u64 value; + u32 key; + int s; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xbpf_map_delete_elem(mapfd, &key); + xclose(s); +} + +static void test_delete_after_close(int family, int sotype, int mapfd) +{ + int err, s; + u64 value; + u32 key; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + xclose(s); + + errno = 0; + err = bpf_map_delete_elem(mapfd, &key); + if (!err || (errno != EINVAL && errno != ENOENT)) + /* SOCKMAP and SOCKHASH return different error codes */ + FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); +} + +static void test_lookup_after_insert(int family, int sotype, int mapfd) +{ + u64 cookie, value; + socklen_t len; + u32 key; + int s; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + len = sizeof(cookie); + xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len); + + xbpf_map_lookup_elem(mapfd, &key, &value); + + if (value != cookie) { + FAIL("map_lookup: have %#llx, want %#llx", + (unsigned long long)value, (unsigned long long)cookie); + } + + xclose(s); +} + +static void test_lookup_after_delete(int family, int sotype, int mapfd) +{ + int err, s; + u64 value; + u32 key; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xbpf_map_delete_elem(mapfd, &key); + + errno = 0; + err = bpf_map_lookup_elem(mapfd, &key, &value); + if (!err || errno != ENOENT) + FAIL_ERRNO("map_lookup: expected ENOENT"); + + xclose(s); +} + +static void test_lookup_32_bit_value(int family, int sotype, int mapfd) +{ + u32 key, value32; + int err, s; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key), + sizeof(value32), 1, 0); + if (mapfd < 0) { + FAIL_ERRNO("map_create"); + goto close; + } + + key = 0; + value32 = s; + xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST); + + errno = 0; + err = bpf_map_lookup_elem(mapfd, &key, &value32); + if (!err || errno != ENOSPC) + FAIL_ERRNO("map_lookup: expected ENOSPC"); + + xclose(mapfd); +close: + xclose(s); +} + +static void test_update_existing(int family, int sotype, int mapfd) +{ + int s1, s2; + u64 value; + u32 key; + + s1 = socket_loopback(family, sotype); + if (s1 < 0) + return; + + s2 = socket_loopback(family, sotype); + if (s2 < 0) + goto close_s1; + + key = 0; + value = s1; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + value = s2; + xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST); + xclose(s2); +close_s1: + xclose(s1); +} + +/* Exercise the code path where we destroy child sockets that never + * got accept()'ed, aka orphans, when parent socket gets closed. + */ +static void test_destroy_orphan_child(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s, c; + u64 value; + u32 key; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + + c = xsocket(family, sotype, 0); + if (c == -1) + goto close_srv; + + xconnect(c, sockaddr(&addr), len); + xclose(c); +close_srv: + xclose(s); +} + +/* Perform a passive open after removing listening socket from SOCKMAP + * to ensure that callbacks get restored properly. + */ +static void test_clone_after_delete(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s, c; + u64 value; + u32 key; + + s = socket_loopback(family, sotype); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + key = 0; + value = s; + xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); + xbpf_map_delete_elem(mapfd, &key); + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv; + + xconnect(c, sockaddr(&addr), len); + xclose(c); +close_srv: + xclose(s); +} + +/* Check that child socket that got created while parent was in a + * SOCKMAP, but got accept()'ed only after the parent has been removed + * from SOCKMAP, gets cloned without parent psock state or callbacks. + */ +static void test_accept_after_delete(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + const u32 zero = 0; + int err, s, c, p; + socklen_t len; + u64 value; + + s = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (s == -1) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + value = s; + err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c == -1) + goto close_srv; + + /* Create child while parent is in sockmap */ + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + /* Remove parent from sockmap */ + err = xbpf_map_delete_elem(mapfd, &zero); + if (err) + goto close_cli; + + p = xaccept_nonblock(s, NULL, NULL); + if (p == -1) + goto close_cli; + + /* Check that child sk_user_data is not set */ + value = p; + xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +/* Check that child socket that got created and accepted while parent + * was in a SOCKMAP is cloned without parent psock state or callbacks. + */ +static void test_accept_before_delete(int family, int sotype, int mapfd) +{ + struct sockaddr_storage addr; + const u32 zero = 0, one = 1; + int err, s, c, p; + socklen_t len; + u64 value; + + s = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (s == -1) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + value = s; + err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c == -1) + goto close_srv; + + /* Create & accept child while parent is in sockmap */ + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + p = xaccept_nonblock(s, NULL, NULL); + if (p == -1) + goto close_cli; + + /* Check that child sk_user_data is not set */ + value = p; + xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST); + + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +struct connect_accept_ctx { + int sockfd; + unsigned int done; + unsigned int nr_iter; +}; + +static bool is_thread_done(struct connect_accept_ctx *ctx) +{ + return READ_ONCE(ctx->done); +} + +static void *connect_accept_thread(void *arg) +{ + struct connect_accept_ctx *ctx = arg; + struct sockaddr_storage addr; + int family, socktype; + socklen_t len; + int err, i, s; + + s = ctx->sockfd; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto done; + + len = sizeof(family); + err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len); + if (err) + goto done; + + len = sizeof(socktype); + err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len); + if (err) + goto done; + + for (i = 0; i < ctx->nr_iter; i++) { + int c, p; + + c = xsocket(family, socktype, 0); + if (c < 0) + break; + + err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr)); + if (err) { + xclose(c); + break; + } + + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) { + xclose(c); + break; + } + + xclose(p); + xclose(c); + } +done: + WRITE_ONCE(ctx->done, 1); + return NULL; +} + +static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) +{ + struct connect_accept_ctx ctx = { 0 }; + struct sockaddr_storage addr; + socklen_t len; + u32 zero = 0; + pthread_t t; + int err, s; + u64 value; + + s = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close; + + ctx.sockfd = s; + ctx.nr_iter = 1000; + + err = xpthread_create(&t, NULL, connect_accept_thread, &ctx); + if (err) + goto close; + + value = s; + while (!is_thread_done(&ctx)) { + err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + if (err) + break; + + err = xbpf_map_delete_elem(mapfd, &zero); + if (err) + break; + } + + xpthread_join(t, NULL); +close: + xclose(s); +} + +static void *listen_thread(void *arg) +{ + struct sockaddr unspec = { AF_UNSPEC }; + struct connect_accept_ctx *ctx = arg; + int err, i, s; + + s = ctx->sockfd; + + for (i = 0; i < ctx->nr_iter; i++) { + err = xlisten(s, 1); + if (err) + break; + err = xconnect(s, &unspec, sizeof(unspec)); + if (err) + break; + } + + WRITE_ONCE(ctx->done, 1); + return NULL; +} + +static void test_race_insert_listen(int family, int socktype, int mapfd) +{ + struct connect_accept_ctx ctx = { 0 }; + const u32 zero = 0; + const int one = 1; + pthread_t t; + int err, s; + u64 value; + + s = xsocket(family, socktype, 0); + if (s < 0) + return; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + if (err) + goto close; + + ctx.sockfd = s; + ctx.nr_iter = 10000; + + err = pthread_create(&t, NULL, listen_thread, &ctx); + if (err) + goto close; + + value = s; + while (!is_thread_done(&ctx)) { + err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); + /* Expecting EOPNOTSUPP before listen() */ + if (err && errno != EOPNOTSUPP) { + FAIL_ERRNO("map_update"); + break; + } + + err = bpf_map_delete_elem(mapfd, &zero); + /* Expecting no entry after unhash on connect(AF_UNSPEC) */ + if (err && errno != EINVAL && errno != ENOENT) { + FAIL_ERRNO("map_delete"); + break; + } + } + + xpthread_join(t, NULL); +close: + xclose(s); +} + +static void zero_verdict_count(int mapfd) +{ + unsigned int zero = 0; + int key; + + key = SK_DROP; + xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); + key = SK_PASS; + xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); +} + +enum redir_mode { + REDIR_INGRESS, + REDIR_EGRESS, +}; + +static const char *redir_mode_str(enum redir_mode mode) +{ + switch (mode) { + case REDIR_INGRESS: + return "ingress"; + case REDIR_EGRESS: + return "egress"; + default: + return "unknown"; + } +} + +static void redir_to_connected(int family, int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + struct sockaddr_storage addr; + int s, c0, c1, p0, p1; + unsigned int pass; + socklen_t len; + int err, n; + u64 value; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + s = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + c0 = xsocket(family, sotype, 0); + if (c0 < 0) + goto close_srv; + err = xconnect(c0, sockaddr(&addr), len); + if (err) + goto close_cli0; + + p0 = xaccept_nonblock(s, NULL, NULL); + if (p0 < 0) + goto close_cli0; + + c1 = xsocket(family, sotype, 0); + if (c1 < 0) + goto close_peer0; + err = xconnect(c1, sockaddr(&addr), len); + if (err) + goto close_cli1; + + p1 = xaccept_nonblock(s, NULL, NULL); + if (p1 < 0) + goto close_cli1; + + key = 0; + value = p0; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer1; + + key = 1; + value = p1; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer1; + + n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close_peer1; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close_peer1; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + + n = read(c0, &b, 1); + if (n < 0) + FAIL_ERRNO("%s: read", log_prefix); + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close_peer1: + xclose(p1); +close_cli1: + xclose(c1); +close_peer0: + xclose(p0); +close_cli0: + xclose(c0); +close_srv: + xclose(s); +} + +static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int parser = bpf_program__fd(skel->progs.prog_skb_parser); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return; + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + goto detach; + + redir_to_connected(family, sotype, sock_map, verdict_map, + REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); +detach: + xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); +} + +static void test_msg_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); + if (err) + return; + + redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); +} + +static void redir_to_listening(int family, int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + struct sockaddr_storage addr; + int s, c, p, err, n; + unsigned int drop; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_mapfd); + + s = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv; + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + goto close_cli; + + key = 0; + value = s; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer; + + key = 1; + value = p; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_peer; + + n = write(mode == REDIR_INGRESS ? c : p, "a", 1); + if (n < 0 && errno != EACCES) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close_peer; + + key = SK_DROP; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop); + if (err) + goto close_peer; + if (drop != 1) + FAIL("%s: want drop count 1, have %d", log_prefix, drop); + +close_peer: + xclose(p); +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int parser = bpf_program__fd(skel->progs.prog_skb_parser); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return; + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + goto detach; + + redir_to_listening(family, sotype, sock_map, verdict_map, + REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); +detach: + xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); +} + +static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); + if (err) + return; + + redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); +} + +static void test_reuseport_select_listening(int family, int sotype, + int sock_map, int verd_map, + int reuseport_prog) +{ + struct sockaddr_storage addr; + unsigned int pass; + int s, c, err; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_map); + + s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, + reuseport_prog); + if (s < 0) + return; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + key = 0; + value = s; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv; + err = xconnect(c, sockaddr(&addr), len); + if (err) + goto close_cli; + + if (sotype == SOCK_STREAM) { + int p; + + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + goto close_cli; + xclose(p); + } else { + char b = 'a'; + ssize_t n; + + n = xsend(c, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + + n = xrecv_nonblock(s, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + } + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_map, &key, &pass); + if (err) + goto close_cli; + if (pass != 1) + FAIL("want pass count 1, have %d", pass); + +close_cli: + xclose(c); +close_srv: + xclose(s); +} + +static void test_reuseport_select_connected(int family, int sotype, + int sock_map, int verd_map, + int reuseport_prog) +{ + struct sockaddr_storage addr; + int s, c0, c1, p0, err; + unsigned int drop; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_map); + + s = socket_loopback_reuseport(family, sotype, reuseport_prog); + if (s < 0) + return; + + /* Populate sock_map[0] to avoid ENOENT on first connection */ + key = 0; + value = s; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + if (err) + goto close_srv; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + c0 = xsocket(family, sotype, 0); + if (c0 < 0) + goto close_srv; + + err = xconnect(c0, sockaddr(&addr), len); + if (err) + goto close_cli0; + + if (sotype == SOCK_STREAM) { + p0 = xaccept_nonblock(s, NULL, NULL); + if (p0 < 0) + goto close_cli0; + } else { + p0 = xsocket(family, sotype, 0); + if (p0 < 0) + goto close_cli0; + + len = sizeof(addr); + err = xgetsockname(c0, sockaddr(&addr), &len); + if (err) + goto close_cli0; + + err = xconnect(p0, sockaddr(&addr), len); + if (err) + goto close_cli0; + } + + /* Update sock_map[0] to redirect to a connected socket */ + key = 0; + value = p0; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST); + if (err) + goto close_peer0; + + c1 = xsocket(family, sotype, 0); + if (c1 < 0) + goto close_peer0; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + goto close_srv; + + errno = 0; + err = connect(c1, sockaddr(&addr), len); + if (sotype == SOCK_DGRAM) { + char b = 'a'; + ssize_t n; + + n = xsend(c1, &b, sizeof(b), 0); + if (n == -1) + goto close_cli1; + + n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); + err = n == -1; + } + if (!err || errno != ECONNREFUSED) + FAIL_ERRNO("connect: expected ECONNREFUSED"); + + key = SK_DROP; + err = xbpf_map_lookup_elem(verd_map, &key, &drop); + if (err) + goto close_cli1; + if (drop != 1) + FAIL("want drop count 1, have %d", drop); + +close_cli1: + xclose(c1); +close_peer0: + xclose(p0); +close_cli0: + xclose(c0); +close_srv: + xclose(s); +} + +/* Check that redirecting across reuseport groups is not allowed. */ +static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, + int verd_map, int reuseport_prog) +{ + struct sockaddr_storage addr; + int s1, s2, c, err; + unsigned int drop; + socklen_t len; + u64 value; + u32 key; + + zero_verdict_count(verd_map); + + /* Create two listeners, each in its own reuseport group */ + s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); + if (s1 < 0) + return; + + s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); + if (s2 < 0) + goto close_srv1; + + key = 0; + value = s1; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + if (err) + goto close_srv2; + + key = 1; + value = s2; + err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + + /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ + len = sizeof(addr); + err = xgetsockname(s2, sockaddr(&addr), &len); + if (err) + goto close_srv2; + + c = xsocket(family, sotype, 0); + if (c < 0) + goto close_srv2; + + err = connect(c, sockaddr(&addr), len); + if (sotype == SOCK_DGRAM) { + char b = 'a'; + ssize_t n; + + n = xsend(c, &b, sizeof(b), 0); + if (n == -1) + goto close_cli; + + n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); + err = n == -1; + } + if (!err || errno != ECONNREFUSED) { + FAIL_ERRNO("connect: expected ECONNREFUSED"); + goto close_cli; + } + + /* Expect drop, can't redirect outside of reuseport group */ + key = SK_DROP; + err = xbpf_map_lookup_elem(verd_map, &key, &drop); + if (err) + goto close_cli; + if (drop != 1) + FAIL("want drop count 1, have %d", drop); + +close_cli: + xclose(c); +close_srv2: + xclose(s2); +close_srv1: + xclose(s1); +} + +#define TEST(fn, ...) \ + { \ + fn, #fn, __VA_ARGS__ \ + } + +static void test_ops_cleanup(const struct bpf_map *map) +{ + const struct bpf_map_def *def; + int err, mapfd; + u32 key; + + def = bpf_map__def(map); + mapfd = bpf_map__fd(map); + + for (key = 0; key < def->max_entries; key++) { + err = bpf_map_delete_elem(mapfd, &key); + if (err && errno != EINVAL && errno != ENOENT) + FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); + } +} + +static const char *family_str(sa_family_t family) +{ + switch (family) { + case AF_INET: + return "IPv4"; + case AF_INET6: + return "IPv6"; + default: + return "unknown"; + } +} + +static const char *map_type_str(const struct bpf_map *map) +{ + const struct bpf_map_def *def; + + def = bpf_map__def(map); + if (IS_ERR(def)) + return "invalid"; + + switch (def->type) { + case BPF_MAP_TYPE_SOCKMAP: + return "sockmap"; + case BPF_MAP_TYPE_SOCKHASH: + return "sockhash"; + default: + return "unknown"; + } +} + +static const char *sotype_str(int sotype) +{ + switch (sotype) { + case SOCK_DGRAM: + return "UDP"; + case SOCK_STREAM: + return "TCP"; + default: + return "unknown"; + } +} + +static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, + int family, int sotype) +{ + const struct op_test { + void (*fn)(int family, int sotype, int mapfd); + const char *name; + int sotype; + } tests[] = { + /* insert */ + TEST(test_insert_invalid), + TEST(test_insert_opened), + TEST(test_insert_bound, SOCK_STREAM), + TEST(test_insert), + /* delete */ + TEST(test_delete_after_insert), + TEST(test_delete_after_close), + /* lookup */ + TEST(test_lookup_after_insert), + TEST(test_lookup_after_delete), + TEST(test_lookup_32_bit_value), + /* update */ + TEST(test_update_existing), + /* races with insert/delete */ + TEST(test_destroy_orphan_child, SOCK_STREAM), + TEST(test_syn_recv_insert_delete, SOCK_STREAM), + TEST(test_race_insert_listen, SOCK_STREAM), + /* child clone */ + TEST(test_clone_after_delete, SOCK_STREAM), + TEST(test_accept_after_delete, SOCK_STREAM), + TEST(test_accept_before_delete, SOCK_STREAM), + }; + const char *family_name, *map_name, *sotype_name; + const struct op_test *t; + char s[MAX_TEST_NAME]; + int map_fd; + + family_name = family_str(family); + map_name = map_type_str(map); + sotype_name = sotype_str(sotype); + map_fd = bpf_map__fd(map); + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, + sotype_name, t->name); + + if (t->sotype != 0 && t->sotype != sotype) + continue; + + if (!test__start_subtest(s)) + continue; + + t->fn(family, sotype, map_fd); + test_ops_cleanup(map); + } +} + +static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family, int sotype) +{ + const struct redir_test { + void (*fn)(struct test_sockmap_listen *skel, + struct bpf_map *map, int family, int sotype); + const char *name; + } tests[] = { + TEST(test_skb_redir_to_connected), + TEST(test_skb_redir_to_listening), + TEST(test_msg_redir_to_connected), + TEST(test_msg_redir_to_listening), + }; + const char *family_name, *map_name; + const struct redir_test *t; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, + t->name); + + if (!test__start_subtest(s)) + continue; + + t->fn(skel, map, family, sotype); + } +} + +static void test_reuseport(struct test_sockmap_listen *skel, + struct bpf_map *map, int family, int sotype) +{ + const struct reuseport_test { + void (*fn)(int family, int sotype, int socket_map, + int verdict_map, int reuseport_prog); + const char *name; + int sotype; + } tests[] = { + TEST(test_reuseport_select_listening), + TEST(test_reuseport_select_connected), + TEST(test_reuseport_mixed_groups), + }; + int socket_map, verdict_map, reuseport_prog; + const char *family_name, *map_name, *sotype_name; + const struct reuseport_test *t; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + sotype_name = sotype_str(sotype); + + socket_map = bpf_map__fd(map); + verdict_map = bpf_map__fd(skel->maps.verdict_map); + reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, + sotype_name, t->name); + + if (t->sotype != 0 && t->sotype != sotype) + continue; + + if (!test__start_subtest(s)) + continue; + + t->fn(family, sotype, socket_map, verdict_map, reuseport_prog); + } +} + +static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) +{ + test_ops(skel, map, family, SOCK_STREAM); + test_ops(skel, map, family, SOCK_DGRAM); + test_redir(skel, map, family, SOCK_STREAM); + test_reuseport(skel, map, family, SOCK_STREAM); + test_reuseport(skel, map, family, SOCK_DGRAM); +} + +void test_sockmap_listen(void) +{ + struct test_sockmap_listen *skel; + + skel = test_sockmap_listen__open_and_load(); + if (!skel) { + FAIL("skeleton open/load failed"); + return; + } + + skel->bss->test_sockmap = true; + run_tests(skel, skel->maps.sock_map, AF_INET); + run_tests(skel, skel->maps.sock_map, AF_INET6); + + skel->bss->test_sockmap = false; + run_tests(skel, skel->maps.sock_hash, AF_INET); + run_tests(skel, skel->maps.sock_hash, AF_INET6); + + test_sockmap_listen__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index f4cd60d6fba2..e56b52ab41da 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -188,7 +188,7 @@ static int start_server(void) }; int fd; - fd = socket(AF_INET, SOCK_STREAM, 0); + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -205,6 +205,7 @@ static int start_server(void) static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; +static volatile bool server_done = false; static void *server_thread(void *arg) { @@ -222,23 +223,24 @@ static void *server_thread(void *arg) if (CHECK_FAIL(err < 0)) { perror("Failed to listed on socket"); - return NULL; + return ERR_PTR(err); } - client_fd = accept(fd, (struct sockaddr *)&addr, &len); + while (true) { + client_fd = accept(fd, (struct sockaddr *)&addr, &len); + if (client_fd == -1 && errno == EAGAIN) { + usleep(50); + continue; + } + break; + } if (CHECK_FAIL(client_fd < 0)) { perror("Failed to accept client"); - return NULL; + return ERR_PTR(err); } - /* Wait for the next connection (that never arrives) - * to keep this thread alive to prevent calling - * close() on client_fd. - */ - if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) { - perror("Unexpected success in second accept"); - return NULL; - } + while (!server_done) + usleep(50); close(client_fd); @@ -249,6 +251,7 @@ void test_tcp_rtt(void) { int server_fd, cgroup_fd; pthread_t tid; + void *server_res; cgroup_fd = test__join_cgroup("/tcp_rtt"); if (CHECK_FAIL(cgroup_fd < 0)) @@ -267,6 +270,11 @@ void test_tcp_rtt(void) pthread_mutex_unlock(&server_started_mtx); CHECK_FAIL(run_test(cgroup_fd, server_fd)); + + server_done = true; + CHECK_FAIL(pthread_join(tid, &server_res)); + CHECK_FAIL(IS_ERR(server_res)); + close_server_fd: close(server_fd); close_cgroup_fd: diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c new file mode 100644 index 000000000000..1e4c258de09d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2020 Google LLC. + */ + +#include <test_progs.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <unistd.h> +#include <malloc.h> +#include <stdlib.h> + +#include "lsm.skel.h" + +char *CMD_ARGS[] = {"true", NULL}; + +int heap_mprotect(void) +{ + void *buf; + long sz; + int ret; + + sz = sysconf(_SC_PAGESIZE); + if (sz < 0) + return sz; + + buf = memalign(sz, 2 * sz); + if (buf == NULL) + return -ENOMEM; + + ret = mprotect(buf, sz, PROT_READ | PROT_WRITE | PROT_EXEC); + free(buf); + return ret; +} + +int exec_cmd(int *monitored_pid) +{ + int child_pid, child_status; + + child_pid = fork(); + if (child_pid == 0) { + *monitored_pid = getpid(); + execvp(CMD_ARGS[0], CMD_ARGS); + return -EINVAL; + } else if (child_pid > 0) { + waitpid(child_pid, &child_status, 0); + return child_status; + } + + return -EINVAL; +} + +void test_test_lsm(void) +{ + struct lsm *skel = NULL; + int err, duration = 0; + + skel = lsm__open_and_load(); + if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) + goto close_prog; + + err = lsm__attach(skel); + if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) + goto close_prog; + + err = exec_cmd(&skel->bss->monitored_pid); + if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) + goto close_prog; + + CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n", + skel->bss->bprm_count); + + skel->bss->monitored_pid = getpid(); + + err = heap_mprotect(); + if (CHECK(errno != EPERM, "heap_mprotect", "want errno=EPERM, got %d\n", + errno)) + goto close_prog; + + CHECK(skel->bss->mprotect_count != 1, "mprotect_count", + "mprotect_count = %d\n", skel->bss->mprotect_count); + +close_prog: + lsm__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 1f6ccdaed1ac..781c8d11604b 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -55,31 +55,40 @@ void test_trampoline_count(void) /* attach 'allowed' 40 trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + obj = NULL; goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) goto cleanup; inst[i].obj = obj; + obj = NULL; if (rand() % 2) { - link = load(obj, fentry_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + link = load(inst[i].obj, fentry_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + link = NULL; goto cleanup; + } inst[i].link_fentry = link; } else { - link = load(obj, fexit_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + link = load(inst[i].obj, fexit_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + link = NULL; goto cleanup; + } inst[i].link_fexit = link; } } /* and try 1 extra.. */ obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + obj = NULL; goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -104,7 +113,9 @@ void test_trampoline_count(void) cleanup_extra: bpf_object__close(obj); cleanup: - while (--i) { + if (i >= MAX_TRAMP_PROGS) + i = MAX_TRAMP_PROGS - 1; + for (; i >= 0; i--) { bpf_link__destroy(inst[i].link_fentry); bpf_link__destroy(inst[i].link_fexit); bpf_object__close(inst[i].obj); diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c new file mode 100644 index 000000000000..72310cfc6474 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_vmlinux.skel.h" + +#define MY_TV_NSEC 1337 + +static void nsleep() +{ + struct timespec ts = { .tv_nsec = MY_TV_NSEC }; + + (void)syscall(__NR_nanosleep, &ts, NULL); +} + +void test_vmlinux(void) +{ + int duration = 0, err; + struct test_vmlinux* skel; + struct test_vmlinux__bss *bss; + + skel = test_vmlinux__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + + err = test_vmlinux__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger everything */ + nsleep(); + + CHECK(!bss->tp_called, "tp", "not called\n"); + CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); + CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); + CHECK(!bss->kprobe_called, "kprobe", "not called\n"); + CHECK(!bss->fentry_called, "fentry", "not called\n"); + +cleanup: + test_vmlinux__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c new file mode 100644 index 000000000000..05b294d6b923 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +#define IFINDEX_LO 1 +#define XDP_FLAGS_REPLACE (1U << 4) + +void test_xdp_attach(void) +{ + struct bpf_object *obj1, *obj2, *obj3; + const char *file = "./test_xdp.o"; + int err, fd1, fd2, fd3; + __u32 duration = 0; + DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, + .old_fd = -1); + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); + if (CHECK_FAIL(err)) + return; + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); + if (CHECK_FAIL(err)) + goto out_1; + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); + if (CHECK_FAIL(err)) + goto out_2; + + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, + &opts); + if (CHECK(err, "load_ok", "initial load failed")) + goto out_close; + + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, + &opts); + if (CHECK(!err, "load_fail", "load with expected id didn't fail")) + goto out; + + opts.old_fd = fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts); + if (CHECK(err, "replace_ok", "replace valid old_fd failed")) + goto out; + + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts); + if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail")) + goto out; + + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts); + if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail")) + goto out; + + opts.old_fd = fd2; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts); + if (CHECK(err, "remove_ok", "remove valid old_fd failed")) + goto out; + +out: + bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); +out_close: + bpf_object__close(obj3); +out_2: + bpf_object__close(obj2); +out_1: + bpf_object__close(obj1); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 6b56bdc73ebc..a0f688c37023 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -4,17 +4,51 @@ #include "test_xdp.skel.h" #include "test_xdp_bpf2bpf.skel.h" +struct meta { + int ifindex; + int pkt_len; +}; + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + int duration = 0; + struct meta *meta = (struct meta *)data; + struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + + if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), + "check_size", "size %u < %zu\n", + size, sizeof(pkt_v4) + sizeof(*meta))) + return; + + if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) + return; + + if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", + "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + return; + + if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), + "check_packet_content", "content not the same\n")) + return; + + *(bool *)ctx = true; +} + void test_xdp_bpf2bpf(void) { __u32 duration = 0, retval, size; char buf[128]; int err, pkt_fd, map_fd; + bool passed = false; struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + struct bpf_program *prog; + struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -27,11 +61,21 @@ void test_xdp_bpf2bpf(void) bpf_map_update_elem(map_fd, &key4, &value4, 0); /* Load trace program */ - opts.attach_prog_fd = pkt_fd, - ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts); + ftrace_skel = test_xdp_bpf2bpf__open(); if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) goto out; + /* Demonstrate the bpf_program__set_attach_target() API rather than + * the load with options, i.e. opts.attach_prog_fd. + */ + prog = ftrace_skel->progs.trace_on_entry; + bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY); + bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); + + prog = ftrace_skel->progs.trace_on_exit; + bpf_program__set_expected_attach_type(prog, BPF_TRACE_FEXIT); + bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); + err = test_xdp_bpf2bpf__load(ftrace_skel); if (CHECK(err, "__load", "ftrace skeleton failed\n")) goto out; @@ -40,6 +84,14 @@ void test_xdp_bpf2bpf(void) if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) goto out; + /* Set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), + 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto out; + /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); @@ -50,6 +102,15 @@ void test_xdp_bpf2bpf(void) err, errno, retval, size)) goto out; + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto out; + + CHECK_FAIL(!passed); + /* Verify test results */ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), "result", "fentry failed err %llu\n", @@ -60,6 +121,8 @@ void test_xdp_bpf2bpf(void) "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); out: + if (pb) + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index b631fb5032d2..3fb4260570b1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -6,14 +6,24 @@ * the kernel BPF logic. */ +#include <stddef.h> #include <linux/bpf.h> #include <linux/types.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> #include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; +int stg_result = 0; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + #define DCTCP_MAX_ALPHA 1024U struct dctcp { @@ -43,12 +53,18 @@ void BPF_PROG(dctcp_init, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); + int *stg; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->loss_cwnd = 0; ca->ce_state = 0; + stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0); + if (stg) { + stg_result = *stg; + bpf_sk_storage_delete(&sk_stg_map, (void *)tp); + } dctcp_reset(tp, ca); } diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index d4a02fe44a12..31975c96e2c9 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -13,7 +13,7 @@ enum e1 { enum e2 { C = 100, - D = -100, + D = 4294967295, E = 0, }; diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c index 38d3a82144ca..9365b686f84b 100644 --- a/tools/testing/selftests/bpf/progs/fentry_test.c +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index c329fccf9842..98e1efe14549 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> struct sk_buff { unsigned int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c index 92f3fa47cf40..85c0b516d6ee 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> struct sk_buff { unsigned int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c index 348109b9ea07..bd1e17d8024c 100644 --- a/tools/testing/selftests/bpf/progs/fexit_test.c +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 8f48a909f079..a46a264ce24e 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -4,7 +4,7 @@ #include <stdbool.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; struct { diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c new file mode 100644 index 000000000000..a4e3c223028d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +char _license[] SEC("license") = "GPL"; + +int monitored_pid = 0; +int mprotect_count = 0; +int bprm_count = 0; + +SEC("lsm/file_mprotect") +int BPF_PROG(test_int_hook, struct vm_area_struct *vma, + unsigned long reqprot, unsigned long prot, int ret) +{ + if (ret != 0) + return ret; + + __u32 pid = bpf_get_current_pid_tgid() >> 32; + int is_heap = 0; + + is_heap = (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk); + + if (is_heap && monitored_pid == pid) { + mprotect_count++; + ret = -EPERM; + } + + return ret; +} + +SEC("lsm/bprm_committed_creds") +int BPF_PROG(test_void_hook, struct linux_binprm *bprm) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + + if (monitored_pid == pid) + bprm_count++; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c new file mode 100644 index 000000000000..8b7466a15c6b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/modify_return.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +static int sequence = 0; +__s32 input_retval = 0; + +__u64 fentry_result = 0; +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(fentry_test, int a, __u64 b) +{ + sequence++; + fentry_result = (sequence == 1); + return 0; +} + +__u64 fmod_ret_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int a, int *b, int ret) +{ + sequence++; + /* This is the first fmod_ret program, the ret passed should be 0 */ + fmod_ret_result = (sequence == 2 && ret == 0); + return input_retval; +} + +__u64 fexit_result = 0; +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int a, __u64 b, int ret) +{ + sequence++; + /* If the input_reval is non-zero a successful modification should have + * occurred. + */ + if (input_retval) + fexit_result = (sequence == 3 && ret == input_retval); + else + fexit_result = (sequence == 3 && ret == 4); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index a5c6d5903b22..ca283af80d4e 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -12,7 +12,6 @@ int bpf_prog1(struct __sk_buff *skb) __u32 lport = skb->local_port; __u32 rport = skb->remote_port; __u8 *d = data; - __u32 len = (__u32) data_end - (__u32) data; int err; if (data + 10 > data_end) { diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index dd8fae6660ab..8056a4c6d918 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -4,6 +4,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> int kprobe_res = 0; int kretprobe_res = 0; @@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx) } SEC("kretprobe/sys_nanosleep") -int handle_kretprobe(struct pt_regs *ctx) +int BPF_KRETPROBE(handle_kretprobe) { kretprobe_res = 2; return 0; diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c new file mode 100644 index 000000000000..77e47b9e4446 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int calls = 0; +int alt_calls = 0; + +SEC("cgroup_skb/egress1") +int egress(struct __sk_buff *skb) +{ + __sync_fetch_and_add(&calls, 1); + return 1; +} + +SEC("cgroup_skb/egress2") +int egress_alt(struct __sk_buff *skb) +{ + __sync_fetch_and_add(&alt_calls, 1); + return 1; +} + +char _license[] SEC("license") = "GPL"; + diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c new file mode 100644 index 000000000000..8941a41c2a55 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define MAX_STACK_RAWTP 10 + +SEC("raw_tracepoint/sys_enter") +int bpf_prog2(void *ctx) +{ + __u64 stack[MAX_STACK_RAWTP]; + int error; + + /* set all the flags which should return -EINVAL */ + error = bpf_get_stack(ctx, stack, 0, -1); + if (error < 0) + goto loop; + + return error; +loop: + while (1) { + error++; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index dd7a4d3dbc0d..1319be1c54ba 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -68,7 +68,7 @@ static struct foo struct3 = { bpf_map_update_elem(&result_##map, &key, var, 0); \ } while (0) -SEC("static_data_load") +SEC("classifier/static_data_load") int load_static_data(struct __sk_buff *skb) { static const __u64 bar = ~0; diff --git a/tools/testing/selftests/bpf/progs/test_link_pinning.c b/tools/testing/selftests/bpf/progs/test_link_pinning.c new file mode 100644 index 000000000000..bbf2a5264dc0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_link_pinning.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int in = 0; +int out = 0; + +SEC("raw_tp/sys_enter") +int raw_tp_prog(const void *ctx) +{ + out = in; + return 0; +} + +SEC("tp_btf/sys_enter") +int tp_btf_prog(const void *ctx) +{ + out = in; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c new file mode 100644 index 000000000000..1dca70a6de2f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> + +static volatile struct { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +} res; + +SEC("raw_tracepoint/sys_enter") +int trace(void *ctx) +{ + __u64 ns_pid_tgid, expected_pid; + struct bpf_pidns_info nsdata; + __u32 key = 0; + + if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata, + sizeof(struct bpf_pidns_info))) + return 0; + + ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid; + expected_pid = res.user_pid_tgid; + + if (expected_pid != ns_pid_tgid) + return 0; + + res.pid_tgid = ns_pid_tgid; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index bfe9fbcb9684..56a50b25cd33 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -6,7 +6,6 @@ #include <linux/ptrace.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bpf_trace_helpers.h" struct task_struct; @@ -17,11 +16,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec) } SEC("kretprobe/__set_task_comm") -int BPF_KRETPROBE(prog2, - struct task_struct *tsk, const char *buf, bool exec, - int ret) +int BPF_KRETPROBE(prog2, int ret) { - return !PT_REGS_PARM1(ctx) && ret; + return ret; } SEC("raw_tp/task_rename") diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c new file mode 100644 index 000000000000..a1ccc831c882 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <stddef.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int valid = 0; +int required_size_out = 0; +int written_stack_out = 0; +int written_global_out = 0; + +struct { + __u64 _a; + __u64 _b; + __u64 _c; +} fpbe[30] = {0}; + +SEC("perf_event") +int perf_branches(void *ctx) +{ + __u64 entries[4 * 3] = {0}; + int required_size, written_stack, written_global; + + /* write to stack */ + written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0); + /* ignore spurious events */ + if (!written_stack) + return 1; + + /* get required size */ + required_size = bpf_read_branch_records(ctx, NULL, 0, + BPF_F_GET_BRANCH_RECORDS_SIZE); + + written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0); + /* ignore spurious events */ + if (!written_global) + return 1; + + required_size_out = required_size; + written_stack_out = written_stack; + written_global_out = written_global; + valid = 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index ebfcc9f50c35..ad59c4c9aba8 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -4,7 +4,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_tracing.h> struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index d556b1572cc6..89b3532ccc75 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,7 +7,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bpf_trace_helpers.h" static struct sockaddr_in old; diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c new file mode 100644 index 000000000000..8f530843b4da --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Cloudflare Ltd. +// Copyright (c) 2020 Isovalent, Inc. + +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ +static inline struct bpf_sock_tuple * +get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct bpf_sock_tuple *result; + struct ethhdr *eth; + __u64 tuple_len; + __u8 proto = 0; + __u64 ihl_len; + + eth = (struct ethhdr *)(data); + if (eth + 1 > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); + + if (iph + 1 > data_end) + return NULL; + if (iph->ihl != 5) + /* Options are not supported */ + return NULL; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&iph->saddr; + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); + + if (ip6h + 1 > data_end) + return NULL; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + *ipv4 = false; + result = (struct bpf_sock_tuple *)&ip6h->saddr; + } else { + return (struct bpf_sock_tuple *)data; + } + + if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) + return NULL; + + *tcp = (proto == IPPROTO_TCP); + return result; +} + +static inline int +handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) +{ + struct bpf_sock_tuple ln = {0}; + struct bpf_sock *sk; + size_t tuple_len; + int ret; + + tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); + if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) + return TC_ACT_SHOT; + + sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); + if (sk) + goto assign; + + if (ipv4) { + if (tuple->ipv4.dport != bpf_htons(4321)) + return TC_ACT_OK; + + ln.ipv4.daddr = bpf_htonl(0x7f000001); + ln.ipv4.dport = bpf_htons(1234); + + sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4), + BPF_F_CURRENT_NETNS, 0); + } else { + if (tuple->ipv6.dport != bpf_htons(4321)) + return TC_ACT_OK; + + /* Upper parts of daddr are already zero. */ + ln.ipv6.daddr[3] = bpf_htonl(0x1); + ln.ipv6.dport = bpf_htons(1234); + + sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6), + BPF_F_CURRENT_NETNS, 0); + } + + /* workaround: We can't do a single socket lookup here, because then + * the compiler will likely spill tuple_len to the stack. This makes it + * lose all bounds information in the verifier, which then rejects the + * call as unsafe. + */ + if (!sk) + return TC_ACT_SHOT; + +assign: + ret = bpf_sk_assign(skb, sk, 0); + bpf_sk_release(sk); + return ret; +} + +static inline int +handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) +{ + struct bpf_sock_tuple ln = {0}; + struct bpf_sock *sk; + size_t tuple_len; + int ret; + + tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); + if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) + return TC_ACT_SHOT; + + sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); + if (sk) { + if (sk->state != BPF_TCP_LISTEN) + goto assign; + bpf_sk_release(sk); + } + + if (ipv4) { + if (tuple->ipv4.dport != bpf_htons(4321)) + return TC_ACT_OK; + + ln.ipv4.daddr = bpf_htonl(0x7f000001); + ln.ipv4.dport = bpf_htons(1234); + + sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4), + BPF_F_CURRENT_NETNS, 0); + } else { + if (tuple->ipv6.dport != bpf_htons(4321)) + return TC_ACT_OK; + + /* Upper parts of daddr are already zero. */ + ln.ipv6.daddr[3] = bpf_htonl(0x1); + ln.ipv6.dport = bpf_htons(1234); + + sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6), + BPF_F_CURRENT_NETNS, 0); + } + + /* workaround: We can't do a single socket lookup here, because then + * the compiler will likely spill tuple_len to the stack. This makes it + * lose all bounds information in the verifier, which then rejects the + * call as unsafe. + */ + if (!sk) + return TC_ACT_SHOT; + + if (sk->state != BPF_TCP_LISTEN) { + bpf_sk_release(sk); + return TC_ACT_SHOT; + } + +assign: + ret = bpf_sk_assign(skb, sk, 0); + bpf_sk_release(sk); + return ret; +} + +SEC("classifier/sk_assign_test") +int bpf_sk_assign_test(struct __sk_buff *skb) +{ + struct bpf_sock_tuple *tuple, ln = {0}; + bool ipv4 = false; + bool tcp = false; + int tuple_len; + int ret = 0; + + tuple = get_tuple(skb, &ipv4, &tcp); + if (!tuple) + return TC_ACT_SHOT; + + /* Note that the verifier socket return type for bpf_skc_lookup_tcp() + * differs from bpf_sk_lookup_udp(), so even though the C-level type is + * the same here, if we try to share the implementations they will + * fail to verify because we're crossing pointer types. + */ + if (tcp) + ret = handle_tcp(skb, tuple, ipv4); + else + ret = handle_udp(skb, tuple, ipv4); + + return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; +} diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 202de3938494..b02ea589ce7e 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -23,6 +23,8 @@ int process(struct __sk_buff *skb) return 1; if (skb->gso_segs != 8) return 1; + if (skb->gso_size != 10) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c new file mode 100644 index 000000000000..a3a366c57ce1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare + +#include <errno.h> +#include <stdbool.h> +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, unsigned int); +} verdict_map SEC(".maps"); + +static volatile bool test_sockmap; /* toggled by user-space */ + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + unsigned int *count; + __u32 zero = 0; + int verdict; + + if (test_sockmap) + verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0); + else + verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0); + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + +SEC("sk_msg") +int prog_msg_verdict(struct sk_msg_md *msg) +{ + unsigned int *count; + __u32 zero = 0; + int verdict; + + if (test_sockmap) + verdict = bpf_msg_redirect_map(msg, &sock_map, zero, 0); + else + verdict = bpf_msg_redirect_hash(msg, &sock_hash, &zero, 0); + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + +SEC("sk_reuseport") +int prog_reuseport(struct sk_reuseport_md *reuse) +{ + unsigned int *count; + int err, verdict; + __u32 zero = 0; + + if (test_sockmap) + err = bpf_sk_select_reuseport(reuse, &sock_map, &zero, 0); + else + err = bpf_sk_select_reuseport(reuse, &sock_hash, &zero, 0); + verdict = err ? SK_DROP : SK_PASS; + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c index e51e6e3a81c2..f030e469d05b 100644 --- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -2,7 +2,8 @@ #include <stdbool.h> #include <stddef.h> #include <linux/bpf.h> -#include "bpf_trace_helpers.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> struct task_struct; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c new file mode 100644 index 000000000000..5611b564d3b1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <asm/unistd.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define MY_TV_NSEC 1337 + +bool tp_called = false; +bool raw_tp_called = false; +bool tp_btf_called = false; +bool kprobe_called = false; +bool fentry_called = false; + +SEC("tp/syscalls/sys_enter_nanosleep") +int handle__tp(struct trace_event_raw_sys_enter *args) +{ + struct __kernel_timespec *ts; + + if (args->id != __NR_nanosleep) + return 0; + + ts = (void *)args->args[0]; + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + tp_called = true; + return 0; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) +{ + struct __kernel_timespec *ts; + + if (id != __NR_nanosleep) + return 0; + + ts = (void *)PT_REGS_PARM1_CORE(regs); + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + raw_tp_called = true; + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) +{ + struct __kernel_timespec *ts; + + if (id != __NR_nanosleep) + return 0; + + ts = (void *)PT_REGS_PARM1_CORE(regs); + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + return 0; + + tp_btf_called = true; + return 0; +} + +SEC("kprobe/hrtimer_nanosleep") +int BPF_KPROBE(handle__kprobe, + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +{ + if (rqtp == MY_TV_NSEC) + kprobe_called = true; + return 0; +} + +SEC("fentry/hrtimer_nanosleep") +int BPF_PROG(handle__fentry, + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +{ + if (rqtp == MY_TV_NSEC) + fentry_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index cb8a04ab7a78..a038e827f850 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> +#include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -#include "bpf_trace_helpers.h" + +char _license[] SEC("license") = "GPL"; struct net_device { /* Structure does not need to contain all entries, @@ -27,16 +29,38 @@ struct xdp_buff { struct xdp_rxq_info *rxq; } __attribute__((preserve_access_index)); +struct meta { + int ifindex; + int pkt_len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + __u64 test_result_fentry = 0; -SEC("fentry/_xdp_tx_iptunnel") +SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { + struct meta meta; + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + meta.ifindex = xdp->rxq->dev->ifindex; + meta.pkt_len = data_end - data; + bpf_xdp_output(xdp, &perf_buf_map, + ((__u64) meta.pkt_len << 32) | + BPF_F_CURRENT_CPU, + &meta, sizeof(meta)); + test_result_fentry = xdp->rxq->dev->ifindex; return 0; } __u64 test_result_fexit = 0; -SEC("fexit/_xdp_tx_iptunnel") +SEC("fexit/FUNC") int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret) { test_result_fexit = ret; diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py new file mode 100644 index 000000000000..4fed2dc25c0a --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool.py @@ -0,0 +1,178 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2020 SUSE LLC. + +import collections +import functools +import json +import os +import socket +import subprocess +import unittest + + +# Add the source tree of bpftool and /usr/local/sbin to PATH +cur_dir = os.path.dirname(os.path.realpath(__file__)) +bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..", + "tools", "bpf", "bpftool")) +os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"] + + +class IfaceNotFoundError(Exception): + pass + + +class UnprivilegedUserError(Exception): + pass + + +def _bpftool(args, json=True): + _args = ["bpftool"] + if json: + _args.append("-j") + _args.extend(args) + + return subprocess.check_output(_args) + + +def bpftool(args): + return _bpftool(args, json=False).decode("utf-8") + + +def bpftool_json(args): + res = _bpftool(args) + return json.loads(res) + + +def get_default_iface(): + for iface in socket.if_nameindex(): + if iface[1] != "lo": + return iface[1] + raise IfaceNotFoundError("Could not find any network interface to probe") + + +def default_iface(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + iface = get_default_iface() + return f(*args, iface, **kwargs) + return wrapper + + +class TestBpftool(unittest.TestCase): + @classmethod + def setUpClass(cls): + if os.getuid() != 0: + raise UnprivilegedUserError( + "This test suite needs root privileges") + + @default_iface + def test_feature_dev_json(self, iface): + unexpected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] + expected_keys = [ + "syscall_config", + "program_types", + "map_types", + "helpers", + "misc", + ] + + res = bpftool_json(["feature", "probe", "dev", iface]) + # Check if the result has all expected keys. + self.assertCountEqual(res.keys(), expected_keys) + # Check if unexpected helpers are not included in helpers probes + # result. + for helpers in res["helpers"].values(): + for unexpected_helper in unexpected_helpers: + self.assertNotIn(unexpected_helper, helpers) + + def test_feature_kernel(self): + test_cases = [ + bpftool_json(["feature", "probe", "kernel"]), + bpftool_json(["feature", "probe"]), + bpftool_json(["feature"]), + ] + unexpected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] + expected_keys = [ + "syscall_config", + "system_config", + "program_types", + "map_types", + "helpers", + "misc", + ] + + for tc in test_cases: + # Check if the result has all expected keys. + self.assertCountEqual(tc.keys(), expected_keys) + # Check if unexpected helpers are not included in helpers probes + # result. + for helpers in tc["helpers"].values(): + for unexpected_helper in unexpected_helpers: + self.assertNotIn(unexpected_helper, helpers) + + def test_feature_kernel_full(self): + test_cases = [ + bpftool_json(["feature", "probe", "kernel", "full"]), + bpftool_json(["feature", "probe", "full"]), + ] + expected_helpers = [ + "bpf_probe_write_user", + "bpf_trace_printk", + ] + + for tc in test_cases: + # Check if expected helpers are included at least once in any + # helpers list for any program type. Unfortunately we cannot assume + # that they will be included in all program types or a specific + # subset of programs. It depends on the kernel version and + # configuration. + found_helpers = False + + for helpers in tc["helpers"].values(): + if all(expected_helper in helpers + for expected_helper in expected_helpers): + found_helpers = True + break + + self.assertTrue(found_helpers) + + def test_feature_kernel_full_vs_not_full(self): + full_res = bpftool_json(["feature", "probe", "full"]) + not_full_res = bpftool_json(["feature", "probe"]) + not_full_set = set() + full_set = set() + + for helpers in full_res["helpers"].values(): + for helper in helpers: + full_set.add(helper) + + for helpers in not_full_res["helpers"].values(): + for helper in helpers: + not_full_set.add(helper) + + self.assertCountEqual(full_set - not_full_set, + {"bpf_probe_write_user", "bpf_trace_printk"}) + self.assertCountEqual(not_full_set - full_set, set()) + + def test_feature_macros(self): + expected_patterns = [ + r"/\*\*\* System call availability \*\*\*/", + r"#define HAVE_BPF_SYSCALL", + r"/\*\*\* eBPF program types \*\*\*/", + r"#define HAVE.*PROG_TYPE", + r"/\*\*\* eBPF map types \*\*\*/", + r"#define HAVE.*MAP_TYPE", + r"/\*\*\* eBPF helper functions \*\*\*/", + r"#define HAVE.*HELPER", + r"/\*\*\* eBPF misc features \*\*\*/", + ] + + res = bpftool(["feature", "probe", "macros"]) + for pattern in expected_patterns: + self.assertRegex(res, pattern) diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh new file mode 100755 index 000000000000..66690778e36d --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2020 SUSE LLC. + +python3 -m unittest -v test_bpftool.TestBpftool diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c new file mode 100644 index 000000000000..ed253f252cd0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ +#define _GNU_SOURCE +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sched.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include "test_progs.h" + +#define CHECK_NEWNS(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + printf("%s:PASS:%s\n", __func__, tag); \ + } \ + __ret; \ +}) + +struct bss { + __u64 dev; + __u64 ino; + __u64 pid_tgid; + __u64 user_pid_tgid; +}; + +int main(int argc, char **argv) +{ + pid_t pid; + int exit_code = 1; + struct stat st; + + printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n"); + + if (stat("/proc/self/ns/pid", &st)) { + perror("stat failed on /proc/self/ns/pid ns\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS), + "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno)) + return exit_code; + + pid = fork(); + if (pid == -1) { + perror("Fork() failed\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (pid > 0) { + int status; + + usleep(5); + waitpid(pid, &status, 0); + return 0; + } else { + + pid = fork(); + if (pid == -1) { + perror("Fork() failed\n"); + printf("%s:FAILED\n", argv[0]); + return exit_code; + } + + if (pid > 0) { + int status; + waitpid(pid, &status, 0); + return 0; + } else { + if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL), + "Unmounting proc", "Cannot umount proc! errno=%d\n", errno)) + return exit_code; + + if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL), + "Mounting proc", "Cannot mount proc! errno=%d\n", errno)) + return exit_code; + + const char *probe_name = "raw_tracepoint/sys_enter"; + const char *file = "test_ns_current_pid_tgid.o"; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + int exit_code = 1; + int err, key = 0; + struct bss bss; + struct stat st; + __u64 id; + + obj = bpf_object__open_file(file, NULL); + if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return exit_code; + + err = bpf_object__load(obj); + if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); + if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n", + probe_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + pid_t tid = syscall(SYS_gettid); + pid_t pid = getpid(); + + id = (__u64) tid << 32 | pid; + bss.user_pid_tgid = id; + + if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st), + "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno)) + goto cleanup; + + bss.dev = st.st_dev; + bss.ino = st.st_ino; + + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); + if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger some syscalls */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); + if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err)) + goto cleanup; + + if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) + goto cleanup; + + exit_code = 0; + printf("%s:PASS\n", argv[0]); +cleanup: + if (!link) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); + } + } +} diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 02eae1e864c2..c6766b2cff85 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data) /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); - if (i < 2 && !err) { - printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n", - i, sfd[i]); - goto out_sockmap; - } else if (i >= 2 && err) { + if (err) { printf("Failed noprog update sockmap '%i:%i'\n", i, sfd[i]); goto out_sockmap; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index bab1e6f1d8f1..b521e0a512b6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1,11 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2017 Facebook */ +#define _GNU_SOURCE #include "test_progs.h" #include "cgroup_helpers.h" #include "bpf_rlimit.h" #include <argp.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> #include <string.h> +#include <execinfo.h> /* backtrace */ /* defined in test_progs.h */ struct test_env env = {}; @@ -27,6 +32,20 @@ struct prog_test_def { int old_error_cnt; }; +/* Override C runtime library's usleep() implementation to ensure nanosleep() + * is always called. Usleep is frequently used in selftests as a way to + * trigger kprobe and tracepoints. + */ +int usleep(useconds_t usec) +{ + struct timespec ts = { + .tv_sec = usec / 1000000, + .tv_nsec = (usec % 1000000) * 1000, + }; + + return syscall(__NR_nanosleep, &ts, NULL); +} + static bool should_run(struct test_selector *sel, int num, const char *name) { int i; @@ -74,6 +93,34 @@ static void skip_account(void) } } +static void stdio_restore(void); + +/* A bunch of tests set custom affinity per-thread and/or per-process. Reset + * it after each test/sub-test. + */ +static void reset_affinity() { + + cpu_set_t cpuset; + int i, err; + + CPU_ZERO(&cpuset); + for (i = 0; i < env.nr_cpus; i++) + CPU_SET(i, &cpuset); + + err = sched_setaffinity(0, sizeof(cpuset), &cpuset); + if (err < 0) { + stdio_restore(); + fprintf(stderr, "Failed to reset process affinity: %d!\n", err); + exit(-1); + } + err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); + if (err < 0) { + stdio_restore(); + fprintf(stderr, "Failed to reset thread affinity: %d!\n", err); + exit(-1); + } +} + void test__end_subtest() { struct prog_test_def *test = env.test; @@ -91,6 +138,8 @@ void test__end_subtest() test->test_num, test->subtest_num, test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); + reset_affinity(); + free(test->subtest_name); test->subtest_name = NULL; } @@ -196,7 +245,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) map = bpf_object__find_map_by_name(obj, name); if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); + fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name); test__fail(); return -1; } @@ -367,7 +416,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, { if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) return 0; - vprintf(format, args); + vfprintf(stdout, format, args); return 0; } @@ -408,7 +457,7 @@ err: int parse_num_list(const char *s, struct test_selector *sel) { - int i, set_len = 0, num, start = 0, end = -1; + int i, set_len = 0, new_len, num, start = 0, end = -1; bool *set = NULL, *tmp, parsing_end = false; char *next; @@ -443,18 +492,19 @@ int parse_num_list(const char *s, struct test_selector *sel) return -EINVAL; if (end + 1 > set_len) { - set_len = end + 1; - tmp = realloc(set, set_len); + new_len = end + 1; + tmp = realloc(set, new_len); if (!tmp) { free(set); return -ENOMEM; } + for (i = set_len; i < start; i++) + tmp[i] = false; set = tmp; + set_len = new_len; } - for (i = start; i <= end; i++) { + for (i = start; i <= end; i++) set[i] = true; - } - } if (!set) @@ -613,10 +663,27 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - printf("Switching to flavor '%s' subdirectory...\n", flavor); + fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); return chdir(flavor); } +#define MAX_BACKTRACE_SZ 128 +void crash_handler(int signum) +{ + void *bt[MAX_BACKTRACE_SZ]; + size_t sz; + + sz = backtrace(bt, ARRAY_SIZE(bt)); + + if (env.test) + dump_test_log(env.test, true); + if (env.stdout) + stdio_restore(); + + fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); + backtrace_symbols_fd(bt, sz, STDERR_FILENO); +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -624,8 +691,14 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; + struct sigaction sigact = { + .sa_handler = crash_handler, + .sa_flags = SA_RESETHAND, + }; int err, i; + sigaction(SIGSEGV, &sigact, NULL); + err = argp_parse(&argp, argc, argv, 0, NULL, &env); if (err) return err; @@ -639,6 +712,12 @@ int main(int argc, char **argv) srand(time(NULL)); env.jit_enabled = is_jit_enabled(); + env.nr_cpus = libbpf_num_possible_cpus(); + if (env.nr_cpus < 0) { + fprintf(stderr, "Failed to get number of CPUs: %d!\n", + env.nr_cpus); + return -1; + } stdio_hijack(); for (i = 0; i < prog_test_cnt; i++) { @@ -669,12 +748,13 @@ int main(int argc, char **argv) test->test_num, test->test_name, test->error_cnt ? "FAIL" : "OK"); + reset_affinity(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); } stdio_restore(); - printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); free(env.test_selector.blacklist.strs); free(env.test_selector.whitelist.strs); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index bcfa9ef23fda..f4aff6b8284b 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -71,6 +71,7 @@ struct test_env { FILE *stderr; char *log_buf; size_t log_cnt; + int nr_cpus; int succ_cnt; /* successful tests */ int sub_succ_cnt; /* successful sub-tests */ @@ -109,10 +110,10 @@ extern struct ipv6_packet pkt_v6; int __save_errno = errno; \ if (__ret) { \ test__fail(); \ - printf("%s:FAIL:%s ", __func__, tag); \ - printf(format); \ + fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \ + fprintf(stdout, ##format); \ } else { \ - printf("%s:PASS:%s %d nsec\n", \ + fprintf(stdout, "%s:PASS:%s %d nsec\n", \ __func__, tag, duration); \ } \ errno = __save_errno; \ @@ -124,7 +125,7 @@ extern struct ipv6_packet pkt_v6; int __save_errno = errno; \ if (__ret) { \ test__fail(); \ - printf("%s:FAIL:%d\n", __func__, __LINE__); \ + fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \ } \ errno = __save_errno; \ __ret; \ diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 7f989b3e4e22..4d0e913bbb22 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -4,12 +4,15 @@ #include <string.h> #include <assert.h> #include <errno.h> +#include <fcntl.h> #include <poll.h> #include <unistd.h> #include <linux/perf_event.h> #include <sys/mman.h> #include "trace_helpers.h" +#define DEBUGFS "/sys/kernel/debug/tracing/" + #define MAX_SYMS 300000 static struct ksym syms[MAX_SYMS]; static int sym_cnt; @@ -86,3 +89,23 @@ long ksym_get_addr(const char *name) return 0; } + +void read_trace_pipe(void) +{ + int trace_fd; + + trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) + return; + + while (1) { + static char buf[4096]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf) - 1); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + } + } +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 0383c9b8adc1..25ef597dd03f 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -12,5 +12,6 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); +void read_trace_pipe(void); #endif diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index d55f476f2237..4d0d09574bf4 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -257,17 +257,15 @@ * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] */ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* no-op or OOB pointer computation */ + /* error on OOB pointer computation */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* potentially OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), /* exit */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr = "R0 unbounded memory access", + .errstr = "value 72057594021150720 makes map_value pointer be out of bounds", .result = REJECT }, { @@ -299,17 +297,15 @@ * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] */ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* no-op or OOB pointer computation */ + /* error on OOB pointer computation */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* potentially OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), /* exit */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr = "R0 unbounded memory access", + .errstr = "value 72057594021150720 makes map_value pointer be out of bounds", .result = REJECT }, { @@ -411,16 +407,14 @@ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), /* r1 = 0xffff'fffe (NOT 0!) */ BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), - /* computes OOB pointer */ + /* error on computing OOB pointer */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), /* exit */ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access", + .errstr = "math between map_value pointer and 4294967294 is not allowed", .result = REJECT, }, { @@ -506,3 +500,42 @@ .errstr = "map_value pointer and 1000000000000", .result = REJECT }, +{ + "bounds check mixed 32bit and 64bit arithmatic. test1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + /* r1 = 0xffffFFFF00000001 */ + BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 1, 3), + /* check ALU64 op keeps 32bit bounds */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_JMP_A(1), + /* invalid ldx if bounds are lost above */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT +}, +{ + "bounds check mixed 32bit and 64bit arithmatic. test2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + /* r1 = 0xffffFFFF00000001 */ + BPF_MOV64_IMM(BPF_REG_2, 3), + /* r1 = 0x2 */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1), + /* check ALU32 op zero extends 64bit bounds */ + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 1), + BPF_JMP_A(1), + /* invalid ldx if bounds are lost above */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT +}, diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c index f24d50f09dbe..69b048cf46d9 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c +++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c @@ -9,17 +9,17 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2), BPF_MOV64_IMM(BPF_REG_4, 256), BPF_EMIT_CALL(BPF_FUNC_get_stack), BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), - BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), + BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_8, 16), BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), @@ -29,7 +29,7 @@ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index 92762c08f5e3..93d6b1641481 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -91,3 +91,108 @@ .result = REJECT, .errstr = "variable ctx access var_off=(0x0; 0x4)", }, +{ + "pass ctx or null check, 1: ctx", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_netns_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, + .result = ACCEPT, +}, +{ + "pass ctx or null check, 2: null", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_netns_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, + .result = ACCEPT, +}, +{ + "pass ctx or null check, 3: 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_netns_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, + .result = REJECT, + .errstr = "R1 type=inv expected=ctx", +}, +{ + "pass ctx or null check, 4: ctx - const", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_netns_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", +}, +{ + "pass ctx or null check, 5: null (connect)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_netns_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + .expected_attach_type = BPF_CGROUP_INET4_CONNECT, + .result = ACCEPT, +}, +{ + "pass ctx or null check, 6: null (bind)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_netns_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = ACCEPT, +}, +{ + "pass ctx or null check, 7: ctx (bind)", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = ACCEPT, +}, +{ + "pass ctx or null check, 8: null (bind)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = REJECT, + .errstr = "R1 type=inv expected=ctx", +}, diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index d438193804b2..2e16b8e268f2 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -1011,6 +1011,53 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "read gso_size from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_size from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "write gso_size from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=176 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_size from CLS", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ "check wire_len is not readable by sockets", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh index 5ba5bef44d5b..bdffe698e1d1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh @@ -45,6 +45,7 @@ ALL_TESTS=" blackhole_ipv6 " NUM_NETIFS=4 +: ${TIMEOUT:=20000} # ms source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -123,7 +124,7 @@ blackhole_ipv4() skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \ action pass - ip -4 route show 198.51.100.0/30 | grep -q offload + busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30 check_err $? "route not marked as offloaded when should" ping_do $h1 198.51.100.1 @@ -147,7 +148,7 @@ blackhole_ipv6() skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \ ip_proto icmpv6 action pass - ip -6 route show 2001:db8:2::/120 | grep -q offload + busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120 check_err $? "route not marked as offloaded when should" ping6_do $h1 2001:db8:2::1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh new file mode 100755 index 000000000000..26044e397157 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap ACL drops functionality over mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ingress_flow_action_drop_test + egress_flow_action_drop_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ingress_flow_action_drop_test() +{ + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower src_mac $h1mac action pass + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101 + + log_test "ingress_flow_action_drop" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 +} + +egress_flow_action_drop_test() +{ + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \ + flower src_mac $h1mac action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102 + + log_test "egress_flow_action_drop" + + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index 58cdbfb608e9..e7aecb065409 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -107,11 +107,11 @@ source_mac_is_multicast_test() RET=0 - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 log_test "Source MAC is multicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 } __vlan_tag_mismatch_test() @@ -132,7 +132,7 @@ __vlan_tag_mismatch_test() $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Add PVID and make sure packets are no longer dropped. bridge vlan add vid 1 dev $swp1 pvid untagged master @@ -148,7 +148,7 @@ __vlan_tag_mismatch_test() devlink_trap_action_set $trap_name "drop" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 } vlan_tag_mismatch_untagged_test() @@ -193,7 +193,7 @@ ingress_vlan_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Add the VLAN on the bridge port and make sure packets are no longer # dropped. @@ -212,7 +212,7 @@ ingress_vlan_filter_test() log_test "Ingress VLAN filter" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 bridge vlan del vid $vid dev $swp1 master bridge vlan del vid $vid dev $swp2 master @@ -237,7 +237,7 @@ __ingress_stp_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Change STP state to forwarding and make sure packets are no longer # dropped. @@ -254,7 +254,7 @@ __ingress_stp_filter_test() devlink_trap_action_set $trap_name "drop" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 bridge vlan del vid $vid dev $swp1 master bridge vlan del vid $vid dev $swp2 master @@ -308,7 +308,7 @@ port_list_is_empty_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave flood on @@ -326,7 +326,7 @@ port_list_is_empty_uc_test() log_test "Port list is empty - unicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 ip link set dev $swp1 type bridge_slave flood on } @@ -354,7 +354,7 @@ port_list_is_empty_mc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave mcast_flood on @@ -372,7 +372,7 @@ port_list_is_empty_mc_test() log_test "Port list is empty - multicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 ip link set dev $swp1 type bridge_slave mcast_flood on } @@ -401,7 +401,7 @@ port_loopback_filter_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 + devlink_trap_drop_test $trap_name $group_name $swp2 101 # Allow packets to be flooded. ip link set dev $swp2 type bridge_slave flood on @@ -419,7 +419,7 @@ port_loopback_filter_uc_test() log_test "Port loopback filter - unicast" - devlink_trap_drop_cleanup $mz_pid $swp2 ip + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 } port_loopback_filter_test() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index d88d8e47d11b..616f47d86a61 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -176,11 +176,11 @@ non_ip_test() 00:00 de:ad:be:ef" & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Non IP" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } __uc_dip_over_mc_dmac_test() @@ -206,11 +206,11 @@ __uc_dip_over_mc_dmac_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Unicast destination IP over multicast destination MAC: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } uc_dip_over_mc_dmac_test() @@ -242,11 +242,11 @@ __sip_is_loopback_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Source IP is loopback address: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } sip_is_loopback_test() @@ -277,11 +277,11 @@ __dip_is_loopback_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Destination IP is loopback address: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } dip_is_loopback_test() @@ -313,11 +313,11 @@ __sip_is_mc_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Source IP is multicast: $desc" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 } sip_is_mc_test() @@ -345,11 +345,11 @@ ipv4_sip_is_limited_bc_test() -B $h2_ipv4 -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IPv4 source IP is limited broadcast" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } ipv4_payload_get() @@ -399,11 +399,11 @@ __ipv4_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IP header corrupted: $desc: IPv4" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } ipv6_payload_get() @@ -446,11 +446,11 @@ __ipv6_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IP header corrupted: $desc: IPv6" - devlink_trap_drop_cleanup $mz_pid $rp2 "ip" + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 } ip_header_corrupted_test() @@ -485,11 +485,11 @@ ipv6_mc_dip_reserved_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IPv6 multicast destination IP reserved scope" - devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101 } ipv6_mc_dip_interface_local_scope_test() @@ -511,11 +511,11 @@ ipv6_mc_dip_interface_local_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "IPv6 multicast destination IP interface-local scope" - devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101 } __blackhole_route_test() @@ -542,10 +542,10 @@ __blackhole_route_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 + devlink_trap_drop_test $trap_name $group_name $rp2 101 log_test "Blackhole route: IPv$flags" - devlink_trap_drop_cleanup $mz_pid $rp2 $proto + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 ip -$flags route del blackhole $subnet } @@ -641,13 +641,9 @@ erif_disabled_test() mz_pid=$! sleep 5 - # In order to see this trap we need a route that points to disabled RIF. - # When ipv6 address is flushed, there is a delay and the routes are - # deleted before the RIF and we cannot get state that we have route - # to disabled RIF. - # Delete IPv6 address first and then check this trap with flushing IPv4. - ip -6 add flush dev br0 - ip -4 add flush dev br0 + # Unlinking the port from the bridge will disable the RIF associated + # with br0 as it is no longer an upper of any mlxsw port. + ip link set dev $rp1 nomaster t1_packets=$(devlink_trap_rx_packets_get $trap_name) t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) @@ -659,7 +655,6 @@ erif_disabled_test() log_test "Egress RIF disabled" kill $mz_pid && wait $mz_pid &> /dev/null - ip link set dev $rp1 nomaster __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 ip link del dev br0 type bridge devlink_trap_action_set $trap_name "drop" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh new file mode 100755 index 000000000000..47edf099a17e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -0,0 +1,384 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap policer functionality over mlxsw. + +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | | +# | | default via 192.0.2.2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rate_limits_test + burst_limits_test + rate_test + burst_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + mtu_set $h1 10000 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + mtu_restore $h1 + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 + mtu_set $h2 10000 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 +} + +h2_destroy() +{ + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + mtu_restore $h2 + simple_if_fini $h2 198.51.100.1/24 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + __addr_add_del $rp1 add 192.0.2.2/24 + __addr_add_del $rp2 add 198.51.100.2/24 + mtu_set $rp1 10000 + mtu_set $rp2 10000 + + ip -4 route add blackhole 198.51.100.100 + + devlink trap set $DEVLINK_DEV trap blackhole_route action trap +} + +router_destroy() +{ + devlink trap set $DEVLINK_DEV trap blackhole_route action drop + + ip -4 route del blackhole 198.51.100.100 + + mtu_restore $rp2 + mtu_restore $rp1 + __addr_add_del $rp2 del 198.51.100.2/24 + __addr_add_del $rp1 del 192.0.2.2/24 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1_mac=$(mac_get $rp1) + + vrf_prepare + + h1_create + h2_create + + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup + + # Reload to ensure devlink-trap settings are back to default. + devlink_reload +} + +rate_limits_test() +{ + RET=0 + + devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null + check_fail $? "Policer rate was changed to rate lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 \ + rate 2000000001 &> /dev/null + check_fail $? "Policer rate was changed to rate higher than limit" + + devlink trap policer set $DEVLINK_DEV policer 1 rate 1 + check_err $? "Failed to set policer rate to minimum" + devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000 + check_err $? "Failed to set policer rate to maximum" + + log_test "Trap policer rate limits" +} + +burst_limits_test() +{ + RET=0 + + devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null + check_fail $? "Policer burst size was changed to 0" + devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null + check_fail $? "Policer burst size was changed to burst size that is not power of 2" + devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null + check_fail $? "Policer burst size was changed to burst size lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 \ + burst $((2**25)) &> /dev/null + check_fail $? "Policer burst size was changed to burst size higher than limit" + + devlink trap policer set $DEVLINK_DEV policer 1 burst 16 + check_err $? "Failed to set policer burst size to minimum" + devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24)) + check_err $? "Failed to set policer burst size to maximum" + + log_test "Trap policer burst size limits" +} + +trap_rate_get() +{ + local t0 t1 + + t0=$(devlink_trap_rx_packets_get blackhole_route) + sleep 10 + t1=$(devlink_trap_rx_packets_get blackhole_route) + + echo $(((t1 - t0) / 10)) +} + +policer_drop_rate_get() +{ + local id=$1; shift + local t0 t1 + + t0=$(devlink_trap_policer_rx_dropped_get $id) + sleep 10 + t1=$(devlink_trap_policer_rx_dropped_get $id) + + echo $(((t1 - t0) / 10)) +} + +__rate_test() +{ + local rate pct drop_rate + local id=$1; shift + + RET=0 + + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16 + devlink trap group set $DEVLINK_DEV group l3_drops policer $id + + # Send packets at highest possible rate and make sure they are dropped + # by the policer. Make sure measured received rate is about 1000 pps + log_info "=== Tx rate: Highest, Policer rate: 1000 pps ===" + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + + sleep 5 # Take measurements when rate is stable + + rate=$(trap_rate_get) + pct=$((100 * (rate - 1000) / 1000)) + ((-5 <= pct && pct <= 5)) + check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%" + log_info "Expected rate 1000 pps, measured rate $rate pps" + + drop_rate=$(policer_drop_rate_get $id) + (( drop_rate > 0 )) + check_err $? "Expected non-zero policer drop rate, got 0" + log_info "Measured policer drop rate of $drop_rate pps" + + stop_traffic + + # Send packets at a rate of 1000 pps and make sure they are not dropped + # by the policer + log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ===" + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec + + sleep 5 # Take measurements when rate is stable + + drop_rate=$(policer_drop_rate_get $id) + (( drop_rate == 0 )) + check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" + log_info "Measured policer drop rate of $drop_rate pps" + + stop_traffic + + # Unbind the policer and send packets at highest possible rate. Make + # sure they are not dropped by the policer and that the measured + # received rate is higher than 1000 pps + log_info "=== Tx rate: Highest, Policer rate: No policer ===" + + devlink trap group set $DEVLINK_DEV group l3_drops nopolicer + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + + rate=$(trap_rate_get) + (( rate > 1000 )) + check_err $? "Expected rate higher than 1000 pps, got $rate pps" + log_info "Measured rate $rate pps" + + drop_rate=$(policer_drop_rate_get $id) + (( drop_rate == 0 )) + check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" + log_info "Measured policer drop rate of $drop_rate pps" + + stop_traffic + + log_test "Trap policer rate" +} + +rate_test() +{ + local id + + for id in $(devlink_trap_policer_ids_get); do + echo + log_info "Running rate test for policer $id" + __rate_test $id + done +} + +__burst_test() +{ + local t0_rx t0_drop t1_rx t1_drop rx drop + local id=$1; shift + + RET=0 + + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32 + devlink trap group set $DEVLINK_DEV group l3_drops policer $id + + # Send a burst of 64 packets and make sure that about 32 are received + # and the rest are dropped by the policer + log_info "=== Tx burst size: 64, Policer burst size: 32 pps ===" + + t0_rx=$(devlink_trap_rx_packets_get blackhole_route) + t0_drop=$(devlink_trap_policer_rx_dropped_get $id) + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64 + + t1_rx=$(devlink_trap_rx_packets_get blackhole_route) + t1_drop=$(devlink_trap_policer_rx_dropped_get $id) + + rx=$((t1_rx - t0_rx)) + pct=$((100 * (rx - 32) / 32)) + ((-20 <= pct && pct <= 20)) + check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%" + log_info "Expected burst size of 32 packets, measured burst size of $rx packets" + + drop=$((t1_drop - t0_drop)) + (( drop > 0 )) + check_err $? "Expected non-zero policer drops, got 0" + log_info "Measured policer drops of $drop packets" + + # Send a burst of 16 packets and make sure that 16 are received + # and that none are dropped by the policer + log_info "=== Tx burst size: 16, Policer burst size: 32 pps ===" + + t0_rx=$(devlink_trap_rx_packets_get blackhole_route) + t0_drop=$(devlink_trap_policer_rx_dropped_get $id) + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16 + + t1_rx=$(devlink_trap_rx_packets_get blackhole_route) + t1_drop=$(devlink_trap_policer_rx_dropped_get $id) + + rx=$((t1_rx - t0_rx)) + (( rx == 16 )) + check_err $? "Expected burst size of 16 packets, got $rx packets" + log_info "Expected burst size of 16 packets, measured burst size of $rx packets" + + drop=$((t1_drop - t0_drop)) + (( drop == 0 )) + check_err $? "Expected zero policer drops, got $drop" + log_info "Measured policer drops of $drop packets" + + # Unbind the policer and send a burst of 64 packets. Make sure that + # 64 packets are received and that none are dropped by the policer + log_info "=== Tx burst size: 64, Policer burst size: No policer ===" + + devlink trap group set $DEVLINK_DEV group l3_drops nopolicer + + t0_rx=$(devlink_trap_rx_packets_get blackhole_route) + t0_drop=$(devlink_trap_policer_rx_dropped_get $id) + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64 + + t1_rx=$(devlink_trap_rx_packets_get blackhole_route) + t1_drop=$(devlink_trap_policer_rx_dropped_get $id) + + rx=$((t1_rx - t0_rx)) + (( rx == 64 )) + check_err $? "Expected burst size of 64 packets, got $rx packets" + log_info "Expected burst size of 64 packets, measured burst size of $rx packets" + + drop=$((t1_drop - t0_drop)) + (( drop == 0 )) + check_err $? "Expected zero policer drops, got $drop" + log_info "Measured policer drops of $drop packets" + + log_test "Trap policer burst size" +} + +burst_test() +{ + local id + + for id in $(devlink_trap_policer_ids_get); do + echo + log_info "Running burst size test for policer $id" + __burst_test $id + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index fd19161dd4ec..e11a416323cf 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -314,11 +314,11 @@ overlay_smac_is_mc_test() -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp1 + devlink_trap_drop_test $trap_name $group_name $swp1 101 log_test "Overlay source MAC is multicast" - devlink_trap_drop_cleanup $mz_pid $swp1 "ip" + devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101 } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index d72d8488a3b2..7a0a99c1d22f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" netdev_pre_up_test vxlan_vlan_add_test - port_vlan_add_test + vxlan_bridge_create_test + bridge_create_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -106,32 +107,56 @@ vxlan_vlan_add_test() ip link del dev br1 } -port_vlan_add_test() +vxlan_bridge_create_test() { RET=0 - ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 - # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". ip link add name vx1 up type vxlan id 1000 \ local 192.0.2.17 remote 192.0.2.18 \ dstport 4789 tos inherit ttl 100 - ip link set dev $swp1 master br1 - check_err $? - - bridge vlan del dev $swp1 vid 1 + # Test with VLAN-aware bridge. + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 ip link set dev vx1 master br1 + + ip link set dev $swp1 master br1 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum check_err $? - bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \ + # Test with VLAN-unaware bridge. + ip link set dev br1 type bridge vlan_filtering 0 + + ip link set dev $swp1 master br1 2>&1 > /dev/null \ | grep -q mlxsw_spectrum check_err $? - log_test "extack - map VLAN at port" + log_test "extack - bridge creation with VXLAN" + ip link del dev br1 ip link del dev vx1 +} + +bridge_create_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 + ip link add name br2 up type bridge vlan_filtering 1 + + ip link set dev $swp1 master br1 + check_err $? + + # Only one VLAN-aware bridge is supported, so this should fail with + # an extack. + ip link set dev $swp2 master br2 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - multiple VLAN-aware bridges creation" + + ip link del dev br2 ip link del dev br1 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh new file mode 100644 index 000000000000..cbe50f260a40 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +if [[ ! -v MLXSW_CHIP ]]; then + MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]') + if [ -z "$MLXSW_CHIP" ]; then + echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command" + exit 1 + fi +fi diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh index eff6393ce974..71066bc4b886 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh @@ -114,23 +114,12 @@ ping_ipv4() ping_test $h1 192.0.2.2 } -wait_for_packets() -{ - local t0=$1; shift - local prio_observe=$1; shift - - local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) - local delta=$((t1 - t0)) - echo $delta - ((delta >= 10)) -} - __test_defprio() { local prio_install=$1; shift local prio_observe=$1; shift - local delta local key + local t1 local i RET=0 @@ -139,9 +128,10 @@ __test_defprio() local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) mausezahn -q $h1 -d 100m -c 10 -t arp reply - delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe) + t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \ + ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) - check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta." + check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))." log_test "Default priority $prio_install/$prio_observe" defprio_uninstall $swp1 $prio_install diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh index c745ce3befee..4cb2aa65278a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh @@ -31,6 +31,7 @@ ALL_TESTS=" ping_ipv4 test_update test_no_update + test_pedit_norewrite test_dscp_leftover " @@ -56,6 +57,11 @@ zero() echo 0 } +three() +{ + echo 3 +} + h1_create() { simple_if_init $h1 192.0.2.1/28 @@ -103,6 +109,9 @@ switch_create() simple_if_init $swp1 192.0.2.2/28 __simple_if_init $swp2 v$swp1 192.0.2.17/28 + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null lldpad_app_wait_set $swp1 @@ -115,6 +124,9 @@ switch_destroy() lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null lldpad_app_wait_del + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + __simple_if_fini $swp2 192.0.2.17/28 simple_if_fini $swp1 192.0.2.2/28 } @@ -223,18 +235,36 @@ __test_update() test_update() { + echo "Test net.ipv4.ip_forward_update_priority=1" __test_update 1 reprioritize } test_no_update() { + echo "Test net.ipv4.ip_forward_update_priority=0" __test_update 0 echo } +# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off. +test_pedit_norewrite() +{ + echo "Test no DSCP rewrite after DSCP is updated by pedit" + + tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \ + action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \ + action skbedit priority 3 + + __test_update 0 three + + tc filter del dev $swp1 ingress pref 1 +} + # Test that when the last APP rule is removed, the prio->DSCP map is properly # set to zeroes, and that the last APP rule does not stay active in the ASIC. test_dscp_leftover() { + echo "Test that last removed DSCP rule is deconfigured correctly" + lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null lldpad_app_wait_del diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh index d231649b4f01..e93878d42596 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh @@ -2,16 +2,15 @@ # SPDX-License-Identifier: GPL-2.0 ROUTER_NUM_NETIFS=4 +: ${TIMEOUT:=20000} # ms router_h1_create() { simple_if_init $h1 192.0.1.1/24 - ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1 } router_h1_destroy() { - ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1 simple_if_fini $h1 192.0.1.1/24 } @@ -64,13 +63,15 @@ router_setup_prepare() router_create } -router_offload_validate() +wait_for_routes() { - local route_count=$1 - local offloaded_count + local t0=$1; shift + local route_count=$1; shift - offloaded_count=$(ip route | grep -o 'offload' | wc -l) - [[ $offloaded_count -ge $route_count ]] + local t1=$(ip route | grep -o 'offload' | wc -l) + local delta=$((t1 - t0)) + echo $delta + [[ $delta -ge $route_count ]] } router_routes_create() @@ -90,8 +91,8 @@ router_routes_create() break 3 fi - echo route add 193.${i}.${j}.${k}/32 via \ - 192.0.2.1 dev $rp2 >> $ROUTE_FILE + echo route add 193.${i}.${j}.${k}/32 dev $rp2 \ + >> $ROUTE_FILE ((count++)) done done @@ -111,45 +112,19 @@ router_test() { local route_count=$1 local should_fail=$2 - local count=0 + local delta RET=0 + local t0=$(ip route | grep -o 'offload' | wc -l) router_routes_create $route_count + delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count) - router_offload_validate $route_count - check_err_fail $should_fail $? "Offload of $route_count routes" + check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta." if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then return fi - tc filter add dev $h2 ingress protocol ip pref 1 flower \ - skip_sw dst_ip 193.0.0.0/8 action drop - - for i in {0..255} - do - for j in {0..255} - do - for k in {0..255} - do - if [[ $count -eq $route_count ]]; then - break 3 - fi - - $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \ - -A 192.0.1.1 -B 193.${i}.${j}.${k} \ - -t ip -q - ((count++)) - done - done - done - - tc_check_packets "dev $h2 ingress" 1 $route_count - check_err $? "Offload mismatch" - - tc filter del dev $h2 ingress protocol ip pref 1 flower \ - skip_sw dst_ip 193.0.0.0/8 action drop - router_routes_destroy } diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 5c39e5f6a480..f4031002d5e9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -32,6 +32,7 @@ ALL_TESTS=" devlink_reload_test " NUM_NETIFS=2 +: ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh @@ -360,20 +361,24 @@ vlan_rif_refcount_test() ip link add link br0 name br0.10 up type vlan id 10 ip -6 address add 2001:db8:1::1/64 dev br0.10 - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 check_err $? "vlan rif was not created before adding port to vlan" bridge vlan add vid 10 dev $swp1 - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 check_err $? "vlan rif was destroyed after adding port to vlan" bridge vlan del vid 10 dev $swp1 - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 check_err $? "vlan rif was destroyed after removing port from vlan" ip link set dev $swp1 nomaster - ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload - check_fail $? "vlan rif was not destroyed after unlinking port from bridge" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 + check_err $? "vlan rif was not destroyed after unlinking port from bridge" log_test "vlan rif refcount" @@ -401,22 +406,28 @@ subport_rif_refcount_test() ip -6 address add 2001:db8:1::1/64 dev bond1 ip -6 address add 2001:db8:2::1/64 dev bond1.10 - ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 check_err $? "subport rif was not created on lag device" - ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 check_err $? "subport rif was not created on vlan device" ip link set dev $swp1 nomaster - ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 check_err $? "subport rif of lag device was destroyed when should not" - ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 check_err $? "subport rif of vlan device was destroyed when should not" ip link set dev $swp2 nomaster - ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload - check_fail $? "subport rif of lag device was not destroyed when should" - ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload - check_fail $? "subport rif of vlan device was not destroyed when should" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was not destroyed when should" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 + check_err $? "subport rif of vlan device was not destroyed when should" log_test "subport rif refcount" @@ -575,7 +586,8 @@ bridge_extern_learn_test() bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn - bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + bridge fdb show brport $swp1 de:ad:be:ef:13:37 check_err $? "fdb entry not marked as offloaded when should" log_test "externally learned fdb entry" @@ -595,9 +607,11 @@ neigh_offload_test() ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \ dev $swp1 - ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -4 neigh show dev $swp1 192.0.2.2 check_err $? "ipv4 neigh entry not marked as offloaded when should" - ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 neigh show dev $swp1 2001:db8:1::2 check_err $? "ipv6 neigh entry not marked as offloaded when should" log_test "neighbour offload indication" @@ -623,25 +637,31 @@ nexthop_offload_test() ip -6 route add 2001:db8:2::/64 vrf v$swp1 \ nexthop via 2001:db8:1::2 dev $swp1 - ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 check_err $? "ipv4 nexthop not marked as offloaded when should" - ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 check_err $? "ipv6 nexthop not marked as offloaded when should" ip link set dev $swp2 down sleep 1 - ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload - check_fail $? "ipv4 nexthop marked as offloaded when should not" - ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload - check_fail $? "ipv6 nexthop marked as offloaded when should not" + busywait "$TIMEOUT" not wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 + check_err $? "ipv4 nexthop marked as offloaded when should not" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 + check_err $? "ipv6 nexthop marked as offloaded when should not" ip link set dev $swp2 up setup_wait - ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 check_err $? "ipv4 nexthop not marked as offloaded after neigh add" - ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 check_err $? "ipv6 nexthop not marked as offloaded after neigh add" log_test "nexthop offload indication" diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index c9fc4d4885c1..94c37124a840 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -56,11 +56,19 @@ switch_destroy() } # Callback from sch_ets_tests.sh -get_stats() +collect_stats() { - local band=$1; shift + local -a streams=("$@") + local stream - ethtool_stats_get "$h2" rx_octets_prio_$band + # Wait for qdisc counter update so that we don't get it mid-way through. + busywait_for_counter 1000 +1 \ + qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \ + > /dev/null + + for stream in ${streams[@]}; do + qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes + done } bail_on_lldpad diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh new file mode 100644 index 000000000000..0d347d48c112 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -0,0 +1,533 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends a >1Gbps stream of traffic from H1, to the switch, which +# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the +# switch and forwarded to the port under test $swp3, which is also 1Gbps. +# +# This way, $swp3 should be 100% filled with traffic without any of it spilling +# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That +# is what H2 is used for--it sends the extra traffic to create backlog. +# +# A RED Qdisc is installed on $swp3. The configuration is such that the minimum +# and maximum size are 1 byte apart, so there is a very clear border under which +# no marking or dropping takes place, and above which everything is marked or +# dropped. +# +# The test uses the buffer build-up behavior to test the installed RED. +# +# In order to test WRED, $swp3 actually contains RED under PRIO, with two +# different configurations. Traffic is prioritized using 802.1p and relies on +# the implicit mlxsw configuration, where packet priority is taken 1:1 from the +# 802.1p marking. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | $h1.11 + | | | $h2.11 + | +# | | 192.0.2.17/28 | | | | 192.0.2.18/28 | | +# | | | | | | | | +# | \______ ______/ | | \______ ______/ | +# | \ / | | \ / | +# | + $h1 | | + $h2 | +# +-------------|------------+ +-------------|------------+ +# | >1Gbps | +# +-------------|------------------------------------------------|------------+ +# | SW + $swp1 + $swp2 | +# | _______/ \___________ ___________/ \_______ | +# | / \ / \ | +# | +-|-----------------+ | +-|-----------------+ | | +# | | + $swp1.10 | | | + $swp2.10 | | | +# | | | | .-------------+ $swp5.10 | | | +# | | BR1_10 | | | | | | | +# | | | | | | BR2_10 | | | +# | | + $swp2.10 | | | | | | | +# | +-|-----------------+ | | | + $swp3.10 | | | +# | | | | +-|-----------------+ | | +# | | +-----------------|-+ | | +-----------------|-+ | +# | | | $swp1.11 + | | | | $swp2.11 + | | +# | | | | | .-----------------+ $swp5.11 | | +# | | | BR1_11 | | | | | | | +# | | | | | | | | BR2_11 | | +# | | | $swp2.11 + | | | | | | | +# | | +-----------------|-+ | | | | $swp3.11 + | | +# | | | | | | +-----------------|-+ | +# | \_______ ___________/ | | \___________ _______/ | +# | \ / \ / \ / | +# | + $swp4 + $swp5 + $swp3 | +# +-------------|----------------------|-------------------------|------------+ +# | | | 1Gbps +# \________1Gbps_________/ | +# +----------------------------|------------+ +# | H3 + $h3 | +# | _____________________/ \_______ | +# | / \ | +# | | | | +# | + $h3.10 $h3.11 + | +# | 192.0.2.3/28 192.0.2.19/28 | +# +-----------------------------------------+ + +NUM_NETIFS=8 +CHECK_TC="yes" +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +ipaddr() +{ + local host=$1; shift + local vlan=$1; shift + + echo 192.0.2.$((16 * (vlan - 10) + host)) +} + +host_create() +{ + local dev=$1; shift + local host=$1; shift + + simple_if_init $dev + mtu_set $dev 10000 + + vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + ip link set dev $dev.10 type vlan egress 0:0 + + vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + ip link set dev $dev.11 type vlan egress 0:1 +} + +host_destroy() +{ + local dev=$1; shift + + vlan_destroy $dev 11 + vlan_destroy $dev 10 + mtu_restore $dev + simple_if_fini $dev +} + +h1_create() +{ + host_create $h1 1 +} + +h1_destroy() +{ + host_destroy $h1 +} + +h2_create() +{ + host_create $h2 2 + + # Some of the tests in this suite use multicast traffic. As this traffic + # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus + # e.g. traffic ingressing through $swp2 is flooded to $swp3 (the + # intended destination) and $swp5 (which is intended as ingress for + # another stream of traffic). + # + # This is generally not a problem, but if the $swp5 throughput is lower + # than $swp2 throughput, there will be a build-up at $swp5. That may + # cause packets to fail to queue up at $swp3 due to shared buffer + # quotas, and the test to spuriously fail. + # + # Prevent this by setting the speed of $h2 to 1Gbps. + + ethtool -s $h2 speed 1000 autoneg off +} + +h2_destroy() +{ + ethtool -s $h2 autoneg on + host_destroy $h2 +} + +h3_create() +{ + host_create $h3 3 + ethtool -s $h3 speed 1000 autoneg off +} + +h3_destroy() +{ + ethtool -s $h3 autoneg on + host_destroy $h3 +} + +switch_create() +{ + local intf + local vlan + + ip link add dev br1_10 type bridge + ip link add dev br1_11 type bridge + + ip link add dev br2_10 type bridge + ip link add dev br2_11 type bridge + + for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do + ip link set dev $intf up + mtu_set $intf 10000 + done + + for intf in $swp1 $swp4; do + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br1_$vlan + ip link set dev $intf.$vlan up + done + done + + for intf in $swp2 $swp3 $swp5; do + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br2_$vlan + ip link set dev $intf.$vlan up + done + done + + ip link set dev $swp4.10 type vlan egress 0:0 + ip link set dev $swp4.11 type vlan egress 0:1 + for intf in $swp1 $swp2 $swp5; do + for vlan in 10 11; do + ip link set dev $intf.$vlan type vlan ingress 0:0 1:1 + done + done + + for intf in $swp2 $swp3 $swp4 $swp5; do + ethtool -s $intf speed 1000 autoneg off + done + + ip link set dev br1_10 up + ip link set dev br1_11 up + ip link set dev br2_10 up + ip link set dev br2_11 up + + local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_set $swp3 8 $size +} + +switch_destroy() +{ + local intf + local vlan + + devlink_port_pool_th_restore $swp3 8 + + tc qdisc del dev $swp3 root 2>/dev/null + + ip link set dev br2_11 down + ip link set dev br2_10 down + ip link set dev br1_11 down + ip link set dev br1_10 down + + for intf in $swp5 $swp4 $swp3 $swp2; do + ethtool -s $intf autoneg on + done + + for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do + for vlan in 11 10; do + ip link set dev $intf.$vlan down + ip link set dev $intf.$vlan nomaster + vlan_destroy $intf $vlan + done + + mtu_restore $intf + ip link set dev $intf down + done + + ip link del dev br2_11 + ip link del dev br2_10 + ip link del dev br1_11 + ip link del dev br1_10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + swp4=${NETIFS[p7]} + swp5=${NETIFS[p8]} + + h3_mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10" + ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11" + ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10" + ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11" +} + +get_tc() +{ + local vlan=$1; shift + + echo $((vlan - 10)) +} + +get_qdisc_handle() +{ + local vlan=$1; shift + + local tc=$(get_tc $vlan) + local band=$((8 - tc)) + + # Handle is 107: for TC1, 108: for TC0. + echo "10$band:" +} + +get_qdisc_backlog() +{ + local vlan=$1; shift + + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog +} + +get_mc_transmit_queue() +{ + local vlan=$1; shift + + local tc=$(($(get_tc $vlan) + 8)) + ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc +} + +get_nmarked() +{ + local vlan=$1; shift + + ethtool_stats_get $swp3 ecn_marked +} + +get_qdisc_npackets() +{ + local vlan=$1; shift + + busywait_for_counter 1100 +1 \ + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets +} + +# This sends traffic in an attempt to build a backlog of $size. Returns 0 on +# success. After 10 failed attempts it bails out and returns 1. It dumps the +# backlog size to stdout. +build_backlog() +{ + local vlan=$1; shift + local size=$1; shift + local proto=$1; shift + + local tc=$((vlan - 10)) + local band=$((8 - tc)) + local cur=-1 + local i=0 + + while :; do + local cur=$(busywait 1100 until_counter_is "> $cur" \ + get_qdisc_backlog $vlan) + local diff=$((size - cur)) + local pkts=$(((diff + 7999) / 8000)) + + if ((cur >= size)); then + echo $cur + return 0 + elif ((i++ > 10)); then + echo $cur + return 1 + fi + + $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \ + -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \ + -t $proto -q -c $pkts "$@" + done +} + +check_marking() +{ + local vlan=$1; shift + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets $vlan) + local nmarked_0=$(get_nmarked $vlan) + sleep 5 + local npackets_1=$(get_qdisc_npackets $vlan) + local nmarked_1=$(get_nmarked $vlan) + + local nmarked_d=$((nmarked_1 - nmarked_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmarked_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +ecn_test_common() +{ + local name=$1; shift + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + # Build the below-the-limit backlog using UDP. We could use TCP just + # fine, but this way we get a proof that UDP is accepted when queue + # length is below the limit. The main stream is using TCP, and if the + # limit is misconfigured, we would see this traffic being ECN marked. + RET=0 + backlog=$(build_backlog $vlan $((2 * limit / 3)) udp) + check_err $? "Could not build the requested backlog" + pct=$(check_marking $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "TC $((vlan - 10)): $name backlog < limit" + + # Now push TCP, because non-TCP traffic would be early-dropped after the + # backlog crosses the limit, and we want to make sure that the backlog + # is above the limit. + RET=0 + backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking $vlan ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." + log_test "TC $((vlan - 10)): $name backlog > limit" +} + +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name=ECN + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + sleep 1 + + ecn_test_common "$name" $vlan $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, it should all get dropped, and backlog + # building should fail. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_fail $? "UDP traffic went into backlog instead of being early-dropped" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_ecn_nodrop_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name="ECN nodrop" + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + sleep 1 + + ecn_test_common "$name" $vlan $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped" + + stop_traffic + sleep 1 +} + +do_red_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + # Use ECN-capable TCP to verify there's no marking even though the queue + # is above limit. + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + + # Pushing below the queue limit should work. + RET=0 + backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "TC $((vlan - 10)): RED backlog < limit" + + # Pushing above should not. + RET=0 + backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) + check_fail $? "Traffic went into backlog instead of being early-dropped" + pct=$(check_marking $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + local diff=$((limit - backlog)) + pct=$((100 * diff / limit)) + ((0 <= pct && pct <= 5)) + check_err $? "backlog $backlog / $limit expected <= 5% distance" + log_test "TC $((vlan - 10)): RED backlog > limit" + + stop_traffic + sleep 1 +} + +do_mc_backlog_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + RET=0 + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc + start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc + + qbl=$(busywait 5000 until_counter_is ">= 500000" \ + get_qdisc_backlog $vlan) + check_err $? "Could not build MC backlog" + + # Verify that we actually see the backlog on BUM TC. Do a busywait as + # well, performance blips might cause false fail. + local ebl + ebl=$(busywait 5000 until_counter_is ">= 500000" \ + get_mc_transmit_queue $vlan) + check_err $? "MC backlog reported by qdisc not visible in ethtool" + + stop_traffic + stop_traffic + + log_test "TC $((vlan - 10)): Qdisc reports MC backlog" +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh new file mode 100755 index 000000000000..1c36c576613b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_nodrop_test + red_test + mc_backlog_test +" +: ${QDISC:=ets} +source sch_red_core.sh + +# do_ecn_test first build 2/3 of the requested backlog and expects no marking, +# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and +# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do +# see (do not see) marking, it is actually due to the configuration of that one +# TC, and not due to configuration of the other TC leaking over. +BACKLOG1=200000 +BACKLOG2=500000 + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 root handle 10: $QDISC \ + bands 8 priomap 7 6 5 4 3 2 1 0 + tc qdisc add dev $swp3 parent 10:8 handle 108: red \ + limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ + probability 1.0 avpkt 8000 burst 38 "${args[@]}" + tc qdisc add dev $swp3 parent 10:7 handle 107: red \ + limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ + probability 1.0 avpkt 8000 burst 63 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 parent 10:7 + tc qdisc del dev $swp3 parent 10:8 + tc qdisc del dev $swp3 root +} + +ecn_test() +{ + install_qdisc ecn + + do_ecn_test 10 $BACKLOG1 + do_ecn_test 11 $BACKLOG2 + + uninstall_qdisc +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + + do_ecn_nodrop_test 10 $BACKLOG1 + do_ecn_nodrop_test 11 $BACKLOG2 + + uninstall_qdisc +} + +red_test() +{ + install_qdisc + + do_red_test 10 $BACKLOG1 + do_red_test 11 $BACKLOG2 + + uninstall_qdisc +} + +mc_backlog_test() +{ + install_qdisc + + # Note that the backlog numbers here do not correspond to RED + # configuration, but are arbitrary. + do_mc_backlog_test 10 $BACKLOG1 + do_mc_backlog_test 11 $BACKLOG2 + + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +bail_on_lldpad +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh new file mode 100755 index 000000000000..76820a0e9a1b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC=prio +source sch_red_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh new file mode 100755 index 000000000000..558667ea11ec --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_nodrop_test + red_test + mc_backlog_test +" +source sch_red_core.sh + +BACKLOG=300000 + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 root handle 108: red \ + limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \ + probability 1.0 avpkt 8000 burst 38 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 root +} + +ecn_test() +{ + install_qdisc ecn + do_ecn_test 10 $BACKLOG + uninstall_qdisc +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + do_ecn_nodrop_test 10 $BACKLOG + uninstall_qdisc +} + +red_test() +{ + install_qdisc + do_red_test 10 $BACKLOG + uninstall_qdisc +} + +mc_backlog_test() +{ + install_qdisc + # Note that the backlog value here does not correspond to RED + # configuration, but is arbitrary. + do_mc_backlog_test 10 $BACKLOG + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +bail_on_lldpad +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh new file mode 100755 index 000000000000..58f3a05f08af --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -0,0 +1,222 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + port_pool_test + port_tc_ip_test + port_tc_arp_test +" + +NUM_NETIFS=2 +source ../../../net/forwarding/lib.sh +source ../../../net/forwarding/devlink_lib.sh +source mlxsw_lib.sh + +SB_POOL_ING=0 +SB_POOL_EGR_CPU=10 + +SB_ITC_CPU_IP=3 +SB_ITC_CPU_ARP=2 +SB_ITC=0 + +h1_create() +{ + simple_if_init $h1 192.0.1.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.1.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.1.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.1.2/24 +} + +sb_occ_pool_check() +{ + local dl_port=$1; shift + local pool=$1; shift + local exp_max_occ=$1 + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +sb_occ_itc_check() +{ + local dl_port=$1; shift + local itc=$1; shift + local exp_max_occ=$1 + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +sb_occ_etc_check() +{ + local dl_port=$1; shift + local etc=$1; shift + local exp_max_occ=$1; shift + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +port_pool_test() +{ + local exp_max_occ=288 + local max_occ + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h1) ingress pool" + + RET=0 + max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress pool" + + RET=0 + max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) + check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress pool" +} + +port_tc_ip_test() +{ + local exp_max_occ=288 + local max_occ + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h1) ingress TC - IP packet" + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress TC - IP packet" + + RET=0 + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - IP packet" +} + +port_tc_arp_test() +{ + local exp_max_occ=96 + local max_occ + + if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then + exp_max_occ=144 + fi + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h1) ingress TC - ARP packet" + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress TC - ARP packet" + + RET=0 + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - ARP packet" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + dl_port1=$(devlink_port_by_netdev $h1) + dl_port2=$(devlink_port_by_netdev $h2) + + cpu_dl_port=$(devlink_cpu_port_get) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py new file mode 100755 index 000000000000..0d4b9327c9b3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py @@ -0,0 +1,416 @@ +#!/usr/bin/python +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import json as j +import random + + +class SkipTest(Exception): + pass + + +class RandomValuePicker: + """ + Class for storing shared buffer configuration. Can handle 3 different + objects, pool, tcbind and portpool. Provide an interface to get random + values for a specific object type as the follow: + 1. Pool: + - random size + + 2. TcBind: + - random pool number + - random threshold + + 3. PortPool: + - random threshold + """ + def __init__(self, pools): + self._pools = [] + for pool in pools: + self._pools.append(pool) + + def _cell_size(self): + return self._pools[0]["cell_size"] + + def _get_static_size(self, th): + # For threshold of 16, this works out to be about 12MB on Spectrum-1, + # and about 17MB on Spectrum-2. + return th * 8000 * self._cell_size() + + def _get_size(self): + return self._get_static_size(16) + + def _get_thtype(self): + return "static" + + def _get_th(self, pool): + # Threshold value could be any integer between 3 to 16 + th = random.randint(3, 16) + if pool["thtype"] == "dynamic": + return th + else: + return self._get_static_size(th) + + def _get_pool(self, direction): + ing_pools = [] + egr_pools = [] + for pool in self._pools: + if pool["type"] == "ingress": + ing_pools.append(pool) + else: + egr_pools.append(pool) + if direction == "ingress": + arr = ing_pools + else: + arr = egr_pools + return arr[random.randint(0, len(arr) - 1)] + + def get_value(self, objid): + if isinstance(objid, Pool): + if objid["pool"] in [4, 8, 9, 10]: + # The threshold type of pools 4, 8, 9 and 10 cannot be changed + raise SkipTest() + else: + return (self._get_size(), self._get_thtype()) + if isinstance(objid, TcBind): + if objid["tc"] >= 8: + # Multicast TCs cannot be changed + raise SkipTest() + else: + pool = self._get_pool(objid["type"]) + th = self._get_th(pool) + pool_n = pool["pool"] + return (pool_n, th) + if isinstance(objid, PortPool): + pool_n = objid["pool"] + pool = self._pools[pool_n] + assert pool["pool"] == pool_n + th = self._get_th(pool) + return (th,) + + +class RecordValuePickerException(Exception): + pass + + +class RecordValuePicker: + """ + Class for storing shared buffer configuration. Can handle 2 different + objects, pool and tcbind. Provide an interface to get the stored values per + object type. + """ + def __init__(self, objlist): + self._recs = [] + for item in objlist: + self._recs.append({"objid": item, "value": item.var_tuple()}) + + def get_value(self, objid): + if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]: + # The threshold type of pools 4, 8, 9 and 10 cannot be changed + raise SkipTest() + if isinstance(objid, TcBind) and objid["tc"] >= 8: + # Multicast TCs cannot be changed + raise SkipTest() + for rec in self._recs: + if rec["objid"].weak_eq(objid): + return rec["value"] + raise RecordValuePickerException() + + +def run_cmd(cmd, json=False): + out = subprocess.check_output(cmd, shell=True) + if json: + return j.loads(out) + return out + + +def run_json_cmd(cmd): + return run_cmd(cmd, json=True) + + +def log_test(test_name, err_msg=None): + if err_msg: + print("\t%s" % err_msg) + print("TEST: %-80s [FAIL]" % test_name) + else: + print("TEST: %-80s [ OK ]" % test_name) + + +class CommonItem(dict): + varitems = [] + + def var_tuple(self): + ret = [] + self.varitems.sort() + for key in self.varitems: + ret.append(self[key]) + return tuple(ret) + + def weak_eq(self, other): + for key in self: + if key in self.varitems: + continue + if self[key] != other[key]: + return False + return True + + +class CommonList(list): + def get_by(self, by_obj): + for item in self: + if item.weak_eq(by_obj): + return item + return None + + def del_by(self, by_obj): + for item in self: + if item.weak_eq(by_obj): + self.remove(item) + + +class Pool(CommonItem): + varitems = ["size", "thtype"] + + def dl_set(self, dlname, size, thtype): + run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"], + self["pool"], + size, thtype)) + + +class PoolList(CommonList): + pass + + +def get_pools(dlname, direction=None): + d = run_json_cmd("devlink sb pool show -j") + pools = PoolList() + for pooldict in d["pool"][dlname]: + if not direction or direction == pooldict["type"]: + pools.append(Pool(pooldict)) + return pools + + +def do_check_pools(dlname, pools, vp): + for pool in pools: + pre_pools = get_pools(dlname) + try: + (size, thtype) = vp.get_value(pool) + except SkipTest: + continue + pool.dl_set(dlname, size, thtype) + post_pools = get_pools(dlname) + pool = post_pools.get_by(pool) + + err_msg = None + if pool["size"] != size: + err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size) + if pool["thtype"] != thtype: + err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype) + + pre_pools.del_by(pool) + post_pools.del_by(pool) + if pre_pools != post_pools: + err_msg = "Other pool setup changed as well" + log_test("pool {} of sb {} set verification".format(pool["pool"], + pool["sb"]), err_msg) + + +def check_pools(dlname, pools): + # Save defaults + record_vp = RecordValuePicker(pools) + + # For each pool, set random size and static threshold type + do_check_pools(dlname, pools, RandomValuePicker(pools)) + + # Restore defaults + do_check_pools(dlname, pools, record_vp) + + +class TcBind(CommonItem): + varitems = ["pool", "threshold"] + + def __init__(self, port, d): + super(TcBind, self).__init__(d) + self["dlportname"] = port.name + + def dl_set(self, pool, th): + run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"], + self["sb"], + self["tc"], + self["type"], + pool, th)) + + +class TcBindList(CommonList): + pass + + +def get_tcbinds(ports, verify_existence=False): + d = run_json_cmd("devlink sb tc bind show -j -n") + tcbinds = TcBindList() + for port in ports: + err_msg = None + if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0: + err_msg = "No tc bind for port" + else: + for tcbinddict in d["tc_bind"][port.name]: + tcbinds.append(TcBind(port, tcbinddict)) + if verify_existence: + log_test("tc bind existence for port {} verification".format(port.name), err_msg) + return tcbinds + + +def do_check_tcbind(ports, tcbinds, vp): + for tcbind in tcbinds: + pre_tcbinds = get_tcbinds(ports) + try: + (pool, th) = vp.get_value(tcbind) + except SkipTest: + continue + tcbind.dl_set(pool, th) + post_tcbinds = get_tcbinds(ports) + tcbind = post_tcbinds.get_by(tcbind) + + err_msg = None + if tcbind["pool"] != pool: + err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool) + if tcbind["threshold"] != th: + err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th) + + pre_tcbinds.del_by(tcbind) + post_tcbinds.del_by(tcbind) + if pre_tcbinds != post_tcbinds: + err_msg = "Other tc bind setup changed as well" + log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"], + tcbind["tc"], + tcbind["sb"]), err_msg) + + +def check_tcbind(dlname, ports, pools): + tcbinds = get_tcbinds(ports, verify_existence=True) + + # Save defaults + record_vp = RecordValuePicker(tcbinds) + + # Bind each port and unicast TC (TCs < 8) to a random pool and a random + # threshold + do_check_tcbind(ports, tcbinds, RandomValuePicker(pools)) + + # Restore defaults + do_check_tcbind(ports, tcbinds, record_vp) + + +class PortPool(CommonItem): + varitems = ["threshold"] + + def __init__(self, port, d): + super(PortPool, self).__init__(d) + self["dlportname"] = port.name + + def dl_set(self, th): + run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"], + self["sb"], + self["pool"], th)) + + +class PortPoolList(CommonList): + pass + + +def get_portpools(ports, verify_existence=False): + d = run_json_cmd("devlink sb port pool -j -n") + portpools = PortPoolList() + for port in ports: + err_msg = None + if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0: + err_msg = "No port pool for port" + else: + for portpooldict in d["port_pool"][port.name]: + portpools.append(PortPool(port, portpooldict)) + if verify_existence: + log_test("port pool existence for port {} verification".format(port.name), err_msg) + return portpools + + +def do_check_portpool(ports, portpools, vp): + for portpool in portpools: + pre_portpools = get_portpools(ports) + (th,) = vp.get_value(portpool) + portpool.dl_set(th) + post_portpools = get_portpools(ports) + portpool = post_portpools.get_by(portpool) + + err_msg = None + if portpool["threshold"] != th: + err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th) + + pre_portpools.del_by(portpool) + post_portpools.del_by(portpool) + if pre_portpools != post_portpools: + err_msg = "Other port pool setup changed as well" + log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"], + portpool["pool"], + portpool["sb"]), err_msg) + + +def check_portpool(dlname, ports, pools): + portpools = get_portpools(ports, verify_existence=True) + + # Save defaults + record_vp = RecordValuePicker(portpools) + + # For each port pool, set a random threshold + do_check_portpool(ports, portpools, RandomValuePicker(pools)) + + # Restore defaults + do_check_portpool(ports, portpools, record_vp) + + +class Port: + def __init__(self, name): + self.name = name + + +class PortList(list): + pass + + +def get_ports(dlname): + d = run_json_cmd("devlink port show -j") + ports = PortList() + for name in d["port"]: + if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical": + ports.append(Port(name)) + return ports + + +def get_device(): + devices_info = run_json_cmd("devlink -j dev info")["info"] + for d in devices_info: + if "mlxsw_spectrum" in devices_info[d]["driver"]: + return d + return None + + +class UnavailableDevlinkNameException(Exception): + pass + + +def test_sb_configuration(): + # Use static seed + random.seed(0) + + dlname = get_device() + if not dlname: + raise UnavailableDevlinkNameException() + + ports = get_ports(dlname) + pools = get_pools(dlname) + + check_pools(dlname, pools) + check_tcbind(dlname, ports, pools) + check_portpool(dlname, ports, pools) + + +test_sb_configuration() diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 7b2acba82a49..fd583a171db7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -8,8 +8,9 @@ source $lib_dir/lib.sh source $lib_dir/tc_common.sh source $lib_dir/devlink_lib.sh -if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then - echo "SKIP: test is tailored for Mellanox Spectrum-2" +if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \ + "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then + echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3" exit 1 fi diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index a0795227216e..efd798a85931 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -8,9 +8,9 @@ tc_flower_get_target() # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 12K (12,288) flow counters and six of + # Currently, the driver supports 30K (30,720) flow counters and six of # these are used for multicast routing. - local target=12282 + local target=30714 if ((! should_fail)); then echo $target diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh new file mode 100755 index 000000000000..20ed98fe5a60 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + default_hw_stats_test + immediate_hw_stats_test + delayed_hw_stats_test + disabled_hw_stats_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +hw_stats_test() +{ + RET=0 + + local name=$1 + local action_hw_stats=$2 + local occ_delta=$3 + local expected_packet_count=$4 + + local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats + check_err $? "Failed to add rule with $name hw_stats" + + local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + local expected_occ=$((orig_occ + occ_delta)) + [ "$new_occ" == "$expected_occ" ] + check_err $? "Expected occupancy of $expected_occ, got $new_occ" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count + check_err $? "Did not match incoming packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "$name hw_stats" +} + +default_hw_stats_test() +{ + hw_stats_test "default" "" 2 1 +} + +immediate_hw_stats_test() +{ + hw_stats_test "immediate" "hw_stats immediate" 2 1 +} + +delayed_hw_stats_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed + check_fail $? "Unexpected success in adding rule with delayed hw_stats" + + log_test "delayed hw_stats" +} + +disabled_hw_stats_test() +{ + hw_stats_test "disabled" "hw_stats disabled" 0 0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + swp1mac=$(mac_get $swp1) + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +check_tc_action_hw_stats_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh new file mode 100755 index 000000000000..68c80d0ec1ec --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh @@ -0,0 +1,186 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + shared_block_drop_test + egress_redirect_test + multi_mirror_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +switch_create() +{ + simple_if_init $swp1 192.0.2.1/24 + simple_if_init $swp2 192.0.2.2/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.2/24 + simple_if_fini $swp1 192.0.2.1/24 +} + +shared_block_drop_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have mixed-bound + # shared block with a drop rule. + + tc qdisc add dev $swp1 ingress_block 22 clsact + check_err $? "Failed to create clsact with ingress block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add drop rule to ingress bound block" + + tc qdisc add dev $swp2 ingress_block 22 clsact + check_err $? "Failed to create another clsact with ingress shared block" + + tc qdisc del dev $swp2 clsact + + tc qdisc add dev $swp2 egress_block 22 clsact + check_fail $? "Incorrect success to create another clsact with egress shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc add dev $swp2 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add drop rule to mixed bound block" + + tc qdisc del dev $swp1 clsact + + tc qdisc add dev $swp1 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add drop rule to egress bound shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + log_test "shared block drop" +} + +egress_redirect_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have mirred redirect on + # egress-bound block. + + tc qdisc add dev $swp1 ingress_block 22 clsact + check_err $? "Failed to create clsact with ingress block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_err $? "Failed to add redirect rule to ingress bound block" + + tc qdisc add dev $swp2 ingress_block 22 clsact + check_err $? "Failed to create another clsact with ingress shared block" + + tc qdisc del dev $swp2 clsact + + tc qdisc add dev $swp2 egress_block 22 clsact + check_fail $? "Incorrect success to create another clsact with egress shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc add dev $swp2 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to mixed bound block" + + tc qdisc del dev $swp1 clsact + + tc qdisc add dev $swp1 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to egress bound shared block" + + tc qdisc del dev $swp2 clsact + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to egress bound block" + + tc qdisc del dev $swp1 clsact + + log_test "shared block drop" +} + +multi_mirror_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have multiple mirror + # actions in a single rule. + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress mirror dev $swp2 + check_err $? "Failed to add rule with single mirror action" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress mirror dev $swp2 \ + action mirred egress mirror dev $swp1 + check_fail $? "Incorrect success to add rule with two mirror actions" + + tc qdisc del dev $swp1 clsact + + log_test "multi mirror" +} + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + vrf_prepare + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + vrf_cleanup +} + +check_tc_shblock_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index a6d733d2a4b4..cc0f07e72cf2 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -2,9 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 # Test for resource limit of offloaded flower rules. The test adds a given -# number of flower matches for different IPv6 addresses, then generates traffic, -# and ensures each was hit exactly once. This file contains functions to set up -# a testing topology and run the test, and is meant to be sourced from a test +# number of flower matches for different IPv6 addresses, then check the offload +# indication for all of the tc flower rules. This file contains functions to set +# up a testing topology and run the test, and is meant to be sourced from a test # script that calls the testing routine with a given number of rules. TC_FLOWER_NUM_NETIFS=2 @@ -94,22 +94,15 @@ __tc_flower_test() tc_flower_rules_create $count $should_fail - for ((i = 0; i < count; ++i)); do - $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \ - -A 2001:db8:2::1 \ - -B $(tc_flower_addr $i) - done - - MISMATCHES=$( - tc -j -s filter show dev $h2 ingress | - jq -r '[ .[] | select(.kind == "flower") | .options | - values as $rule | .actions[].stats.packets | - select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] | - join(", ")' - ) - - test -z "$MISMATCHES" - check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES" + offload_count=$(tc -j -s filter show dev $h2 ingress | + jq -r '[ .[] | select(.kind == "flower") | + .options | .in_hw ]' | jq .[] | wc -l) + [[ $((offload_count - 1)) -eq $count ]] + if [[ $should_fail -eq 0 ]]; then + check_err $? "Offload mismatch" + else + check_err_fail $should_fail $? "Offload more than expacted" + fi } tc_flower_test() diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 4632f51af7ab..729a86cc4ede 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -9,6 +9,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="sanitization_test offload_indication_test \ sanitization_vlan_aware_test offload_indication_vlan_aware_test" NUM_NETIFS=2 +: ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh setup_prepare() @@ -470,8 +471,8 @@ offload_indication_fdb_flood_test() bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 - bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \ + bridge fdb show brport vxlan0 check_err $? bridge fdb del 00:00:00:00:00:00 dev vxlan0 self @@ -486,11 +487,11 @@ offload_indication_fdb_bridge_test() bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ dst 198.51.100.2 - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 check_err $? log_test "vxlan entry offload indication - initial state" @@ -500,9 +501,9 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 + check_err $? log_test "vxlan entry offload indication - after removal from bridge" @@ -511,11 +512,11 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 check_err $? log_test "vxlan entry offload indication - after re-add to bridge" @@ -525,9 +526,9 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 + check_err $? log_test "vxlan entry offload indication - after removal from vxlan" @@ -536,11 +537,11 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? - bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 check_err $? log_test "vxlan entry offload indication - after re-add to vxlan" @@ -558,27 +559,32 @@ offload_indication_decap_route_test() { RET=0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan0 down - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan1 down - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vxlan decap route - vxlan device down" RET=0 ip link set dev vxlan1 up - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev vxlan0 up - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vxlan device up" @@ -586,11 +592,13 @@ offload_indication_decap_route_test() RET=0 ip address delete 198.51.100.1/32 dev lo - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? ip address add 198.51.100.1/32 dev lo - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - add local route" @@ -598,16 +606,19 @@ offload_indication_decap_route_test() RET=0 ip link set dev $swp1 nomaster - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link set dev $swp2 nomaster - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - local ports enslavement" @@ -615,12 +626,14 @@ offload_indication_decap_route_test() RET=0 ip link del dev br0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link del dev br1 - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vxlan decap route - bridge device deletion" @@ -632,16 +645,19 @@ offload_indication_decap_route_test() ip link set dev $swp2 master br1 ip link set dev vxlan0 master br0 ip link set dev vxlan1 master br1 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link del dev vxlan0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? ip link del dev vxlan1 - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? log_test "vxlan decap route - vxlan device deletion" @@ -656,12 +672,15 @@ check_fdb_offloaded() local mac=00:11:22:33:44:55 local zmac=00:00:00:00:00:00 - bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \ + bridge fdb show dev vxlan0 check_err $? - bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \ + bridge fdb show dev vxlan0 check_err $? - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 check_err $? } @@ -672,13 +691,15 @@ check_vxlan_fdb_not_offloaded() bridge fdb show dev vxlan0 | grep $mac | grep -q self check_err $? - bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \ + bridge fdb show dev vxlan0 + check_err $? bridge fdb show dev vxlan0 | grep $zmac | grep -q self check_err $? - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? } check_bridge_fdb_not_offloaded() @@ -688,8 +709,9 @@ check_bridge_fdb_not_offloaded() bridge fdb show dev vxlan0 | grep $mac | grep -q master check_err $? - bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \ + bridge fdb show dev vxlan0 + check_err $? } __offload_indication_join_vxlan_first() @@ -771,12 +793,14 @@ __offload_indication_join_vxlan_last() ip link set dev $swp1 master br0 - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? ip link set dev vxlan0 master br0 - bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 check_err $? log_test "offload indication - attach vxlan last" @@ -854,20 +878,26 @@ sanitization_vlan_aware_test() bridge vlan del vid 10 dev vxlan20 bridge vlan add vid 20 dev vxlan20 pvid untagged - # Test that offloading of an unsupported tunnel fails when it is - # triggered by addition of VLAN to a local port - RET=0 + # Test that when two VXLAN tunnels with conflicting configurations + # (i.e., different TTL) are enslaved to the same VLAN-aware bridge, + # then the enslavement of a port to the bridge is denied. - # TOS must be set to inherit - ip link set dev vxlan10 type vxlan tos 42 + # Use the offload indication of the local route to ensure the VXLAN + # configuration was correctly rollbacked. + ip address add 198.51.100.1/32 dev lo - ip link set dev $swp1 master br0 - bridge vlan add vid 10 dev $swp1 &> /dev/null + ip link set dev vxlan10 type vxlan ttl 10 + ip link set dev $swp1 master br0 &> /dev/null check_fail $? - log_test "vlan-aware - failed vlan addition to a local port" + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? + + log_test "vlan-aware - failed enslavement to bridge due to conflict" - ip link set dev vxlan10 type vxlan tos inherit + ip link set dev vxlan10 type vxlan ttl 20 + ip address del 198.51.100.1/32 dev lo ip link del dev vxlan20 ip link del dev vxlan10 @@ -924,11 +954,11 @@ offload_indication_vlan_aware_fdb_test() bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ dst 198.51.100.2 vlan 10 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 check_err $? log_test "vxlan entry offload indication - initial state" @@ -938,9 +968,9 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 + check_err $? log_test "vxlan entry offload indication - after removal from bridge" @@ -949,11 +979,11 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 check_err $? log_test "vxlan entry offload indication - after re-add to bridge" @@ -963,9 +993,9 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 + check_err $? log_test "vxlan entry offload indication - after removal from vxlan" @@ -974,11 +1004,11 @@ offload_indication_vlan_aware_fdb_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? - bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 check_err $? log_test "vxlan entry offload indication - after re-add to vxlan" @@ -990,28 +1020,31 @@ offload_indication_vlan_aware_decap_route_test() { RET=0 - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag on one VxLAN device and make sure route is still # marked as offloaded bridge vlan add vid 10 dev vxlan10 untagged - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload \ + ip route show table local 198.51.100.1 check_err $? # Toggle PVID flag on second VxLAN device and make sure route is no # longer marked as offloaded bridge vlan add vid 20 dev vxlan20 untagged - ip route show table local | grep 198.51.100.1 | grep -q offload - check_fail $? + busywait "$TIMEOUT" not wait_for_offload \ + ip route show table local 198.51.100.1 + check_err $? # Toggle PVID flag back and make sure route is marked as offloaded bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged - ip route show table local | grep 198.51.100.1 | grep -q offload + busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1 check_err $? log_test "vxlan decap route - vni map/unmap" @@ -1064,35 +1097,33 @@ offload_indication_vlan_aware_l3vni_test() ip link set dev vxlan0 master br0 bridge vlan add dev vxlan0 vid 10 pvid untagged - # No local port or router port is member in the VLAN, so tunnel should - # not be offloaded - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload - check_fail $? "vxlan tunnel offloaded when should not" + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel not offloaded when should" # Configure a VLAN interface and make sure tunnel is offloaded ip link add link br0 name br10 up type vlan id 10 sysctl_set net.ipv6.conf.br10.disable_ipv6 0 ip -6 address add 2001:db8:1::1/64 dev br10 - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 check_err $? "vxlan tunnel not offloaded when should" # Unlink the VXLAN device, make sure tunnel is no longer offloaded, # then add it back to the bridge and make sure it is offloaded ip link set dev vxlan0 nomaster - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload - check_fail $? "vxlan tunnel offloaded after unlinked from bridge" + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel offloaded after unlinked from bridge" ip link set dev vxlan0 master br0 - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload - check_fail $? "vxlan tunnel offloaded despite no matching vid" + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel offloaded despite no matching vid" bridge vlan add dev vxlan0 vid 10 pvid untagged - bridge fdb show brport vxlan0 | grep $zmac | grep self \ - | grep -q offload + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 check_err $? "vxlan tunnel not offloaded after adding vid" log_test "vxlan - l3 vni" diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 025a84c2ab5a..9f9741444549 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -141,6 +141,16 @@ regions_test() check_region_snapshot_count dummy post-first-delete 2 + devlink region new $DL_HANDLE/dummy snapshot 25 + check_err $? "Failed to create a new snapshot with id 25" + + check_region_snapshot_count dummy post-first-request 3 + + devlink region del $DL_HANDLE/dummy snapshot 25 + check_err $? "Failed to delete snapshot with id 25" + + check_region_snapshot_count dummy post-second-delete 2 + log_test "regions test" } @@ -367,6 +377,11 @@ dummy_reporter_test() { RET=0 + check_reporter_info dummy healthy 0 0 0 true + + devlink health set $DL_HANDLE reporter dummy auto_recover false + check_err $? "Failed to dummy reporter auto_recover option" + check_reporter_info dummy healthy 0 0 0 false local BREAK_MSG="foo bar" diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index f101ab9441e2..dbd1e014ba17 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -16,6 +16,8 @@ ALL_TESTS=" trap_group_action_test bad_trap_group_test trap_group_stats_test + trap_policer_test + trap_policer_bind_test port_del_test dev_del_test " @@ -23,6 +25,7 @@ NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} DEVLINK_DEV=netdevsim/${DEV} +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ SLEEP_TIME=1 NETDEV="" NUM_NETIFS=0 @@ -103,6 +106,11 @@ trap_metadata_test() for trap_name in $(devlink_traps_get); do devlink_trap_metadata_test $trap_name "input_port" check_err $? "Input port not reported as metadata of trap $trap_name" + if [ $trap_name == "ingress_flow_action_drop" ] || + [ $trap_name == "egress_flow_action_drop" ]; then + devlink_trap_metadata_test $trap_name "flow_action_cookie" + check_err $? "Flow action cookie not reported as metadata of trap $trap_name" + fi done log_test "Trap metadata" @@ -251,6 +259,119 @@ trap_group_stats_test() log_test "Trap group statistics" } +trap_policer_test() +{ + local packets_t0 + local packets_t1 + + if [ $(devlink_trap_policers_num_get) -eq 0 ]; then + check_err 1 "Failed to dump policers" + fi + + devlink trap policer set $DEVLINK_DEV policer 1337 &> /dev/null + check_fail $? "Did not get an error for setting a non-existing policer" + devlink trap policer show $DEVLINK_DEV policer 1337 &> /dev/null + check_fail $? "Did not get an error for getting a non-existing policer" + + devlink trap policer set $DEVLINK_DEV policer 1 rate 2000 burst 16 + check_err $? "Failed to set valid parameters for a valid policer" + if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then + check_err 1 "Policer rate was not changed" + fi + if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then + check_err 1 "Policer burst size was not changed" + fi + + devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null + check_fail $? "Policer rate was changed to rate lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 rate 9000 &> /dev/null + check_fail $? "Policer rate was changed to rate higher than limit" + devlink trap policer set $DEVLINK_DEV policer 1 burst 2 &> /dev/null + check_fail $? "Policer burst size was changed to burst size lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 rate 65537 &> /dev/null + check_fail $? "Policer burst size was changed to burst size higher than limit" + echo "y" > $DEBUGFS_DIR/fail_trap_policer_set + devlink trap policer set $DEVLINK_DEV policer 1 rate 3000 &> /dev/null + check_fail $? "Managed to set policer rate when should not" + echo "n" > $DEBUGFS_DIR/fail_trap_policer_set + if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then + check_err 1 "Policer rate was changed to an invalid value" + fi + if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then + check_err 1 "Policer burst size was changed to an invalid value" + fi + + packets_t0=$(devlink_trap_policer_rx_dropped_get 1) + sleep .5 + packets_t1=$(devlink_trap_policer_rx_dropped_get 1) + if [ ! $packets_t1 -gt $packets_t0 ]; then + check_err 1 "Policer drop counter was not incremented" + fi + + echo "y"> $DEBUGFS_DIR/fail_trap_policer_counter_get + devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null + check_fail $? "Managed to read policer drop counter when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_policer_counter_get + devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null + check_err $? "Did not manage to read policer drop counter when should" + + log_test "Trap policer" +} + +trap_group_check_policer() +{ + local group_name=$1; shift + + devlink -j -p trap group show $DEVLINK_DEV group $group_name \ + | jq -e '.[][][]["policer"]' &> /dev/null +} + +trap_policer_bind_test() +{ + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 + check_err $? "Failed to bind a valid policer" + if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then + check_err 1 "Bound policer was not changed" + fi + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1337 \ + &> /dev/null + check_fail $? "Did not get an error for binding a non-existing policer" + if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then + check_err 1 "Bound policer was changed when should not" + fi + + devlink trap group set $DEVLINK_DEV group l2_drops policer 0 + check_err $? "Failed to unbind a policer when using ID 0" + trap_group_check_policer "l2_drops" + check_fail $? "Trap group has a policer after unbinding with ID 0" + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 + check_err $? "Failed to bind a valid policer" + + devlink trap group set $DEVLINK_DEV group l2_drops nopolicer + check_err $? "Failed to unbind a policer when using 'nopolicer' keyword" + trap_group_check_policer "l2_drops" + check_fail $? "Trap group has a policer after unbinding with 'nopolicer' keyword" + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 + check_err $? "Failed to bind a valid policer" + + echo "y"> $DEBUGFS_DIR/fail_trap_group_set + devlink trap group set $DEVLINK_DEV group l2_drops policer 2 \ + &> /dev/null + check_fail $? "Managed to bind a policer when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_group_set + devlink trap group set $DEVLINK_DEV group l2_drops policer 2 + check_err $? "Did not manage to bind a policer when should" + + devlink trap group set $DEVLINK_DEV group l2_drops action drop \ + policer 1337 &> /dev/null + check_fail $? "Did not get an error for partially modified trap group" + + log_test "Trap policer binding" +} + port_del_test() { local group_name diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ecc52d4c034d..997c65dcad68 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -23,3 +23,8 @@ so_txtime tcp_fastopen_backup_key nettest fin_ack_lat +reuseaddr_ports_exhausted +hwtstamp_config +rxtimestamp +timestamping +txtimestamp diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 4c1bd03ffa1c..3f386eb9e7d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -14,6 +14,8 @@ TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh +TEST_PROGS += reuseaddr_ports_exhausted.sh +TEST_PROGS += txtimestamp.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -22,6 +24,8 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat +TEST_GEN_FILES += reuseaddr_ports_exhausted +TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index b8503a8119b0..3b42c06b5985 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -12,6 +12,7 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_IFB=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m @@ -27,5 +28,6 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_NETEM=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 40b076983239..155d48bd4d9e 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then exit 1 fi +devlink dev help 2>&1 | grep info &> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing devlink dev info support" + exit 1 +fi + ############################################################################## # Devlink helpers @@ -373,6 +379,7 @@ devlink_trap_drop_test() local trap_name=$1; shift local group_name=$1; shift local dev=$1; shift + local handle=$1; shift # This is the common part of all the tests. It checks that stats are # initially idle, then non-idle after changing the trap action and @@ -397,7 +404,7 @@ devlink_trap_drop_test() devlink_trap_group_stats_idle_test $group_name check_err $? "Trap group stats not idle after setting action to drop" - tc_check_packets "dev $dev egress" 101 0 + tc_check_packets "dev $dev egress" $handle 0 check_err $? "Packets were not dropped" } @@ -406,7 +413,68 @@ devlink_trap_drop_cleanup() local mz_pid=$1; shift local dev=$1; shift local proto=$1; shift + local pref=$1; shift + local handle=$1; shift kill $mz_pid && wait $mz_pid &> /dev/null - tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower + tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower +} + +devlink_trap_policers_num_get() +{ + devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length' +} + +devlink_trap_policer_rate_get() +{ + local policer_id=$1; shift + + devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \ + | jq '.[][][]["rate"]' +} + +devlink_trap_policer_burst_get() +{ + local policer_id=$1; shift + + devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \ + | jq '.[][][]["burst"]' +} + +devlink_trap_policer_rx_dropped_get() +{ + local policer_id=$1; shift + + devlink -j -p -s trap policer show $DEVLINK_DEV policer $policer_id \ + | jq '.[][][]["stats"]["rx"]["dropped"]' +} + +devlink_trap_group_policer_get() +{ + local group_name=$1; shift + + devlink -j -p trap group show $DEVLINK_DEV group $group_name \ + | jq '.[][][]["policer"]' +} + +devlink_trap_policer_ids_get() +{ + devlink -j -p trap policer show \ + | jq '.[]["'$DEVLINK_DEV'"][]["policer"]' +} + +devlink_port_by_netdev() +{ + local if_name=$1 + + devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]' +} + +devlink_cpu_port_get() +{ + local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" | + grep cpu | cut -d/ -f3 | cut -d: -f1 | + sed -n '1p') + + echo "$DEVLINK_DEV/$cpu_dl_port_num" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 2f5da414aaa7..977fc2b326a2 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -60,6 +60,15 @@ check_tc_chain_support() fi } +check_tc_action_hw_stats_support() +{ + tc actions help 2>&1 | grep -q hw_stats + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing action hw_stats support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 @@ -248,13 +257,40 @@ busywait() done } +not() +{ + "$@" + [[ $? != 0 ]] +} + +grep_bridge_fdb() +{ + local addr=$1; shift + local word + local flag + + if [ "$1" == "self" ] || [ "$1" == "master" ]; then + word=$1; shift + if [ "$1" == "-v" ]; then + flag=$1; shift + fi + fi + + $@ | grep $addr | grep $flag "$word" +} + +wait_for_offload() +{ + "$@" | grep -q offload +} + until_counter_is() { - local value=$1; shift + local expr=$1; shift local current=$("$@") echo $((current)) - ((current >= value)) + ((current $expr)) } busywait_for_counter() @@ -263,7 +299,7 @@ busywait_for_counter() local delta=$1; shift local base=$("$@") - busywait "$timeout" until_counter_is $((base + delta)) "$@" + busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } setup_wait_dev() @@ -599,6 +635,17 @@ tc_rule_stats_get() | jq ".[1].options.actions[].stats$selector" } +tc_rule_handle_stats_get() +{ + local id=$1; shift + local handle=$1; shift + local selector=${1:-.packets}; shift + + tc -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats$selector" +} + ethtool_stats_get() { local dev=$1; shift @@ -607,6 +654,26 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +qdisc_stats_get() +{ + local dev=$1; shift + local handle=$1; shift + local selector=$1; shift + + tc -j -s qdisc show dev "$dev" \ + | jq '.[] | select(.handle == "'"$handle"'") | '"$selector" +} + +qdisc_parent_stats_get() +{ + local dev=$1; shift + local parent=$1; shift + local selector=$1; shift + + tc -j -s qdisc show dev "$dev" invisible \ + | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" +} + humanize() { local speed=$1; shift @@ -1132,18 +1199,29 @@ flood_test() flood_multicast_test $br_port $host1_if $host2_if } -start_traffic() +__start_traffic() { + local proto=$1; shift local h_in=$1; shift # Where the traffic egresses the host local sip=$1; shift local dip=$1; shift local dmac=$1; shift $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ - -a own -b $dmac -t udp -q & + -a own -b $dmac -t "$proto" -q "$@" & sleep 1 } +start_traffic() +{ + __start_traffic udp "$@" +} + +start_tcp_traffic() +{ + __start_traffic tcp "$@" +} + stop_traffic() { # Suppress noise from killing mausezahn. diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh new file mode 100755 index 000000000000..b50081855913 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by a pedit action. An ingress +# filter installed on $h2 verifies that the packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ip_dsfield + test_ip_dscp + test_ip_ecn + test_ip_dscp_ecn +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +do_test_pedit_dsfield_common() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local mz_flags=$1; shift + + RET=0 + + # TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is + # overwritten when zero is selected. + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345 + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + log_test "$pedit_locus pedit $pedit_action" +} + +do_test_pedit_dsfield() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + local saddr=$1; shift + local daddr=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_ip_dsfield() +{ + local locus=$1; shift + local dsfield + + for dsfield in 0 1 2 3 128 252 253 254 255; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $dsfield" \ + ip "ip_tos $dsfield" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_dsfield() +{ + do_test_ip_dsfield "dev $swp1 ingress" + do_test_ip_dsfield "dev $swp2 egress" +} + +do_test_ip_dscp() +{ + local locus=$1; shift + local dscp + + for dscp in 0 1 2 3 32 61 62 63; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $((dscp << 2)) retain 0xfc" \ + ip "ip_tos $(((dscp << 2) | 1))" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_dscp() +{ + do_test_ip_dscp "dev $swp1 ingress" + do_test_ip_dscp "dev $swp2 egress" +} + +do_test_ip_ecn() +{ + local locus=$1; shift + local ecn + + for ecn in 0 1 2 3; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $ecn retain 0x03" \ + ip "ip_tos $((124 | $ecn))" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_ecn() +{ + do_test_ip_ecn "dev $swp1 ingress" + do_test_ip_ecn "dev $swp2 egress" +} + +do_test_ip_dscp_ecn() +{ + local locus=$1; shift + + tc filter add $locus handle 101 pref 1 \ + flower action pedit ex munge ip dsfield set 124 retain 0xfc \ + action pedit ex munge ip dsfield set 1 retain 0x03 + tc filter add dev $h2 ingress handle 101 pref 1 prot ip \ + flower skip_hw ip_tos 125 action pass + + do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN" \ + "-A 192.0.2.1 -B 192.0.2.2" + + tc filter del dev $h2 ingress pref 1 + tc filter del $locus pref 1 +} + +test_ip_dscp_ecn() +{ + do_test_ip_dscp_ecn "dev $swp1 ingress" + do_test_ip_dscp_ecn "dev $swp2 egress" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 40e0ad1bc4f2..e60c8b4818cc 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -34,11 +34,14 @@ switch_destroy() } # Callback from sch_ets_tests.sh -get_stats() +collect_stats() { - local stream=$1; shift + local -a streams=("$@") + local stream - link_stats_get $h2.1$stream rx bytes + for stream in ${streams[@]}; do + qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes + done } ets_run diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 3c3b204d47e8..cdf689e99458 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -2,7 +2,7 @@ # Global interface: # $put -- port under test (e.g. $swp2) -# get_stats($band) -- A function to collect stats for band +# collect_stats($streams...) -- A function to get stats for individual streams # ets_start_traffic($band) -- Start traffic for this band # ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc @@ -94,15 +94,11 @@ __ets_dwrr_test() sleep 10 - t0=($(for stream in ${streams[@]}; do - get_stats $stream - done)) + t0=($(collect_stats "${streams[@]}")) sleep 10 - t1=($(for stream in ${streams[@]}; do - get_stats $stream - done)) + t1=($(collect_stats "${streams[@]}")) d=($(for ((i = 0; i < ${#streams[@]}; i++)); do echo $((${t1[$i]} - ${t0[$i]})) done)) diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh new file mode 100755 index 000000000000..e3bd8a6bb8b4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by an action skbedit priority. The +# new priority should be taken into account when classifying traffic on the PRIO +# qdisc at $swp2. The test verifies that for different priority values, the +# traffic ends up in expected PRIO band. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | PRIO | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ingress + test_egress +" + +NUM_NETIFS=4 +source lib.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + tc qdisc add dev $swp2 root handle 10: \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 +} + +switch_destroy() +{ + tc qdisc del dev $swp2 root + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +test_skbedit_priority_one() +{ + local locus=$1; shift + local prio=$1; shift + local classid=$1; shift + + RET=0 + + tc filter add $locus handle 101 pref 1 \ + flower action skbedit priority $prio + + local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets) + local pkt2=$(tc_rule_handle_stats_get "$locus" 101) + $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q + + local pkt1 + pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \ + qdisc_parent_stats_get $swp2 $classid .packets) + check_err $? "Expected to get 10 packets on class $classid, but got $((pkt1 - pkt0))." + + local pkt3=$(tc_rule_handle_stats_get "$locus" 101) + ((pkt3 >= pkt2 + 10)) + check_err $? "Expected to get 10 packets on skbedit rule but got $((pkt3 - pkt2))." + + log_test "$locus skbedit priority $prio -> classid $classid" + + tc filter del $locus pref 1 +} + +test_ingress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp1 ingress" \ + $prio 10:$((8 - prio)) + done +} + +test_egress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp2 egress" \ + $prio 10:$((8 - prio)) + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 64f652633585..0e18e8be6e2a 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -6,39 +6,14 @@ CHECK_TC="yes" # Can be overridden by the configuration file. See lib.sh TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms -__tc_check_packets() -{ - local id=$1 - local handle=$2 - local count=$3 - local operator=$4 - - start_time="$(date -u +%s%3N)" - while true - do - cmd_jq "tc -j -s filter show $id" \ - ".[] | select(.options.handle == $handle) | \ - select(.options.actions[0].stats.packets $operator $count)" \ - &> /dev/null - ret=$? - if [[ $ret -eq 0 ]]; then - return $ret - fi - current_time="$(date -u +%s%3N)" - diff=$(expr $current_time - $start_time) - if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then - return 1 - fi - done -} - tc_check_packets() { local id=$1 local handle=$2 local count=$3 - __tc_check_packets "$id" "$handle" "$count" "==" + busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null } tc_check_packets_hitting() @@ -46,5 +21,6 @@ tc_check_packets_hitting() local id=$1 local handle=$2 - __tc_check_packets "$id" "$handle" 0 ">" + busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null } diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c index e1fdee841021..e1fdee841021 100644 --- a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c +++ b/tools/testing/selftests/net/hwtstamp_config.c diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index d72f07642738..ea13b255a99d 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,2 +1,3 @@ mptcp_connect +pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index ba450e62dc5b..f50976ee7d44 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 top_srcdir = ../../../../.. +KSFT_KHDR_INSTALL := 1 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh -TEST_GEN_FILES = mptcp_connect +TEST_GEN_FILES = mptcp_connect pm_nl_ctl TEST_FILES := settings diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 99579c0223c1..cedee5b952ba 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -34,8 +34,8 @@ extern int optind; #define TCP_ULP 31 #endif +static int poll_timeout = 10 * 1000; static bool listen_mode; -static int poll_timeout; enum cfg_mode { CFG_MODE_POLL, @@ -50,11 +50,21 @@ static int cfg_sock_proto = IPPROTO_MPTCP; static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; +static int cfg_rcvbuf; +static bool cfg_join; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]" - "[ -l ] [ -t timeout ] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" + "[-l] connect_address\n"); + fprintf(stderr, "\t-6 use ipv6\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-p num -- use port num\n"); + fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); + fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); + fprintf(stderr, "\t-u -- check mptcp ulp\n"); exit(1); } @@ -97,6 +107,17 @@ static void xgetaddrinfo(const char *node, const char *service, } } +static void set_rcvbuf(int fd, unsigned int size) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); + if (err) { + perror("set SO_RCVBUF"); + exit(1); + } +} + static void set_sndbuf(int fd, unsigned int size) { int err; @@ -230,6 +251,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, static size_t do_rnd_write(const int fd, char *buf, const size_t len) { + static bool first = true; unsigned int do_w; ssize_t bw; @@ -237,10 +259,19 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (do_w == 0 || do_w > len) do_w = len; + if (cfg_join && first && do_w > 100) + do_w = 100; + bw = write(fd, buf, do_w); if (bw < 0) perror("write"); + /* let the join handshake complete, before going on */ + if (cfg_join && first) { + usleep(200000); + first = false; + } + return bw; } @@ -365,8 +396,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) break; /* ... but we still receive. - * Close our write side. + * Close our write side, ev. give some time + * for address notification */ + if (cfg_join) + usleep(400000); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -383,6 +417,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } } + /* leave some time for late join/announce */ + if (cfg_join) + usleep(400000); + close(peerfd); return 0; } @@ -638,7 +676,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (r & 1) + if (!cfg_join && (r & 1)) close(fd); } @@ -704,6 +742,8 @@ int main_loop(void) check_getpeername_connect(fd); + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); @@ -745,7 +785,7 @@ int parse_mode(const char *mode) return 0; } -int parse_sndbuf(const char *size) +static int parse_int(const char *size) { unsigned long s; @@ -765,17 +805,19 @@ int parse_sndbuf(const char *size) die_usage(); } - cfg_sndbuf = s; - - return 0; + return (int)s; } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) { + while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) { switch (c) { + case 'j': + cfg_join = true; + cfg_mode = CFG_MODE_POLL; + break; case 'l': listen_mode = true; break; @@ -802,8 +844,11 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_mode = parse_mode(optarg); break; - case 'b': - cfg_sndbuf = parse_sndbuf(optarg); + case 'S': + cfg_sndbuf = parse_int(optarg); + break; + case 'R': + cfg_rcvbuf = parse_int(optarg); break; } } @@ -831,6 +876,8 @@ int main(int argc, char *argv[]) if (fd < 0) return 1; + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index d573a0feb98d..acf02e156d20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="b:d:e:l:r:h4cm:" +optstring="S:R:d:e:l:r:h4cm:" ret=0 sin="" sout="" @@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101)) tc_reorder="" testmode="" sndbuf=0 +rcvbuf=0 options_log=true if [ $tc_loss -eq 100 ];then @@ -39,7 +40,8 @@ usage() { echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" - echo -e "\t-b: set sndbuf value (default: use kernel default)" + echo -e "\t-S: set sndbuf value (default: use kernel default)" + echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" } @@ -73,11 +75,19 @@ while getopts "$optstring" option;do "c") capture=true ;; - "b") + "S") if [ $OPTARG -ge 0 ];then sndbuf="$OPTARG" else - echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2 + echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "R") + if [ $OPTARG -ge 0 ];then + rcvbuf="$OPTARG" + else + echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 exit 1 fi ;; @@ -342,8 +352,12 @@ do_transfer() port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) + if [ "$rcvbuf" -gt 0 ]; then + extra_args="$extra_args -R $rcvbuf" + fi + if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -b $sndbuf" + extra_args="$extra_args -S $sndbuf" fi if [ -n "$testmode" ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh new file mode 100755 index 000000000000..dd42c2f692d0 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -0,0 +1,357 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +sin="" +sout="" +cin="" +cout="" +ksft_skip=4 +timeout=30 +capture=0 + +TEST_COUNT=0 + +init() +{ + capout=$(mktemp) + + rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) + + ns1="ns1-$rndh" + ns2="ns2-$rndh" + + for netns in "$ns1" "$ns2";do + ip netns add $netns || exit $ksft_skip + ip -net $netns link set lo up + ip netns exec $netns sysctl -q net.mptcp.enabled=1 + ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + done + + # ns1 ns2 + # ns1eth1 ns2eth1 + # ns1eth2 ns2eth2 + # ns1eth3 ns2eth3 + # ns1eth4 ns2eth4 + + for i in `seq 1 4`; do + ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" + ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i + ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad + ip -net "$ns1" link set ns1eth$i up + + ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i + ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad + ip -net "$ns2" link set ns2eth$i up + + # let $ns2 reach any $ns1 address from any interface + ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + done +} + +cleanup_partial() +{ + rm -f "$capout" + + for netns in "$ns1" "$ns2"; do + ip netns del $netns + done +} + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + cleanup_partial +} + +reset() +{ + cleanup_partial + init +} + +for arg in "$@"; do + if [ "$arg" = "-c" ]; then + capture=1 + fi +done + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + + +check_transfer() +{ + in=$1 + out=$2 + what=$3 + + cmp "$in" "$out" > /dev/null 2>&1 + if [ $? -ne 0 ] ;then + echo "[ FAIL ] $what does not match (in, out):" + print_file_err "$in" + print_file_err "$out" + + return 1 + fi + + return 0 +} + +do_ping() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + + ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + fi +} + +do_transfer() +{ + listener_ns="$1" + connector_ns="$2" + cl_proto="$3" + srv_proto="$4" + connect_addr="$5" + + port=$((10000+$TEST_COUNT)) + TEST_COUNT=$((TEST_COUNT+1)) + + :> "$cout" + :> "$sout" + :> "$capout" + + if [ $capture -eq 1 ]; then + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile="mp_join-${listener_ns}.pcap" + + echo "Capturing traffic for test $TEST_COUNT into $capfile" + ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi + + ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + spid=$! + + sleep 1 + + ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + cpid=$! + + wait $cpid + retc=$? + wait $spid + rets=$? + + if [ $capture -eq 1 ]; then + sleep 1 + kill $cappid + fi + + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " client exit code $retc, server $rets" 1>&2 + echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + + cat "$capout" + return 1 + fi + + check_transfer $sin $cout "file received by client" + retc=$? + check_transfer $cin $sout "file received by server" + rets=$? + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + cat "$capout" + return 0 + fi + + cat "$capout" + return 1 +} + +make_file() +{ + name=$1 + who=$2 + + SIZE=1 + + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + + echo "Created $name (size $SIZE KB) containing data sent by $who" +} + +run_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +chk_join_nr() +{ + local msg="$1" + local syn_nr=$2 + local syn_ack_nr=$3 + local ack_nr=$4 + local count + local dump_stats + + printf "%-36s %s" "$msg" "syn" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_nr" ]; then + echo "[fail] got $count JOIN[s] syn expected $syn_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - synack" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_ack_nr" ]; then + echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - ack" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$ack_nr" ]; then + echo "[fail] got $count JOIN[s] ack expected $ack_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +init +make_file "$cin" "client" +make_file "$sin" "server" +trap cleanup EXIT + +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "no JOIN" "0" "0" "0" + +# subflow limted by client +reset +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow, limited by client" 0 0 0 + +# subflow limted by server +reset +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow, limited by server" 1 1 0 + +# subflow +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow" 1 1 1 + +# multiple subflows +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows" 2 2 2 + +# multiple subflows limited by serverf +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows, limited by server" 2 2 1 + +# add_address, unused +reset +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "unused signal address" 0 0 0 + +# accept and use add_addr +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address" 1 1 1 + +# accept and use add_addr with an additional subflow +# note: signal address in server ns and local addresses in client ns must +# belong to different subnets or one of the listed local address could be +# used for 'add_addr' subflow +reset +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflow and signal" 2 2 2 + +# accept and use add_addr with additional subflows +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows and signal" 3 3 3 + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh new file mode 100755 index 000000000000..9172746b6cf0 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ksft_skip=4 +ret=0 + +usage() { + echo "Usage: $0 [ -h ]" +} + + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +err=$(mktemp) +ret=0 + +cleanup() +{ + rm -f $out + ip netns del $ns1 +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +ip netns add $ns1 || exit $ksft_skip +ip -net $ns1 link set lo up +ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 + +check() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local out=`$cmd 2>$err` + local cmd_ret=$? + + printf "%-50s %s" "$msg" + if [ $cmd_ret -ne 0 ]; then + echo "[FAIL] command execution '$cmd' stderr " + cat $err + ret=1 + elif [ "$out" = "$expected" ]; then + echo "[ OK ]" + else + echo -n "[FAIL] " + echo "expected '$expected' got '$out'" + ret=1 + fi +} + +check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 0" "defaults limits" + +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup +check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr" + +check "ip netns exec $ns1 ./pm_nl_ctl dump" \ +"id 1 flags 10.0.1.1 +id 2 flags subflow dev lo 10.0.1.2 +id 3 flags signal,backup 10.0.1.3" "dump addrs" + +ip netns exec $ns1 ./pm_nl_ctl del 2 +check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr" +check "ip netns exec $ns1 ./pm_nl_ctl dump" \ +"id 1 flags 10.0.1.1 +id 3 flags signal,backup 10.0.1.3" "dump addrs after del" + +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 +check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" + +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal +check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" + +for i in `seq 5 9`; do + ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 +done +check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" +check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" + +for i in `seq 9 256`; do + ip netns exec $ns1 ./pm_nl_ctl del $i + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 +done +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 3 flags signal,backup 10.0.1.3 +id 4 flags signal 10.0.1.4 +id 5 flags signal 10.0.1.5 +id 6 flags signal 10.0.1.6 +id 7 flags signal 10.0.1.7 +id 8 flags signal 10.0.1.8" "id limit" + +ip netns exec $ns1 ./pm_nl_ctl flush +check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" + +ip netns exec $ns1 ./pm_nl_ctl limits 9 1 +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 0" "rcv addrs above hard limit" + +ip netns exec $ns1 ./pm_nl_ctl limits 1 9 +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 0" "subflows above hard limit" + +ip netns exec $ns1 ./pm_nl_ctl limits 8 8 +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 +subflows 8" "set limits" + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c new file mode 100644 index 000000000000..b24a2f17d415 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -0,0 +1,616 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <error.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/types.h> + +#include <arpa/inet.h> +#include <net/if.h> + +#include <linux/rtnetlink.h> +#include <linux/genetlink.h> + +#include "linux/mptcp.h" + +#ifndef MPTCP_PM_NAME +#define MPTCP_PM_NAME "mptcp_pm" +#endif + +static void syntax(char *argv[]) +{ + fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]); + fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); + fprintf(stderr, "\tdel <id>\n"); + fprintf(stderr, "\tget <id>\n"); + fprintf(stderr, "\tflush\n"); + fprintf(stderr, "\tdump\n"); + fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); + exit(0); +} + +static int init_genl_req(char *data, int family, int cmd, int version) +{ + struct nlmsghdr *nh = (void *)data; + struct genlmsghdr *gh; + int off = 0; + + nh->nlmsg_type = family; + nh->nlmsg_flags = NLM_F_REQUEST; + nh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + off += NLMSG_ALIGN(sizeof(*nh)); + + gh = (void *)(data + off); + gh->cmd = cmd; + gh->version = version; + off += NLMSG_ALIGN(sizeof(*gh)); + return off; +} + +static void nl_error(struct nlmsghdr *nh) +{ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh); + int len = nh->nlmsg_len - sizeof(*nh); + uint32_t off; + + if (len < sizeof(struct nlmsgerr)) + error(1, 0, "netlink error message truncated %d min %ld", len, + sizeof(struct nlmsgerr)); + + if (!err->error) { + /* check messages from kernel */ + struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh); + + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == NLMSGERR_ATTR_MSG) + fprintf(stderr, "netlink ext ack msg: %s\n", + (char *)RTA_DATA(attrs)); + if (attrs->rta_type == NLMSGERR_ATTR_OFFS) { + memcpy(&off, RTA_DATA(attrs), 4); + fprintf(stderr, "netlink err off %d\n", + (int)off); + } + attrs = RTA_NEXT(attrs, len); + } + } else { + fprintf(stderr, "netlink error %d", err->error); + } +} + +/* do a netlink command and, if max > 0, fetch the reply */ +static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + socklen_t addr_len; + void *data = nh; + int rem, ret; + int err = 0; + + nh->nlmsg_len = len; + ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr)); + if (ret != len) + error(1, errno, "send netlink: %uB != %uB\n", ret, len); + if (max == 0) + return 0; + + addr_len = sizeof(nladdr); + rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len); + if (ret < 0) + error(1, errno, "recv netlink: %uB\n", ret); + + /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */ + for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) { + if (nh->nlmsg_type == NLMSG_ERROR) { + nl_error(nh); + err = 1; + } + } + if (err) + error(1, 0, "bailing out due to netlink error[s]"); + return ret; +} + +static int genl_parse_getfamily(struct nlmsghdr *nlh) +{ + struct genlmsghdr *ghdr = NLMSG_DATA(nlh); + int len = nlh->nlmsg_len; + struct rtattr *attrs; + + if (nlh->nlmsg_type != GENL_ID_CTRL) + error(1, errno, "Not a controller message, len=%d type=0x%x\n", + nlh->nlmsg_len, nlh->nlmsg_type); + + len -= NLMSG_LENGTH(GENL_HDRLEN); + + if (len < 0) + error(1, errno, "wrong controller message len %d\n", len); + + if (ghdr->cmd != CTRL_CMD_NEWFAMILY) + error(1, errno, "Unknown controller command %d\n", ghdr->cmd); + + attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) + return *(__u16 *)RTA_DATA(attrs); + attrs = RTA_NEXT(attrs, len); + } + + error(1, errno, "can't find CTRL_ATTR_FAMILY_ID attr"); + return -1; +} + +static int resolve_mptcp_pm_netlink(int fd) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct nlmsghdr *nh; + struct rtattr *rta; + int namelen; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 0); + + rta = (void *)(data + off); + namelen = strlen(MPTCP_PM_NAME) + 1; + rta->rta_type = CTRL_ATTR_FAMILY_NAME; + rta->rta_len = RTA_LENGTH(namelen); + memcpy(RTA_DATA(rta), MPTCP_PM_NAME, namelen); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, sizeof(data)); + return genl_parse_getfamily((void *)data); +} + +int add_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + u_int16_t family; + u_int32_t flags; + int nest_start; + u_int8_t id; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ADD_ADDR, + MPTCP_PM_VER); + + if (argc < 3) + syntax(argv); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", argv[2]); + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + for (arg = 3; arg < argc; arg++) { + if (!strcmp(argv[arg], "flags")) { + char *tok, *str; + + /* flags */ + flags = 0; + if (++arg >= argc) + error(1, 0, " missing flags value"); + + /* do not support flag list yet */ + for (str = argv[arg]; (tok = strtok(str, ",")); + str = NULL) { + if (!strcmp(tok, "subflow")) + flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + else if (!strcmp(tok, "signal")) + flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "backup")) + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + error(1, errno, + "unknown flag %s", argv[arg]); + } + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "dev")) { + int32_t ifindex; + + if (++arg >= argc) + error(1, 0, " missing dev name"); + + ifindex = if_nametoindex(argv[arg]); + if (!ifindex) + error(1, errno, "unknown device %s", argv[arg]); + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &ifindex, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + nest->rta_len = off - nest_start; + + do_nl_req(fd, nh, off, 0); + return 0; +} + +int del_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + int nest_start; + u_int8_t id; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR, + MPTCP_PM_VER); + + /* the only argument is the address id */ + if (argc != 3) + syntax(argv); + + id = atoi(argv[2]); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* build a dummy addr with only the ID set */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + nest->rta_len = off - nest_start; + + do_nl_req(fd, nh, off, 0); + return 0; +} + +static void print_addr(struct rtattr *attrs, int len) +{ + uint16_t family = 0; + char str[1024]; + uint32_t flags; + uint8_t id; + + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY) + memcpy(&family, RTA_DATA(attrs), 2); + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) { + if (family != AF_INET) + error(1, errno, "wrong IP (v4) for family %d", + family); + inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str)); + printf("%s", str); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) { + if (family != AF_INET6) + error(1, errno, "wrong IP (v6) for family %d", + family); + inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str)); + printf("%s", str); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) { + memcpy(&id, RTA_DATA(attrs), 1); + printf("id %d ", id); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FLAGS) { + memcpy(&flags, RTA_DATA(attrs), 4); + + printf("flags "); + if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + printf("signal"); + flags &= ~MPTCP_PM_ADDR_FLAG_SIGNAL; + if (flags) + printf(","); + } + + if (flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + printf("subflow"); + flags &= ~MPTCP_PM_ADDR_FLAG_SUBFLOW; + if (flags) + printf(","); + } + + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { + printf("backup"); + flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + if (flags) + printf(","); + } + + /* bump unknown flags, if any */ + if (flags) + printf("0x%x", flags); + printf(" "); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_IF_IDX) { + char name[IF_NAMESIZE], *ret; + int32_t ifindex; + + memcpy(&ifindex, RTA_DATA(attrs), 4); + ret = if_indextoname(ifindex, name); + if (ret) + printf("dev %s ", ret); + else + printf("dev unknown/%d", ifindex); + } + + attrs = RTA_NEXT(attrs, len); + } + printf("\n"); +} + +static void print_addrs(struct nlmsghdr *nh, int pm_family, int total_len) +{ + struct rtattr *attrs; + + for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) { + int len = nh->nlmsg_len; + + if (nh->nlmsg_type == NLMSG_DONE) + break; + if (nh->nlmsg_type == NLMSG_ERROR) + nl_error(nh); + if (nh->nlmsg_type != pm_family) + continue; + + len -= NLMSG_LENGTH(GENL_HDRLEN); + attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) + + GENL_HDRLEN); + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == + (MPTCP_PM_ATTR_ADDR | NLA_F_NESTED)) + print_addr((void *)RTA_DATA(attrs), + attrs->rta_len); + attrs = RTA_NEXT(attrs, len); + } + } +} + +int get_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + int nest_start; + u_int8_t id; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, + MPTCP_PM_VER); + + /* the only argument is the address id */ + if (argc != 3) + syntax(argv); + + id = atoi(argv[2]); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* build a dummy addr with only the ID set */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + nest->rta_len = off - nest_start; + + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); + return 0; +} + +int dump_addrs(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + pid_t pid = getpid(); + struct nlmsghdr *nh; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, + MPTCP_PM_VER); + nh->nlmsg_flags |= NLM_F_DUMP; + nh->nlmsg_seq = 1; + nh->nlmsg_pid = pid; + nh->nlmsg_len = off; + + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); + return 0; +} + +int flush_addrs(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct nlmsghdr *nh; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_FLUSH_ADDRS, + MPTCP_PM_VER); + + do_nl_req(fd, nh, off, 0); + return 0; +} + +static void print_limits(struct nlmsghdr *nh, int pm_family, int total_len) +{ + struct rtattr *attrs; + uint32_t max; + + for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) { + int len = nh->nlmsg_len; + + if (nh->nlmsg_type == NLMSG_DONE) + break; + if (nh->nlmsg_type == NLMSG_ERROR) + nl_error(nh); + if (nh->nlmsg_type != pm_family) + continue; + + len -= NLMSG_LENGTH(GENL_HDRLEN); + attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) + + GENL_HDRLEN); + while (RTA_OK(attrs, len)) { + int type = attrs->rta_type; + + if (type != MPTCP_PM_ATTR_RCV_ADD_ADDRS && + type != MPTCP_PM_ATTR_SUBFLOWS) + goto next; + + memcpy(&max, RTA_DATA(attrs), 4); + printf("%s %u\n", type == MPTCP_PM_ATTR_SUBFLOWS ? + "subflows" : "accept", max); + +next: + attrs = RTA_NEXT(attrs, len); + } + } +} + +int get_set_limits(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + uint32_t rcv_addr = 0, subflows = 0; + int cmd, len = sizeof(data); + struct nlmsghdr *nh; + int off = 0; + + /* limit */ + if (argc == 4) { + rcv_addr = atoi(argv[2]); + subflows = atoi(argv[3]); + cmd = MPTCP_PM_CMD_SET_LIMITS; + } else { + cmd = MPTCP_PM_CMD_GET_LIMITS; + } + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, cmd, MPTCP_PM_VER); + + /* limit */ + if (cmd == MPTCP_PM_CMD_SET_LIMITS) { + struct rtattr *rta = (void *)(data + off); + + rta->rta_type = MPTCP_PM_ATTR_RCV_ADD_ADDRS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &rcv_addr, 4); + off += NLMSG_ALIGN(rta->rta_len); + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_SUBFLOWS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &subflows, 4); + off += NLMSG_ALIGN(rta->rta_len); + + /* do not expect a reply */ + len = 0; + } + + len = do_nl_req(fd, nh, off, len); + if (cmd == MPTCP_PM_CMD_GET_LIMITS) + print_limits(nh, pm_family, len); + return 0; +} + +int main(int argc, char *argv[]) +{ + int fd, pm_family; + + if (argc < 2) + syntax(argv); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (fd == -1) + error(1, errno, "socket netlink"); + + pm_family = resolve_mptcp_pm_netlink(fd); + + if (!strcmp(argv[1], "add")) + return add_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "del")) + return del_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "flush")) + return flush_addrs(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "get")) + return get_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "dump")) + return dump_addrs(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "limits")) + return get_set_limits(fd, pm_family, argc, argv); + + fprintf(stderr, "unknown sub-command: %s", argv[1]); + syntax(argv); + return 0; +} diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c new file mode 100644 index 000000000000..7b01b7c2ec10 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Check if we can fully utilize 4-tuples for connect(). + * + * Rules to bind sockets to the same port when all ephemeral ports are + * exhausted. + * + * 1. if there are TCP_LISTEN sockets on the port, fail to bind. + * 2. if there are sockets without SO_REUSEADDR, fail to bind. + * 3. if SO_REUSEADDR is disabled, fail to bind. + * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled, + * succeed to bind. + * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and + * there is no socket having the both options and the same EUID, + * succeed to bind. + * 6. fail to bind. + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> +#include "../kselftest_harness.h" + +struct reuse_opts { + int reuseaddr[2]; + int reuseport[2]; +}; + +struct reuse_opts unreusable_opts[12] = { + {0, 0, 0, 0}, + {0, 0, 0, 1}, + {0, 0, 1, 0}, + {0, 0, 1, 1}, + {0, 1, 0, 0}, + {0, 1, 0, 1}, + {0, 1, 1, 0}, + {0, 1, 1, 1}, + {1, 0, 0, 0}, + {1, 0, 0, 1}, + {1, 0, 1, 0}, + {1, 0, 1, 1}, +}; + +struct reuse_opts reusable_opts[4] = { + {1, 1, 0, 0}, + {1, 1, 0, 1}, + {1, 1, 1, 0}, + {1, 1, 1, 1}, +}; + +int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) +{ + struct sockaddr_in local_addr; + int len = sizeof(local_addr); + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(-1, fd) TH_LOG("failed to open socket."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT."); + + local_addr.sin_family = AF_INET; + local_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + local_addr.sin_port = 0; + + if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) { + close(fd); + return -1; + } + + return fd; +} + +TEST(reuseaddr_ports_exhausted_unreusable) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 12; i++) { + opts = &unreusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind."); + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_same_euid) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + + if (opts->reuseport[0] && opts->reuseport[1]) { + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + } else { + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_different_euid) +{ + struct reuse_opts *opts; + int i, j, ret, fd[2]; + uid_t euid[2] = {10, 20}; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) { + ret = seteuid(euid[j]); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]); + + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ret = seteuid(0); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0."); + } + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid."); + + if (fd[1] != -1) { + ret = listen(fd[0], 5); + ASSERT_EQ(0, ret) TH_LOG("failed to listen."); + + ret = listen(fd[1], 5); + EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh new file mode 100755 index 000000000000..20e3a2913d06 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run tests when all ephemeral ports are exhausted. +# +# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_local_port_range="32768 32768" \ + > /dev/null 2>&1 + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +do_test() { + ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted +} + +do_test +echo "tests done" diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 6dee9e636a95..6dee9e636a95 100644 --- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/net/timestamping.c index aca3491174a1..aca3491174a1 100644 --- a/tools/testing/selftests/networking/timestamping/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 7e386be47120..011b0da6b033 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -41,6 +41,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/select.h> #include <sys/socket.h> @@ -49,6 +50,10 @@ #include <time.h> #include <unistd.h> +#define NSEC_PER_USEC 1000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000LL + /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -61,12 +66,16 @@ static int cfg_delay_snd; static int cfg_delay_ack; static bool cfg_show_payload; static bool cfg_do_pktinfo; +static bool cfg_busy_poll; +static int cfg_sleep_usec = 50 * 1000; static bool cfg_loop_nodata; -static bool cfg_no_delay; static bool cfg_use_cmsg; static bool cfg_use_pf_packet; +static bool cfg_use_epoll; +static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; +static bool cfg_print_nsec; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -75,11 +84,48 @@ static struct timespec ts_usr; static int saved_tskey = -1; static int saved_tskey_type = -1; +struct timing_event { + int64_t min; + int64_t max; + int64_t total; + int count; +}; + +static struct timing_event usr_enq; +static struct timing_event usr_snd; +static struct timing_event usr_ack; + static bool test_failed; +static int64_t timespec_to_ns64(struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + static int64_t timespec_to_us64(struct timespec *ts) { - return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000; + return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC; +} + +static void init_timing_event(struct timing_event *te) +{ + te->min = INT64_MAX; + te->max = 0; + te->total = 0; + te->count = 0; +} + +static void add_timing_event(struct timing_event *te, + struct timespec *t_start, struct timespec *t_end) +{ + int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start); + + te->count++; + if (ts_delta < te->min) + te->min = ts_delta; + if (ts_delta > te->max) + te->max = ts_delta; + te->total += ts_delta; } static void validate_key(int tskey, int tstype) @@ -113,25 +159,43 @@ static void validate_timestamp(struct timespec *cur, int min_delay) start64 = timespec_to_us64(&ts_usr); if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { - fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n", + fprintf(stderr, "ERROR: %lu us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); test_failed = true; } } +static void __print_ts_delta_formatted(int64_t ts_delta) +{ + if (cfg_print_nsec) + fprintf(stderr, "%lu ns", ts_delta); + else + fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC); +} + static void __print_timestamp(const char *name, struct timespec *cur, uint32_t key, int payload_len) { + int64_t ts_delta; + if (!(cur->tv_sec | cur->tv_nsec)) return; - fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", - name, cur->tv_sec, cur->tv_nsec / 1000, - key, payload_len); - - if (cur != &ts_usr) - fprintf(stderr, " (USR %+" PRId64 " us)", - timespec_to_us64(cur) - timespec_to_us64(&ts_usr)); + if (cfg_print_nsec) + fprintf(stderr, " %s: %lu s %lu ns (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec, + key, payload_len); + else + fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC, + key, payload_len); + + if (cur != &ts_usr) { + ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr); + fprintf(stderr, " (USR +"); + __print_ts_delta_formatted(ts_delta); + fprintf(stderr, ")"); + } fprintf(stderr, "\n"); } @@ -155,14 +219,17 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, case SCM_TSTAMP_SCHED: tsname = " ENQ"; validate_timestamp(&tss->ts[0], 0); + add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]); break; case SCM_TSTAMP_SND: tsname = " SND"; validate_timestamp(&tss->ts[0], cfg_delay_snd); + add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]); break; case SCM_TSTAMP_ACK: tsname = " ACK"; validate_timestamp(&tss->ts[0], cfg_delay_ack); + add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]); break; default: error(1, 0, "unknown timestamp type: %u", @@ -171,6 +238,21 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); } +static void print_timing_event(char *name, struct timing_event *te) +{ + if (!te->count) + return; + + fprintf(stderr, " %s: count=%d", name, te->count); + fprintf(stderr, ", avg="); + __print_ts_delta_formatted((int64_t)(te->total / te->count)); + fprintf(stderr, ", min="); + __print_ts_delta_formatted(te->min); + fprintf(stderr, ", max="); + __print_ts_delta_formatted(te->max); + fprintf(stderr, "\n"); +} + /* TODO: convert to check_and_print payload once API is stable */ static void print_payload(char *data, int len) { @@ -198,6 +280,17 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); } +static void __epoll(int epfd) +{ + struct epoll_event events; + int ret; + + memset(&events, 0, sizeof(events)); + ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout); + if (ret != 1) + error(1, errno, "epoll_wait"); +} + static void __poll(int fd) { struct pollfd pollfd; @@ -391,7 +484,11 @@ static void do_test(int family, unsigned int report_opt) struct msghdr msg; struct iovec iov; char *buf; - int fd, i, val = 1, total_len; + int fd, i, val = 1, total_len, epfd = 0; + + init_timing_event(&usr_enq); + init_timing_event(&usr_snd); + init_timing_event(&usr_ack); total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { @@ -418,6 +515,20 @@ static void do_test(int family, unsigned int report_opt) if (fd < 0) error(1, errno, "socket"); + if (cfg_use_epoll) { + struct epoll_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.data.fd = fd; + if (cfg_epollet) + ev.events |= EPOLLET; + epfd = epoll_create(1); + if (epfd <= 0) + error(1, errno, "epoll_create"); + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)) + error(1, errno, "epoll_ctl"); + } + /* reset expected key on each new socket */ saved_tskey = -1; @@ -525,19 +636,28 @@ static void do_test(int family, unsigned int report_opt) error(1, errno, "send"); /* wait for all errors to be queued, else ACKs arrive OOO */ - if (!cfg_no_delay) - usleep(50 * 1000); + if (cfg_sleep_usec) + usleep(cfg_sleep_usec); - __poll(fd); + if (!cfg_busy_poll) { + if (cfg_use_epoll) + __epoll(epfd); + else + __poll(fd); + } while (!recv_errmsg(fd)) {} } + print_timing_event("USR-ENQ", &usr_enq); + print_timing_event("USR-SND", &usr_snd); + print_timing_event("USR-ACK", &usr_ack); + if (close(fd)) error(1, errno, "close"); free(buf); - usleep(100 * 1000); + usleep(100 * NSEC_PER_USEC); } static void __attribute__((noreturn)) usage(const char *filepath) @@ -547,18 +667,22 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -b: busy poll to read from error queue\n" " -c N: number of packets for each test\n" " -C: use cmsg to set tstamp recording options\n" - " -D: no delay between packets\n" - " -F: poll() waits forever for an event\n" + " -e: use level-triggered epoll() instead of poll()\n" + " -E: use event-triggered epoll() instead of poll()\n" + " -F: poll()/epoll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -L listen on hostname and port\n" " -n: set no-payload option\n" + " -N: print timestamps and durations in nsec (instead of usec)\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" + " -S N: usec to sleep before reading error queue\n" " -u: use udp\n" " -v: validate SND delay (usec)\n" " -V: validate ACK delay (usec)\n" @@ -572,7 +696,8 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) { + while ((c = getopt(argc, argv, + "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -580,15 +705,21 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'b': + cfg_busy_poll = true; + break; case 'c': cfg_num_pkts = strtoul(optarg, NULL, 10); break; case 'C': cfg_use_cmsg = true; break; - case 'D': - cfg_no_delay = true; + case 'e': + cfg_use_epoll = true; break; + case 'E': + cfg_use_epoll = true; + cfg_epollet = true; case 'F': cfg_poll_timeout = -1; break; @@ -604,6 +735,9 @@ static void parse_opt(int argc, char **argv) case 'n': cfg_loop_nodata = true; break; + case 'N': + cfg_print_nsec = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; @@ -623,6 +757,9 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_RAW; cfg_ipproto = IPPROTO_RAW; break; + case 'S': + cfg_sleep_usec = strtoul(optarg, NULL, 10); + break; case 'u': proto_count++; cfg_proto = SOCK_DGRAM; @@ -653,6 +790,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "pass -P, -r, -R or -u, not multiple"); if (cfg_do_pktinfo && cfg_use_pf_packet) error(1, 0, "cannot ask for pktinfo over pf_packet"); + if (cfg_busy_poll && cfg_use_epoll) + error(1, 0, "pass epoll or busy_poll, not both"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index df0d86ca72b7..eea6f5193693 100755 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -43,15 +43,40 @@ run_test_tcpudpraw() { } run_test_all() { + setup run_test_tcpudpraw # setsockopt run_test_tcpudpraw -C # cmsg run_test_tcpudpraw -n # timestamp w/o data + echo "OK. All tests passed" +} + +run_test_one() { + setup + ./txtimestamp $@ +} + +usage() { + echo "Usage: $0 [ -r | --run ] <txtimestamp args> | [ -h | --help ]" + echo " (no args) Run all tests" + echo " -r|--run Run an individual test with arguments" + echo " -h|--help Help" +} + +main() { + if [[ $# -eq 0 ]]; then + run_test_all + else + if [[ "$1" = "-r" || "$1" == "--run" ]]; then + shift + run_test_one $@ + else + usage + fi + fi } if [[ "$(ip netns identify)" == "root" ]]; then - ../../net/in_netns.sh $0 $@ + ./in_netns.sh $0 $@ else - setup - run_test_all - echo "OK. All tests passed" + main $@ fi diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore deleted file mode 100644 index d9355035e746..000000000000 --- a/tools/testing/selftests/networking/timestamping/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -timestamping -rxtimestamp -txtimestamp -hwtstamp_config diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile deleted file mode 100644 index 1de8bd8ccf5d..000000000000 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I../../../../../usr/include - -TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp -TEST_PROGS := txtimestamp.sh - -all: $(TEST_PROGS) - -top_srcdir = ../../../../.. -KSFT_KHDR_INSTALL := 1 -include ../../lib.mk diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config deleted file mode 100644 index a13e3169b0a4..000000000000 --- a/tools/testing/selftests/networking/timestamping/config +++ /dev/null @@ -1,2 +0,0 @@ -CONFIG_IFB=y -CONFIG_NET_SCH_NETEM=y diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c03af4600281..c33a7aac27ff 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -31,6 +31,7 @@ CONFIG_NET_EMATCH_U32=m CONFIG_NET_EMATCH_META=m CONFIG_NET_EMATCH_TEXT=m CONFIG_NET_EMATCH_IPSET=m +CONFIG_NET_EMATCH_CANID=m CONFIG_NET_EMATCH_IPT=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m @@ -58,3 +59,8 @@ CONFIG_NET_IFE_SKBPRIO=m CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m + +# +## Network testing +# +CONFIG_CAN=m diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index 98a20faf3198..e788c114a484 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -372,5 +372,907 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "bae4", + "name": "Add basic filter with u32 ematch u8/zero offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 0x0f at 0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(01000000/0f000000 at 0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "e6cb", + "name": "Add basic filter with u32 ematch u8/zero offset and invalid value >0xFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/0f000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7727", + "name": "Add basic filter with u32 ematch u8/positive offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0x1f at 12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17000000/1f000000 at 12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "a429", + "name": "Add basic filter with u32 ematch u8/invalid mask >0xFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff00 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000/ff000000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "8373", + "name": "Add basic filter with u32 ematch u8/missing offset", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff at)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "ab8e", + "name": "Add basic filter with u32 ematch u8/missing AT keyword", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "712d", + "name": "Add basic filter with u32 ematch u8/missing value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "350f", + "name": "Add basic filter with u32 ematch u8/non-numeric value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 zero 0xff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "e28f", + "name": "Add basic filter with u32 ematch u8/non-numeric mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 mask at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11000000/00000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "6d5f", + "name": "Add basic filter with u32 ematch u8/negative offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at -14)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(0000a000/0000f000 at -16\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "12dc", + "name": "Add basic filter with u32 ematch u8/nexthdr+ offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at nexthdr+0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "1d85", + "name": "Add basic filter with u32 ematch u16/zero offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x1122 0xffff at 0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/ffff0000 at 0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "3672", + "name": "Add basic filter with u32 ematch u16/zero offset and invalid value >0xFFFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x112233 0xffff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223300/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7fb0", + "name": "Add basic filter with u32 ematch u16/positive offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0x1fff at 12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17880000/1fff0000 at 12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "19af", + "name": "Add basic filter with u32 ematch u16/invalid mask >0xFFFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffffffff at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffffffff at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "446d", + "name": "Add basic filter with u32 ematch u16/missing offset", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff at)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000 at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "151b", + "name": "Add basic filter with u32 ematch u16/missing AT keyword", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "bb23", + "name": "Add basic filter with u32 ematch u16/missing value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "decc", + "name": "Add basic filter with u32 ematch u16/non-numeric value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 zero 0xffff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "e988", + "name": "Add basic filter with u32 ematch u16/non-numeric mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 mask at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/00000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "07d8", + "name": "Add basic filter with u32 ematch u16/negative offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xffff at -12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabb0000/ffff0000 at -12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "f474", + "name": "Add basic filter with u32 ematch u16/nexthdr+ offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "47a0", + "name": "Add basic filter with u32 ematch u32/zero offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at 0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at 0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "849f", + "name": "Add basic filter with u32 ematch u32/positive offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227080/1ffff0f0 at 12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "d288", + "name": "Add basic filter with u32 ematch u32/missing offset", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0xffffffff at)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227788/ffffffff at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "4998", + "name": "Add basic filter with u32 ematch u32/missing AT keyword", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x77889900 0xfffff0f0 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77889900/fffff0f0 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "1f0a", + "name": "Add basic filter with u32 ematch u32/missing value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 at 12)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "848e", + "name": "Add basic filter with u32 ematch u32/non-numeric value", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 zero 0xffff at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "f748", + "name": "Add basic filter with u32 ematch u32/non-numeric mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11223344 mask at 0)' classid 1:1", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223344/00000000 at 0\\)", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "55a6", + "name": "Add basic filter with u32 ematch u32/negative offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aa00cc00/ff00ff00 at -12\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7282", + "name": "Add basic filter with u32 ematch u32/nexthdr+ offset and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "b2b6", + "name": "Add basic filter with canid ematch and single SFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "f67f", + "name": "Add basic filter with canid ematch and single SFF with mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaabb:0x00ff)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x2BB:0xFF\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "bd5c", + "name": "Add basic filter with canid ematch and multiple SFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1 sff 2 sff 3)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "83c7", + "name": "Add basic filter with canid ematch and multiple SFF with masks", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "a8f5", + "name": "Add basic filter with canid ematch and single EFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "98ae", + "name": "Add basic filter with canid ematch and single EFF with mask", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaabb:0xf1f1)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAABB:0xF1F1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "6056", + "name": "Add basic filter with canid ematch and multiple EFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1 eff 2 eff 3)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "d188", + "name": "Add basic filter with canid ematch and multiple EFF with masks", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "25d1", + "name": "Add basic filter with canid ematch and a combination of SFF/EFF", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01 eff 0x02)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2 sff 0x1\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "b438", + "name": "Add basic filter with canid ematch and a combination of SFF/EFF with masks", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01:0xf eff 0x02:0xf)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json new file mode 100644 index 000000000000..0703a2a255eb --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json @@ -0,0 +1,185 @@ +[ + { + "id": "8b6e", + "name": "Create RED with no flags", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "342e", + "name": "Create RED with adaptive flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2d4b", + "name": "Create RED with ECN flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "650f", + "name": "Create RED with flags ECN, adaptive", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "5f15", + "name": "Create RED with flags ECN, harddrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "53e8", + "name": "Create RED with flags ECN, nodrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "d091", + "name": "Fail to create RED with only nodrop flag", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "af8e", + "name": "Create RED with flags ECN, nodrop, harddrop", + "category": [ + "qdisc", + "red" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 1d8b93f1af31..5a4fb80fa832 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -55,6 +55,78 @@ static void test_stream_connection_reset(const struct test_opts *opts) close(fd); } +static void test_stream_bind_only_client(const struct test_opts *opts) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = opts->peer_cid, + }, + }; + int ret; + int fd; + + /* Wait for the server to be ready */ + control_expectln("BIND"); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + timeout_begin(TIMEOUT); + do { + ret = connect(fd, &addr.sa, sizeof(addr.svm)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret != -1) { + fprintf(stderr, "expected connect(2) failure, got %d\n", ret); + exit(EXIT_FAILURE); + } + if (errno != ECONNRESET) { + fprintf(stderr, "unexpected connect(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + + /* Notify the server that the client has finished */ + control_writeln("DONE"); + + close(fd); +} + +static void test_stream_bind_only_server(const struct test_opts *opts) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = VMADDR_CID_ANY, + }, + }; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + /* Notify the client that the server is ready */ + control_writeln("BIND"); + + /* Wait for the client to finish */ + control_expectln("DONE"); + + close(fd); +} + static void test_stream_client_close_client(const struct test_opts *opts) { int fd; @@ -213,6 +285,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_connection_reset, }, { + .name = "SOCK_STREAM bind only", + .run_client = test_stream_bind_only_client, + .run_server = test_stream_bind_only_server, + }, + { .name = "SOCK_STREAM client close", .run_client = test_stream_client_close_client, .run_server = test_stream_client_close_server, |