diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2020-08-06 13:15:47 +0200 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2020-08-06 13:15:47 +0200 |
commit | 94fb1afb14c4f0ceb8c5508ddddac6819f662e95 (patch) | |
tree | 4988e5769dc7482caa7f441475ae31f50bbd37ef /tools/testing/selftests | |
parent | perf evsel: Don't set sample_regs_intr/sample_regs_user for dummy event (diff) | |
parent | Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next (diff) | |
download | linux-94fb1afb14c4f0ceb8c5508ddddac6819f662e95.tar.xz linux-94fb1afb14c4f0ceb8c5508ddddac6819f662e95.zip |
Mgerge remote-tracking branch 'torvalds/master' into perf/core
To sync headers, for instance, in this case tools/perf was ahead of
upstream till Linus merged tip/perf/core to get the
PERF_RECORD_TEXT_POKE changes:
Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h'
diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/testing/selftests')
201 files changed, 14011 insertions, 1144 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 1195bd85af38..e03bc15ce731 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -6,6 +6,7 @@ TARGETS += breakpoints TARGETS += capabilities TARGETS += cgroup TARGETS += clone3 +TARGETS += core TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += drivers/dma-buf @@ -15,6 +16,7 @@ TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll TARGETS += firmware +TARGETS += fpu TARGETS += ftrace TARGETS += futex TARGETS += gpio @@ -54,6 +56,7 @@ TARGETS += splice TARGETS += static_keys TARGETS += sync TARGETS += sysctl +TARGETS += tc-testing TARGETS += timens ifneq (1, $(quicktest)) TARGETS += timers diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 22aaec74ea0a..e7a8cf83ba48 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -111,6 +111,7 @@ SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build INCLUDE_DIR := $(SCRATCH_DIR)/include BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a +RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. @@ -134,12 +135,12 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ @@ -177,13 +178,28 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers -$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): +$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) +ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ +else + $(call msg,CP,,$@) + cp "$(VMLINUX_H)" $@ +endif + +$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ + $(TOOLSDIR)/bpf/resolve_btfids/main.c \ + $(TOOLSDIR)/lib/rbtree.c \ + $(TOOLSDIR)/lib/zalloc.c \ + $(TOOLSDIR)/lib/string.c \ + $(TOOLSDIR)/lib/ctype.c \ + $(TOOLSDIR)/lib/str_error_r.c + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ + OUTPUT=$(BUILD_DIR)/resolve_btfids/ BPFOBJ=$(BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, @@ -347,9 +363,11 @@ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ + $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h index 6f8988738bc1..719ab56cdb5d 100644 --- a/tools/testing/selftests/bpf/bpf_legacy.h +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -2,20 +2,6 @@ #ifndef __BPF_LEGACY__ #define __BPF_LEGACY__ -/* - * legacy bpf_map_def with extra fields supported only by bpf_load(), do not - * use outside of samples/bpf - */ -struct bpf_map_def_legacy { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - #define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ struct ____btf_map_##name { \ type_key key; \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 0fb910df5387..033051717ba5 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -290,3 +290,26 @@ free_mem: free(fhp); return ret; } + +int cgroup_setup_and_join(const char *path) { + int cg_fd; + + if (setup_cgroup_environment()) { + fprintf(stderr, "Failed to setup cgroup environment\n"); + return -EINVAL; + } + + cg_fd = create_and_get_cgroup(path); + if (cg_fd < 0) { + fprintf(stderr, "Failed to create test cgroup\n"); + cleanup_cgroup_environment(); + return cg_fd; + } + + if (join_cgroup(path)) { + fprintf(stderr, "Failed to join cgroup\n"); + cleanup_cgroup_environment(); + return -EINVAL; + } + return cg_fd; +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index d64bb8957090..5fe3d88e4f0d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -9,6 +9,7 @@ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) +int cgroup_setup_and_join(const char *path); int create_and_get_cgroup(const char *path); int join_cgroup(const char *path); int setup_cgroup_environment(void); diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index e8da7b39158d..b8d6aef99db4 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -58,20 +58,10 @@ int main(int argc, char **argv) int exit_code = 1; char buf[256]; - err = setup_cgroup_environment(); - if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err, - errno)) + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) return 1; - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n", - cgroup_fd, errno)) - goto cleanup_cgroup_env; - - err = join_cgroup(TEST_CGROUP); - if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno)) - goto cleanup_cgroup_env; - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) goto cleanup_cgroup_env; diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index e36dd1a1780d..f56655690f9b 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -7,8 +7,6 @@ #include <arpa/inet.h> -#include <sys/epoll.h> - #include <linux/err.h> #include <linux/in.h> #include <linux/in6.h> @@ -17,8 +15,13 @@ #include "network_helpers.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) -#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ - __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) +#define log_err(MSG, ...) ({ \ + int __save = errno; \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), \ + ##__VA_ARGS__); \ + errno = __save; \ +}) struct ipv4_packet pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), @@ -37,131 +40,169 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -int start_server_with_port(int family, int type, __u16 port) +static int settimeo(int fd, int timeout_ms) { - struct sockaddr_storage addr = {}; - socklen_t len; - int fd; + struct timeval timeout = { .tv_sec = 3 }; - if (family == AF_INET) { - struct sockaddr_in *sin = (void *)&addr; + if (timeout_ms > 0) { + timeout.tv_sec = timeout_ms / 1000; + timeout.tv_usec = (timeout_ms % 1000) * 1000; + } - sin->sin_family = AF_INET; - sin->sin_port = htons(port); - len = sizeof(*sin); - } else { - struct sockaddr_in6 *sin6 = (void *)&addr; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, + sizeof(timeout))) { + log_err("Failed to set SO_RCVTIMEO"); + return -1; + } - sin6->sin6_family = AF_INET6; - sin6->sin6_port = htons(port); - len = sizeof(*sin6); + if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, + sizeof(timeout))) { + log_err("Failed to set SO_SNDTIMEO"); + return -1; } - fd = socket(family, type | SOCK_NONBLOCK, 0); + return 0; +} + +#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) + +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct sockaddr_storage addr = {}; + socklen_t len; + int fd; + + if (make_sockaddr(family, addr_str, port, &addr, &len)) + return -1; + + fd = socket(family, type, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; } + if (settimeo(fd, timeout_ms)) + goto error_close; + if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { log_err("Failed to bind socket"); - close(fd); - return -1; + goto error_close; } if (type == SOCK_STREAM) { if (listen(fd, 1) < 0) { log_err("Failed to listed on socket"); - close(fd); - return -1; + goto error_close; } } return fd; + +error_close: + save_errno_close(fd); + return -1; } -int start_server(int family, int type) +static int connect_fd_to_addr(int fd, + const struct sockaddr_storage *addr, + socklen_t addrlen) { - return start_server_with_port(family, type, 0); -} + if (connect(fd, (const struct sockaddr *)addr, addrlen)) { + log_err("Failed to connect to server"); + return -1; + } -static const struct timeval timeo_sec = { .tv_sec = 3 }; -static const size_t timeo_optlen = sizeof(timeo_sec); + return 0; +} -int connect_to_fd(int family, int type, int server_fd) +int connect_to_fd(int server_fd, int timeout_ms) { - int fd, save_errno; + struct sockaddr_storage addr; + struct sockaddr_in *addr_in; + socklen_t addrlen, optlen; + int fd, type; - fd = socket(family, type, 0); - if (fd < 0) { - log_err("Failed to create client socket"); + optlen = sizeof(type); + if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { + log_err("getsockopt(SOL_TYPE)"); return -1; } - if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) { - save_errno = errno; - close(fd); - errno = save_errno; + addrlen = sizeof(addr); + if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { + log_err("Failed to get server addr"); return -1; } + addr_in = (struct sockaddr_in *)&addr; + fd = socket(addr_in->sin_family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (settimeo(fd, timeout_ms)) + goto error_close; + + if (connect_fd_to_addr(fd, &addr, addrlen)) + goto error_close; + return fd; + +error_close: + save_errno_close(fd); + return -1; } -int connect_fd_to_fd(int client_fd, int server_fd) +int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) { struct sockaddr_storage addr; socklen_t len = sizeof(addr); - int save_errno; - if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, - timeo_optlen)) { - log_err("Failed to set SO_RCVTIMEO"); + if (settimeo(client_fd, timeout_ms)) return -1; - } if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { log_err("Failed to get server addr"); return -1; } - if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) { - if (errno != EINPROGRESS) { - save_errno = errno; - log_err("Failed to connect to server"); - errno = save_errno; - } + if (connect_fd_to_addr(client_fd, &addr, len)) return -1; - } return 0; } -int connect_wait(int fd) +int make_sockaddr(int family, const char *addr_str, __u16 port, + struct sockaddr_storage *addr, socklen_t *len) { - struct epoll_event ev = {}, events[2]; - int timeout_ms = 1000; - int efd, nfd; - - efd = epoll_create1(EPOLL_CLOEXEC); - if (efd < 0) { - log_err("Failed to open epoll fd"); - return -1; - } + if (family == AF_INET) { + struct sockaddr_in *sin = (void *)addr; - ev.events = EPOLLRDHUP | EPOLLOUT; - ev.data.fd = fd; + sin->sin_family = AF_INET; + sin->sin_port = htons(port); + if (addr_str && + inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { + log_err("inet_pton(AF_INET, %s)", addr_str); + return -1; + } + if (len) + *len = sizeof(*sin); + return 0; + } else if (family == AF_INET6) { + struct sockaddr_in6 *sin6 = (void *)addr; - if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) { - log_err("Failed to register fd=%d on epoll fd=%d", fd, efd); - close(efd); - return -1; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + if (addr_str && + inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { + log_err("inet_pton(AF_INET6, %s)", addr_str); + return -1; + } + if (len) + *len = sizeof(*sin6); + return 0; } - - nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms); - if (nfd < 0) - log_err("Failed to wait for I/O event on epoll fd=%d", efd); - - close(efd); - return nfd; + return -1; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 6a8009605670..c3728f6667e4 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,10 +33,11 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; -int start_server(int family, int type); -int start_server_with_port(int family, int type, __u16 port); -int connect_to_fd(int family, int type, int server_fd); -int connect_fd_to_fd(int client_fd, int server_fd); -int connect_wait(int client_fd); +int start_server(int family, int type, const char *addr, __u16 port, + int timeout_ms); +int connect_to_fd(int server_fd, int timeout_ms); +int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); +int make_sockaddr(int family, const char *addr_str, __u16 port, + struct sockaddr_storage *addr, socklen_t *len); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c new file mode 100644 index 000000000000..3693f7d133eb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/autoload.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_autoload.skel.h" + +void test_autoload(void) +{ + int duration = 0, err; + struct test_autoload* skel; + + skel = test_autoload__open_and_load(); + /* prog3 should be broken */ + if (CHECK(skel, "skel_open_and_load", "unexpected success\n")) + goto cleanup; + + skel = test_autoload__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + /* don't load prog3 */ + bpf_program__set_autoload(skel->progs.prog3, false); + + err = test_autoload__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + err = test_autoload__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(!skel->bss->prog1_called, "prog1", "not called\n"); + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + +cleanup: + test_autoload__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 87c29dde1cf9..4ffefdc1130f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -5,11 +5,23 @@ #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" #include "bpf_iter_task.skel.h" +#include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_tcp4.skel.h" +#include "bpf_iter_tcp6.skel.h" +#include "bpf_iter_udp4.skel.h" +#include "bpf_iter_udp6.skel.h" #include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" #include "bpf_iter_test_kern4.skel.h" +#include "bpf_iter_bpf_hash_map.skel.h" +#include "bpf_iter_bpf_percpu_hash_map.skel.h" +#include "bpf_iter_bpf_array_map.skel.h" +#include "bpf_iter_bpf_percpu_array_map.skel.h" +#include "bpf_iter_bpf_sk_storage_map.skel.h" +#include "bpf_iter_test_kern5.skel.h" +#include "bpf_iter_test_kern6.skel.h" static int duration; @@ -106,6 +118,20 @@ static void test_task(void) bpf_iter_task__destroy(skel); } +static void test_task_stack(void) +{ + struct bpf_iter_task_stack *skel; + + skel = bpf_iter_task_stack__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_task_stack); + + bpf_iter_task_stack__destroy(skel); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; @@ -120,6 +146,62 @@ static void test_task_file(void) bpf_iter_task_file__destroy(skel); } +static void test_tcp4(void) +{ + struct bpf_iter_tcp4 *skel; + + skel = bpf_iter_tcp4__open_and_load(); + if (CHECK(!skel, "bpf_iter_tcp4__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_tcp4); + + bpf_iter_tcp4__destroy(skel); +} + +static void test_tcp6(void) +{ + struct bpf_iter_tcp6 *skel; + + skel = bpf_iter_tcp6__open_and_load(); + if (CHECK(!skel, "bpf_iter_tcp6__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_tcp6); + + bpf_iter_tcp6__destroy(skel); +} + +static void test_udp4(void) +{ + struct bpf_iter_udp4 *skel; + + skel = bpf_iter_udp4__open_and_load(); + if (CHECK(!skel, "bpf_iter_udp4__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_udp4); + + bpf_iter_udp4__destroy(skel); +} + +static void test_udp6(void) +{ + struct bpf_iter_udp6 *skel; + + skel = bpf_iter_udp6__open_and_load(); + if (CHECK(!skel, "bpf_iter_udp6__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_udp6); + + bpf_iter_udp6__destroy(skel); +} + /* The expected string is less than 16 bytes */ static int do_read_with_fd(int iter_fd, const char *expected, bool read_one_char) @@ -380,6 +462,440 @@ out: bpf_iter_test_kern4__destroy(skel); } +static void test_bpf_hash_map(void) +{ + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_hash_map *skel; + int err, i, len, map_fd, iter_fd; + __u64 val, expected_val = 0; + struct bpf_link *link; + struct key_t { + int a; + int b; + int c; + } key; + char buf[64]; + + skel = bpf_iter_bpf_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_hash_map__open", + "skeleton open failed\n")) + return; + + skel->bss->in_test_mode = true; + + err = bpf_iter_bpf_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_hash_map__load", + "skeleton load failed\n")) + goto out; + + /* iterator with hashmap2 and hashmap3 should fail */ + opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", + "attach_iter for hashmap2 unexpected succeeded\n")) + goto out; + + opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", + "attach_iter for hashmap3 unexpected succeeded\n")) + goto out; + + /* hashmap1 should be good, update map values here */ + map_fd = bpf_map__fd(skel->maps.hashmap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) { + key.a = i + 1; + key.b = i + 2; + key.c = i + 3; + val = i + 4; + expected_key_a += key.a; + expected_key_b += key.b; + expected_key_c += key.c; + expected_val += val; + + err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum_a != expected_key_a, + "key_sum_a", "got %u expected %u\n", + skel->bss->key_sum_a, expected_key_a)) + goto close_iter; + if (CHECK(skel->bss->key_sum_b != expected_key_b, + "key_sum_b", "got %u expected %u\n", + skel->bss->key_sum_b, expected_key_b)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %llu expected %llu\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_hash_map__destroy(skel); +} + +static void test_bpf_percpu_hash_map(void) +{ + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_percpu_hash_map *skel; + int err, i, j, len, map_fd, iter_fd; + __u32 expected_val = 0; + struct bpf_link *link; + struct key_t { + int a; + int b; + int c; + } key; + char buf[64]; + void *val; + + val = malloc(8 * bpf_num_possible_cpus()); + + skel = bpf_iter_bpf_percpu_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); + + err = bpf_iter_bpf_percpu_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", + "skeleton load failed\n")) + goto out; + + /* update map values here */ + map_fd = bpf_map__fd(skel->maps.hashmap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) { + key.a = i + 1; + key.b = i + 2; + key.c = i + 3; + expected_key_a += key.a; + expected_key_b += key.b; + expected_key_c += key.c; + + for (j = 0; j < bpf_num_possible_cpus(); j++) { + *(__u32 *)(val + j * 8) = i + j; + expected_val += i + j; + } + + err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum_a != expected_key_a, + "key_sum_a", "got %u expected %u\n", + skel->bss->key_sum_a, expected_key_a)) + goto close_iter; + if (CHECK(skel->bss->key_sum_b != expected_key_b, + "key_sum_b", "got %u expected %u\n", + skel->bss->key_sum_b, expected_key_b)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_percpu_hash_map__destroy(skel); +} + +static void test_bpf_array_map(void) +{ + __u64 val, expected_val = 0, res_first_val, first_val = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + __u32 expected_key = 0, res_first_key; + struct bpf_iter_bpf_array_map *skel; + int err, i, map_fd, iter_fd; + struct bpf_link *link; + char buf[64] = {}; + int len, start; + + skel = bpf_iter_bpf_array_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.arraymap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + val = i + 4; + expected_key += i; + expected_val += val; + + if (i == 0) + first_val = val; + + err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + start = 0; + while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) + start += len; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + res_first_key = *(__u32 *)buf; + res_first_val = *(__u64 *)(buf + sizeof(__u32)); + if (CHECK(res_first_key != 0 || res_first_val != first_val, + "bpf_seq_write", + "seq_write failure: first key %u vs expected 0, " + " first value %llu vs expected %llu\n", + res_first_key, res_first_val, first_val)) + goto close_iter; + + if (CHECK(skel->bss->key_sum != expected_key, + "key_sum", "got %u expected %u\n", + skel->bss->key_sum, expected_key)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %llu expected %llu\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + err = bpf_map_lookup_elem(map_fd, &i, &val); + if (CHECK(err, "map_lookup", "map_lookup failed\n")) + goto out; + if (CHECK(i != val, "invalid_val", + "got value %llu expected %u\n", val, i)) + goto out; + } + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_array_map__destroy(skel); +} + +static void test_bpf_percpu_array_map(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_percpu_array_map *skel; + __u32 expected_key = 0, expected_val = 0; + int err, i, j, map_fd, iter_fd; + struct bpf_link *link; + char buf[64]; + void *val; + int len; + + val = malloc(8 * bpf_num_possible_cpus()); + + skel = bpf_iter_bpf_percpu_array_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); + + err = bpf_iter_bpf_percpu_array_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", + "skeleton load failed\n")) + goto out; + + /* update map values here */ + map_fd = bpf_map__fd(skel->maps.arraymap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + expected_key += i; + + for (j = 0; j < bpf_num_possible_cpus(); j++) { + *(__u32 *)(val + j * 8) = i + j; + expected_val += i + j; + } + + err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum != expected_key, + "key_sum", "got %u expected %u\n", + skel->bss->key_sum, expected_key)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_percpu_array_map__destroy(skel); +} + +static void test_bpf_sk_storage_map(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int err, i, len, map_fd, iter_fd, num_sockets; + struct bpf_iter_bpf_sk_storage_map *skel; + int sock_fd[3] = {-1, -1, -1}; + __u32 val, expected_val = 0; + struct bpf_link *link; + char buf[64]; + + skel = bpf_iter_bpf_sk_storage_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.sk_stg_map); + num_sockets = ARRAY_SIZE(sock_fd); + for (i = 0; i < num_sockets; i++) { + sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno)) + goto out; + + val = i + 1; + expected_val += val; + + err = bpf_map_update_elem(map_fd, &sock_fd[i], &val, + BPF_NOEXIST); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->ipv6_sk_count != num_sockets, + "ipv6_sk_count", "got %u expected %u\n", + skel->bss->ipv6_sk_count, num_sockets)) + goto close_iter; + + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + for (i = 0; i < num_sockets; i++) { + if (sock_fd[i] >= 0) + close(sock_fd[i]); + } + bpf_iter_bpf_sk_storage_map__destroy(skel); +} + +static void test_rdonly_buf_out_of_bound(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_test_kern5 *skel; + struct bpf_link *link; + + skel = bpf_iter_test_kern5__open_and_load(); + if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load", + "skeleton open_and_load failed\n")) + return; + + opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) + bpf_link__destroy(link); + + bpf_iter_test_kern5__destroy(skel); +} + +static void test_buf_neg_offset(void) +{ + struct bpf_iter_test_kern6 *skel; + + skel = bpf_iter_test_kern6__open_and_load(); + if (CHECK(skel, "bpf_iter_test_kern6__open_and_load", + "skeleton open_and_load unexpected success\n")) + bpf_iter_test_kern6__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -392,8 +908,18 @@ void test_bpf_iter(void) test_bpf_map(); if (test__start_subtest("task")) test_task(); + if (test__start_subtest("task_stack")) + test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("tcp4")) + test_tcp4(); + if (test__start_subtest("tcp6")) + test_tcp6(); + if (test__start_subtest("udp4")) + test_udp4(); + if (test__start_subtest("udp6")) + test_udp6(); if (test__start_subtest("anon")) test_anon_iter(false); if (test__start_subtest("anon-read-one-char")) @@ -406,4 +932,18 @@ void test_bpf_iter(void) test_overflow(true, false); if (test__start_subtest("prog-ret-1")) test_overflow(false, true); + if (test__start_subtest("bpf_hash_map")) + test_bpf_hash_map(); + if (test__start_subtest("bpf_percpu_hash_map")) + test_bpf_percpu_hash_map(); + if (test__start_subtest("bpf_array_map")) + test_bpf_array_map(); + if (test__start_subtest("bpf_percpu_array_map")) + test_bpf_percpu_array_map(); + if (test__start_subtest("bpf_sk_storage_map")) + test_bpf_sk_storage_map(); + if (test__start_subtest("rdonly-buf-out-of-bound")) + test_rdonly_buf_out_of_bound(); + if (test__start_subtest("buf-neg-offset")) + test_buf_neg_offset(); } diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c new file mode 100644 index 000000000000..643dfa35419c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include <cgroup_helpers.h> +#include <network_helpers.h> + +#include "progs/cg_storage_multi.h" + +#include "cg_storage_multi_egress_only.skel.h" +#include "cg_storage_multi_isolated.skel.h" +#include "cg_storage_multi_shared.skel.h" + +#define PARENT_CGROUP "/cgroup_storage" +#define CHILD_CGROUP "/cgroup_storage/child" + +static int duration; + +static bool assert_storage(struct bpf_map *map, const void *key, + struct cgroup_value *expected) +{ + struct cgroup_value value; + int map_fd; + + map_fd = bpf_map__fd(map); + + if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0, + "map-lookup", "errno %d", errno)) + return true; + if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)), + "assert-storage", "storages differ")) + return true; + + return false; +} + +static bool assert_storage_noexist(struct bpf_map *map, const void *key) +{ + struct cgroup_value value; + int map_fd; + + map_fd = bpf_map__fd(map); + + if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0, + "map-lookup", "succeeded, expected ENOENT")) + return true; + if (CHECK(errno != ENOENT, + "map-lookup", "errno %d, expected ENOENT", errno)) + return true; + + return false; +} + +static bool connect_send(const char *cgroup_path) +{ + bool res = true; + int server_fd = -1, client_fd = -1; + + if (join_cgroup(cgroup_path)) + goto out_clean; + + server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0); + if (server_fd < 0) + goto out_clean; + + client_fd = connect_to_fd(server_fd, 0); + if (client_fd < 0) + goto out_clean; + + if (send(client_fd, "message", strlen("message"), 0) < 0) + goto out_clean; + + res = false; + +out_clean: + close(client_fd); + close(server_fd); + return res; +} + +static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) +{ + struct cg_storage_multi_egress_only *obj; + struct cgroup_value expected_cgroup_value; + struct bpf_cgroup_storage_key key; + struct bpf_link *parent_link = NULL, *child_link = NULL; + bool err; + + key.attach_type = BPF_CGROUP_INET_EGRESS; + + obj = cg_storage_multi_egress_only__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + /* Attach to parent cgroup, trigger packet from child. + * Assert that there is only one run and in that run the storage is + * parent cgroup's storage. + * Also assert that child cgroup's storage does not exist + */ + parent_link = bpf_program__attach_cgroup(obj->progs.egress, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_link), "parent-cg-attach", + "err %ld", PTR_ERR(parent_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "first-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 1, + "first-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + + /* Attach to parent and child cgroup, trigger packet from child. + * Assert that there are two additional runs, one that run with parent + * cgroup's storage and one with child cgroup's storage. + */ + child_link = bpf_program__attach_cgroup(obj->progs.egress, + child_cgroup_fd); + if (CHECK(IS_ERR(child_link), "child-cg-attach", + "err %ld", PTR_ERR(child_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "second-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 3, + "second-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + +close_bpf_object: + if (!IS_ERR(parent_link)) + bpf_link__destroy(parent_link); + if (!IS_ERR(child_link)) + bpf_link__destroy(child_link); + + cg_storage_multi_egress_only__destroy(obj); +} + +static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) +{ + struct cg_storage_multi_isolated *obj; + struct cgroup_value expected_cgroup_value; + struct bpf_cgroup_storage_key key; + struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL; + struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL; + struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL; + bool err; + + obj = cg_storage_multi_isolated__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + /* Attach to parent cgroup, trigger packet from child. + * Assert that there is three runs, two with parent cgroup egress and + * one with parent cgroup ingress, stored in separate parent storages. + * Also assert that child cgroup's storages does not exist + */ + parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", + "err %ld", PTR_ERR(parent_egress1_link))) + goto close_bpf_object; + parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", + "err %ld", PTR_ERR(parent_egress2_link))) + goto close_bpf_object; + parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", + "err %ld", PTR_ERR(parent_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "first-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 3, + "first-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + + /* Attach to parent and child cgroup, trigger packet from child. + * Assert that there is six additional runs, parent cgroup egresses and + * ingress, child cgroup egresses and ingress. + * Assert that egree and ingress storages are separate. + */ + child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", + "err %ld", PTR_ERR(child_egress1_link))) + goto close_bpf_object; + child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", + "err %ld", PTR_ERR(child_egress2_link))) + goto close_bpf_object; + child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + child_cgroup_fd); + if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", + "err %ld", PTR_ERR(child_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "second-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 9, + "second-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + +close_bpf_object: + if (!IS_ERR(parent_egress1_link)) + bpf_link__destroy(parent_egress1_link); + if (!IS_ERR(parent_egress2_link)) + bpf_link__destroy(parent_egress2_link); + if (!IS_ERR(parent_ingress_link)) + bpf_link__destroy(parent_ingress_link); + if (!IS_ERR(child_egress1_link)) + bpf_link__destroy(child_egress1_link); + if (!IS_ERR(child_egress2_link)) + bpf_link__destroy(child_egress2_link); + if (!IS_ERR(child_ingress_link)) + bpf_link__destroy(child_ingress_link); + + cg_storage_multi_isolated__destroy(obj); +} + +static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) +{ + struct cg_storage_multi_shared *obj; + struct cgroup_value expected_cgroup_value; + __u64 key; + struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL; + struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL; + struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL; + bool err; + + obj = cg_storage_multi_shared__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + /* Attach to parent cgroup, trigger packet from child. + * Assert that there is three runs, two with parent cgroup egress and + * one with parent cgroup ingress. + * Also assert that child cgroup's storage does not exist + */ + parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", + "err %ld", PTR_ERR(parent_egress1_link))) + goto close_bpf_object; + parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", + "err %ld", PTR_ERR(parent_egress2_link))) + goto close_bpf_object; + parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", + "err %ld", PTR_ERR(parent_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "first-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 3, + "first-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { + .egress_pkts = 2, + .ingress_pkts = 1, + }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key = get_cgroup_id(CHILD_CGROUP); + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + + /* Attach to parent and child cgroup, trigger packet from child. + * Assert that there is six additional runs, parent cgroup egresses and + * ingress, child cgroup egresses and ingress. + */ + child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", + "err %ld", PTR_ERR(child_egress1_link))) + goto close_bpf_object; + child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", + "err %ld", PTR_ERR(child_egress2_link))) + goto close_bpf_object; + child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + child_cgroup_fd); + if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", + "err %ld", PTR_ERR(child_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "second-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 9, + "second-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { + .egress_pkts = 4, + .ingress_pkts = 2, + }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key = get_cgroup_id(CHILD_CGROUP); + expected_cgroup_value = (struct cgroup_value) { + .egress_pkts = 2, + .ingress_pkts = 1, + }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + +close_bpf_object: + if (!IS_ERR(parent_egress1_link)) + bpf_link__destroy(parent_egress1_link); + if (!IS_ERR(parent_egress2_link)) + bpf_link__destroy(parent_egress2_link); + if (!IS_ERR(parent_ingress_link)) + bpf_link__destroy(parent_ingress_link); + if (!IS_ERR(child_egress1_link)) + bpf_link__destroy(child_egress1_link); + if (!IS_ERR(child_egress2_link)) + bpf_link__destroy(child_egress2_link); + if (!IS_ERR(child_ingress_link)) + bpf_link__destroy(child_ingress_link); + + cg_storage_multi_shared__destroy(obj); +} + +void test_cg_storage_multi(void) +{ + int parent_cgroup_fd = -1, child_cgroup_fd = -1; + + parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP); + if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno)) + goto close_cgroup_fd; + child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP); + if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno)) + goto close_cgroup_fd; + + if (test__start_subtest("egress_only")) + test_egress_only(parent_cgroup_fd, child_cgroup_fd); + + if (test__start_subtest("isolated")) + test_isolated(parent_cgroup_fd, child_cgroup_fd); + + if (test__start_subtest("shared")) + test_shared(parent_cgroup_fd, child_cgroup_fd); + +close_cgroup_fd: + close(child_cgroup_fd); + close(parent_cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 6e04f8d1d15b..4d9b514b3fd9 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -2,6 +2,7 @@ #include <test_progs.h> #include "cgroup_helpers.h" +#include "testing_helpers.h" #include "test_cgroup_link.skel.h" static __u32 duration = 0; @@ -37,7 +38,8 @@ void test_cgroup_link(void) int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs); DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts); struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link; - __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags; + __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id; + struct bpf_link_info info; int i = 0, err, prog_fd; bool detach_legacy = false; @@ -219,6 +221,22 @@ void test_cgroup_link(void) /* BPF programs should still get called */ ping_and_check(0, cg_nr); + prog_id = link_info_prog_id(links[0], &info); + CHECK(prog_id == 0, "link_info", "failed\n"); + CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id); + + err = bpf_link__detach(links[0]); + if (CHECK(err, "link_detach", "failed %d\n", err)) + goto cleanup; + + /* cgroup_id should be zero in link_info */ + prog_id = link_info_prog_id(links[0], &info); + CHECK(prog_id == 0, "link_info", "failed\n"); + CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id); + + /* First BPF program shouldn't be called anymore */ + ping_and_check(0, cg_nr - 1); + /* leave cgroup and remove them, don't detach programs */ cleanup_cgroup_environment(); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c index 059047af7df3..464edc1c1708 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c @@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk) socklen_t addr_len = sizeof(addr); __u32 duration = 0; - serv_sk = start_server(AF_INET6, SOCK_STREAM); + serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) return; @@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk) *g_serv_port = addr.sin6_port; /* Client outside of test cgroup should fail to connect by timeout. */ - err = connect_fd_to_fd(out_sk, serv_sk); + err = connect_fd_to_fd(out_sk, serv_sk, 1000); if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd", "unexpected result err %d errno %d\n", err, errno)) goto cleanup; - err = connect_wait(out_sk); - if (CHECK(err, "connect_wait", "unexpected result %d\n", err)) - goto cleanup; - /* Client inside test cgroup should connect just fine. */ - in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk); + in_sk = connect_to_fd(serv_sk, 0); if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno)) goto cleanup; @@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void) * differs from that of testing cgroup. Moving selftests process to * testing cgroup won't change cgroup id of an already created socket. */ - out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + out_sk = socket(AF_INET6, SOCK_STREAM, 0); if (CHECK_FAIL(out_sk < 0)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c index 17bbf76812ca..9229db2f5ca5 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - fd = connect_to_fd(family, type, server_fd); + fd = connect_to_fd(server_fd, 0); if (fd < 0) { err = -1; goto close_bpf_object; @@ -137,25 +137,25 @@ void test_connect_force_port(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124); + server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123); + server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124); + server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM)); diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c new file mode 100644 index 000000000000..6acb0e94d4d7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#define _GNU_SOURCE +#include <test_progs.h> +#include "test_core_retro.skel.h" + +void test_core_retro(void) +{ + int err, zero = 0, res, duration = 0, my_pid = getpid(); + struct test_core_retro *skel; + + /* load program */ + skel = test_core_retro__open_and_load(); + if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) + goto out_close; + + err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0); + if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno)) + goto out_close; + + /* attach probe */ + err = test_core_retro__attach(skel); + if (CHECK(err, "attach_kprobe", "err %d\n", err)) + goto out_close; + + /* trigger */ + usleep(1); + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res); + if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno)) + goto out_close; + + CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid); + +out_close: + test_core_retro__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c new file mode 100644 index 000000000000..1a11612ace6c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/endian.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include "test_endian.skel.h" + +static int duration; + +#define IN16 0x1234 +#define IN32 0x12345678U +#define IN64 0x123456789abcdef0ULL + +#define OUT16 0x3412 +#define OUT32 0x78563412U +#define OUT64 0xf0debc9a78563412ULL + +void test_endian(void) +{ + struct test_endian* skel; + struct test_endian__bss *bss; + int err; + + skel = test_endian__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + + bss->in16 = IN16; + bss->in32 = IN32; + bss->in64 = IN64; + + err = test_endian__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out16, (__u64)OUT16); + CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out32, (__u64)OUT32); + CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out64, (__u64)OUT64); + + CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const16, (__u64)OUT16); + CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const32, (__u64)OUT32); + CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const64, (__u64)OUT64); +cleanup: + test_endian__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c new file mode 100644 index 000000000000..d884b2ed5bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include <test_progs.h> +#include "test_stacktrace_build_id.skel.h" + +void test_get_stackid_cannot_attach(void) +{ + struct perf_event_attr attr = { + /* .type = PERF_TYPE_SOFTWARE, */ + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .precise_ip = 1, + .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK, + .branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES | + PERF_SAMPLE_BRANCH_CALL_STACK, + .sample_period = 5000, + .size = sizeof(struct perf_event_attr), + }; + struct test_stacktrace_build_id *skel; + __u32 duration = 0; + int pmu_fd, err; + + skel = test_stacktrace_build_id__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + return; + + /* override program type */ + bpf_program__set_perf_event(skel->progs.oncpu); + + err = test_stacktrace_build_id__load(skel); + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) + goto cleanup; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n", + __func__); + test__skip(); + goto cleanup; + } + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto cleanup; + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain", + "should have failed\n"); + close(pmu_fd); + + /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */ + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto cleanup; + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain", + "err: %ld\n", PTR_ERR(skel->links.oncpu)); + close(pmu_fd); + + /* add exclude_callchain_kernel, attach should fail */ + attr.exclude_callchain_kernel = 1; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto cleanup; + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel", + "should have failed\n"); + close(pmu_fd); + +cleanup: + test_stacktrace_build_id__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c new file mode 100644 index 000000000000..e3d6777226a8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> +#include "test_ksyms.skel.h" +#include <sys/stat.h> + +static int duration; + +static __u64 kallsyms_find(const char *sym) +{ + char type, name[500]; + __u64 addr, res = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) + return 0; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + res = addr; + goto out; + } + } + + CHECK(false, "not_found", "symbol %s not found\n", sym); +out: + fclose(f); + return res; +} + +void test_ksyms(void) +{ + __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); + const char *btf_path = "/sys/kernel/btf/vmlinux"; + struct test_ksyms *skel; + struct test_ksyms__data *data; + struct stat st; + __u64 btf_size; + int err; + + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) + return; + btf_size = st.st_size; + + skel = test_ksyms__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + return; + + err = test_ksyms__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops", + "got 0x%llx, exp 0x%llx\n", + data->out__bpf_link_fops, link_fops_addr); + CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1", + "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); + CHECK(data->out__btf_size != btf_size, "btf_size", + "got %llu, exp %llu\n", data->out__btf_size, btf_size); + CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + +cleanup: + test_ksyms__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c index c1168e4a9036..5a2a689dbb68 100644 --- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c +++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c @@ -23,7 +23,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; @@ -49,7 +49,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(err)) goto close_bpf_object; - client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + client_fd = connect_to_fd(server_fd, 0); if (CHECK_FAIL(client_fd < 0)) goto close_bpf_object; close(client_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c new file mode 100644 index 000000000000..c230a573c373 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <test_progs.h> +#include <network_helpers.h> + +#include "map_ptr_kern.skel.h" + +void test_map_ptr(void) +{ + struct map_ptr_kern *skel; + __u32 duration = 0, retval; + char buf[128]; + int err; + + skel = map_ptr_kern__open_and_load(); + if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + return; + + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, + sizeof(pkt_v4), buf, NULL, &retval, NULL); + + if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) + goto cleanup; + + if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval, + skel->bss->g_map_type, skel->bss->g_line)) + goto cleanup; + +cleanup: + map_ptr_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index a122ce3b360e..c33ec180b3f2 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -4,6 +4,7 @@ #include <sched.h> #include <sys/socket.h> #include <test_progs.h> +#include "test_perf_buffer.skel.h" #include "bpf/libbpf_internal.h" /* AddressSanitizer sometimes crashes due to data dereference below, due to @@ -25,16 +26,11 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) void test_perf_buffer(void) { - int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; - const char *prog_name = "kprobe/sys_nanosleep"; - const char *file = "./test_perf_buffer.o"; + int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; struct perf_buffer_opts pb_opts = {}; - struct bpf_map *perf_buf_map; + struct test_perf_buffer *skel; cpu_set_t cpu_set, cpu_seen; - struct bpf_program *prog; - struct bpf_object *obj; struct perf_buffer *pb; - struct bpf_link *link; bool *online; nr_cpus = libbpf_num_possible_cpus(); @@ -51,33 +47,21 @@ void test_perf_buffer(void) nr_on_cpus++; /* load program */ - err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { - obj = NULL; - goto out_close; - } - - prog = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) + skel = test_perf_buffer__open_and_load(); + if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) goto out_close; - /* load map */ - perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map"); - if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) - goto out_close; - - /* attach kprobe */ - link = bpf_program__attach_kprobe(prog, false /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); - if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link))) + /* attach probe */ + err = test_perf_buffer__attach(skel); + if (CHECK(err, "attach_kprobe", "err %d\n", err)) goto out_close; /* set up perf buffer */ pb_opts.sample_cb = on_sample; pb_opts.ctx = &cpu_seen; - pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts); if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) - goto out_detach; + goto out_close; /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); @@ -94,7 +78,7 @@ void test_perf_buffer(void) &cpu_set); if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", i, err)) - goto out_detach; + goto out_close; usleep(1); } @@ -110,9 +94,7 @@ void test_perf_buffer(void) out_free_pb: perf_buffer__free(pb); -out_detach: - bpf_link__destroy(link); out_close: - bpf_object__close(obj); + test_perf_buffer__destroy(skel); free(online); } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c new file mode 100644 index 000000000000..72c3690844fb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#define _GNU_SOURCE +#include <pthread.h> +#include <sched.h> +#include <test_progs.h> +#include "perf_event_stackmap.skel.h" + +#ifndef noinline +#define noinline __attribute__((noinline)) +#endif + +noinline int func_1(void) +{ + static int val = 1; + + val += 1; + + usleep(100); + return val; +} + +noinline int func_2(void) +{ + return func_1(); +} + +noinline int func_3(void) +{ + return func_2(); +} + +noinline int func_4(void) +{ + return func_3(); +} + +noinline int func_5(void) +{ + return func_4(); +} + +noinline int func_6(void) +{ + int i, val = 1; + + for (i = 0; i < 100; i++) + val += func_5(); + + return val; +} + +void test_perf_event_stackmap(void) +{ + struct perf_event_attr attr = { + /* .type = PERF_TYPE_SOFTWARE, */ + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .precise_ip = 2, + .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK | + PERF_SAMPLE_CALLCHAIN, + .branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES | + PERF_SAMPLE_BRANCH_CALL_STACK, + .sample_period = 5000, + .size = sizeof(struct perf_event_attr), + }; + struct perf_event_stackmap *skel; + __u32 duration = 0; + cpu_set_t cpu_set; + int pmu_fd, err; + + skel = perf_event_stackmap__open(); + + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + return; + + err = perf_event_stackmap__load(skel); + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) + goto cleanup; + + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno)) + goto cleanup; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (pmu_fd < 0) { + printf("%s:SKIP:cpu doesn't support the event\n", __func__); + test__skip(); + goto cleanup; + } + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", + "err %ld\n", PTR_ERR(skel->links.oncpu))) { + close(pmu_fd); + goto cleanup; + } + + /* create kernel and user stack traces for testing */ + func_6(); + + CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n"); + CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n"); + CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n"); + CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n"); + +cleanup: + perf_event_stackmap__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c new file mode 100644 index 000000000000..3b127cab4864 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/err.h> +#include <string.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> +#include <linux/btf.h> +#include <linux/kernel.h> +#define CONFIG_DEBUG_INFO_BTF +#include <linux/btf_ids.h> +#include "test_progs.h" + +static int duration; + +struct symbol { + const char *name; + int type; + int id; +}; + +struct symbol test_symbols[] = { + { "unused", BTF_KIND_UNKN, 0 }, + { "S", BTF_KIND_TYPEDEF, -1 }, + { "T", BTF_KIND_TYPEDEF, -1 }, + { "U", BTF_KIND_TYPEDEF, -1 }, + { "S", BTF_KIND_STRUCT, -1 }, + { "U", BTF_KIND_UNION, -1 }, + { "func", BTF_KIND_FUNC, -1 }, +}; + +BTF_ID_LIST(test_list_local) +BTF_ID_UNUSED +BTF_ID(typedef, S) +BTF_ID(typedef, T) +BTF_ID(typedef, U) +BTF_ID(struct, S) +BTF_ID(union, U) +BTF_ID(func, func) + +extern __u32 test_list_global[]; +BTF_ID_LIST_GLOBAL(test_list_global) +BTF_ID_UNUSED +BTF_ID(typedef, S) +BTF_ID(typedef, T) +BTF_ID(typedef, U) +BTF_ID(struct, S) +BTF_ID(union, U) +BTF_ID(func, func) + +static int +__resolve_symbol(struct btf *btf, int type_id) +{ + const struct btf_type *type; + const char *str; + unsigned int i; + + type = btf__type_by_id(btf, type_id); + if (!type) { + PRINT_FAIL("Failed to get type for ID %d\n", type_id); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(test_symbols); i++) { + if (test_symbols[i].id != -1) + continue; + + if (BTF_INFO_KIND(type->info) != test_symbols[i].type) + continue; + + str = btf__name_by_offset(btf, type->name_off); + if (!str) { + PRINT_FAIL("Failed to get name for BTF ID %d\n", type_id); + return -1; + } + + if (!strcmp(str, test_symbols[i].name)) + test_symbols[i].id = type_id; + } + + return 0; +} + +static int resolve_symbols(void) +{ + struct btf *btf; + int type_id; + __u32 nr; + + btf = btf__parse_elf("btf_data.o", NULL); + if (CHECK(libbpf_get_error(btf), "resolve", + "Failed to load BTF from btf_data.o\n")) + return -1; + + nr = btf__get_nr_types(btf); + + for (type_id = 1; type_id <= nr; type_id++) { + if (__resolve_symbol(btf, type_id)) + break; + } + + btf__free(btf); + return 0; +} + +int test_resolve_btfids(void) +{ + __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; + unsigned int i, j; + int ret = 0; + + if (resolve_symbols()) + return -1; + + /* Check BTF_ID_LIST(test_list_local) and + * BTF_ID_LIST_GLOBAL(test_list_global) IDs + */ + for (j = 0; j < ARRAY_SIZE(test_lists); j++) { + test_list = test_lists[j]; + for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { + ret = CHECK(test_list[i] != test_symbols[i].id, + "id_check", + "wrong ID for %s (%d != %d)\n", + test_symbols[i].name, + test_list[i], test_symbols[i].id); + } + } + + return ret; +} diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 713167449c98..8b571890c57e 100644 --- a/tools/testing/selftests/bpf/prog_tests/section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -35,7 +35,7 @@ static struct sec_name_test tests[] = { {-EINVAL, 0}, }, {"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} }, - {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} }, + {"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} }, {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} }, {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} }, {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} }, diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c new file mode 100644 index 000000000000..c571584c00f5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -0,0 +1,1329 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2020 Cloudflare +/* + * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP). + * + * Tests exercise: + * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook, + * - redirecting socket lookup to a socket selected by BPF program, + * - failing a socket lookup on BPF program's request, + * - error scenarios for selecting a socket from BPF program, + * - accessing BPF program context, + * - attaching and running multiple BPF programs. + * + * Tests run in a dedicated network namespace. + */ + +#define _GNU_SOURCE +#include <arpa/inet.h> +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <bpf/libbpf.h> +#include <bpf/bpf.h> + +#include "test_progs.h" +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "testing_helpers.h" +#include "test_sk_lookup.skel.h" + +/* External (address, port) pairs the client sends packets to. */ +#define EXT_IP4 "127.0.0.1" +#define EXT_IP6 "fd00::1" +#define EXT_PORT 7007 + +/* Internal (address, port) pairs the server listens/receives at. */ +#define INT_IP4 "127.0.0.2" +#define INT_IP4_V6 "::ffff:127.0.0.2" +#define INT_IP6 "fd00::2" +#define INT_PORT 8008 + +#define IO_TIMEOUT_SEC 3 + +enum server { + SERVER_A = 0, + SERVER_B = 1, + MAX_SERVERS, +}; + +enum { + PROG1 = 0, + PROG2, +}; + +struct inet_addr { + const char *ip; + unsigned short port; +}; + +struct test { + const char *desc; + struct bpf_program *lookup_prog; + struct bpf_program *reuseport_prog; + struct bpf_map *sock_map; + int sotype; + struct inet_addr connect_to; + struct inet_addr listen_at; + enum server accept_on; + bool reuseport_has_conns; /* Add a connected socket to reuseport group */ +}; + +static __u32 duration; /* for CHECK macro */ + +static bool is_ipv6(const char *ip) +{ + return !!strchr(ip, ':'); +} + +static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog) +{ + int err, prog_fd; + + prog_fd = bpf_program__fd(reuseport_prog); + if (prog_fd < 0) { + errno = -prog_fd; + return -1; + } + + err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, + &prog_fd, sizeof(prog_fd)); + if (err) + return -1; + + return 0; +} + +static socklen_t inetaddr_len(const struct sockaddr_storage *addr) +{ + return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) : + addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0); +} + +static int make_socket(int sotype, const char *ip, int port, + struct sockaddr_storage *addr) +{ + struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC }; + int err, family, fd; + + family = is_ipv6(ip) ? AF_INET6 : AF_INET; + err = make_sockaddr(family, ip, port, addr, NULL); + if (CHECK(err, "make_address", "failed\n")) + return -1; + + fd = socket(addr->ss_family, sotype, 0); + if (CHECK(fd < 0, "socket", "failed\n")) { + log_err("failed to make socket"); + return -1; + } + + err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) { + log_err("failed to set SNDTIMEO"); + close(fd); + return -1; + } + + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) { + log_err("failed to set RCVTIMEO"); + close(fd); + return -1; + } + + return fd; +} + +static int make_server(int sotype, const char *ip, int port, + struct bpf_program *reuseport_prog) +{ + struct sockaddr_storage addr = {0}; + const int one = 1; + int err, fd = -1; + + fd = make_socket(sotype, ip, port, &addr); + if (fd < 0) + return -1; + + /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */ + if (sotype == SOCK_DGRAM) { + err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one, + sizeof(one)); + if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) { + log_err("failed to enable IP_RECVORIGDSTADDR"); + goto fail; + } + } + + if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) { + err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one, + sizeof(one)); + if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) { + log_err("failed to enable IPV6_RECVORIGDSTADDR"); + goto fail; + } + } + + if (sotype == SOCK_STREAM) { + err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one)); + if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) { + log_err("failed to enable SO_REUSEADDR"); + goto fail; + } + } + + if (reuseport_prog) { + err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, + sizeof(one)); + if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) { + log_err("failed to enable SO_REUSEPORT"); + goto fail; + } + } + + err = bind(fd, (void *)&addr, inetaddr_len(&addr)); + if (CHECK(err, "bind", "failed\n")) { + log_err("failed to bind listen socket"); + goto fail; + } + + if (sotype == SOCK_STREAM) { + err = listen(fd, SOMAXCONN); + if (CHECK(err, "make_server", "listen")) { + log_err("failed to listen on port %d", port); + goto fail; + } + } + + /* Late attach reuseport prog so we can have one init path */ + if (reuseport_prog) { + err = attach_reuseport(fd, reuseport_prog); + if (CHECK(err, "attach_reuseport", "failed\n")) { + log_err("failed to attach reuseport prog"); + goto fail; + } + } + + return fd; +fail: + close(fd); + return -1; +} + +static int make_client(int sotype, const char *ip, int port) +{ + struct sockaddr_storage addr = {0}; + int err, fd; + + fd = make_socket(sotype, ip, port, &addr); + if (fd < 0) + return -1; + + err = connect(fd, (void *)&addr, inetaddr_len(&addr)); + if (CHECK(err, "make_client", "connect")) { + log_err("failed to connect client socket"); + goto fail; + } + + return fd; +fail: + close(fd); + return -1; +} + +static int send_byte(int fd) +{ + ssize_t n; + + errno = 0; + n = send(fd, "a", 1, 0); + if (CHECK(n <= 0, "send_byte", "send")) { + log_err("failed/partial send"); + return -1; + } + return 0; +} + +static int recv_byte(int fd) +{ + char buf[1]; + ssize_t n; + + n = recv(fd, buf, sizeof(buf), 0); + if (CHECK(n <= 0, "recv_byte", "recv")) { + log_err("failed/partial recv"); + return -1; + } + return 0; +} + +static int tcp_recv_send(int server_fd) +{ + char buf[1]; + int ret, fd; + ssize_t n; + + fd = accept(server_fd, NULL, NULL); + if (CHECK(fd < 0, "accept", "failed\n")) { + log_err("failed to accept"); + return -1; + } + + n = recv(fd, buf, sizeof(buf), 0); + if (CHECK(n <= 0, "recv", "failed\n")) { + log_err("failed/partial recv"); + ret = -1; + goto close; + } + + n = send(fd, buf, n, 0); + if (CHECK(n <= 0, "send", "failed\n")) { + log_err("failed/partial send"); + ret = -1; + goto close; + } + + ret = 0; +close: + close(fd); + return ret; +} + +static void v4_to_v6(struct sockaddr_storage *ss) +{ + struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss; + struct sockaddr_in v4 = *(struct sockaddr_in *)ss; + + v6->sin6_family = AF_INET6; + v6->sin6_port = v4.sin_port; + v6->sin6_addr.s6_addr[10] = 0xff; + v6->sin6_addr.s6_addr[11] = 0xff; + memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4); +} + +static int udp_recv_send(int server_fd) +{ + char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))]; + struct sockaddr_storage _src_addr = { 0 }; + struct sockaddr_storage *src_addr = &_src_addr; + struct sockaddr_storage *dst_addr = NULL; + struct msghdr msg = { 0 }; + struct iovec iov = { 0 }; + struct cmsghdr *cm; + char buf[1]; + int ret, fd; + ssize_t n; + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + msg.msg_name = src_addr; + msg.msg_namelen = sizeof(*src_addr); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); + + errno = 0; + n = recvmsg(server_fd, &msg, 0); + if (CHECK(n <= 0, "recvmsg", "failed\n")) { + log_err("failed to receive"); + return -1; + } + if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n")) + return -1; + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if ((cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_ORIGDSTADDR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_ORIGDSTADDR)) { + dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm); + break; + } + log_err("warning: ignored cmsg at level %d type %d", + cm->cmsg_level, cm->cmsg_type); + } + if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n")) + return -1; + + /* Server socket bound to IPv4-mapped IPv6 address */ + if (src_addr->ss_family == AF_INET6 && + dst_addr->ss_family == AF_INET) { + v4_to_v6(dst_addr); + } + + /* Reply from original destination address. */ + fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0); + if (CHECK(fd < 0, "socket", "failed\n")) { + log_err("failed to create tx socket"); + return -1; + } + + ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr)); + if (CHECK(ret, "bind", "failed\n")) { + log_err("failed to bind tx socket"); + goto out; + } + + msg.msg_control = NULL; + msg.msg_controllen = 0; + n = sendmsg(fd, &msg, 0); + if (CHECK(n <= 0, "sendmsg", "failed\n")) { + log_err("failed to send echo reply"); + ret = -1; + goto out; + } + + ret = 0; +out: + close(fd); + return ret; +} + +static int tcp_echo_test(int client_fd, int server_fd) +{ + int err; + + err = send_byte(client_fd); + if (err) + return -1; + err = tcp_recv_send(server_fd); + if (err) + return -1; + err = recv_byte(client_fd); + if (err) + return -1; + + return 0; +} + +static int udp_echo_test(int client_fd, int server_fd) +{ + int err; + + err = send_byte(client_fd); + if (err) + return -1; + err = udp_recv_send(server_fd); + if (err) + return -1; + err = recv_byte(client_fd); + if (err) + return -1; + + return 0; +} + +static struct bpf_link *attach_lookup_prog(struct bpf_program *prog) +{ + struct bpf_link *link; + int net_fd; + + net_fd = open("/proc/self/ns/net", O_RDONLY); + if (CHECK(net_fd < 0, "open", "failed\n")) { + log_err("failed to open /proc/self/ns/net"); + return NULL; + } + + link = bpf_program__attach_netns(prog, net_fd); + if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) { + errno = -PTR_ERR(link); + log_err("failed to attach program '%s' to netns", + bpf_program__name(prog)); + link = NULL; + } + + close(net_fd); + return link; +} + +static int update_lookup_map(struct bpf_map *map, int index, int sock_fd) +{ + int err, map_fd; + uint64_t value; + + map_fd = bpf_map__fd(map); + if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) { + errno = -map_fd; + log_err("failed to get map FD"); + return -1; + } + + value = (uint64_t)sock_fd; + err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem", "failed\n")) { + log_err("failed to update redir_map @ %d", index); + return -1; + } + + return 0; +} + +static void query_lookup_prog(struct test_sk_lookup *skel) +{ + struct bpf_link *link[3] = {}; + struct bpf_link_info info; + __u32 attach_flags = 0; + __u32 prog_ids[3] = {}; + __u32 prog_cnt = 3; + __u32 prog_id; + int net_fd; + int err; + + net_fd = open("/proc/self/ns/net", O_RDONLY); + if (CHECK(net_fd < 0, "open", "failed\n")) { + log_err("failed to open /proc/self/ns/net"); + return; + } + + link[0] = attach_lookup_prog(skel->progs.lookup_pass); + if (!link[0]) + goto close; + link[1] = attach_lookup_prog(skel->progs.lookup_pass); + if (!link[1]) + goto detach; + link[2] = attach_lookup_prog(skel->progs.lookup_drop); + if (!link[2]) + goto detach; + + err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */, + &attach_flags, prog_ids, &prog_cnt); + if (CHECK(err, "bpf_prog_query", "failed\n")) { + log_err("failed to query lookup prog"); + goto detach; + } + + errno = 0; + if (CHECK(attach_flags != 0, "bpf_prog_query", + "wrong attach_flags on query: %u", attach_flags)) + goto detach; + if (CHECK(prog_cnt != 3, "bpf_prog_query", + "wrong program count on query: %u", prog_cnt)) + goto detach; + prog_id = link_info_prog_id(link[0], &info); + CHECK(prog_ids[0] != prog_id, "bpf_prog_query", + "invalid program #0 id on query: %u != %u\n", + prog_ids[0], prog_id); + CHECK(info.netns.netns_ino == 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + prog_id = link_info_prog_id(link[1], &info); + CHECK(prog_ids[1] != prog_id, "bpf_prog_query", + "invalid program #1 id on query: %u != %u\n", + prog_ids[1], prog_id); + CHECK(info.netns.netns_ino == 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + prog_id = link_info_prog_id(link[2], &info); + CHECK(prog_ids[2] != prog_id, "bpf_prog_query", + "invalid program #2 id on query: %u != %u\n", + prog_ids[2], prog_id); + CHECK(info.netns.netns_ino == 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + + err = bpf_link__detach(link[0]); + if (CHECK(err, "link_detach", "failed %d\n", err)) + goto detach; + + /* prog id is still there, but netns_ino is zeroed out */ + prog_id = link_info_prog_id(link[0], &info); + CHECK(prog_ids[0] != prog_id, "bpf_prog_query", + "invalid program #0 id on query: %u != %u\n", + prog_ids[0], prog_id); + CHECK(info.netns.netns_ino != 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + +detach: + if (link[2]) + bpf_link__destroy(link[2]); + if (link[1]) + bpf_link__destroy(link[1]); + if (link[0]) + bpf_link__destroy(link[0]); +close: + close(net_fd); +} + +static void run_lookup_prog(const struct test *t) +{ + int server_fds[MAX_SERVERS] = { -1 }; + int client_fd, reuse_conn_fd = -1; + struct bpf_link *lookup_link; + int i, err; + + lookup_link = attach_lookup_prog(t->lookup_prog); + if (!lookup_link) + return; + + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { + server_fds[i] = make_server(t->sotype, t->listen_at.ip, + t->listen_at.port, + t->reuseport_prog); + if (server_fds[i] < 0) + goto close; + + err = update_lookup_map(t->sock_map, i, server_fds[i]); + if (err) + goto close; + + /* want just one server for non-reuseport test */ + if (!t->reuseport_prog) + break; + } + + /* Regular UDP socket lookup with reuseport behaves + * differently when reuseport group contains connected + * sockets. Check that adding a connected UDP socket to the + * reuseport group does not affect how reuseport works with + * BPF socket lookup. + */ + if (t->reuseport_has_conns) { + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + + /* Add an extra socket to reuseport group */ + reuse_conn_fd = make_server(t->sotype, t->listen_at.ip, + t->listen_at.port, + t->reuseport_prog); + if (reuse_conn_fd < 0) + goto close; + + /* Connect the extra socket to itself */ + err = getsockname(reuse_conn_fd, (void *)&addr, &len); + if (CHECK(err, "getsockname", "errno %d\n", errno)) + goto close; + err = connect(reuse_conn_fd, (void *)&addr, len); + if (CHECK(err, "connect", "errno %d\n", errno)) + goto close; + } + + client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port); + if (client_fd < 0) + goto close; + + if (t->sotype == SOCK_STREAM) + tcp_echo_test(client_fd, server_fds[t->accept_on]); + else + udp_echo_test(client_fd, server_fds[t->accept_on]); + + close(client_fd); +close: + if (reuse_conn_fd != -1) + close(reuse_conn_fd); + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { + if (server_fds[i] != -1) + close(server_fds[i]); + } + bpf_link__destroy(lookup_link); +} + +static void test_redirect_lookup(struct test_sk_lookup *skel) +{ + const struct test tests[] = { + { + .desc = "TCP IPv4 redir port", + .lookup_prog = skel->progs.redir_port, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, INT_PORT }, + }, + { + .desc = "TCP IPv4 redir addr", + .lookup_prog = skel->progs.redir_ip4, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, EXT_PORT }, + }, + { + .desc = "TCP IPv4 redir with reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + .accept_on = SERVER_B, + }, + { + .desc = "TCP IPv4 redir skip reuseport", + .lookup_prog = skel->progs.select_sock_a_no_reuseport, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + .accept_on = SERVER_A, + }, + { + .desc = "TCP IPv6 redir port", + .lookup_prog = skel->progs.redir_port, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, INT_PORT }, + }, + { + .desc = "TCP IPv6 redir addr", + .lookup_prog = skel->progs.redir_ip6, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, EXT_PORT }, + }, + { + .desc = "TCP IPv4->IPv6 redir port", + .lookup_prog = skel->progs.redir_port, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4_V6, INT_PORT }, + }, + { + .desc = "TCP IPv6 redir with reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + .accept_on = SERVER_B, + }, + { + .desc = "TCP IPv6 redir skip reuseport", + .lookup_prog = skel->progs.select_sock_a_no_reuseport, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + .accept_on = SERVER_A, + }, + { + .desc = "UDP IPv4 redir port", + .lookup_prog = skel->progs.redir_port, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, INT_PORT }, + }, + { + .desc = "UDP IPv4 redir addr", + .lookup_prog = skel->progs.redir_ip4, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, EXT_PORT }, + }, + { + .desc = "UDP IPv4 redir with reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + .accept_on = SERVER_B, + }, + { + .desc = "UDP IPv4 redir and reuseport with conns", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + .accept_on = SERVER_B, + .reuseport_has_conns = true, + }, + { + .desc = "UDP IPv4 redir skip reuseport", + .lookup_prog = skel->progs.select_sock_a_no_reuseport, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + .accept_on = SERVER_A, + }, + { + .desc = "UDP IPv6 redir port", + .lookup_prog = skel->progs.redir_port, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, INT_PORT }, + }, + { + .desc = "UDP IPv6 redir addr", + .lookup_prog = skel->progs.redir_ip6, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, EXT_PORT }, + }, + { + .desc = "UDP IPv4->IPv6 redir port", + .lookup_prog = skel->progs.redir_port, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .listen_at = { INT_IP4_V6, INT_PORT }, + .connect_to = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "UDP IPv6 redir and reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + .accept_on = SERVER_B, + }, + { + .desc = "UDP IPv6 redir and reuseport with conns", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + .accept_on = SERVER_B, + .reuseport_has_conns = true, + }, + { + .desc = "UDP IPv6 redir skip reuseport", + .lookup_prog = skel->progs.select_sock_a_no_reuseport, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + .accept_on = SERVER_A, + }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + if (test__start_subtest(t->desc)) + run_lookup_prog(t); + } +} + +static void drop_on_lookup(const struct test *t) +{ + struct sockaddr_storage dst = {}; + int client_fd, server_fd, err; + struct bpf_link *lookup_link; + ssize_t n; + + lookup_link = attach_lookup_prog(t->lookup_prog); + if (!lookup_link) + return; + + server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, + t->reuseport_prog); + if (server_fd < 0) + goto detach; + + client_fd = make_socket(t->sotype, t->connect_to.ip, + t->connect_to.port, &dst); + if (client_fd < 0) + goto close_srv; + + err = connect(client_fd, (void *)&dst, inetaddr_len(&dst)); + if (t->sotype == SOCK_DGRAM) { + err = send_byte(client_fd); + if (err) + goto close_all; + + /* Read out asynchronous error */ + n = recv(client_fd, NULL, 0, 0); + err = n == -1; + } + if (CHECK(!err || errno != ECONNREFUSED, "connect", + "unexpected success or error\n")) + log_err("expected ECONNREFUSED on connect"); + +close_all: + close(client_fd); +close_srv: + close(server_fd); +detach: + bpf_link__destroy(lookup_link); +} + +static void test_drop_on_lookup(struct test_sk_lookup *skel) +{ + const struct test tests[] = { + { + .desc = "TCP IPv4 drop on lookup", + .lookup_prog = skel->progs.lookup_drop, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "TCP IPv6 drop on lookup", + .lookup_prog = skel->progs.lookup_drop, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, EXT_PORT }, + }, + { + .desc = "UDP IPv4 drop on lookup", + .lookup_prog = skel->progs.lookup_drop, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "UDP IPv6 drop on lookup", + .lookup_prog = skel->progs.lookup_drop, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, INT_PORT }, + }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + if (test__start_subtest(t->desc)) + drop_on_lookup(t); + } +} + +static void drop_on_reuseport(const struct test *t) +{ + struct sockaddr_storage dst = { 0 }; + int client, server1, server2, err; + struct bpf_link *lookup_link; + ssize_t n; + + lookup_link = attach_lookup_prog(t->lookup_prog); + if (!lookup_link) + return; + + server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, + t->reuseport_prog); + if (server1 < 0) + goto detach; + + err = update_lookup_map(t->sock_map, SERVER_A, server1); + if (err) + goto detach; + + /* second server on destination address we should never reach */ + server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, + NULL /* reuseport prog */); + if (server2 < 0) + goto close_srv1; + + client = make_socket(t->sotype, t->connect_to.ip, + t->connect_to.port, &dst); + if (client < 0) + goto close_srv2; + + err = connect(client, (void *)&dst, inetaddr_len(&dst)); + if (t->sotype == SOCK_DGRAM) { + err = send_byte(client); + if (err) + goto close_all; + + /* Read out asynchronous error */ + n = recv(client, NULL, 0, 0); + err = n == -1; + } + if (CHECK(!err || errno != ECONNREFUSED, "connect", + "unexpected success or error\n")) + log_err("expected ECONNREFUSED on connect"); + +close_all: + close(client); +close_srv2: + close(server2); +close_srv1: + close(server1); +detach: + bpf_link__destroy(lookup_link); +} + +static void test_drop_on_reuseport(struct test_sk_lookup *skel) +{ + const struct test tests[] = { + { + .desc = "TCP IPv4 drop on reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.reuseport_drop, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + }, + { + .desc = "TCP IPv6 drop on reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.reuseport_drop, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + }, + { + .desc = "UDP IPv4 drop on reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.reuseport_drop, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + }, + { + .desc = "TCP IPv6 drop on reuseport", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.reuseport_drop, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + if (test__start_subtest(t->desc)) + drop_on_reuseport(t); + } +} + +static void run_sk_assign(struct test_sk_lookup *skel, + struct bpf_program *lookup_prog, + const char *listen_ip, const char *connect_ip) +{ + int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 }; + struct bpf_link *lookup_link; + int i, err; + + lookup_link = attach_lookup_prog(lookup_prog); + if (!lookup_link) + return; + + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { + server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL); + if (server_fds[i] < 0) + goto close_servers; + + err = update_lookup_map(skel->maps.redir_map, i, + server_fds[i]); + if (err) + goto close_servers; + } + + client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT); + if (client_fd < 0) + goto close_servers; + + peer_fd = accept(server_fds[SERVER_B], NULL, NULL); + if (CHECK(peer_fd < 0, "accept", "failed\n")) + goto close_client; + + close(peer_fd); +close_client: + close(client_fd); +close_servers: + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { + if (server_fds[i] != -1) + close(server_fds[i]); + } + bpf_link__destroy(lookup_link); +} + +static void run_sk_assign_v4(struct test_sk_lookup *skel, + struct bpf_program *lookup_prog) +{ + run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4); +} + +static void run_sk_assign_v6(struct test_sk_lookup *skel, + struct bpf_program *lookup_prog) +{ + run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6); +} + +static void run_sk_assign_connected(struct test_sk_lookup *skel, + int sotype) +{ + int err, client_fd, connected_fd, server_fd; + struct bpf_link *lookup_link; + + server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL); + if (server_fd < 0) + return; + + connected_fd = make_client(sotype, EXT_IP4, EXT_PORT); + if (connected_fd < 0) + goto out_close_server; + + /* Put a connected socket in redirect map */ + err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd); + if (err) + goto out_close_connected; + + lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport); + if (!lookup_link) + goto out_close_connected; + + /* Try to redirect TCP SYN / UDP packet to a connected socket */ + client_fd = make_client(sotype, EXT_IP4, EXT_PORT); + if (client_fd < 0) + goto out_unlink_prog; + if (sotype == SOCK_DGRAM) { + send_byte(client_fd); + recv_byte(server_fd); + } + + close(client_fd); +out_unlink_prog: + bpf_link__destroy(lookup_link); +out_close_connected: + close(connected_fd); +out_close_server: + close(server_fd); +} + +static void test_sk_assign_helper(struct test_sk_lookup *skel) +{ + if (test__start_subtest("sk_assign returns EEXIST")) + run_sk_assign_v4(skel, skel->progs.sk_assign_eexist); + if (test__start_subtest("sk_assign honors F_REPLACE")) + run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag); + if (test__start_subtest("sk_assign accepts NULL socket")) + run_sk_assign_v4(skel, skel->progs.sk_assign_null); + if (test__start_subtest("access ctx->sk")) + run_sk_assign_v4(skel, skel->progs.access_ctx_sk); + if (test__start_subtest("narrow access to ctx v4")) + run_sk_assign_v4(skel, skel->progs.ctx_narrow_access); + if (test__start_subtest("narrow access to ctx v6")) + run_sk_assign_v6(skel, skel->progs.ctx_narrow_access); + if (test__start_subtest("sk_assign rejects TCP established")) + run_sk_assign_connected(skel, SOCK_STREAM); + if (test__start_subtest("sk_assign rejects UDP connected")) + run_sk_assign_connected(skel, SOCK_DGRAM); +} + +struct test_multi_prog { + const char *desc; + struct bpf_program *prog1; + struct bpf_program *prog2; + struct bpf_map *redir_map; + struct bpf_map *run_map; + int expect_errno; + struct inet_addr listen_at; +}; + +static void run_multi_prog_lookup(const struct test_multi_prog *t) +{ + struct sockaddr_storage dst = {}; + int map_fd, server_fd, client_fd; + struct bpf_link *link1, *link2; + int prog_idx, done, err; + + map_fd = bpf_map__fd(t->run_map); + + done = 0; + prog_idx = PROG1; + err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY); + if (CHECK(err, "bpf_map_update_elem", "failed\n")) + return; + prog_idx = PROG2; + err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY); + if (CHECK(err, "bpf_map_update_elem", "failed\n")) + return; + + link1 = attach_lookup_prog(t->prog1); + if (!link1) + return; + link2 = attach_lookup_prog(t->prog2); + if (!link2) + goto out_unlink1; + + server_fd = make_server(SOCK_STREAM, t->listen_at.ip, + t->listen_at.port, NULL); + if (server_fd < 0) + goto out_unlink2; + + err = update_lookup_map(t->redir_map, SERVER_A, server_fd); + if (err) + goto out_close_server; + + client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst); + if (client_fd < 0) + goto out_close_server; + + err = connect(client_fd, (void *)&dst, inetaddr_len(&dst)); + if (CHECK(err && !t->expect_errno, "connect", + "unexpected error %d\n", errno)) + goto out_close_client; + if (CHECK(err && t->expect_errno && errno != t->expect_errno, + "connect", "unexpected error %d\n", errno)) + goto out_close_client; + + done = 0; + prog_idx = PROG1; + err = bpf_map_lookup_elem(map_fd, &prog_idx, &done); + CHECK(err, "bpf_map_lookup_elem", "failed\n"); + CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n"); + + done = 0; + prog_idx = PROG2; + err = bpf_map_lookup_elem(map_fd, &prog_idx, &done); + CHECK(err, "bpf_map_lookup_elem", "failed\n"); + CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n"); + +out_close_client: + close(client_fd); +out_close_server: + close(server_fd); +out_unlink2: + bpf_link__destroy(link2); +out_unlink1: + bpf_link__destroy(link1); +} + +static void test_multi_prog_lookup(struct test_sk_lookup *skel) +{ + struct test_multi_prog tests[] = { + { + .desc = "multi prog - pass, pass", + .prog1 = skel->progs.multi_prog_pass1, + .prog2 = skel->progs.multi_prog_pass2, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "multi prog - drop, drop", + .prog1 = skel->progs.multi_prog_drop1, + .prog2 = skel->progs.multi_prog_drop2, + .listen_at = { EXT_IP4, EXT_PORT }, + .expect_errno = ECONNREFUSED, + }, + { + .desc = "multi prog - pass, drop", + .prog1 = skel->progs.multi_prog_pass1, + .prog2 = skel->progs.multi_prog_drop2, + .listen_at = { EXT_IP4, EXT_PORT }, + .expect_errno = ECONNREFUSED, + }, + { + .desc = "multi prog - drop, pass", + .prog1 = skel->progs.multi_prog_drop1, + .prog2 = skel->progs.multi_prog_pass2, + .listen_at = { EXT_IP4, EXT_PORT }, + .expect_errno = ECONNREFUSED, + }, + { + .desc = "multi prog - pass, redir", + .prog1 = skel->progs.multi_prog_pass1, + .prog2 = skel->progs.multi_prog_redir2, + .listen_at = { INT_IP4, INT_PORT }, + }, + { + .desc = "multi prog - redir, pass", + .prog1 = skel->progs.multi_prog_redir1, + .prog2 = skel->progs.multi_prog_pass2, + .listen_at = { INT_IP4, INT_PORT }, + }, + { + .desc = "multi prog - drop, redir", + .prog1 = skel->progs.multi_prog_drop1, + .prog2 = skel->progs.multi_prog_redir2, + .listen_at = { INT_IP4, INT_PORT }, + }, + { + .desc = "multi prog - redir, drop", + .prog1 = skel->progs.multi_prog_redir1, + .prog2 = skel->progs.multi_prog_drop2, + .listen_at = { INT_IP4, INT_PORT }, + }, + { + .desc = "multi prog - redir, redir", + .prog1 = skel->progs.multi_prog_redir1, + .prog2 = skel->progs.multi_prog_redir2, + .listen_at = { INT_IP4, INT_PORT }, + }, + }; + struct test_multi_prog *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + t->redir_map = skel->maps.redir_map; + t->run_map = skel->maps.run_map; + if (test__start_subtest(t->desc)) + run_multi_prog_lookup(t); + } +} + +static void run_tests(struct test_sk_lookup *skel) +{ + if (test__start_subtest("query lookup prog")) + query_lookup_prog(skel); + test_redirect_lookup(skel); + test_drop_on_lookup(skel); + test_drop_on_reuseport(skel); + test_sk_assign_helper(skel); + test_multi_prog_lookup(skel); +} + +static int switch_netns(void) +{ + static const char * const setup_script[] = { + "ip -6 addr add dev lo " EXT_IP6 "/128", + "ip -6 addr add dev lo " INT_IP6 "/128", + "ip link set dev lo up", + NULL, + }; + const char * const *cmd; + int err; + + err = unshare(CLONE_NEWNET); + if (CHECK(err, "unshare", "failed\n")) { + log_err("unshare(CLONE_NEWNET)"); + return -1; + } + + for (cmd = setup_script; *cmd; cmd++) { + err = system(*cmd); + if (CHECK(err, "system", "failed\n")) { + log_err("system(%s)", *cmd); + return -1; + } + } + + return 0; +} + +void test_sk_lookup(void) +{ + struct test_sk_lookup *skel; + int err; + + err = switch_netns(); + if (err) + return; + + skel = test_sk_lookup__open_and_load(); + if (CHECK(!skel, "skel open_and_load", "failed\n")) + return; + + run_tests(skel); + + test_sk_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index 7021b92af313..25de86af2d03 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -11,6 +11,7 @@ void test_skb_ctx(void) .cb[3] = 4, .cb[4] = 5, .priority = 6, + .ifindex = 1, .tstamp = 7, .wire_len = 100, .gso_segs = 8, @@ -92,6 +93,10 @@ void test_skb_ctx(void) "ctx_out_priority", "skb->priority == %d, expected %d\n", skb.priority, 7); + CHECK_ATTR(skb.ifindex != 1, + "ctx_out_ifindex", + "skb->ifindex == %d, expected %d\n", + skb.ifindex, 1); CHECK_ATTR(skb.tstamp != 8, "ctx_out_tstamp", "skb->tstamp == %lld, expected %d\n", diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index fa153cf67b1b..fe87b77af459 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -41,7 +41,7 @@ void test_skeleton(void) CHECK(bss->in4 != 0, "in4", "got %lld != exp %lld\n", bss->in4, 0LL); CHECK(bss->out4 != 0, "out4", "got %lld != exp %lld\n", bss->out4, 0LL); - CHECK(rodata->in6 != 0, "in6", "got %d != exp %d\n", rodata->in6, 0); + CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0); CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0); /* validate we can pre-setup global variables, even in .bss */ @@ -49,7 +49,7 @@ void test_skeleton(void) data->in2 = 11; bss->in3 = 12; bss->in4 = 13; - rodata->in6 = 14; + rodata->in.in6 = 14; err = test_skeleton__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) @@ -60,7 +60,7 @@ void test_skeleton(void) CHECK(data->in2 != 11, "in2", "got %lld != exp %lld\n", data->in2, 11LL); CHECK(bss->in3 != 12, "in3", "got %d != exp %d\n", bss->in3, 12); CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL); - CHECK(rodata->in6 != 14, "in6", "got %d != exp %d\n", rodata->in6, 14); + CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14); /* now set new values and attach to get them into outX variables */ data->in1 = 1; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 8547ecbdc61f..ec281b0363b8 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -193,11 +193,10 @@ static void run_test(int cgroup_fd) if (CHECK_FAIL(server_fd < 0)) goto close_bpf_object; + pthread_mutex_lock(&server_started_mtx); if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, (void *)&server_fd))) goto close_server_fd; - - pthread_mutex_lock(&server_started_mtx); pthread_cond_wait(&server_started, &server_started_mtx); pthread_mutex_unlock(&server_started_mtx); diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 9013a0c01eed..d207e968e6b1 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd) goto close_bpf_object; } - client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + client_fd = connect_to_fd(server_fd, 0); if (client_fd < 0) { err = -1; goto close_bpf_object; @@ -161,7 +161,7 @@ void test_tcp_rtt(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c new file mode 100644 index 000000000000..39b0decb1bb2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ + +#include <test_progs.h> + +#include "trace_printk.skel.h" + +#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" +#define SEARCHMSG "testing,testing" + +void test_trace_printk(void) +{ + int err, iter = 0, duration = 0, found = 0; + struct trace_printk__bss *bss; + struct trace_printk *skel; + char *buf = NULL; + FILE *fp = NULL; + size_t buflen; + + skel = trace_printk__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = trace_printk__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + bss = skel->bss; + + err = trace_printk__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + fp = fopen(TRACEBUF, "r"); + if (CHECK(fp == NULL, "could not open trace buffer", + "error %d opening %s", errno, TRACEBUF)) + goto cleanup; + + /* We do not want to wait forever if this test fails... */ + fcntl(fileno(fp), F_SETFL, O_NONBLOCK); + + /* wait for tracepoint to trigger */ + usleep(1); + trace_printk__detach(skel); + + if (CHECK(bss->trace_printk_ran == 0, + "bpf_trace_printk never ran", + "ran == %d", bss->trace_printk_ran)) + goto cleanup; + + if (CHECK(bss->trace_printk_ret <= 0, + "bpf_trace_printk returned <= 0 value", + "got %d", bss->trace_printk_ret)) + goto cleanup; + + /* verify our search string is in the trace buffer */ + while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) { + if (strstr(buf, SEARCHMSG) != NULL) + found++; + if (found == bss->trace_printk_ran) + break; + if (++iter > 1000) + break; + } + + if (CHECK(!found, "message from bpf_trace_printk not found", + "no instance of %s in %s", SEARCHMSG, TRACEBUF)) + goto cleanup; + +cleanup: + trace_printk__destroy(skel); + free(buf); + if (fp) + fclose(fp); +} diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c new file mode 100644 index 000000000000..2aba09d4d01b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "udp_limit.skel.h" + +#include <sys/types.h> +#include <sys/socket.h> + +static int duration; + +void test_udp_limit(void) +{ + struct udp_limit *skel; + int fd1 = -1, fd2 = -1; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/udp_limit"); + if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + return; + + skel = udp_limit__open_and_load(); + if (CHECK(!skel, "skel-load", "errno %d", errno)) + goto close_cgroup_fd; + + skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd); + skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd); + if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release), + "cg-attach", "sock %ld sock_release %ld", + PTR_ERR(skel->links.sock), + PTR_ERR(skel->links.sock_release))) + goto close_skeleton; + + /* BPF program enforces a single UDP socket per cgroup, + * verify that. + */ + fd1 = socket(AF_INET, SOCK_DGRAM, 0); + if (CHECK(fd1 < 0, "fd1", "errno %d", errno)) + goto close_skeleton; + + fd2 = socket(AF_INET, SOCK_DGRAM, 0); + if (CHECK(fd2 >= 0, "fd2", "errno %d", errno)) + goto close_skeleton; + + /* We can reopen again after close. */ + close(fd1); + fd1 = -1; + + fd1 = socket(AF_INET, SOCK_DGRAM, 0); + if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno)) + goto close_skeleton; + + /* Make sure the program was invoked the expected + * number of times: + * - open fd1 - BPF_CGROUP_INET_SOCK_CREATE + * - attempt to openfd2 - BPF_CGROUP_INET_SOCK_CREATE + * - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE + * - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE + */ + if (CHECK(skel->bss->invocations != 4, "bss-invocations", + "invocations=%d", skel->bss->invocations)) + goto close_skeleton; + + /* We should still have a single socket in use */ + if (CHECK(skel->bss->in_use != 1, "bss-in_use", + "in_use=%d", skel->bss->in_use)) + goto close_skeleton; + +close_skeleton: + if (fd1 >= 0) + close(fd1); + if (fd2 >= 0) + close(fd2); + udp_limit__destroy(skel); +close_cgroup_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c new file mode 100644 index 000000000000..c75525eab02c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_varlen.skel.h" + +#define CHECK_VAL(got, exp) \ + CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \ + (long)(got), (long)(exp)) + +void test_varlen(void) +{ + int duration = 0, err; + struct test_varlen* skel; + struct test_varlen__bss *bss; + struct test_varlen__data *data; + const char str1[] = "Hello, "; + const char str2[] = "World!"; + const char exp_str[] = "Hello, \0World!\0"; + const int size1 = sizeof(str1); + const int size2 = sizeof(str2); + + skel = test_varlen__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + data = skel->data; + + err = test_varlen__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + bss->test_pid = getpid(); + + /* trigger everything */ + memcpy(bss->buf_in1, str1, size1); + memcpy(bss->buf_in2, str2, size2); + bss->capture = true; + usleep(1); + bss->capture = false; + + CHECK_VAL(bss->payload1_len1, size1); + CHECK_VAL(bss->payload1_len2, size2); + CHECK_VAL(bss->total1, size1 + size2); + CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload2_len1, size1); + CHECK_VAL(data->payload2_len2, size2); + CHECK_VAL(data->total2, size1 + size2); + CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload3_len1, size1); + CHECK_VAL(data->payload3_len2, size2); + CHECK_VAL(data->total3, size1 + size2); + CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload4_len1, size1); + CHECK_VAL(data->payload4_len2, size2); + CHECK_VAL(data->total4, size1 + size2); + CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", + "doesn't match!"); +cleanup: + test_varlen__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c new file mode 100644 index 000000000000..0176573fe4e7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <uapi/linux/bpf.h> +#include <linux/if_link.h> +#include <test_progs.h> + +#include "test_xdp_with_cpumap_helpers.skel.h" + +#define IFINDEX_LO 1 + +void test_xdp_with_cpumap_helpers(void) +{ + struct test_xdp_with_cpumap_helpers *skel; + struct bpf_prog_info info = {}; + struct bpf_cpumap_val val = { + .qsize = 192, + }; + __u32 duration = 0, idx = 0; + __u32 len = sizeof(info); + int err, prog_fd, map_fd; + + skel = test_xdp_with_cpumap_helpers__open_and_load(); + if (CHECK_FAIL(!skel)) { + perror("test_xdp_with_cpumap_helpers__open_and_load"); + return; + } + + /* can not attach program with cpumaps that allow programs + * as xdp generic + */ + prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); + CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP", + "should have failed\n"); + + prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); + map_fd = bpf_map__fd(skel->maps.cpu_map); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); + if (CHECK_FAIL(err)) + goto out_close; + + val.bpf_prog.fd = prog_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + CHECK(err, "Add program to cpumap entry", "err %d errno %d\n", + err, errno); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno); + CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry", + "expected %u read %u\n", info.id, val.bpf_prog.id); + + /* can not attach BPF_XDP_CPUMAP program to a device */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); + CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program", + "should have failed\n"); + + val.qsize = 192; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry", + "should have failed\n"); + +out_close: + test_xdp_with_cpumap_helpers__destroy(skel); +} + +void test_xdp_cpumap_attach(void) +{ + if (test__start_subtest("cpumap_with_progs")) + test_xdp_with_cpumap_helpers(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c new file mode 100644 index 000000000000..6f814999b395 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <uapi/linux/if_link.h> +#include <test_progs.h> +#include "test_xdp_link.skel.h" + +#define IFINDEX_LO 1 + +void test_xdp_link(void) +{ + __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; + DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); + struct test_xdp_link *skel1 = NULL, *skel2 = NULL; + struct bpf_link_info link_info; + struct bpf_prog_info prog_info; + struct bpf_link *link; + __u32 link_info_len = sizeof(link_info); + __u32 prog_info_len = sizeof(prog_info); + + skel1 = test_xdp_link__open_and_load(); + if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) + goto cleanup; + prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); + + skel2 = test_xdp_link__open_and_load(); + if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) + goto cleanup; + prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); + + memset(&prog_info, 0, sizeof(prog_info)); + err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); + if (CHECK(err, "fd_info1", "failed %d\n", -errno)) + goto cleanup; + id1 = prog_info.id; + + memset(&prog_info, 0, sizeof(prog_info)); + err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); + if (CHECK(err, "fd_info2", "failed %d\n", -errno)) + goto cleanup; + id2 = prog_info.id; + + /* set initial prog attachment */ + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); + if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) + goto cleanup; + + /* validate prog ID */ + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err); + + /* BPF link is not allowed to replace prog attachment */ + link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); + if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + bpf_link__destroy(link); + /* best-effort detach prog */ + opts.old_fd = prog_fd1; + bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); + goto cleanup; + } + + /* detach BPF program */ + opts.old_fd = prog_fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); + if (CHECK(err, "prog_detach", "failed %d\n", err)) + goto cleanup; + + /* now BPF link should attach successfully */ + link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); + if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + goto cleanup; + skel1->links.xdp_handler = link; + + /* validate prog ID */ + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err)) + goto cleanup; + + /* BPF prog attach is not allowed to replace BPF link */ + opts.old_fd = prog_fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); + if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) + goto cleanup; + + /* Can't force-update when BPF link is active */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); + if (CHECK(!err, "prog_update_fail", "unexpected success\n")) + goto cleanup; + + /* Can't force-detach when BPF link is active */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); + if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) + goto cleanup; + + /* BPF link is not allowed to replace another BPF link */ + link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); + if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + bpf_link__destroy(link); + goto cleanup; + } + + bpf_link__destroy(skel1->links.xdp_handler); + skel1->links.xdp_handler = NULL; + + /* new link attach should succeed */ + link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); + if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + goto cleanup; + skel2->links.xdp_handler = link; + + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != id2, "id2_check", + "loaded prog id %u != id2 %u, err %d", id0, id1, err)) + goto cleanup; + + /* updating program under active BPF link works as expected */ + err = bpf_link__update_program(link, skel1->progs.xdp_handler); + if (CHECK(err, "link_upd", "failed: %d\n", err)) + goto cleanup; + + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto cleanup; + + CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", + "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); + CHECK(link_info.prog_id != id1, "link_prog_id", + "got %u != exp %u\n", link_info.prog_id, id1); + CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + + err = bpf_link__detach(link); + if (CHECK(err, "link_detach", "failed %d\n", err)) + goto cleanup; + + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto cleanup; + CHECK(link_info.prog_id != id1, "link_prog_id", + "got %u != exp %u\n", link_info.prog_id, id1); + /* ifindex should be zeroed out */ + CHECK(link_info.xdp.ifindex != 0, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, 0); + +cleanup: + test_xdp_link__destroy(skel1); + test_xdp_link__destroy(skel2); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h new file mode 100644 index 000000000000..c196280df90d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used +#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used +#define bpf_iter__netlink bpf_iter__netlink___not_used +#define bpf_iter__task bpf_iter__task___not_used +#define bpf_iter__task_file bpf_iter__task_file___not_used +#define bpf_iter__tcp bpf_iter__tcp___not_used +#define tcp6_sock tcp6_sock___not_used +#define bpf_iter__udp bpf_iter__udp___not_used +#define udp6_sock udp6_sock___not_used +#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used +#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__bpf_map +#undef bpf_iter__ipv6_route +#undef bpf_iter__netlink +#undef bpf_iter__task +#undef bpf_iter__task_file +#undef bpf_iter__tcp +#undef tcp6_sock +#undef bpf_iter__udp +#undef udp6_sock +#undef bpf_iter__bpf_map_elem +#undef bpf_iter__bpf_sk_storage_map + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__ipv6_route { + struct bpf_iter_meta *meta; + struct fib6_info *rt; +} __attribute__((preserve_access_index)); + +struct bpf_iter__netlink { + struct bpf_iter_meta *meta; + struct netlink_sock *sk; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task_file { + struct bpf_iter_meta *meta; + struct task_struct *task; + __u32 fd; + struct file *file; +} __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; +} __attribute__((preserve_access_index)); + +struct bpf_iter__tcp { + struct bpf_iter_meta *meta; + struct sock_common *sk_common; + uid_t uid; +} __attribute__((preserve_access_index)); + +struct tcp6_sock { + struct tcp_sock tcp; + struct ipv6_pinfo inet6; +} __attribute__((preserve_access_index)); + +struct bpf_iter__udp { + struct bpf_iter_meta *meta; + struct udp_sock *udp_sk; + uid_t uid __attribute__((aligned(8))); + int bucket __attribute__((aligned(8))); +} __attribute__((preserve_access_index)); + +struct udp6_sock { + struct udp_sock udp; + struct ipv6_pinfo inet6; +} __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map_elem { + struct bpf_iter_meta *meta; + struct bpf_map *map; + void *key; + void *value; +}; + +struct bpf_iter__bpf_sk_storage_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; + struct sock *sk; + void *value; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c new file mode 100644 index 000000000000..6286023fd62b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} arraymap1 SEC(".maps"); + +__u32 key_sum = 0; +__u64 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + __u64 *val = ctx->value; + + if (key == (void *)0 || val == (void *)0) + return 0; + + bpf_seq_write(ctx->meta->seq, key, sizeof(__u32)); + bpf_seq_write(ctx->meta->seq, val, sizeof(__u64)); + key_sum += *key; + val_sum += *val; + *val = *key; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c new file mode 100644 index 000000000000..07ddbfdbcab7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u64); +} hashmap1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, __u64); + __type(value, __u64); +} hashmap2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u32); +} hashmap3 SEC(".maps"); + +/* will set before prog run */ +bool in_test_mode = 0; + +/* will collect results during prog run */ +__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; +__u64 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + __u32 seq_num = ctx->meta->seq_num; + struct bpf_map *map = ctx->map; + struct key_t *key = ctx->key; + __u64 *val = ctx->value; + + if (in_test_mode) { + /* test mode is used by selftests to + * test functionality of bpf_hash_map iter. + * + * the above hashmap1 will have correct size + * and will be accepted, hashmap2 and hashmap3 + * should be rejected due to smaller key/value + * size. + */ + if (key == (void *)0 || val == (void *)0) + return 0; + + key_sum_a += key->a; + key_sum_b += key->b; + key_sum_c += key->c; + val_sum += *val; + return 0; + } + + /* non-test mode, the map is prepared with the + * below bpftool command sequence: + * bpftool map create /sys/fs/bpf/m1 type hash \ + * key 12 value 8 entries 3 name map1 + * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \ + * value 0 0 0 1 0 0 0 1 + * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \ + * value 0 0 0 1 0 0 0 2 + * The bpftool iter command line: + * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \ + * map id 77 + * The below output will be: + * map dump starts + * 77: (1000000 0 2000000) (200000001000000) + * 77: (1000000 0 1000000) (100000001000000) + * map dump ends + */ + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, "map dump starts\n"); + + if (key == (void *)0 || val == (void *)0) { + BPF_SEQ_PRINTF(seq, "map dump ends\n"); + return 0; + } + + BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id, + key->a, key->b, key->c, *val); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index b57bd6fef208..08651b23edba 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -1,27 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c new file mode 100644 index 000000000000..85fa710fad90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u32); +} arraymap1 SEC(".maps"); + +/* will set before prog run */ +volatile const __u32 num_cpus = 0; + +__u32 key_sum = 0, val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + void *pptr = ctx->value; + __u32 step; + int i; + + if (key == (void *)0 || pptr == (void *)0) + return 0; + + key_sum += *key; + + step = 8; + for (i = 0; i < num_cpus; i++) { + val_sum += *(__u32 *)pptr; + pptr += step; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c new file mode 100644 index 000000000000..feaaa2b89c57 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u32); +} hashmap1 SEC(".maps"); + +/* will set before prog run */ +volatile const __u32 num_cpus = 0; + +/* will collect results during prog run */ +__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; +__u32 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + struct key_t *key = ctx->key; + void *pptr = ctx->value; + __u32 step; + int i; + + if (key == (void *)0 || pptr == (void *)0) + return 0; + + key_sum_a += key->a; + key_sum_b += key->b; + key_sum_c += key->c; + + step = 8; + for (i = 0; i < num_cpus; i++) { + val_sum += *(__u32 *)pptr; + pptr += step; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c new file mode 100644 index 000000000000..6b70ccaba301 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + +__u32 val_sum = 0; +__u32 ipv6_sk_count = 0; + +SEC("iter/bpf_sk_storage_map") +int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) +{ + struct sock *sk = ctx->sk; + __u32 *val = ctx->value; + + if (sk == (void *)0 || val == (void *)0) + return 0; + + if (sk->sk_family == AF_INET6) + ipv6_sk_count++; + + val_sum += *val; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index c8e9ca74c87b..d58d9f1642b5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -1,35 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__ipv6_route +#include "bpf_iter.h" +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__ipv6_route { - struct bpf_iter_meta *meta; - struct fib6_info *rt; -} __attribute__((preserve_access_index)); - char _license[] SEC("license") = "GPL"; extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; -#define RTF_GATEWAY 0x0002 -#define IFNAMSIZ 16 -#define fib_nh_gw_family nh_common.nhc_gw_family -#define fib_nh_gw6 nh_common.nhc_gw.ipv6 -#define fib_nh_dev nh_common.nhc_dev - SEC("iter/ipv6_route") int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index 75ecf956a2df..95989f4c99b5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -1,30 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__netlink bpf_iter__netlink___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__netlink +#include "bpf_iter.h" +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -#define sk_rmem_alloc sk_backlog.rmem_alloc -#define sk_refcnt __sk_common.skc_refcnt - -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__netlink { - struct bpf_iter_meta *meta; - struct netlink_sock *sk; -} __attribute__((preserve_access_index)); - static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket) { return &container_of(socket, struct socket_alloc, socket)->vfs_inode; @@ -54,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx) if (!nlk->groups) { group = 0; } else { - /* FIXME: temporary use bpf_probe_read here, needs + /* FIXME: temporary use bpf_probe_read_kernel here, needs * verifier support to do direct access. */ - bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); + bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]); } BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ", nlk->portid, (u32)group, @@ -74,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx) * with current verifier. */ inode = SOCK_INODE(sk); - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); } BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index ee754021f98e..4983087852a0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -1,27 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 0f0ec3db20ba..8b787baa2654 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -1,29 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task_file bpf_iter__task_file___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task_file +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_file { - struct bpf_iter_meta *meta; - struct task_struct *task; - __u32 fd; - struct file *file; -} __attribute__((preserve_access_index)); - SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c new file mode 100644 index 000000000000..50e59a2e142e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define MAX_STACK_TRACE_DEPTH 64 +unsigned long entries[MAX_STACK_TRACE_DEPTH] = {}; +#define SIZE_OF_ULONG (sizeof(unsigned long)) + +SEC("iter/task") +int dump_task_stack(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + long i, retlen; + + if (task == (void *)0) + return 0; + + retlen = bpf_get_task_stack(task, entries, + MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0); + if (retlen < 0) + return 0; + + BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid, + retlen / SIZE_OF_ULONG); + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { + if (retlen > i * SIZE_OF_ULONG) + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); + } + BPF_SEQ_PRINTF(seq, "\n"); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c new file mode 100644 index 000000000000..54380c5e1069 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + __be32 dest, src; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = inet->inet_daddr; + src = inet->inet_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->rcv_nxt - tp->copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, src, srcp, destp, destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->write_seq - tp->snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + __u16 destp, srcp; + __be32 dest, src; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = tw->tw_daddr; + src = tw->tw_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, src, srcp, dest, destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, irsk->ir_loc_addr, + irsk->ir_num, irsk->ir_rmt_addr, + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp4(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "rem_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET) + return 0; + + tp = bpf_skc_to_tcp_sock(sk_common); + if (tp) + return dump_tcp_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c new file mode 100644 index 000000000000..b4fbddfa4e10 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct in6_addr *dest, *src; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->tcp.inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->tcp.write_seq - tp->tcp.snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->tcp.snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(&tp->tcp) ? -1 + : tp->tcp.snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + const struct in6_addr *dest, *src; + __u16 destp, srcp; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + struct in6_addr *src, *dest; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + src = &irsk->ir_v6_loc_addr; + dest = &irsk->ir_v6_rmt_addr; + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + irsk->ir_num, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp6(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp6_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "remote_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET6) + return 0; + + tp = bpf_skc_to_tcp6_sock(sk_common); + if (tp) + return dump_tcp6_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c index 13c2c90c835f..2a4647f20c46 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c @@ -1,25 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index 0aa71b333cf3..ee49493dc125 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -1,25 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - __u32 map1_id = 0, map2_id = 0; __u32 map1_accessed = 0, map2_accessed = 0; __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c new file mode 100644 index 000000000000..e3a7575e81d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u64); +} hashmap1 SEC(".maps"); + +__u32 key_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + void *key = ctx->key; + + if (key == (void *)0) + return 0; + + /* out of bound access w.r.t. hashmap1 */ + key_sum += *(__u32 *)(key + sizeof(struct key_t)); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c new file mode 100644 index 000000000000..1c7304f56b1e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 value_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + void *value = ctx->value; + + if (value == (void *)0) + return 0; + + /* negative offset, verifier failure. */ + value_sum += *(__u32 *)(value - 4); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h index dee1339e6905..d5e3df66ad9a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h @@ -1,27 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; int count = 0; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c new file mode 100644 index 000000000000..f258583afbbd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +SEC("iter/udp") +int dump_udp4(struct bpf_iter__udp *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct udp_sock *udp_sk = ctx->udp_sk; + struct inet_sock *inet; + __u16 srcp, destp; + __be32 dest, src; + __u32 seq_num; + int rqueue; + + if (udp_sk == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, + " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode ref pointer drops\n"); + + /* filter out udp6 sockets */ + inet = &udp_sk->inet; + if (inet->sk.sk_family == AF_INET6) + return 0; + + inet = &udp_sk->inet; + dest = inet->inet_daddr; + src = inet->inet_rcv_saddr; + srcp = bpf_ntohs(inet->inet_sport); + destp = bpf_ntohs(inet->inet_dport); + rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit; + + BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ", + ctx->bucket, src, srcp, dest, destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", + inet->sk.sk_state, + inet->sk.sk_wmem_alloc.refs.counter - 1, + rqueue, + 0, 0L, 0, ctx->uid, 0, + sock_i_ino(&inet->sk), + inet->sk.sk_refcnt.refs.counter, udp_sk, + inet->sk.sk_drops.counter); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c new file mode 100644 index 000000000000..65f93bb03f0f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +#define IPV6_SEQ_DGRAM_HEADER \ + " sl " \ + "local_address " \ + "remote_address " \ + "st tx_queue rx_queue tr tm->when retrnsmt" \ + " uid timeout inode ref pointer drops\n" + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +SEC("iter/udp") +int dump_udp6(struct bpf_iter__udp *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct udp_sock *udp_sk = ctx->udp_sk; + const struct in6_addr *dest, *src; + struct udp6_sock *udp6_sk; + struct inet_sock *inet; + __u16 srcp, destp; + __u32 seq_num; + int rqueue; + + if (udp_sk == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER); + + udp6_sk = bpf_skc_to_udp6_sock(udp_sk); + if (udp6_sk == (void *)0) + return 0; + + inet = &udp_sk->inet; + srcp = bpf_ntohs(inet->inet_sport); + destp = bpf_ntohs(inet->inet_dport); + rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit; + dest = &inet->sk.sk_v6_daddr; + src = &inet->sk.sk_v6_rcv_saddr; + + BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + ctx->bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", + inet->sk.sk_state, + inet->sk.sk_wmem_alloc.refs.counter - 1, + rqueue, + 0, 0L, 0, ctx->uid, 0, + sock_i_ino(&inet->sk), + inet->sk.sk_refcnt.refs.counter, udp_sk, + inet->sk.sk_drops.counter); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h new file mode 100644 index 000000000000..01378911252b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_NET_H__ +#define __BPF_TRACING_NET_H__ + +#define AF_INET 2 +#define AF_INET6 10 + +#define ICSK_TIME_RETRANS 1 +#define ICSK_TIME_PROBE0 3 +#define ICSK_TIME_LOSS_PROBE 5 +#define ICSK_TIME_REO_TIMEOUT 6 + +#define IFNAMSIZ 16 + +#define RTF_GATEWAY 0x0002 + +#define TCP_INFINITE_SSTHRESH 0x7fffffff +#define TCP_PINGPONG_THRESH 3 + +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 + +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr +#define inet_dport sk.__sk_common.skc_dport + +#define ir_loc_addr req.__req_common.skc_rcv_saddr +#define ir_num req.__req_common.skc_num +#define ir_rmt_addr req.__req_common.skc_daddr +#define ir_rmt_port req.__req_common.skc_dport +#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr +#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr + +#define sk_family __sk_common.skc_family +#define sk_rmem_alloc sk_backlog.rmem_alloc +#define sk_refcnt __sk_common.skc_refcnt +#define sk_state __sk_common.skc_state +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr + +#define s6_addr32 in6_u.u6_addr32 + +#define tw_daddr __tw_common.skc_daddr +#define tw_rcv_saddr __tw_common.skc_rcv_saddr +#define tw_dport __tw_common.skc_dport +#define tw_refcnt __tw_common.skc_refcnt +#define tw_v6_daddr __tw_common.skc_v6_daddr +#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr + +#endif diff --git a/tools/testing/selftests/bpf/progs/btf_data.c b/tools/testing/selftests/bpf/progs/btf_data.c new file mode 100644 index 000000000000..baa525275bde --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_data.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +struct S { + int a; + int b; + int c; +}; + +union U { + int a; + int b; + int c; +}; + +struct S1 { + int a; + int b; + int c; +}; + +union U1 { + int a; + int b; + int c; +}; + +typedef int T; +typedef int S; +typedef int U; +typedef int T1; +typedef int S1; +typedef int U1; + +struct root_struct { + S m_1; + T m_2; + U m_3; + S1 m_4; + T1 m_5; + U1 m_6; + struct S m_7; + struct S1 m_8; + union U m_9; + union U1 m_10; +}; + +int func(struct root_struct *root) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h new file mode 100644 index 000000000000..a0778fe7857a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __PROGS_CG_STORAGE_MULTI_H +#define __PROGS_CG_STORAGE_MULTI_H + +#include <asm/types.h> + +struct cgroup_value { + __u32 egress_pkts; + __u32 ingress_pkts; +}; + +#endif diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c new file mode 100644 index 000000000000..44ad46b33539 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> + +#include "progs/cg_storage_multi.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct cgroup_value); +} cgroup_storage SEC(".maps"); + +__u32 invocations = 0; + +SEC("cgroup_skb/egress") +int egress(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c new file mode 100644 index 000000000000..a25373002055 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> + +#include "progs/cg_storage_multi.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct cgroup_value); +} cgroup_storage SEC(".maps"); + +__u32 invocations = 0; + +SEC("cgroup_skb/egress/1") +int egress1(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/egress/2") +int egress2(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/ingress") +int ingress(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c new file mode 100644 index 000000000000..a149f33bc533 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> + +#include "progs/cg_storage_multi.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, __u64); + __type(value, struct cgroup_value); +} cgroup_storage SEC(".maps"); + +__u32 invocations = 0; + +SEC("cgroup_skb/egress/1") +int egress1(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/egress/2") +int egress2(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/ingress") +int ingress(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 1ab2c5eba86c..b1b2773c0b9d 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_keepalive(struct bpf_sock_addr *ctx) +{ + int zero = 0, one = 1; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one))) + return 1; + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one))) + return 1; + } + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero))) + return 1; + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + if (set_keepalive(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c new file mode 100644 index 000000000000..473665cac67e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define LOOP_BOUND 0xf +#define MAX_ENTRIES 8 +#define HALF_ENTRIES (MAX_ENTRIES >> 1) + +_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND"); + +enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC; +__u32 g_line = 0; + +#define VERIFY_TYPE(type, func) ({ \ + g_map_type = type; \ + if (!func()) \ + return 0; \ +}) + + +#define VERIFY(expr) ({ \ + g_line = __LINE__; \ + if (!(expr)) \ + return 0; \ +}) + +struct bpf_map_memory { + __u32 pages; +} __attribute__((preserve_access_index)); + +struct bpf_map { + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 id; + struct bpf_map_memory memory; +} __attribute__((preserve_access_index)); + +static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, + __u32 value_size, __u32 max_entries) +{ + VERIFY(map->map_type == g_map_type); + VERIFY(map->key_size == key_size); + VERIFY(map->value_size == value_size); + VERIFY(map->max_entries == max_entries); + VERIFY(map->id > 0); + VERIFY(map->memory.pages > 0); + + return 1; +} + +static inline int check_bpf_map_ptr(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(indirect->map_type == direct->map_type); + VERIFY(indirect->key_size == direct->key_size); + VERIFY(indirect->value_size == direct->value_size); + VERIFY(indirect->max_entries == direct->max_entries); + VERIFY(indirect->id == direct->id); + VERIFY(indirect->memory.pages == direct->memory.pages); + + return 1; +} + +static inline int check(struct bpf_map *indirect, struct bpf_map *direct, + __u32 key_size, __u32 value_size, __u32 max_entries) +{ + VERIFY(check_bpf_map_ptr(indirect, direct)); + VERIFY(check_bpf_map_fields(indirect, key_size, value_size, + max_entries)); + return 1; +} + +static inline int check_default(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + +typedef struct { + int counter; +} atomic_t; + +struct bpf_htab { + struct bpf_map map; + atomic_t count; + __u32 n_buckets; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */ + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_hash SEC(".maps"); + +static inline int check_hash(void) +{ + struct bpf_htab *hash = (struct bpf_htab *)&m_hash; + struct bpf_map *map = (struct bpf_map *)&m_hash; + int i; + + VERIFY(check_default(&hash->map, map)); + + VERIFY(hash->n_buckets == MAX_ENTRIES); + VERIFY(hash->elem_size == 64); + + VERIFY(hash->count.counter == 0); + for (i = 0; i < HALF_ENTRIES; ++i) { + const __u32 key = i; + const __u32 val = 1; + + if (bpf_map_update_elem(hash, &key, &val, 0)) + return 0; + } + VERIFY(hash->count.counter == HALF_ENTRIES); + + return 1; +} + +struct bpf_array { + struct bpf_map map; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_array SEC(".maps"); + +static inline int check_array(void) +{ + struct bpf_array *array = (struct bpf_array *)&m_array; + struct bpf_map *map = (struct bpf_map *)&m_array; + int i, n_lookups = 0, n_keys = 0; + + VERIFY(check_default(&array->map, map)); + + VERIFY(array->elem_size == 8); + + for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) { + const __u32 key = i; + __u32 *val = bpf_map_lookup_elem(array, &key); + + ++n_lookups; + if (val) + ++n_keys; + } + + VERIFY(n_lookups == MAX_ENTRIES); + VERIFY(n_keys == MAX_ENTRIES); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_prog_array SEC(".maps"); + +static inline int check_prog_array(void) +{ + struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array; + struct bpf_map *map = (struct bpf_map *)&m_prog_array; + + VERIFY(check_default(&prog_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_perf_event_array SEC(".maps"); + +static inline int check_perf_event_array(void) +{ + struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array; + struct bpf_map *map = (struct bpf_map *)&m_perf_event_array; + + VERIFY(check_default(&perf_event_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_hash SEC(".maps"); + +static inline int check_percpu_hash(void) +{ + struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_percpu_hash; + + VERIFY(check_default(&percpu_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_array SEC(".maps"); + +static inline int check_percpu_array(void) +{ + struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array; + struct bpf_map *map = (struct bpf_map *)&m_percpu_array; + + VERIFY(check_default(&percpu_array->map, map)); + + return 1; +} + +struct bpf_stack_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} m_stack_trace SEC(".maps"); + +static inline int check_stack_trace(void) +{ + struct bpf_stack_map *stack_trace = + (struct bpf_stack_map *)&m_stack_trace; + struct bpf_map *map = (struct bpf_map *)&m_stack_trace; + + VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64), + MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cgroup_array SEC(".maps"); + +static inline int check_cgroup_array(void) +{ + struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_array; + + VERIFY(check_default(&cgroup_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_hash SEC(".maps"); + +static inline int check_lru_hash(void) +{ + struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_hash; + + VERIFY(check_default(&lru_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_percpu_hash SEC(".maps"); + +static inline int check_lru_percpu_hash(void) +{ + struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash; + + VERIFY(check_default(&lru_percpu_hash->map, map)); + + return 1; +} + +struct lpm_trie { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct lpm_key { + struct bpf_lpm_trie_key trie_key; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct lpm_key); + __type(value, __u32); +} m_lpm_trie SEC(".maps"); + +static inline int check_lpm_trie(void) +{ + struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie; + struct bpf_map *map = (struct bpf_map *)&m_lpm_trie; + + VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32), + MAX_ENTRIES)); + + return 1; +} + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + }); +} m_array_of_maps SEC(".maps") = { + .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +static inline int check_array_of_maps(void) +{ + struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; + + VERIFY(check_default(&array_of_maps->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct inner_map); +} m_hash_of_maps SEC(".maps") = { + .values = { + [2] = &inner_map, + }, +}; + +static inline int check_hash_of_maps(void) +{ + struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; + + VERIFY(check_default(&hash_of_maps->map, map)); + + return 1; +} + +struct bpf_dtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap SEC(".maps"); + +static inline int check_devmap(void) +{ + struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap; + struct bpf_map *map = (struct bpf_map *)&m_devmap; + + VERIFY(check_default(&devmap->map, map)); + + return 1; +} + +struct bpf_stab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockmap SEC(".maps"); + +static inline int check_sockmap(void) +{ + struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap; + struct bpf_map *map = (struct bpf_map *)&m_sockmap; + + VERIFY(check_default(&sockmap->map, map)); + + return 1; +} + +struct bpf_cpu_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cpumap SEC(".maps"); + +static inline int check_cpumap(void) +{ + struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap; + struct bpf_map *map = (struct bpf_map *)&m_cpumap; + + VERIFY(check_default(&cpumap->map, map)); + + return 1; +} + +struct xsk_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_xskmap SEC(".maps"); + +static inline int check_xskmap(void) +{ + struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap; + struct bpf_map *map = (struct bpf_map *)&m_xskmap; + + VERIFY(check_default(&xskmap->map, map)); + + return 1; +} + +struct bpf_shtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockhash SEC(".maps"); + +static inline int check_sockhash(void) +{ + struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash; + struct bpf_map *map = (struct bpf_map *)&m_sockhash; + + VERIFY(check_default(&sockhash->map, map)); + + return 1; +} + +struct bpf_cgroup_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_cgroup_storage SEC(".maps"); + +static inline int check_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage; + + VERIFY(check(&cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct reuseport_array { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_reuseport_sockarray SEC(".maps"); + +static inline int check_reuseport_sockarray(void) +{ + struct reuseport_array *reuseport_sockarray = + (struct reuseport_array *)&m_reuseport_sockarray; + struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray; + + VERIFY(check_default(&reuseport_sockarray->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_percpu_cgroup_storage SEC(".maps"); + +static inline int check_percpu_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *percpu_cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage; + + VERIFY(check(&percpu_cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct bpf_queue_stack { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_queue SEC(".maps"); + +static inline int check_queue(void) +{ + struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue; + struct bpf_map *map = (struct bpf_map *)&m_queue; + + VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_STACK); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_stack SEC(".maps"); + +static inline int check_stack(void) +{ + struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack; + struct bpf_map *map = (struct bpf_map *)&m_stack; + + VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct bpf_sk_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, __u32); + __type(value, __u32); +} m_sk_storage SEC(".maps"); + +static inline int check_sk_storage(void) +{ + struct bpf_sk_storage_map *sk_storage = + (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_map *map = (struct bpf_map *)&m_sk_storage; + + VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap_hash SEC(".maps"); + +static inline int check_devmap_hash(void) +{ + struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash; + struct bpf_map *map = (struct bpf_map *)&m_devmap_hash; + + VERIFY(check_default(&devmap_hash->map, map)); + + return 1; +} + +struct bpf_ringbuf_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} m_ringbuf SEC(".maps"); + +static inline int check_ringbuf(void) +{ + struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf; + struct bpf_map *map = (struct bpf_map *)&m_ringbuf; + + VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12)); + + return 1; +} + +SEC("cgroup_skb/egress") +int cg_skb(void *ctx) +{ + VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array); + VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array); + VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap); + VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap); + VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + check_reuseport_sockarray); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + check_percpu_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue); + VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack); + VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash); + VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf); + + return 1; +} + +__u32 _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c new file mode 100644 index 000000000000..25467d13c356 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); +} stackmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, stack_trace_t); +} stackdata_map SEC(".maps"); + +long stackid_kernel = 1; +long stackid_user = 1; +long stack_kernel = 1; +long stack_user = 1; + +SEC("perf_event") +int oncpu(void *ctx) +{ + stack_trace_t *trace; + __u32 key = 0; + long val; + + val = bpf_get_stackid(ctx, &stackmap, 0); + if (val > 0) + stackid_kernel = 2; + val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); + if (val > 0) + stackid_user = 2; + + trace = bpf_map_lookup_elem(&stackdata_map, &key); + if (!trace) + return 0; + + val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0); + if (val > 0) + stack_kernel = 2; + + val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK); + if (val > 0) + stack_user = 2; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c new file mode 100644 index 000000000000..62c8cdec6d5d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_autoload.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +bool prog1_called = false; +bool prog2_called = false; +bool prog3_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + +struct fake_kernel_struct { + int whatever; +} __attribute__((preserve_access_index)); + +SEC("fentry/unexisting-kprobe-will-fail-if-loaded") +int prog3(const void *ctx) +{ + struct fake_kernel_struct *fake = (void *)ctx; + fake->whatever = 123; + prog3_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c new file mode 100644 index 000000000000..20861ec2f674 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_retro.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct task_struct { + int tgid; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} exp_tgid_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} results SEC(".maps"); + +SEC("tp/raw_syscalls/sys_enter") +int handle_sys_enter(void *ctx) +{ + struct task_struct *task = (void *)bpf_get_current_task(); + int tgid = BPF_CORE_READ(task, tgid); + int zero = 0; + int real_tgid = bpf_get_current_pid_tgid() >> 32; + int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero); + + /* only pass through sys_enters from test process */ + if (!exp_tgid || *exp_tgid != real_tgid) + return 0; + + bpf_map_update_elem(&results, &zero, &tgid, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c new file mode 100644 index 000000000000..ddb687c5d125 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_endian.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define IN16 0x1234 +#define IN32 0x12345678U +#define IN64 0x123456789abcdef0ULL + +__u16 in16 = 0; +__u32 in32 = 0; +__u64 in64 = 0; + +__u16 out16 = 0; +__u32 out32 = 0; +__u64 out64 = 0; + +__u16 const16 = 0; +__u32 const32 = 0; +__u64 const64 = 0; + +SEC("raw_tp/sys_enter") +int sys_enter(const void *ctx) +{ + out16 = __builtin_bswap16(in16); + out32 = __builtin_bswap32(in32); + out64 = __builtin_bswap64(in64); + const16 = ___bpf_swab16(IN16); + const32 = ___bpf_swab32(IN32); + const64 = ___bpf_swab64(IN64); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index 29817a703984..b6a6eb279e54 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -57,8 +57,9 @@ struct { SEC("raw_tracepoint/sys_enter") int bpf_prog1(void *ctx) { - int max_len, max_buildid_len, usize, ksize, total_size; + int max_len, max_buildid_len, total_size; struct stack_trace_t *data; + long usize, ksize; void *raw_data; __u32 key = 0; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c new file mode 100644 index 000000000000..6c9cbb5a3bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u64 out__bpf_link_fops = -1; +__u64 out__bpf_link_fops1 = -1; +__u64 out__btf_size = -1; +__u64 out__per_cpu_start = -1; + +extern const void bpf_link_fops __ksym; +extern const void __start_BTF __ksym; +extern const void __stop_BTF __ksym; +extern const void __per_cpu_start __ksym; +/* non-existing symbol, weak, default to zero */ +extern const void bpf_link_fops1 __ksym __weak; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + out__bpf_link_fops = (__u64)&bpf_link_fops; + out__btf_size = (__u64)(&__stop_BTF - &__start_BTF); + out__per_cpu_start = (__u64)&__per_cpu_start; + + out__bpf_link_fops1 = (__u64)&bpf_link_fops1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index ad59c4c9aba8..8207a2dc2f9d 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -12,8 +12,8 @@ struct { __uint(value_size, sizeof(int)); } perf_buf_map SEC(".maps"); -SEC("kprobe/sys_nanosleep") -int BPF_KPROBE(handle_sys_nanosleep_entry) +SEC("tp/raw_syscalls/sys_enter") +int handle_sys_enter(void *ctx) { int cpu = bpf_get_smp_processor_id(); diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c new file mode 100644 index 000000000000..bbf8296f4d66 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2020 Cloudflare + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <sys/socket.h> + +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define IP4(a, b, c, d) \ + bpf_htonl((((__u32)(a) & 0xffU) << 24) | \ + (((__u32)(b) & 0xffU) << 16) | \ + (((__u32)(c) & 0xffU) << 8) | \ + (((__u32)(d) & 0xffU) << 0)) +#define IP6(aaaa, bbbb, cccc, dddd) \ + { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } + +#define MAX_SOCKS 32 + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, MAX_SOCKS); + __type(key, __u32); + __type(value, __u64); +} redir_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} run_map SEC(".maps"); + +enum { + PROG1 = 0, + PROG2, +}; + +enum { + SERVER_A = 0, + SERVER_B, +}; + +/* Addressable key/value constants for convenience */ +static const int KEY_PROG1 = PROG1; +static const int KEY_PROG2 = PROG2; +static const int PROG_DONE = 1; + +static const __u32 KEY_SERVER_A = SERVER_A; +static const __u32 KEY_SERVER_B = SERVER_B; + +static const __u16 DST_PORT = 7007; /* Host byte order */ +static const __u32 DST_IP4 = IP4(127, 0, 0, 1); +static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); + +SEC("sk_lookup/lookup_pass") +int lookup_pass(struct bpf_sk_lookup *ctx) +{ + return SK_PASS; +} + +SEC("sk_lookup/lookup_drop") +int lookup_drop(struct bpf_sk_lookup *ctx) +{ + return SK_DROP; +} + +SEC("sk_reuseport/reuse_pass") +int reuseport_pass(struct sk_reuseport_md *ctx) +{ + return SK_PASS; +} + +SEC("sk_reuseport/reuse_drop") +int reuseport_drop(struct sk_reuseport_md *ctx) +{ + return SK_DROP; +} + +/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */ +SEC("sk_lookup/redir_port") +int redir_port(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err; + + if (ctx->local_port != DST_PORT) + return SK_PASS; + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + return SK_PASS; + + err = bpf_sk_assign(ctx, sk, 0); + bpf_sk_release(sk); + return err ? SK_DROP : SK_PASS; +} + +/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */ +SEC("sk_lookup/redir_ip4") +int redir_ip4(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err; + + if (ctx->family != AF_INET) + return SK_PASS; + if (ctx->local_port != DST_PORT) + return SK_PASS; + if (ctx->local_ip4 != DST_IP4) + return SK_PASS; + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + return SK_PASS; + + err = bpf_sk_assign(ctx, sk, 0); + bpf_sk_release(sk); + return err ? SK_DROP : SK_PASS; +} + +/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */ +SEC("sk_lookup/redir_ip6") +int redir_ip6(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err; + + if (ctx->family != AF_INET6) + return SK_PASS; + if (ctx->local_port != DST_PORT) + return SK_PASS; + if (ctx->local_ip6[0] != DST_IP6[0] || + ctx->local_ip6[1] != DST_IP6[1] || + ctx->local_ip6[2] != DST_IP6[2] || + ctx->local_ip6[3] != DST_IP6[3]) + return SK_PASS; + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + return SK_PASS; + + err = bpf_sk_assign(ctx, sk, 0); + bpf_sk_release(sk); + return err ? SK_DROP : SK_PASS; +} + +SEC("sk_lookup/select_sock_a") +int select_sock_a(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err; + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + return SK_PASS; + + err = bpf_sk_assign(ctx, sk, 0); + bpf_sk_release(sk); + return err ? SK_DROP : SK_PASS; +} + +SEC("sk_lookup/select_sock_a_no_reuseport") +int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err; + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + return SK_DROP; + + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT); + bpf_sk_release(sk); + return err ? SK_DROP : SK_PASS; +} + +SEC("sk_reuseport/select_sock_b") +int select_sock_b(struct sk_reuseport_md *ctx) +{ + __u32 key = KEY_SERVER_B; + int err; + + err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0); + return err ? SK_DROP : SK_PASS; +} + +/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */ +SEC("sk_lookup/sk_assign_eexist") +int sk_assign_eexist(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err, ret; + + ret = SK_DROP; + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); + if (!sk) + goto out; + err = bpf_sk_assign(ctx, sk, 0); + if (err) + goto out; + bpf_sk_release(sk); + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + goto out; + err = bpf_sk_assign(ctx, sk, 0); + if (err != -EEXIST) { + bpf_printk("sk_assign returned %d, expected %d\n", + err, -EEXIST); + goto out; + } + + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ +out: + if (sk) + bpf_sk_release(sk); + return ret; +} + +/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */ +SEC("sk_lookup/sk_assign_replace_flag") +int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err, ret; + + ret = SK_DROP; + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + goto out; + err = bpf_sk_assign(ctx, sk, 0); + if (err) + goto out; + bpf_sk_release(sk); + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); + if (!sk) + goto out; + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); + if (err) { + bpf_printk("sk_assign returned %d, expected 0\n", err); + goto out; + } + + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ +out: + if (sk) + bpf_sk_release(sk); + return ret; +} + +/* Check that bpf_sk_assign(sk=NULL) is accepted. */ +SEC("sk_lookup/sk_assign_null") +int sk_assign_null(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk = NULL; + int err, ret; + + ret = SK_DROP; + + err = bpf_sk_assign(ctx, NULL, 0); + if (err) { + bpf_printk("sk_assign returned %d, expected 0\n", err); + goto out; + } + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); + if (!sk) + goto out; + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); + if (err) { + bpf_printk("sk_assign returned %d, expected 0\n", err); + goto out; + } + + if (ctx->sk != sk) + goto out; + err = bpf_sk_assign(ctx, NULL, 0); + if (err != -EEXIST) + goto out; + err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); + if (err) + goto out; + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); + if (err) + goto out; + + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ +out: + if (sk) + bpf_sk_release(sk); + return ret; +} + +/* Check that selected sk is accessible through context. */ +SEC("sk_lookup/access_ctx_sk") +int access_ctx_sk(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk1 = NULL, *sk2 = NULL; + int err, ret; + + ret = SK_DROP; + + /* Try accessing unassigned (NULL) ctx->sk field */ + if (ctx->sk && ctx->sk->family != AF_INET) + goto out; + + /* Assign a value to ctx->sk */ + sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk1) + goto out; + err = bpf_sk_assign(ctx, sk1, 0); + if (err) + goto out; + if (ctx->sk != sk1) + goto out; + + /* Access ctx->sk fields */ + if (ctx->sk->family != AF_INET || + ctx->sk->type != SOCK_STREAM || + ctx->sk->state != BPF_TCP_LISTEN) + goto out; + + /* Reset selection */ + err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); + if (err) + goto out; + if (ctx->sk) + goto out; + + /* Assign another socket */ + sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); + if (!sk2) + goto out; + err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE); + if (err) + goto out; + if (ctx->sk != sk2) + goto out; + + /* Access reassigned ctx->sk fields */ + if (ctx->sk->family != AF_INET || + ctx->sk->type != SOCK_STREAM || + ctx->sk->state != BPF_TCP_LISTEN) + goto out; + + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ +out: + if (sk1) + bpf_sk_release(sk1); + if (sk2) + bpf_sk_release(sk2); + return ret; +} + +/* Check narrow loads from ctx fields that support them. + * + * Narrow loads of size >= target field size from a non-zero offset + * are not covered because they give bogus results, that is the + * verifier ignores the offset. + */ +SEC("sk_lookup/ctx_narrow_access") +int ctx_narrow_access(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err, family; + __u16 *half; + __u8 *byte; + bool v4; + + v4 = (ctx->family == AF_INET); + + /* Narrow loads from family field */ + byte = (__u8 *)&ctx->family; + half = (__u16 *)&ctx->family; + if (byte[0] != (v4 ? AF_INET : AF_INET6) || + byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + return SK_DROP; + if (half[0] != (v4 ? AF_INET : AF_INET6)) + return SK_DROP; + + byte = (__u8 *)&ctx->protocol; + if (byte[0] != IPPROTO_TCP || + byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + return SK_DROP; + half = (__u16 *)&ctx->protocol; + if (half[0] != IPPROTO_TCP) + return SK_DROP; + + /* Narrow loads from remote_port field. Expect non-0 value. */ + byte = (__u8 *)&ctx->remote_port; + if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) + return SK_DROP; + half = (__u16 *)&ctx->remote_port; + if (half[0] == 0) + return SK_DROP; + + /* Narrow loads from local_port field. Expect DST_PORT. */ + byte = (__u8 *)&ctx->local_port; + if (byte[0] != ((DST_PORT >> 0) & 0xff) || + byte[1] != ((DST_PORT >> 8) & 0xff) || + byte[2] != 0 || byte[3] != 0) + return SK_DROP; + half = (__u16 *)&ctx->local_port; + if (half[0] != DST_PORT) + return SK_DROP; + + /* Narrow loads from IPv4 fields */ + if (v4) { + /* Expect non-0.0.0.0 in remote_ip4 */ + byte = (__u8 *)&ctx->remote_ip4; + if (byte[0] == 0 && byte[1] == 0 && + byte[2] == 0 && byte[3] == 0) + return SK_DROP; + half = (__u16 *)&ctx->remote_ip4; + if (half[0] == 0 && half[1] == 0) + return SK_DROP; + + /* Expect DST_IP4 in local_ip4 */ + byte = (__u8 *)&ctx->local_ip4; + if (byte[0] != ((DST_IP4 >> 0) & 0xff) || + byte[1] != ((DST_IP4 >> 8) & 0xff) || + byte[2] != ((DST_IP4 >> 16) & 0xff) || + byte[3] != ((DST_IP4 >> 24) & 0xff)) + return SK_DROP; + half = (__u16 *)&ctx->local_ip4; + if (half[0] != ((DST_IP4 >> 0) & 0xffff) || + half[1] != ((DST_IP4 >> 16) & 0xffff)) + return SK_DROP; + } else { + /* Expect 0.0.0.0 IPs when family != AF_INET */ + byte = (__u8 *)&ctx->remote_ip4; + if (byte[0] != 0 || byte[1] != 0 && + byte[2] != 0 || byte[3] != 0) + return SK_DROP; + half = (__u16 *)&ctx->remote_ip4; + if (half[0] != 0 || half[1] != 0) + return SK_DROP; + + byte = (__u8 *)&ctx->local_ip4; + if (byte[0] != 0 || byte[1] != 0 && + byte[2] != 0 || byte[3] != 0) + return SK_DROP; + half = (__u16 *)&ctx->local_ip4; + if (half[0] != 0 || half[1] != 0) + return SK_DROP; + } + + /* Narrow loads from IPv6 fields */ + if (!v4) { + /* Expenct non-:: IP in remote_ip6 */ + byte = (__u8 *)&ctx->remote_ip6; + if (byte[0] == 0 && byte[1] == 0 && + byte[2] == 0 && byte[3] == 0 && + byte[4] == 0 && byte[5] == 0 && + byte[6] == 0 && byte[7] == 0 && + byte[8] == 0 && byte[9] == 0 && + byte[10] == 0 && byte[11] == 0 && + byte[12] == 0 && byte[13] == 0 && + byte[14] == 0 && byte[15] == 0) + return SK_DROP; + half = (__u16 *)&ctx->remote_ip6; + if (half[0] == 0 && half[1] == 0 && + half[2] == 0 && half[3] == 0 && + half[4] == 0 && half[5] == 0 && + half[6] == 0 && half[7] == 0) + return SK_DROP; + + /* Expect DST_IP6 in local_ip6 */ + byte = (__u8 *)&ctx->local_ip6; + if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || + byte[1] != ((DST_IP6[0] >> 8) & 0xff) || + byte[2] != ((DST_IP6[0] >> 16) & 0xff) || + byte[3] != ((DST_IP6[0] >> 24) & 0xff) || + byte[4] != ((DST_IP6[1] >> 0) & 0xff) || + byte[5] != ((DST_IP6[1] >> 8) & 0xff) || + byte[6] != ((DST_IP6[1] >> 16) & 0xff) || + byte[7] != ((DST_IP6[1] >> 24) & 0xff) || + byte[8] != ((DST_IP6[2] >> 0) & 0xff) || + byte[9] != ((DST_IP6[2] >> 8) & 0xff) || + byte[10] != ((DST_IP6[2] >> 16) & 0xff) || + byte[11] != ((DST_IP6[2] >> 24) & 0xff) || + byte[12] != ((DST_IP6[3] >> 0) & 0xff) || + byte[13] != ((DST_IP6[3] >> 8) & 0xff) || + byte[14] != ((DST_IP6[3] >> 16) & 0xff) || + byte[15] != ((DST_IP6[3] >> 24) & 0xff)) + return SK_DROP; + half = (__u16 *)&ctx->local_ip6; + if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || + half[1] != ((DST_IP6[0] >> 16) & 0xffff) || + half[2] != ((DST_IP6[1] >> 0) & 0xffff) || + half[3] != ((DST_IP6[1] >> 16) & 0xffff) || + half[4] != ((DST_IP6[2] >> 0) & 0xffff) || + half[5] != ((DST_IP6[2] >> 16) & 0xffff) || + half[6] != ((DST_IP6[3] >> 0) & 0xffff) || + half[7] != ((DST_IP6[3] >> 16) & 0xffff)) + return SK_DROP; + } else { + /* Expect :: IPs when family != AF_INET6 */ + byte = (__u8 *)&ctx->remote_ip6; + if (byte[0] != 0 || byte[1] != 0 || + byte[2] != 0 || byte[3] != 0 || + byte[4] != 0 || byte[5] != 0 || + byte[6] != 0 || byte[7] != 0 || + byte[8] != 0 || byte[9] != 0 || + byte[10] != 0 || byte[11] != 0 || + byte[12] != 0 || byte[13] != 0 || + byte[14] != 0 || byte[15] != 0) + return SK_DROP; + half = (__u16 *)&ctx->remote_ip6; + if (half[0] != 0 || half[1] != 0 || + half[2] != 0 || half[3] != 0 || + half[4] != 0 || half[5] != 0 || + half[6] != 0 || half[7] != 0) + return SK_DROP; + + byte = (__u8 *)&ctx->local_ip6; + if (byte[0] != 0 || byte[1] != 0 || + byte[2] != 0 || byte[3] != 0 || + byte[4] != 0 || byte[5] != 0 || + byte[6] != 0 || byte[7] != 0 || + byte[8] != 0 || byte[9] != 0 || + byte[10] != 0 || byte[11] != 0 || + byte[12] != 0 || byte[13] != 0 || + byte[14] != 0 || byte[15] != 0) + return SK_DROP; + half = (__u16 *)&ctx->local_ip6; + if (half[0] != 0 || half[1] != 0 || + half[2] != 0 || half[3] != 0 || + half[4] != 0 || half[5] != 0 || + half[6] != 0 || half[7] != 0) + return SK_DROP; + } + + /* Success, redirect to KEY_SERVER_B */ + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); + if (sk) { + bpf_sk_assign(ctx, sk, 0); + bpf_sk_release(sk); + } + return SK_PASS; +} + +/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */ +SEC("sk_lookup/sk_assign_esocknosupport") +int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err, ret; + + ret = SK_DROP; + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + goto out; + + err = bpf_sk_assign(ctx, sk, 0); + if (err != -ESOCKTNOSUPPORT) { + bpf_printk("sk_assign returned %d, expected %d\n", + err, -ESOCKTNOSUPPORT); + goto out; + } + + ret = SK_PASS; /* Success, pass to regular lookup */ +out: + if (sk) + bpf_sk_release(sk); + return ret; +} + +SEC("sk_lookup/multi_prog_pass1") +int multi_prog_pass1(struct bpf_sk_lookup *ctx) +{ + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); + return SK_PASS; +} + +SEC("sk_lookup/multi_prog_pass2") +int multi_prog_pass2(struct bpf_sk_lookup *ctx) +{ + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); + return SK_PASS; +} + +SEC("sk_lookup/multi_prog_drop1") +int multi_prog_drop1(struct bpf_sk_lookup *ctx) +{ + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); + return SK_DROP; +} + +SEC("sk_lookup/multi_prog_drop2") +int multi_prog_drop2(struct bpf_sk_lookup *ctx) +{ + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); + return SK_DROP; +} + +static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) +{ + struct bpf_sock *sk; + int err; + + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); + if (!sk) + return SK_DROP; + + err = bpf_sk_assign(ctx, sk, 0); + bpf_sk_release(sk); + if (err) + return SK_DROP; + + return SK_PASS; +} + +SEC("sk_lookup/multi_prog_redir1") +int multi_prog_redir1(struct bpf_sk_lookup *ctx) +{ + int ret; + + ret = select_server_a(ctx); + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); + return SK_PASS; +} + +SEC("sk_lookup/multi_prog_redir2") +int multi_prog_redir2(struct bpf_sk_lookup *ctx) +{ + int ret; + + ret = select_server_a(ctx); + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); + return SK_PASS; +} + +char _license[] SEC("license") = "Dual BSD/GPL"; +__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 77ae86f44db5..374ccef704e1 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -20,7 +20,9 @@ long long in4 __attribute__((aligned(64))) = 0; struct s in5 = {}; /* .rodata section */ -const volatile int in6 = 0; +const volatile struct { + const int in6; +} in = {}; /* .data section */ int out1 = -1; @@ -46,7 +48,7 @@ int handler(const void *ctx) out3 = in3; out4 = in4; out5 = in5; - out6 = in6; + out6 = in.in6; bpf_syscall = CONFIG_BPF_SYSCALL; kern_ver = LINUX_KERNEL_VERSION; diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c new file mode 100644 index 000000000000..cd4b72c55dfe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define MAX_LEN 256 + +char buf_in1[MAX_LEN] = {}; +char buf_in2[MAX_LEN] = {}; + +int test_pid = 0; +bool capture = false; + +/* .bss */ +long payload1_len1 = 0; +long payload1_len2 = 0; +long total1 = 0; +char payload1[MAX_LEN + MAX_LEN] = {}; + +/* .data */ +int payload2_len1 = -1; +int payload2_len2 = -1; +int total2 = -1; +char payload2[MAX_LEN + MAX_LEN] = { 1 }; + +int payload3_len1 = -1; +int payload3_len2 = -1; +int total3= -1; +char payload3[MAX_LEN + MAX_LEN] = { 1 }; + +int payload4_len1 = -1; +int payload4_len2 = -1; +int total4= -1; +char payload4[MAX_LEN + MAX_LEN] = { 1 }; + +SEC("raw_tp/sys_enter") +int handler64_unsigned(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload1; + u64 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len2 = len; + } + + total1 = payload - (void *)payload1; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handler64_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload3; + long len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload3_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload3_len2 = len; + } + total3 = payload - (void *)payload3; + + return 0; +} + +SEC("tp/raw_syscalls/sys_enter") +int handler32_unsigned(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload2; + u32 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len2 = len; + } + + total2 = payload - (void *)payload2; + + return 0; +} + +SEC("tp/raw_syscalls/sys_exit") +int handler32_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload4; + int len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload4_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload4_len2 = len; + } + total4 = payload - (void *)payload4; + + return 0; +} + +SEC("tp/syscalls/sys_exit_getpid") +int handler_exit(void *regs) +{ + long bla; + + if (bpf_probe_read_kernel(&bla, sizeof(bla), 0)) + return 1; + else + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 5611b564d3b1..29fa09d6a6c6 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) return 0; } -SEC("kprobe/hrtimer_nanosleep") -int BPF_KPROBE(handle__kprobe, - ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +SEC("kprobe/hrtimer_start_range_ns") +int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { - if (rqtp == MY_TV_NSEC) + if (tim == MY_TV_NSEC) kprobe_called = true; return 0; } -SEC("fentry/hrtimer_nanosleep") -int BPF_PROG(handle__fentry, - ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +SEC("fentry/hrtimer_start_range_ns") +int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { - if (rqtp == MY_TV_NSEC) + if (tim == MY_TV_NSEC) fentry_called = true; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c new file mode 100644 index 000000000000..eb93ea95d1d8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char LICENSE[] SEC("license") = "GPL"; + +SEC("xdp/handler") +int xdp_handler(struct xdp_md *xdp) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c new file mode 100644 index 000000000000..59ee4f182ff8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define IFINDEX_LO 1 + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); + __uint(max_entries, 4); +} cpu_map SEC(".maps"); + +SEC("xdp_redir") +int xdp_redir_prog(struct xdp_md *ctx) +{ + return bpf_redirect_map(&cpu_map, 1, 0); +} + +SEC("xdp_dummy") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp_cpumap/dummy_cm") +int xdp_dummy_cm(struct xdp_md *ctx) +{ + if (ctx->ingress_ifindex == IFINDEX_LO) + return XDP_DROP; + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c new file mode 100644 index 000000000000..8ca7f399b670 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/trace_printk.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020, Oracle and/or its affiliates. + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int trace_printk_ret = 0; +int trace_printk_ran = 0; + +SEC("tp/raw_syscalls/sys_enter") +int sys_enter(void *ctx) +{ + static const char fmt[] = "testing,testing %d\n"; + + trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt), + ++trace_printk_ran); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c new file mode 100644 index 000000000000..165e3c2dd9a3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/udp_limit.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <sys/socket.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int invocations = 0, in_use = 0; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_map SEC(".maps"); + +SEC("cgroup/sock_create") +int sock(struct bpf_sock *ctx) +{ + int *sk_storage; + __u32 key; + + if (ctx->type != SOCK_DGRAM) + return 1; + + sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!sk_storage) + return 0; + *sk_storage = 0xdeadbeef; + + __sync_fetch_and_add(&invocations, 1); + + if (in_use > 0) { + /* BPF_CGROUP_INET_SOCK_RELEASE is _not_ called + * when we return an error from the BPF + * program! + */ + return 0; + } + + __sync_fetch_and_add(&in_use, 1); + return 1; +} + +SEC("cgroup/sock_release") +int sock_release(struct bpf_sock *ctx) +{ + int *sk_storage; + __u32 key; + + if (ctx->type != SOCK_DGRAM) + return 1; + + sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0); + if (!sk_storage || *sk_storage != 0xdeadbeef) + return 0; + + __sync_fetch_and_add(&invocations, 1); + __sync_fetch_and_add(&in_use, -1); + return 1; +} diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py index a53ed58528d6..bfff82be3fc1 100755 --- a/tools/testing/selftests/bpf/tcp_client.py +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -34,7 +34,7 @@ serverPort = int(sys.argv[1]) # create active socket sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) try: - sock.connect(('localhost', serverPort)) + sock.connect(('::1', serverPort)) except socket.error as e: sys.exit(1) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py index 0ca60d193bed..42ab8882f00f 100755 --- a/tools/testing/selftests/bpf/tcp_server.py +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -38,7 +38,7 @@ serverSocket = None # create passive socket serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) -try: serverSocket.bind(('localhost', 0)) +try: serverSocket.bind(('::1', 0)) except socket.error as msg: print('bind fails: ' + str(msg)) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 655729004391..d946252a25bb 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -74,22 +74,7 @@ int main(int argc, char **argv) goto out; } - if (setup_cgroup_environment()) { - printf("Failed to setup cgroup environment\n"); - goto err; - } - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (cgroup_fd < 0) { - printf("Failed to create test cgroup\n"); - goto err; - } - - if (join_cgroup(TEST_CGROUP)) { - printf("Failed to join cgroup\n"); - goto err; - } + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); /* Attach the bpf program */ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index d850fb9076b5..804dddd97d4c 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -33,21 +33,10 @@ int main(int argc, char **argv) goto out; } - if (setup_cgroup_environment()) { - printf("Failed to load DEV_CGROUP program\n"); - goto err; - } - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); - goto err; - } - - if (join_cgroup(TEST_CGROUP)) { - printf("Failed to join cgroup\n"); - goto err; + goto out; } /* Attach bpf program */ diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index 9df0d2ac45f8..4f6444bcd53f 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then exit $ksft_skip fi -SRC_TREE=../../../../ +if [ "$building_out_of_srctree" ]; then + # We are in linux-build/kselftest/bpf + OUTPUT=../../ +else + # We are in linux/tools/testing/selftests/bpf + OUTPUT=../../../../ +fi test_run() { @@ -19,8 +25,8 @@ test_run() echo "[ JIT enabled:$1 hardened:$2 ]" dmesg -C - if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then - insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null + if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then + insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null if [ $? -ne 0 ]; then rc=1 fi diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 785eabf2a593..5620919fde9e 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE & ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment -kill -INT $! +kill -TERM $! if [[ $(< $TMP_FILE) != "foobar" ]]; then exit 1 diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c index c1da5404454a..a7b9a69f4fd5 100644 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -58,22 +58,9 @@ int main(int argc, char **argv) goto out; } - if (setup_cgroup_environment()) { - printf("Failed to load bpf program\n"); - goto err; - } - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (cgroup_fd < 0) { - printf("Failed to create test cgroup\n"); + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + if (cgroup_fd < 0) goto err; - } - - if (join_cgroup(TEST_CGROUP)) { - printf("Failed to join cgroup\n"); - goto err; - } /* Attach bpf program */ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { @@ -82,9 +69,9 @@ int main(int argc, char **argv) } if (system("which ping6 &>/dev/null") == 0) - assert(!system("ping6 localhost -c 10000 -f -q > /dev/null")); + assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null")); else - assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null")); + assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null")); if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, &prog_cnt)) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 54fa5fa688ce..b1e4dadacd9b 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -12,6 +12,9 @@ #include <string.h> #include <execinfo.h> /* backtrace */ +#define EXIT_NO_TEST 2 +#define EXIT_ERR_SETUP_INFRA 3 + /* defined in test_progs.h */ struct test_env env = {}; @@ -111,13 +114,31 @@ static void reset_affinity() { if (err < 0) { stdio_restore(); fprintf(stderr, "Failed to reset process affinity: %d!\n", err); - exit(-1); + exit(EXIT_ERR_SETUP_INFRA); } err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); if (err < 0) { stdio_restore(); fprintf(stderr, "Failed to reset thread affinity: %d!\n", err); - exit(-1); + exit(EXIT_ERR_SETUP_INFRA); + } +} + +static void save_netns(void) +{ + env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (env.saved_netns_fd == -1) { + perror("open(/proc/self/ns/net)"); + exit(EXIT_ERR_SETUP_INFRA); + } +} + +static void restore_netns(void) +{ + if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) { + stdio_restore(); + perror("setns(CLONE_NEWNS)"); + exit(EXIT_ERR_SETUP_INFRA); } } @@ -138,8 +159,6 @@ void test__end_subtest() test->test_num, test->subtest_num, test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); - reset_affinity(); - free(test->subtest_name); test->subtest_name = NULL; } @@ -366,6 +385,8 @@ enum ARG_KEYS { ARG_TEST_NAME_BLACKLIST = 'b', ARG_VERIFIER_STATS = 's', ARG_VERBOSE = 'v', + ARG_GET_TEST_CNT = 'c', + ARG_LIST_TEST_NAMES = 'l', }; static const struct argp_option opts[] = { @@ -379,6 +400,10 @@ static const struct argp_option opts[] = { "Output verifier statistics", }, { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL, "Verbose output (use -vv or -vvv for progressively verbose output)" }, + { "count", ARG_GET_TEST_CNT, NULL, 0, + "Get number of selected top-level tests " }, + { "list", ARG_LIST_TEST_NAMES, NULL, 0, + "List test names that would run (without running them) " }, {}, }; @@ -511,6 +536,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } } break; + case ARG_GET_TEST_CNT: + env->get_test_cnt = true; + break; + case ARG_LIST_TEST_NAMES: + env->list_test_names = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -643,6 +674,7 @@ int main(int argc, char **argv) return -1; } + save_netns(); stdio_hijack(); for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; @@ -654,6 +686,17 @@ int main(int argc, char **argv) test->test_num, test->test_name)) continue; + if (env.get_test_cnt) { + env.succ_cnt++; + continue; + } + + if (env.list_test_names) { + fprintf(env.stdout, "%s\n", test->test_name); + env.succ_cnt++; + continue; + } + test->run_test(); /* ensure last sub-test is finalized properly */ if (test->subtest_name) @@ -673,19 +716,34 @@ int main(int argc, char **argv) test->error_cnt ? "FAIL" : "OK"); reset_affinity(); + restore_netns(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); } stdio_restore(); + + if (env.get_test_cnt) { + printf("%d\n", env.succ_cnt); + goto out; + } + + if (env.list_test_names) + goto out; + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); +out: free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); + close(env.saved_netns_fd); + + if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) + return EXIT_NO_TEST; return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f4503c926aca..6e09bf738473 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -66,6 +66,8 @@ struct test_env { enum verbosity verbosity; bool jit_enabled; + bool get_test_cnt; + bool list_test_names; struct prog_test_def *test; FILE *stdout; @@ -78,6 +80,8 @@ struct test_env { int sub_succ_cnt; /* successful sub-tests */ int fail_cnt; /* total failed tests + sub-tests */ int skip_cnt; /* skipped tests */ + + int saved_netns_fd; }; extern struct test_env env; diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index 356351c0ac28..4a64306728ab 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -160,16 +160,10 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CGROUP_PATH); + cgfd = cgroup_setup_and_join(CGROUP_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CGROUP_PATH)) - goto err; - if (send_packet(argv[1])) goto err; diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 52bf14955797..9613f7538840 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -464,16 +464,10 @@ int main(int argc, char **argv) int cgfd = -1; int err = 0; - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 0358814c67dc..b8c72c1d9cf7 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -1638,16 +1638,10 @@ int main(int argc, char **argv) exit(err); } - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index f0fc103261a4..6c9f269c396d 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -421,19 +421,11 @@ int main(int argc, char **argv) struct bpf_object *obj; struct bpf_map *map; - err = setup_cgroup_environment(); - CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d", - err, errno); - - atexit(cleanup_cgroup_environment); - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - CHECK(cgroup_fd == -1, "create_and_get_cgroup()", + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", "cgroup_fd:%d errno:%d", cgroup_fd, errno); - - err = join_cgroup(TEST_CGROUP); - CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); + atexit(cleanup_cgroup_environment); err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 15653b0e26eb..154a8fd2a48d 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -191,16 +191,10 @@ int main(int argc, char **argv) int cgfd = -1; int err = 0; - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_test(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 78789b27e573..9b6fb00dc7a0 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1963,23 +1963,9 @@ int main(int argc, char **argv) } if (!cg_fd) { - if (setup_cgroup_environment()) { - fprintf(stderr, "ERROR: cgroup env failed\n"); - return -EINVAL; - } - - cg_fd = create_and_get_cgroup(CG_PATH); - if (cg_fd < 0) { - fprintf(stderr, - "ERROR: (%i) open cg path failed: %s\n", - cg_fd, strerror(errno)); + cg_fd = cgroup_setup_and_join(CG_PATH); + if (cg_fd < 0) return cg_fd; - } - - if (join_cgroup(CG_PATH)) { - fprintf(stderr, "ERROR: failed to join cgroup\n"); - return -EINVAL; - } cg_created = 1; } diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index d196e2a4a6e0..a20a919244c0 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -1619,16 +1619,10 @@ int main(int argc, char **argv) int cgfd = -1; int err = 0; - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 3ae127620463..74a9e49988b6 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -102,16 +102,10 @@ int main(int argc, char **argv) __u32 key = 0; int rv; - if (setup_cgroup_environment()) - goto err; - - cg_fd = create_and_get_cgroup(cg_path); + cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; - if (join_cgroup(cg_path)) - goto err; - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index f9765ddf0761..8549b31716ab 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -86,16 +86,10 @@ int main(int argc, char **argv) CPU_SET(0, &cpuset); pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - if (setup_cgroup_environment()) - goto err; - - cg_fd = create_and_get_cgroup(cg_path); + cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; - if (join_cgroup(cg_path)) - goto err; - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index c4b17e08d431..dd80f0c84afb 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -10,52 +10,72 @@ # | xdp forwarding | # ------------------ -cleanup() +ret=0 + +setup() { - if [ "$?" = "0" ]; then - echo "selftests: test_xdp_redirect [PASS]"; - else - echo "selftests: test_xdp_redirect [FAILED]"; - fi - set +e + local xdpmode=$1 + + ip netns add ns1 + ip netns add ns2 + + ip link add veth1 index 111 type veth peer name veth11 netns ns1 + ip link add veth2 index 222 type veth peer name veth22 netns ns2 + + ip link set veth1 up + ip link set veth2 up + ip -n ns1 link set dev veth11 up + ip -n ns2 link set dev veth22 up + + ip -n ns1 addr add 10.1.1.11/24 dev veth11 + ip -n ns2 addr add 10.1.1.22/24 dev veth22 +} + +cleanup() +{ ip link del veth1 2> /dev/null ip link del veth2 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } -ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null -if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" - exit 0 -fi -set -e - -ip netns add ns1 -ip netns add ns2 +test_xdp_redirect() +{ + local xdpmode=$1 -trap cleanup 0 2 3 6 9 + setup -ip link add veth1 index 111 type veth peer name veth11 -ip link add veth2 index 222 type veth peer name veth22 + ip link set dev veth1 $xdpmode off &> /dev/null + if [ $? -ne 0 ];then + echo "selftests: test_xdp_redirect $xdpmode [SKIP]" + return 0 + fi -ip link set veth11 netns ns1 -ip link set veth22 netns ns2 + ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null + ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null + ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null + ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null -ip link set veth1 up -ip link set veth2 up + ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null + local ret1=$? + ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null + local ret2=$? -ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11 -ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22 + if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then + echo "selftests: test_xdp_redirect $xdpmode [PASS]"; + else + ret=1 + echo "selftests: test_xdp_redirect $xdpmode [FAILED]"; + fi -ip netns exec ns1 ip link set dev veth11 up -ip netns exec ns2 ip link set dev veth22 up + cleanup +} -ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222 -ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111 +set -e +trap cleanup 2 3 6 9 -ip netns exec ns1 ping -c 1 10.1.1.22 -ip netns exec ns2 ping -c 1 10.1.1.11 +test_xdp_redirect xdpgeneric +test_xdp_redirect xdpdrv -exit 0 +exit $ret diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 0af6337a8962..800d503e5cb4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -64,3 +64,17 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) return 0; } + +__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) +{ + __u32 info_len = sizeof(*info); + int err; + + memset(info, 0, sizeof(*info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len); + if (err) { + printf("failed to get link info: %d\n", -errno); + return 0; + } + return info->prog_id; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 923b51762759..d4f8e749611b 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (C) 2020 Facebook, Inc. */ #include <stdbool.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> int parse_num_list(const char *s, bool **set, int *set_len); +__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c new file mode 100644 index 000000000000..2ad5f974451c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -0,0 +1,492 @@ +{ + "valid 1,2,4,8-byte reads from bpf_sk_lookup", + .insns = { + /* 1-byte read from family field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family) + 3), + /* 2-byte read from family field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family) + 2), + /* 4-byte read from family field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family)), + + /* 1-byte read from protocol field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol) + 3), + /* 2-byte read from protocol field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol) + 2), + /* 4-byte read from protocol field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol)), + + /* 1-byte read from remote_ip4 field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4) + 3), + /* 2-byte read from remote_ip4 field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4) + 2), + /* 4-byte read from remote_ip4 field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4)), + + /* 1-byte read from remote_ip6 field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 6), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 7), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 9), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 10), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 11), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 13), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 14), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 15), + /* 2-byte read from remote_ip6 field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 6), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 10), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 14), + /* 4-byte read from remote_ip6 field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), + + /* 1-byte read from remote_port field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port) + 3), + /* 2-byte read from remote_port field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port) + 2), + /* 4-byte read from remote_port field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port)), + + /* 1-byte read from local_ip4 field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4) + 3), + /* 2-byte read from local_ip4 field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4) + 2), + /* 4-byte read from local_ip4 field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4)), + + /* 1-byte read from local_ip6 field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 4), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 6), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 7), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 8), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 9), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 10), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 11), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 12), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 13), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 14), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 15), + /* 2-byte read from local_ip6 field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 4), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 6), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 8), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 10), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 12), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 14), + /* 4-byte read from local_ip6 field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 4), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6) + 12), + + /* 1-byte read from local_port field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port) + 3), + /* 2-byte read from local_port field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port) + 2), + /* 4-byte read from local_port field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port)), + + /* 8-byte read from sk field */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, sk)), + + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ +{ + "invalid 8-byte read from bpf_sk_lookup family field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, family)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup protocol field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, protocol)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup remote_ip4 field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip4)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup remote_ip6 field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_ip6)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup remote_port field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, remote_port)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup local_ip4 field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip4)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup local_ip6 field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_ip6)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 8-byte read from bpf_sk_lookup local_port field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ +{ + "invalid 4-byte read from bpf_sk_lookup sk field", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, sk)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 2-byte read from bpf_sk_lookup sk field", + .insns = { + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, sk)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 1-byte read from bpf_sk_lookup sk field", + .insns = { + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, sk)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +/* out of bounds and unaligned reads from bpf_sk_lookup */ +{ + "invalid 4-byte read past end of bpf_sk_lookup", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + sizeof(struct bpf_sk_lookup)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 4-byte unaligned read from bpf_sk_lookup at even offset", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +/* in-bound and out-of-bound writes to bpf_sk_lookup */ +{ + "invalid 8-byte write to bpf_sk_lookup", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 4-byte write to bpf_sk_lookup", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 2-byte write to bpf_sk_lookup", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 1-byte write to bpf_sk_lookup", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, +{ + "invalid 4-byte write past end of bpf_sk_lookup", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + sizeof(struct bpf_sk_lookup)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c new file mode 100644 index 000000000000..b52209db8250 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -0,0 +1,62 @@ +{ + "bpf_map_ptr: read with negative offset rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "R1 is bpf_array invalid negative access: off=-8", +}, +{ + "bpf_map_ptr: write rejected", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "only read from bpf_array is supported", +}, +{ + "bpf_map_ptr: read non-existent field rejected", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", +}, +{ + "bpf_map_ptr: read ops field accepted", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c index cd26ee6b7b1d..1f2b8c4cb26d 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c @@ -56,7 +56,7 @@ .fixup_map_in_map = { 16 }, .fixup_map_array_48b = { 13 }, .result = REJECT, - .errstr = "R0 invalid mem access 'map_ptr'", + .errstr = "only read from bpf_array is supported", }, { "cond: two branches returning different map pointers for lookup (tail, tail)", diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 97ee658e1242..ed4e76b24649 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -836,3 +836,41 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "32bit pkt_ptr -= scalar", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7), + BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "32bit scalar -= pkt_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6), + BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index b3ead29c6089..2cf6f10ab7c4 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -47,7 +47,7 @@ void child(int cpu) _exit(0); } -bool run_test(int cpu) +int run_test(int cpu) { int status; pid_t pid = fork(); @@ -55,7 +55,7 @@ bool run_test(int cpu) if (pid < 0) { ksft_print_msg("fork() failed: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (pid == 0) child(cpu); @@ -63,67 +63,68 @@ bool run_test(int cpu) wpid = waitpid(pid, &status, __WALL); if (wpid != pid) { ksft_print_msg("waitpid() failed: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (!WIFSTOPPED(status)) { ksft_print_msg("child did not stop: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (WSTOPSIG(status) != SIGSTOP) { ksft_print_msg("child did not stop with SIGSTOP: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) { if (errno == EIO) { - ksft_exit_skip( + ksft_print_msg( "ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n", strerror(errno)); + return KSFT_SKIP; } ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } wpid = waitpid(pid, &status, __WALL); if (wpid != pid) { ksft_print_msg("waitpid() failed: $s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (WIFEXITED(status)) { ksft_print_msg("child did not single-step: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (!WIFSTOPPED(status)) { ksft_print_msg("child did not stop: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (WSTOPSIG(status) != SIGTRAP) { ksft_print_msg("child did not stop with SIGTRAP: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) { ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } wpid = waitpid(pid, &status, __WALL); if (wpid != pid) { ksft_print_msg("waitpid() failed: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } if (!WIFEXITED(status)) { ksft_print_msg("child did not exit after PTRACE_CONT: %s\n", strerror(errno)); - return false; + return KSFT_FAIL; } - return true; + return KSFT_PASS; } void suspend(void) @@ -183,32 +184,38 @@ int main(int argc, char **argv) } } + err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus); + if (err < 0) + ksft_exit_fail_msg("sched_getaffinity() failed\n"); + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { if (!CPU_ISSET(cpu, &available_cpus)) continue; tests++; } - ksft_set_plan(tests); if (do_suspend) suspend(); - err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus); - if (err < 0) - ksft_exit_fail_msg("sched_getaffinity() failed\n"); - + ksft_set_plan(tests); for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { - bool test_success; + int test_success; if (!CPU_ISSET(cpu, &available_cpus)) continue; test_success = run_test(cpu); - if (test_success) { + switch (test_success) { + case KSFT_PASS: ksft_test_result_pass("CPU %d\n", cpu); - } else { + break; + case KSFT_SKIP: + ksft_test_result_skip("CPU %d\n", cpu); + break; + case KSFT_FAIL: ksft_test_result_fail("CPU %d\n", cpu); succeeded = false; + break; } } diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore index a81085742d40..83c0f6246055 100644 --- a/tools/testing/selftests/clone3/.gitignore +++ b/tools/testing/selftests/clone3/.gitignore @@ -2,3 +2,4 @@ clone3 clone3_clear_sighand clone3_set_tid +clone3_cap_checkpoint_restore diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile index cf976c732906..ef7564cb7abe 100644 --- a/tools/testing/selftests/clone3/Makefile +++ b/tools/testing/selftests/clone3/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g -I../../../../usr/include/ +LDLIBS += -lcap -TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid +TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \ + clone3_cap_checkpoint_restore include ../lib.mk diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index f14c269a5a18..b7e6dec36173 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -131,9 +131,9 @@ int main(int argc, char *argv[]) uid_t uid = getuid(); - test_clone3_supported(); ksft_print_header(); ksft_set_plan(17); + test_clone3_supported(); /* Just a simple clone3() should return 0.*/ test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST); diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c new file mode 100644 index 000000000000..9562425aa0a9 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Based on Christian Brauner's clone3() example. + * These tests are assuming to be running in the host's + * PID namespace. + */ + +/* capabilities related code based on selftests/bpf/test_verifier.c */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/capability.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sched.h> + +#include "../kselftest_harness.h" +#include "clone3_selftests.h" + +#ifndef MAX_PID_NS_LEVEL +#define MAX_PID_NS_LEVEL 32 +#endif + +static void child_exit(int ret) +{ + fflush(stdout); + fflush(stderr); + _exit(ret); +} + +static int call_clone3_set_tid(struct __test_metadata *_metadata, + pid_t *set_tid, size_t set_tid_size) +{ + int status; + pid_t pid = -1; + + struct clone_args args = { + .exit_signal = SIGCHLD, + .set_tid = ptr_to_u64(set_tid), + .set_tid_size = set_tid_size, + }; + + pid = sys_clone3(&args, sizeof(struct clone_args)); + if (pid < 0) { + TH_LOG("%s - Failed to create new process", strerror(errno)); + return -errno; + } + + if (pid == 0) { + int ret; + char tmp = 0; + + TH_LOG("I am the child, my PID is %d (expected %d)", getpid(), set_tid[0]); + + if (set_tid[0] != getpid()) + child_exit(EXIT_FAILURE); + child_exit(EXIT_SUCCESS); + } + + TH_LOG("I am the parent (%d). My child's pid is %d", getpid(), pid); + + if (waitpid(pid, &status, 0) < 0) { + TH_LOG("Child returned %s", strerror(errno)); + return -errno; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static int test_clone3_set_tid(struct __test_metadata *_metadata, + pid_t *set_tid, size_t set_tid_size) +{ + int ret; + + TH_LOG("[%d] Trying clone3() with CLONE_SET_TID to %d", getpid(), set_tid[0]); + ret = call_clone3_set_tid(_metadata, set_tid, set_tid_size); + TH_LOG("[%d] clone3() with CLONE_SET_TID %d says:%d", getpid(), set_tid[0], ret); + return ret; +} + +struct libcap { + struct __user_cap_header_struct hdr; + struct __user_cap_data_struct data[2]; +}; + +static int set_capability(void) +{ + cap_value_t cap_values[] = { CAP_SETUID, CAP_SETGID }; + struct libcap *cap; + int ret = -1; + cap_t caps; + + caps = cap_get_proc(); + if (!caps) { + perror("cap_get_proc"); + return -1; + } + + /* Drop all capabilities */ + if (cap_clear(caps)) { + perror("cap_clear"); + goto out; + } + + cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET); + cap_set_flag(caps, CAP_PERMITTED, 2, cap_values, CAP_SET); + + cap = (struct libcap *) caps; + + /* 40 -> CAP_CHECKPOINT_RESTORE */ + cap->data[1].effective |= 1 << (40 - 32); + cap->data[1].permitted |= 1 << (40 - 32); + + if (cap_set_proc(caps)) { + perror("cap_set_proc"); + goto out; + } + ret = 0; +out: + if (cap_free(caps)) + perror("cap_free"); + return ret; +} + +TEST(clone3_cap_checkpoint_restore) +{ + pid_t pid; + int status; + int ret = 0; + pid_t set_tid[1]; + + test_clone3_supported(); + + EXPECT_EQ(getuid(), 0) + XFAIL(return, "Skipping all tests as non-root\n"); + + memset(&set_tid, 0, sizeof(set_tid)); + + /* Find the current active PID */ + pid = fork(); + if (pid == 0) { + TH_LOG("Child has PID %d", getpid()); + child_exit(EXIT_SUCCESS); + } + ASSERT_GT(waitpid(pid, &status, 0), 0) + TH_LOG("Waiting for child %d failed", pid); + + /* After the child has finished, its PID should be free. */ + set_tid[0] = pid; + + ASSERT_EQ(set_capability(), 0) + TH_LOG("Could not set CAP_CHECKPOINT_RESTORE"); + + ASSERT_EQ(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), 0); + + EXPECT_EQ(setgid(65534), 0) + TH_LOG("Failed to setgid(65534)"); + ASSERT_EQ(setuid(65534), 0); + + set_tid[0] = pid; + /* This would fail without CAP_CHECKPOINT_RESTORE */ + ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), -EPERM); + ASSERT_EQ(set_capability(), 0) + TH_LOG("Could not set CAP_CHECKPOINT_RESTORE"); + /* This should work as we have CAP_CHECKPOINT_RESTORE as non-root */ + ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index 9e1af8aa7698..db5fc9c5edcf 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -119,9 +119,8 @@ static void test_clone3_clear_sighand(void) int main(int argc, char **argv) { ksft_print_header(); - test_clone3_supported(); - ksft_set_plan(1); + test_clone3_supported(); test_clone3_clear_sighand(); diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index 25beb22f35b5..5831c1082d6d 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -157,8 +157,8 @@ int main(int argc, char *argv[]) pid_t set_tid[MAX_PID_NS_LEVEL * 2]; ksft_print_header(); - test_clone3_supported(); ksft_set_plan(29); + test_clone3_supported(); if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0) ksft_exit_fail_msg("pipe() failed\n"); diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore new file mode 100644 index 000000000000..6e6712ce5817 --- /dev/null +++ b/tools/testing/selftests/core/.gitignore @@ -0,0 +1 @@ +close_range_test diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile new file mode 100644 index 000000000000..f6f2d6f473c6 --- /dev/null +++ b/tools/testing/selftests/core/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -g -I../../../../usr/include/ + +TEST_GEN_PROGS := close_range_test + +include ../lib.mk + diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c new file mode 100644 index 000000000000..c99b98b0d461 --- /dev/null +++ b/tools/testing/selftests/core/close_range_test.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/kernel.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> + +#include "../kselftest_harness.h" +#include "../clone3/clone3_selftests.h" + +#ifndef __NR_close_range +#define __NR_close_range -1 +#endif + +#ifndef CLOSE_RANGE_UNSHARE +#define CLOSE_RANGE_UNSHARE (1U << 1) +#endif + +static inline int sys_close_range(unsigned int fd, unsigned int max_fd, + unsigned int flags) +{ + return syscall(__NR_close_range, fd, max_fd, flags); +} + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +TEST(close_range) +{ + int i, ret; + int open_fds[101]; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + XFAIL(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { + if (errno == ENOSYS) + XFAIL(return, "close_range() syscall not supported"); + } + + EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); + + for (i = 0; i <= 50; i++) + EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); + + for (i = 51; i <= 100; i++) + EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); + + /* create a couple of gaps */ + close(57); + close(78); + close(81); + close(82); + close(84); + close(90); + + EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0)); + + for (i = 51; i <= 92; i++) + EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); + + for (i = 93; i <= 100; i++) + EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); + + /* test that the kernel caps and still closes all fds */ + EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0)); + + for (i = 93; i <= 99; i++) + EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); + + EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); + + EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0)); + + EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL)); +} + +TEST(close_range_unshare) +{ + int i, ret, status; + pid_t pid; + int open_fds[101]; + struct clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + XFAIL(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = sys_close_range(open_fds[0], open_fds[50], + CLOSE_RANGE_UNSHARE); + if (ret) + exit(EXIT_FAILURE); + + for (i = 0; i <= 50; i++) + if (fcntl(open_fds[i], F_GETFL) != -1) + exit(EXIT_FAILURE); + + for (i = 51; i <= 100; i++) + if (fcntl(open_fds[i], F_GETFL) == -1) + exit(EXIT_FAILURE); + + /* create a couple of gaps */ + close(57); + close(78); + close(81); + close(82); + close(84); + close(90); + + ret = sys_close_range(open_fds[51], open_fds[92], + CLOSE_RANGE_UNSHARE); + if (ret) + exit(EXIT_FAILURE); + + for (i = 51; i <= 92; i++) + if (fcntl(open_fds[i], F_GETFL) != -1) + exit(EXIT_FAILURE); + + for (i = 93; i <= 100; i++) + if (fcntl(open_fds[i], F_GETFL) == -1) + exit(EXIT_FAILURE); + + /* test that the kernel caps and still closes all fds */ + ret = sys_close_range(open_fds[93], open_fds[99], + CLOSE_RANGE_UNSHARE); + if (ret) + exit(EXIT_FAILURE); + + for (i = 93; i <= 99; i++) + if (fcntl(open_fds[i], F_GETFL) != -1) + exit(EXIT_FAILURE); + + if (fcntl(open_fds[100], F_GETFL) == -1) + exit(EXIT_FAILURE); + + ret = sys_close_range(open_fds[100], open_fds[100], + CLOSE_RANGE_UNSHARE); + if (ret) + exit(EXIT_FAILURE); + + if (fcntl(open_fds[100], F_GETFL) != -1) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(close_range_unshare_capped) +{ + int i, ret, status; + pid_t pid; + int open_fds[101]; + struct clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { + int fd; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + XFAIL(return, "Skipping test since /dev/null does not exist"); + } + + open_fds[i] = fd; + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = sys_close_range(open_fds[0], UINT_MAX, + CLOSE_RANGE_UNSHARE); + if (ret) + exit(EXIT_FAILURE); + + for (i = 0; i <= 100; i++) + if (fcntl(open_fds[i], F_GETFL) != -1) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 0d347d48c112..517297a14ecf 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -121,6 +121,7 @@ h1_destroy() h2_create() { host_create $h2 2 + tc qdisc add dev $h2 clsact # Some of the tests in this suite use multicast traffic. As this traffic # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus @@ -141,6 +142,7 @@ h2_create() h2_destroy() { ethtool -s $h2 autoneg on + tc qdisc del dev $h2 clsact host_destroy $h2 } @@ -336,6 +338,17 @@ get_qdisc_npackets() qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets } +send_packets() +{ + local vlan=$1; shift + local proto=$1; shift + local pkts=$1; shift + + $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \ + -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \ + -t $proto -q -c $pkts "$@" +} + # This sends traffic in an attempt to build a backlog of $size. Returns 0 on # success. After 10 failed attempts it bails out and returns 1. It dumps the # backlog size to stdout. @@ -364,9 +377,7 @@ build_backlog() return 1 fi - $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \ - -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \ - -t $proto -q -c $pkts "$@" + send_packets $vlan $proto $pkts "$@" done } @@ -531,3 +542,115 @@ do_mc_backlog_test() log_test "TC $((vlan - 10)): Qdisc reports MC backlog" } + +do_drop_test() +{ + local vlan=$1; shift + local limit=$1; shift + local trigger=$1; shift + local subtest=$1; shift + local fetch_counter=$1; shift + local backlog + local base + local now + local pct + + RET=0 + + start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac + + # Create a bit of a backlog and observe no mirroring due to drops. + qevent_rule_install_$subtest + base=$($fetch_counter) + + build_backlog $vlan $((2 * limit / 3)) udp >/dev/null + + busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null + check_fail $? "Spurious packets observed without buffer pressure" + + # Push to the queue until it's at the limit. The configured limit is + # rounded by the qdisc and then by the driver, so this is the best we + # can do to get to the real limit of the system. + build_backlog $vlan $((3 * limit / 2)) udp >/dev/null + + base=$($fetch_counter) + send_packets $vlan udp 11 + + now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter) + check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen" + + # When no extra traffic is injected, there should be no mirroring. + busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null + check_fail $? "Spurious packets observed" + + # When the rule is uninstalled, there should be no mirroring. + qevent_rule_uninstall_$subtest + send_packets $vlan udp 11 + busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null + check_fail $? "Spurious packets observed after uninstall" + + log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd" + + stop_traffic + sleep 1 +} + +qevent_rule_install_mirror() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action mirred egress mirror dev $swp2 hw_stats disabled +} + +qevent_rule_uninstall_mirror() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} + +qevent_counter_fetch_mirror() +{ + tc_rule_handle_stats_get "dev $h2 ingress" 101 +} + +do_drop_mirror_test() +{ + local vlan=$1; shift + local limit=$1; shift + local qevent_name=$1; shift + + tc filter add dev $h2 ingress pref 1 handle 101 prot ip \ + flower skip_sw ip_proto udp \ + action drop + + do_drop_test "$vlan" "$limit" "$qevent_name" mirror \ + qevent_counter_fetch_mirror + + tc filter del dev $h2 ingress pref 1 handle 101 flower +} + +qevent_rule_install_trap() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action trap hw_stats disabled +} + +qevent_rule_uninstall_trap() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} + +qevent_counter_fetch_trap() +{ + local trap_name=$1; shift + + devlink_trap_rx_packets_get "$trap_name" +} + +do_drop_trap_test() +{ + local vlan=$1; shift + local limit=$1; shift + local trap_name=$1; shift + + do_drop_test "$vlan" "$limit" "$trap_name" trap \ + "qevent_counter_fetch_trap $trap_name" +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index 1c36c576613b..3f007c5f8361 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -7,6 +7,8 @@ ALL_TESTS=" ecn_nodrop_test red_test mc_backlog_test + red_mirror_test + red_trap_test " : ${QDISC:=ets} source sch_red_core.sh @@ -83,6 +85,26 @@ mc_backlog_test() uninstall_qdisc } +red_mirror_test() +{ + install_qdisc qevent early_drop block 10 + + do_drop_mirror_test 10 $BACKLOG1 early_drop + do_drop_mirror_test 11 $BACKLOG2 early_drop + + uninstall_qdisc +} + +red_trap_test() +{ + install_qdisc qevent early_drop block 10 + + do_drop_trap_test 10 $BACKLOG1 early_drop + do_drop_trap_test 11 $BACKLOG2 early_drop + + uninstall_qdisc +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index 558667ea11ec..ede9c38d3eff 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -7,6 +7,7 @@ ALL_TESTS=" ecn_nodrop_test red_test mc_backlog_test + red_mirror_test " source sch_red_core.sh @@ -57,6 +58,13 @@ mc_backlog_test() uninstall_qdisc } +red_mirror_test() +{ + install_qdisc qevent early_drop block 10 + do_drop_mirror_test 10 $BACKLOG + uninstall_qdisc +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index fd583a171db7..d7cf33a3f18d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -28,7 +28,7 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre" +ALL_TESTS="router tc_flower mirror_gre tc_police" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh new file mode 100644 index 000000000000..e79ac0dad1f4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_police_scale.sh + +tc_police_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get global_policers single_rate_policers) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 43ba1b438f6d..43f662401bc3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,7 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre" +ALL_TESTS="router tc_flower mirror_gre tc_police" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh new file mode 100644 index 000000000000..e79ac0dad1f4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_police_scale.sh + +tc_police_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get global_policers single_rate_policers) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh new file mode 100755 index 000000000000..448b75c1545a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that policers shared by different tc filters are correctly reference +# counted by observing policers' occupancy via devlink-resource. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + tc_police_occ_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +tc_police_occ_get() +{ + devlink_resource_occ_get global_policers single_rate_policers +} + +tc_police_occ_test() +{ + RET=0 + + local occ=$(tc_police_occ_get) + + tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/ok + (( occ + 1 == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))" + + tc filter del dev $swp1 ingress pref 1 handle 101 flower + (( occ == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $occ" + + tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/ok \ + index 10 + tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \ + flower skip_sw action police index 10 + + (( occ + 1 == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))" + + tc filter del dev $swp1 ingress pref 2 handle 102 flower + (( occ + 1 == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))" + + tc filter del dev $swp1 ingress pref 1 handle 101 flower + (( occ == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $occ" + + log_test "tc police occupancy" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh new file mode 100644 index 000000000000..4b96561c462f --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0 + +TC_POLICE_NUM_NETIFS=2 + +tc_police_h1_create() +{ + simple_if_init $h1 +} + +tc_police_h1_destroy() +{ + simple_if_fini $h1 +} + +tc_police_switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +tc_police_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +tc_police_rules_create() +{ + local count=$1; shift + local should_fail=$1; shift + + TC_POLICE_BATCH_FILE="$(mktemp)" + + for ((i = 0; i < count; ++i)); do + cat >> $TC_POLICE_BATCH_FILE <<-EOF + filter add dev $swp1 ingress \ + prot ip \ + flower skip_sw \ + action police rate 10mbit burst 100k \ + conform-exceed drop/ok + EOF + done + + tc -b $TC_POLICE_BATCH_FILE + check_err_fail $should_fail $? "Rule insertion" +} + +__tc_police_test() +{ + local count=$1; shift + local should_fail=$1; shift + + tc_police_rules_create $count $should_fail + + offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l) + ((offload_count == count)) + check_err_fail $should_fail $? "tc police offload count" +} + +tc_police_test() +{ + local count=$1; shift + local should_fail=$1; shift + + if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then + check_err 1 "Could not test offloaded functionality" + return + fi + + __tc_police_test $count $should_fail +} + +tc_police_setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + tc_police_h1_create + tc_police_switch_create +} + +tc_police_cleanup() +{ + pre_cleanup + + tc_police_switch_destroy + tc_police_h1_destroy + + vrf_cleanup +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 9241250c5921..553cb9fad508 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -11,6 +11,8 @@ ALL_TESTS=" matchall_mirror_behind_flower_ingress_test matchall_sample_behind_flower_ingress_test matchall_mirror_behind_flower_egress_test + police_limits_test + multi_police_test " NUM_NETIFS=2 @@ -287,6 +289,80 @@ matchall_mirror_behind_flower_egress_test() matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2" } +police_limits_test() +{ + RET=0 + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 0.5kbit burst 1m conform-exceed drop/ok + check_fail $? "Incorrect success to add police action with too low rate" + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 2.5tbit burst 1g conform-exceed drop/ok + check_fail $? "Incorrect success to add police action with too high rate" + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.5kbit burst 1m conform-exceed drop/ok + check_err $? "Failed to add police action with low rate" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.9tbit burst 1g conform-exceed drop/ok + check_err $? "Failed to add police action with high rate" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.5kbit burst 512b conform-exceed drop/ok + check_fail $? "Incorrect success to add police action with too low burst size" + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.5kbit burst 2k conform-exceed drop/ok + check_err $? "Failed to add police action with low burst size" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc qdisc del dev $swp1 clsact + + log_test "police rate and burst limits" +} + +multi_police_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have multiple police + # actions in a single rule. + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/ok + check_err $? "Failed to add rule with single police action" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/pipe \ + action police rate 200mbit burst 200k conform-exceed drop/ok + check_fail $? "Incorrect success to add rule with two police actions" + + tc qdisc del dev $swp1 clsact + + log_test "multi police" +} + setup_prepare() { swp1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh new file mode 100644 index 000000000000..ba1d53b9f815 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -0,0 +1,786 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +VNI_GEN=$RANDOM +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID +NSIM_NETDEV= +HAS_ETHTOOL= +EXIT_STATUS=0 +num_cases=0 +num_errors=0 + +clean_up_devs=( ) + +function err_cnt { + echo "ERROR:" $@ + EXIT_STATUS=1 + ((num_errors++)) + ((num_cases++)) +} + +function pass_cnt { + ((num_cases++)) +} + +function cleanup_tuns { + for dev in "${clean_up_devs[@]}"; do + [ -e /sys/class/net/$dev ] && ip link del dev $dev + done + clean_up_devs=( ) +} + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_tuns + cleanup_nsim +} + +trap cleanup EXIT + +function new_vxlan { + local dev=$1 + local dstport=$2 + local lower=$3 + local ipver=$4 + local flags=$5 + + local group ipfl + + [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))" + + ip $ipfl link add $dev type vxlan \ + group $group \ + dev $lower \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function new_geneve { + local dev=$1 + local dstport=$2 + local ipver=$3 + local flags=$4 + + local group ipfl + + [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))" + + ip $ipfl link add $dev type geneve \ + remote $remote \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function del_dev { + local dev=$1 + + ip link del dev $dev + check_tables +} + +# Helpers for netdevsim port/type encoding +function mke { + local port=$1 + local type=$2 + + echo $((port << 16 | type)) +} + +function pre { + local val=$1 + + echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))" +} + +function pre_ethtool { + local val=$1 + local port=$((val >> 16)) + local type=$((val & 0xffff)) + + case $type in + 1) + type_name="vxlan" + ;; + 2) + type_name="geneve" + ;; + 4) + type_name="vxlan-gpe" + ;; + *) + type_name="bit X" + ;; + esac + + echo "port $port, $type_name" +} + +function check_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local -n expected=$2 + local last=$3 + + read -a have < $path + + if [ ${#expected[@]} -ne ${#have[@]} ]; then + echo "check_table: BAD NUMBER OF ITEMS" + return 0 + fi + + for i in "${!expected[@]}"; do + if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then + pp_expected=`pre_ethtool ${expected[i]}` + ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null + if [ $? -ne 0 -a $last -ne 0 ]; then + err_cnt "ethtool table $1 on port $port: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + return 0 + + fi + fi + + if [ ${expected[i]} != ${have[i]} ]; then + if [ $last -ne 0 ]; then + err_cnt "table $1 on port $port: $pfx - $msg" + echo " check_table: wrong entry $i" + echo " expected: `pre ${expected[i]}`" + echo " have: `pre ${have[i]}`" + return 0 + fi + return 1 + fi + done + + pass_cnt + return 0 +} + +function check_tables { + # Need retries in case we have workqueue making the changes + local retries=10 + + while ! check_table 0 exp0 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done + while ! check_table 1 exp1 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done +} + +function print_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + read -a have < $path + + tree $NSIM_DEV_DFS/ + + echo "Port $port table $1:" + + for i in "${!have[@]}"; do + echo " `pre ${have[i]}`" + done + +} + +function print_tables { + print_table 0 + print_table 1 +} + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +### +### Code start +### + +# Probe ethtool support +ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y + +modprobe netdevsim + +# Basic test +pfx="basic" + +for port in 0 1; do + old_netdevs=$(ls /sys/class/net) + if [ $port -eq 0 ]; then + echo $NSIM_ID > /sys/bus/netdevsim/new_device + else + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + echo 1 > $NSIM_DEV_SYS/new_port + fi + NSIM_NETDEV=`get_netdev_name old_netdevs` + + msg="new NIC device created" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + msg="VxLAN v4 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlan0 4789 $NSIM_NETDEV + new_vxlan vxlan1 4789 $NSIM_NETDEV + + msg="VxLAN v4 devices go down" + exp0=( 0 0 0 0 ) + ifconfig vxlan1 down + ifconfig vxlan0 down + check_tables + + msg="VxLAN v6 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlanA 4789 $NSIM_NETDEV 6 + + for ifc in vxlan0 vxlan1; do + ifconfig $ifc up + done + + new_vxlan vxlanB 4789 $NSIM_NETDEV 6 + + msg="another VxLAN v6 devices" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + + msg="Geneve device" + exp1=( `mke 6081 2` 0 0 0 ) + new_geneve gnv0 6081 + + msg="NIC device goes down" + ifconfig $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + msg="NIC device goes up again" + ifconfig $NSIM_NETDEV up + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + check_tables + + cleanup_tuns + + msg="tunnels destroyed" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + modprobe -r geneve + modprobe -r vxlan + modprobe -r udp_tunnel + + check_tables +done + +modprobe -r netdevsim + +# Module tests +pfx="module tests" + +if modinfo netdevsim | grep udp_tunnel >/dev/null; then + err_cnt "netdevsim depends on udp_tunnel" +else + pass_cnt +fi + +modprobe netdevsim + +old_netdevs=$(ls /sys/class/net) +port=0 +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep +echo 0 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="create VxLANs" +exp0=( 0 0 0 0 ) # sleep is longer than out wait +new_vxlan vxlan0 10000 $NSIM_NETDEV + +modprobe -r vxlan +modprobe -r udp_tunnel + +msg="remove tunnels" +exp0=( 0 0 0 0 ) +check_tables + +msg="create VxLANs" +exp0=( 0 0 0 0 ) # sleep is longer than out wait +new_vxlan vxlan0 10000 $NSIM_NETDEV + +exp0=( 0 0 0 0 ) + +modprobe -r netdevsim +modprobe netdevsim + +# Overflow the table + +function overflow_table0 { + local pfx=$1 + + msg="create VxLANs 1/5" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="create VxLANs 2/5" + exp0=( `mke 10000 1` `mke 10001 1` 0 0 ) + new_vxlan vxlan1 10001 $NSIM_NETDEV + + msg="create VxLANs 3/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 ) + new_vxlan vxlan2 10002 $NSIM_NETDEV + + msg="create VxLANs 4/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` ) + new_vxlan vxlan3 10003 $NSIM_NETDEV + + msg="create VxLANs 5/5" + new_vxlan vxlan4 10004 $NSIM_NETDEV +} + +function overflow_table1 { + local pfx=$1 + + msg="create GENEVE 1/5" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="create GENEVE 2/5" + exp1=( `mke 20000 2` `mke 20001 2` 0 0 ) + new_geneve gnv1 20001 + + msg="create GENEVE 3/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 ) + new_geneve gnv2 20002 + + msg="create GENEVE 4/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` ) + new_geneve gnv3 20003 + + msg="create GENEVE 5/5" + new_geneve gnv4 20004 +} + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Sync all +pfx="sync all" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Destroy full NIC +pfx="destroy full" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "destroy NIC" + overflow_table1 "destroy NIC" + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# IPv4 only +pfx="IPv4 only" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v6" + new_vxlan vxlanA1 10000 $NSIM_NETDEV 6 + + ip link set dev vxlanA0 down + ip link set dev vxlanA0 up + check_tables + + msg="create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="down VxLANs v4" + exp0=( 0 0 0 0 ) + ip link set dev vxlan0 down + check_tables + + msg="up VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + ip link set dev vxlan0 up + check_tables + + msg="destroy VxLANs v4" + exp0=( 0 0 0 0 ) + del_dev vxlan0 + + msg="recreate VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + del_dev vxlanA0 + del_dev vxlanA1 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Failures +pfx="error injection" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + + msg="1 - create VxLANs v6" + exp0=( 0 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="1 - create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="1 - remove VxLANs v4" + del_dev vxlan0 + + msg="1 - remove VxLANs v6" + exp0=( 0 0 0 0 ) + del_dev vxlanA0 + + msg="2 - create GENEVE" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="2 - destroy GENEVE" + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + exp1=( `mke 20000 2` 0 0 0 ) + del_dev gnv0 + + msg="2 - create second GENEVE" + exp1=( 0 `mke 20001 2` 0 0 ) + new_geneve gnv0 20001 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# netdev flags +pfx="netdev flags" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="turn off" + exp0=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + + msg="create VxLANs v4 - off" + exp0=( 0 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="created off - turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# device initiated reset +pfx="reset notification" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="NIC device goes down" + ifconfig $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="NIC device goes up again" + ifconfig $NSIM_NETDEV up + exp0=( `mke 10000 1` 0 0 0 ) + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +modprobe -r netdevsim + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $num_cases checks" +else + echo "FAILED $num_errors/$num_cases checks" +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 8a6b507e34a8..1d27f52c61e6 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -21,7 +21,6 @@ #include <linux/android/binder.h> #include <linux/android/binderfs.h> -#include "../../kselftest.h" #include "../../kselftest_harness.h" #define DEFAULT_THREADS 4 @@ -37,37 +36,26 @@ fd = -EBADF; \ } -#define log_exit(format, ...) \ - ({ \ - fprintf(stderr, format "\n", ##__VA_ARGS__); \ - exit(EXIT_FAILURE); \ - }) - -static void change_mountns(void) +static void change_mountns(struct __test_metadata *_metadata) { int ret; ret = unshare(CLONE_NEWNS); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n", - strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to unshare mount namespace", + strerror(errno)); + } ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to mount / as private\n", - strerror(errno)); -} - -static void rmdir_protect_errno(const char *dir) -{ - int saved_errno = errno; - (void)rmdir(dir); - errno = saved_errno; + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to mount / as private", + strerror(errno)); + } } -static int __do_binderfs_test(void) +static int __do_binderfs_test(struct __test_metadata *_metadata) { - int fd, ret, saved_errno; + int fd, ret, saved_errno, result = 1; size_t len; ssize_t wret; struct binderfs_device device = { 0 }; @@ -75,113 +63,107 @@ static int __do_binderfs_test(void) char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME]; - change_mountns(); + change_mountns(_metadata); - if (!mkdtemp(binderfs_mntpt)) - ksft_exit_fail_msg( - "%s - Failed to create binderfs mountpoint\n", + EXPECT_NE(mkdtemp(binderfs_mntpt), NULL) { + TH_LOG("%s - Failed to create binderfs mountpoint", strerror(errno)); + goto out; + } ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); - if (ret < 0) { - if (errno != ENODEV) - ksft_exit_fail_msg("%s - Failed to mount binderfs\n", - strerror(errno)); - - rmdir_protect_errno(binderfs_mntpt); - return 1; + EXPECT_EQ(ret, 0) { + if (errno == ENODEV) + XFAIL(goto out, "binderfs missing"); + TH_LOG("%s - Failed to mount binderfs", strerror(errno)); + goto rmdir; } - /* binderfs mount test passed */ - ksft_inc_pass_cnt(); + /* success: binderfs mounted */ memcpy(device.name, "my-binder", strlen("my-binder")); snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt); fd = open(device_path, O_RDONLY | O_CLOEXEC); - if (fd < 0) - ksft_exit_fail_msg( - "%s - Failed to open binder-control device\n", + EXPECT_GE(fd, 0) { + TH_LOG("%s - Failed to open binder-control device", strerror(errno)); + goto umount; + } ret = ioctl(fd, BINDER_CTL_ADD, &device); saved_errno = errno; close(fd); errno = saved_errno; - if (ret < 0) { - rmdir_protect_errno(binderfs_mntpt); - ksft_exit_fail_msg( - "%s - Failed to allocate new binder device\n", + EXPECT_GE(ret, 0) { + TH_LOG("%s - Failed to allocate new binder device", strerror(errno)); + goto umount; } - ksft_print_msg( - "Allocated new binder device with major %d, minor %d, and name %s\n", + TH_LOG("Allocated new binder device with major %d, minor %d, and name %s", device.major, device.minor, device.name); - /* binder device allocation test passed */ - ksft_inc_pass_cnt(); + /* success: binder device allocation */ snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt); fd = open(device_path, O_CLOEXEC | O_RDONLY); - if (fd < 0) { - rmdir_protect_errno(binderfs_mntpt); - ksft_exit_fail_msg("%s - Failed to open my-binder device\n", - strerror(errno)); + EXPECT_GE(fd, 0) { + TH_LOG("%s - Failed to open my-binder device", + strerror(errno)); + goto umount; } ret = ioctl(fd, BINDER_VERSION, &version); saved_errno = errno; close(fd); errno = saved_errno; - if (ret < 0) { - rmdir_protect_errno(binderfs_mntpt); - ksft_exit_fail_msg( - "%s - Failed to open perform BINDER_VERSION request\n", + EXPECT_GE(ret, 0) { + TH_LOG("%s - Failed to open perform BINDER_VERSION request", strerror(errno)); + goto umount; } - ksft_print_msg("Detected binder version: %d\n", - version.protocol_version); + TH_LOG("Detected binder version: %d", version.protocol_version); - /* binder transaction with binderfs binder device passed */ - ksft_inc_pass_cnt(); + /* success: binder transaction with binderfs binder device */ ret = unlink(device_path); - if (ret < 0) { - rmdir_protect_errno(binderfs_mntpt); - ksft_exit_fail_msg("%s - Failed to delete binder device\n", - strerror(errno)); + EXPECT_EQ(ret, 0) { + TH_LOG("%s - Failed to delete binder device", + strerror(errno)); + goto umount; } - /* binder device removal passed */ - ksft_inc_pass_cnt(); + /* success: binder device removal */ snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt); ret = unlink(device_path); - if (!ret) { - rmdir_protect_errno(binderfs_mntpt); - ksft_exit_fail_msg("Managed to delete binder-control device\n"); - } else if (errno != EPERM) { - rmdir_protect_errno(binderfs_mntpt); - ksft_exit_fail_msg( - "%s - Failed to delete binder-control device but exited with unexpected error code\n", + EXPECT_NE(ret, 0) { + TH_LOG("Managed to delete binder-control device"); + goto umount; + } + EXPECT_EQ(errno, EPERM) { + TH_LOG("%s - Failed to delete binder-control device but exited with unexpected error code", strerror(errno)); + goto umount; } - /* binder-control device removal failed as expected */ - ksft_inc_xfail_cnt(); + /* success: binder-control device removal failed as expected */ + result = 0; -on_error: +umount: ret = umount2(binderfs_mntpt, MNT_DETACH); - rmdir_protect_errno(binderfs_mntpt); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to unmount binderfs\n", - strerror(errno)); - - /* binderfs unmount test passed */ - ksft_inc_pass_cnt(); - return 0; + EXPECT_EQ(ret, 0) { + TH_LOG("%s - Failed to unmount binderfs", strerror(errno)); + } +rmdir: + ret = rmdir(binderfs_mntpt); + EXPECT_EQ(ret, 0) { + TH_LOG("%s - Failed to rmdir binderfs mount", strerror(errno)); + } +out: + return result; } static int wait_for_pid(pid_t pid) @@ -291,7 +273,7 @@ static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf, return 0; } -static void change_userns(int syncfds[2]) +static void change_userns(struct __test_metadata *_metadata, int syncfds[2]) { int ret; char buf; @@ -299,25 +281,29 @@ static void change_userns(int syncfds[2]) close_prot_errno_disarm(syncfds[1]); ret = unshare(CLONE_NEWUSER); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", - strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to unshare user namespace", + strerror(errno)); + } ret = write_nointr(syncfds[0], "1", 1); - if (ret != 1) - ksft_exit_fail_msg("write_nointr() failed\n"); + ASSERT_EQ(ret, 1) { + TH_LOG("write_nointr() failed"); + } ret = read_nointr(syncfds[0], &buf, 1); - if (ret != 1) - ksft_exit_fail_msg("read_nointr() failed\n"); + ASSERT_EQ(ret, 1) { + TH_LOG("read_nointr() failed"); + } close_prot_errno_disarm(syncfds[0]); - if (setid_userns_root()) - ksft_exit_fail_msg("setid_userns_root() failed"); + ASSERT_EQ(setid_userns_root(), 0) { + TH_LOG("setid_userns_root() failed"); + } } -static void change_idmaps(int syncfds[2], pid_t pid) +static void change_idmaps(struct __test_metadata *_metadata, int syncfds[2], pid_t pid) { int ret; char buf; @@ -326,35 +312,42 @@ static void change_idmaps(int syncfds[2], pid_t pid) close_prot_errno_disarm(syncfds[0]); ret = read_nointr(syncfds[1], &buf, 1); - if (ret != 1) - ksft_exit_fail_msg("read_nointr() failed\n"); + ASSERT_EQ(ret, 1) { + TH_LOG("read_nointr() failed"); + } snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid()); ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map)); - if (ret) - ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed"); + ASSERT_EQ(ret, 0) { + TH_LOG("write_id_mapping(UID_MAP) failed"); + } snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid()); ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map)); - if (ret) - ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed"); + ASSERT_EQ(ret, 0) { + TH_LOG("write_id_mapping(GID_MAP) failed"); + } ret = write_nointr(syncfds[1], "1", 1); - if (ret != 1) - ksft_exit_fail_msg("write_nointr() failed"); + ASSERT_EQ(ret, 1) { + TH_LOG("write_nointr() failed"); + } close_prot_errno_disarm(syncfds[1]); } +struct __test_metadata *_thread_metadata; static void *binder_version_thread(void *data) { + struct __test_metadata *_metadata = _thread_metadata; int fd = PTR_TO_INT(data); struct binder_version version = { 0 }; int ret; ret = ioctl(fd, BINDER_VERSION, &version); if (ret < 0) - ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno)); + TH_LOG("%s - Failed to open perform BINDER_VERSION request\n", + strerror(errno)); pthread_exit(data); } @@ -377,68 +370,79 @@ TEST(binderfs_stress) device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME]; ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to create socket pair", strerror(errno)); + } pid = fork(); - if (pid < 0) { + ASSERT_GE(pid, 0) { + TH_LOG("%s - Failed to fork", strerror(errno)); close_prot_errno_disarm(syncfds[0]); close_prot_errno_disarm(syncfds[1]); - ksft_exit_fail_msg("%s - Failed to fork", strerror(errno)); } if (pid == 0) { int i, j, k, nthreads; pthread_attr_t attr; pthread_t threads[DEFAULT_THREADS]; - change_userns(syncfds); - change_mountns(); + change_userns(_metadata, syncfds); + change_mountns(_metadata); - if (!mkdtemp(binderfs_mntpt)) - log_exit("%s - Failed to create binderfs mountpoint\n", - strerror(errno)); + ASSERT_NE(mkdtemp(binderfs_mntpt), NULL) { + TH_LOG("%s - Failed to create binderfs mountpoint", + strerror(errno)); + } ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); - if (ret < 0) - log_exit("%s - Failed to mount binderfs\n", strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to mount binderfs", strerror(errno)); + } for (int i = 0; i < ARRAY_SIZE(fds); i++) { snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt); fd = open(device_path, O_RDONLY | O_CLOEXEC); - if (fd < 0) - log_exit("%s - Failed to open binder-control device\n", strerror(errno)); + ASSERT_GE(fd, 0) { + TH_LOG("%s - Failed to open binder-control device", + strerror(errno)); + } memset(&device, 0, sizeof(device)); snprintf(device.name, sizeof(device.name), "%d", i); ret = ioctl(fd, BINDER_CTL_ADD, &device); close_prot_errno_disarm(fd); - if (ret < 0) - log_exit("%s - Failed to allocate new binder device\n", strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to allocate new binder device", + strerror(errno)); + } snprintf(device_path, sizeof(device_path), "%s/%d", binderfs_mntpt, i); fds[i] = open(device_path, O_RDONLY | O_CLOEXEC); - if (fds[i] < 0) - log_exit("%s - Failed to open binder device\n", strerror(errno)); + ASSERT_GE(fds[i], 0) { + TH_LOG("%s - Failed to open binder device", strerror(errno)); + } } ret = umount2(binderfs_mntpt, MNT_DETACH); - rmdir_protect_errno(binderfs_mntpt); - if (ret < 0) - log_exit("%s - Failed to unmount binderfs\n", strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to unmount binderfs", strerror(errno)); + rmdir(binderfs_mntpt); + } nthreads = get_nprocs_conf(); if (nthreads > DEFAULT_THREADS) nthreads = DEFAULT_THREADS; + _thread_metadata = _metadata; pthread_attr_init(&attr); for (k = 0; k < ARRAY_SIZE(fds); k++) { for (i = 0; i < nthreads; i++) { ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k])); if (ret) { - ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i); + TH_LOG("%s - Failed to create thread %d", + strerror(errno), i); break; } } @@ -448,7 +452,8 @@ TEST(binderfs_stress) ret = pthread_join(threads[j], &fdptr); if (ret) - ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr)); + TH_LOG("%s - Failed to join thread %d for fd %d", + strerror(errno), j, PTR_TO_INT(fdptr)); } } pthread_attr_destroy(&attr); @@ -459,11 +464,12 @@ TEST(binderfs_stress) exit(EXIT_SUCCESS); } - change_idmaps(syncfds, pid); + change_idmaps(_metadata, syncfds, pid); ret = wait_for_pid(pid); - if (ret) - ksft_exit_fail_msg("wait_for_pid() failed"); + ASSERT_EQ(ret, 0) { + TH_LOG("wait_for_pid() failed"); + } } TEST(binderfs_test_privileged) @@ -471,7 +477,7 @@ TEST(binderfs_test_privileged) if (geteuid() != 0) XFAIL(return, "Tests are not run as root. Skipping privileged tests"); - if (__do_binderfs_test() == 1) + if (__do_binderfs_test(_metadata)) XFAIL(return, "The Android binderfs filesystem is not available"); } @@ -482,31 +488,33 @@ TEST(binderfs_test_unprivileged) pid_t pid; ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds); - if (ret < 0) - ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno)); + ASSERT_EQ(ret, 0) { + TH_LOG("%s - Failed to create socket pair", strerror(errno)); + } pid = fork(); - if (pid < 0) { + ASSERT_GE(pid, 0) { close_prot_errno_disarm(syncfds[0]); close_prot_errno_disarm(syncfds[1]); - ksft_exit_fail_msg("%s - Failed to fork", strerror(errno)); + TH_LOG("%s - Failed to fork", strerror(errno)); } if (pid == 0) { - change_userns(syncfds); - if (__do_binderfs_test() == 1) + change_userns(_metadata, syncfds); + if (__do_binderfs_test(_metadata)) exit(2); exit(EXIT_SUCCESS); } - change_idmaps(syncfds, pid); + change_idmaps(_metadata, syncfds, pid); ret = wait_for_pid(pid); if (ret) { if (ret == 2) XFAIL(return, "The Android binderfs filesystem is not available"); - else - ksft_exit_fail_msg("wait_for_pid() failed"); + ASSERT_EQ(ret, 0) { + TH_LOG("wait_for_pid() failed"); + } } } diff --git a/tools/testing/selftests/firmware/settings b/tools/testing/selftests/firmware/settings new file mode 100644 index 000000000000..085e664ee093 --- /dev/null +++ b/tools/testing/selftests/firmware/settings @@ -0,0 +1,8 @@ +# The async firmware timeout is set to 1 second (but ends up being effectively +# 2 seconds). There are 3 test configs, each done with and without firmware +# present, each with 2 "nowait" functions tested 5 times. Expected time for a +# normal execution should be 2 * 3 * 2 * 2 * 5 = 120 seconds for those alone. +# Additionally, fw_fallback may take 5 seconds for internal timeouts in each +# of the 3 configs, so at least another 15 seconds are needed. Add another +# 10 seconds for each testing config: 120 + 15 + 30 +timeout=165 diff --git a/tools/testing/selftests/fpu/.gitignore b/tools/testing/selftests/fpu/.gitignore new file mode 100644 index 000000000000..d6d12ac1d9c3 --- /dev/null +++ b/tools/testing/selftests/fpu/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +test_fpu diff --git a/tools/testing/selftests/fpu/Makefile b/tools/testing/selftests/fpu/Makefile new file mode 100644 index 000000000000..ea62c176ede7 --- /dev/null +++ b/tools/testing/selftests/fpu/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0+ + +LDLIBS := -lm + +TEST_GEN_PROGS := test_fpu + +TEST_PROGS := run_test_fpu.sh + +include ../lib.mk diff --git a/tools/testing/selftests/fpu/run_test_fpu.sh b/tools/testing/selftests/fpu/run_test_fpu.sh new file mode 100755 index 000000000000..d77be93ec139 --- /dev/null +++ b/tools/testing/selftests/fpu/run_test_fpu.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Load kernel module for FPU tests + +uid=$(id -u) +if [ $uid -ne 0 ]; then + echo "$0: Must be run as root" + exit 1 +fi + +if ! which modprobe > /dev/null 2>&1; then + echo "$0: You need modprobe installed" + exit 4 +fi + +if ! modinfo test_fpu > /dev/null 2>&1; then + echo "$0: You must have the following enabled in your kernel:" + echo "CONFIG_TEST_FPU=m" + exit 4 +fi + +NR_CPUS=$(getconf _NPROCESSORS_ONLN) +if [ ! $NR_CPUS ]; then + NR_CPUS=1 +fi + +modprobe test_fpu + +if [ ! -e /sys/kernel/debug/selftest_helpers/test_fpu ]; then + mount -t debugfs none /sys/kernel/debug + + if [ ! -e /sys/kernel/debug/selftest_helpers/test_fpu ]; then + echo "$0: Error mounting debugfs" + exit 4 + fi +fi + +echo "Running 1000 iterations on all CPUs... " +for i in $(seq 1 1000); do + for c in $(seq 1 $NR_CPUS); do + ./test_fpu & + done +done + +rmmod test_fpu diff --git a/tools/testing/selftests/fpu/test_fpu.c b/tools/testing/selftests/fpu/test_fpu.c new file mode 100644 index 000000000000..200238522a9d --- /dev/null +++ b/tools/testing/selftests/fpu/test_fpu.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* This testcase operates with the test_fpu kernel driver. + * It modifies the FPU control register in user mode and calls the kernel + * module to perform floating point operations in the kernel. The control + * register value should be independent between kernel and user mode. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <fenv.h> +#include <unistd.h> +#include <fcntl.h> + +const char *test_fpu_path = "/sys/kernel/debug/selftest_helpers/test_fpu"; + +int main(void) +{ + char dummy[1]; + int fd = open(test_fpu_path, O_RDONLY); + + if (fd < 0) { + printf("[SKIP]\tcan't access %s: %s\n", + test_fpu_path, strerror(errno)); + return 0; + } + + if (read(fd, dummy, 1) < 0) { + printf("[FAIL]\taccess with default rounding mode failed\n"); + return 1; + } + + fesetround(FE_DOWNWARD); + if (read(fd, dummy, 1) < 0) { + printf("[FAIL]\taccess with downward rounding mode failed\n"); + return 2; + } + if (fegetround() != FE_DOWNWARD) { + printf("[FAIL]\tusermode rounding mode clobbered\n"); + return 3; + } + + /* Note: the tests up to this point are quite safe and will only return + * an error. But the exception mask setting can cause misbehaving kernel + * to crash. + */ + feclearexcept(FE_ALL_EXCEPT); + feenableexcept(FE_ALL_EXCEPT); + if (read(fd, dummy, 1) < 0) { + printf("[FAIL]\taccess with fpu exceptions unmasked failed\n"); + return 4; + } + if (fegetexcept() != FE_ALL_EXCEPT) { + printf("[FAIL]\tusermode fpu exception mask clobbered\n"); + return 5; + } + + printf("[OK]\ttest_fpu\n"); + return 0; +} diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index c82aa77958e5..ea2147248ebe 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -130,7 +130,7 @@ test_reqs() if [[ $KMOD_VERSION -le 19 ]]; then echo "$0: You need at least kmod 20" >&2 echo "kmod <= 19 is buggy, for details see:" >&2 - echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2 + echo "https://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2 exit $ksft_skip fi diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 862eee734553..8d50483fe204 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -1,11 +1,43 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * kselftest.h: kselftest framework return codes to include from - * selftests. + * kselftest.h: low-level kselftest framework to include from + * selftest programs. When possible, please use + * kselftest_harness.h instead. * * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2014 Samsung Electronics Co., Ltd. * + * Using this API consists of first counting how many tests your code + * has to run, and then starting up the reporting: + * + * ksft_print_header(); + * ksft_set_plan(total_number_of_tests); + * + * For each test, report any progress, debugging, etc with: + * + * ksft_print_msg(fmt, ...); + * + * and finally report the pass/fail/skip/xfail state of the test with one of: + * + * ksft_test_result(condition, fmt, ...); + * ksft_test_result_pass(fmt, ...); + * ksft_test_result_fail(fmt, ...); + * ksft_test_result_skip(fmt, ...); + * ksft_test_result_xfail(fmt, ...); + * ksft_test_result_error(fmt, ...); + * + * When all tests are finished, clean up and exit the program with one of: + * + * ksft_exit(condition); + * ksft_exit_pass(); + * ksft_exit_fail(); + * + * If the program wants to report details on why the entire program has + * failed, it can instead exit with a message (this is usually done when + * the program is aborting before finishing all tests): + * + * ksft_exit_fail_msg(fmt, ...); + * */ #ifndef __KSELFTEST_H #define __KSELFTEST_H @@ -74,7 +106,7 @@ static inline void ksft_print_cnts(void) if (ksft_plan != ksft_test_num()) printf("# Planned tests != run tests (%u != %u)\n", ksft_plan, ksft_test_num()); - printf("# Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n", + printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n", ksft_cnt.ksft_pass, ksft_cnt.ksft_fail, ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass, ksft_cnt.ksft_xskip, ksft_cnt.ksft_error); @@ -120,6 +152,32 @@ static inline void ksft_test_result_fail(const char *msg, ...) va_end(args); } +/** + * ksft_test_result() - Report test success based on truth of condition + * + * @condition: if true, report test success, otherwise failure. + */ +#define ksft_test_result(condition, fmt, ...) do { \ + if (!!(condition)) \ + ksft_test_result_pass(fmt, ##__VA_ARGS__);\ + else \ + ksft_test_result_fail(fmt, ##__VA_ARGS__);\ + } while (0) + +static inline void ksft_test_result_xfail(const char *msg, ...) +{ + int saved_errno = errno; + va_list args; + + ksft_cnt.ksft_xfail++; + + va_start(args, msg); + printf("ok %d # XFAIL ", ksft_test_num()); + errno = saved_errno; + vprintf(msg, args); + va_end(args); +} + static inline void ksft_test_result_skip(const char *msg, ...) { int saved_errno = errno; @@ -128,12 +186,13 @@ static inline void ksft_test_result_skip(const char *msg, ...) ksft_cnt.ksft_xskip++; va_start(args, msg); - printf("not ok %d # SKIP ", ksft_test_num()); + printf("ok %d # SKIP ", ksft_test_num()); errno = saved_errno; vprintf(msg, args); va_end(args); } +/* TODO: how does "error" differ from "fail" or "skip"? */ static inline void ksft_test_result_error(const char *msg, ...) { int saved_errno = errno; @@ -156,11 +215,22 @@ static inline int ksft_exit_pass(void) static inline int ksft_exit_fail(void) { - printf("Bail out!\n"); ksft_print_cnts(); exit(KSFT_FAIL); } +/** + * ksft_exit() - Exit selftest based on truth of condition + * + * @condition: if true, exit self test with success, otherwise fail. + */ +#define ksft_exit(condition) do { \ + if (!!(condition)) \ + ksft_exit_pass(); \ + else \ + ksft_exit_fail(); \ + } while (0) + static inline int ksft_exit_fail_msg(const char *msg, ...) { int saved_errno = errno; @@ -190,18 +260,30 @@ static inline int ksft_exit_xpass(void) static inline int ksft_exit_skip(const char *msg, ...) { - if (msg) { - int saved_errno = errno; - va_list args; + int saved_errno = errno; + va_list args; + + va_start(args, msg); - va_start(args, msg); - printf("not ok %d # SKIP ", 1 + ksft_test_num()); + /* + * FIXME: several tests misuse ksft_exit_skip so produce + * something sensible if some tests have already been run + * or a plan has been printed. Those tests should use + * ksft_test_result_skip or ksft_exit_fail_msg instead. + */ + if (ksft_plan || ksft_test_num()) { + ksft_cnt.ksft_xskip++; + printf("ok %d # SKIP ", 1 + ksft_test_num()); + } else { + printf("1..0 # SKIP "); + } + if (msg) { errno = saved_errno; vprintf(msg, args); va_end(args); - } else { - ksft_print_cnts(); } + if (ksft_test_num()) + ksft_print_cnts(); exit(KSFT_SKIP); } diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 676b3a8b114d..cc9c846585f0 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -53,6 +53,10 @@ run_one() settings="$BASE_DIR/$DIR/settings" if [ -r "$settings" ] ; then while read line ; do + # Skip comments. + if echo "$line" | grep -q '^#'; then + continue + fi field=$(echo "$line" | cut -d= -f1) value=$(echo "$line" | cut -d= -f2-) eval "kselftest_$field"="$value" @@ -77,10 +81,10 @@ run_one() echo "ok $test_num $TEST_HDR_MSG") || (rc=$?; \ if [ $rc -eq $skip_rc ]; then \ - echo "not ok $test_num $TEST_HDR_MSG # SKIP" + echo "ok $test_num $TEST_HDR_MSG # SKIP" elif [ $rc -eq $timeout_rc ]; then \ echo "#" - echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT" + echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds" else echo "not ok $test_num $TEST_HDR_MSG # exit=$rc" fi) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index c9f03ef93338..4f78e4805633 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -50,7 +50,9 @@ #ifndef __KSELFTEST_HARNESS_H #define __KSELFTEST_HARNESS_H +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <asm/types.h> #include <errno.h> #include <stdbool.h> @@ -58,10 +60,13 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/mman.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include "kselftest.h" + #define TEST_TIMEOUT_DEFAULT 30 /* Utilities exposed to the test definitions */ @@ -104,26 +109,28 @@ /* Unconditional logger for internal use. */ #define __TH_LOG(fmt, ...) \ - fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \ + fprintf(TH_LOG_STREAM, "# %s:%d:%s:" fmt "\n", \ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) /** - * XFAIL(statement, fmt, ...) + * SKIP(statement, fmt, ...) * - * @statement: statement to run after reporting XFAIL + * @statement: statement to run after reporting SKIP * @fmt: format string * @...: optional arguments * - * This forces a "pass" after reporting a failure with an XFAIL prefix, + * This forces a "pass" after reporting why something is being skipped * and runs "statement", which is usually "return" or "goto skip". */ -#define XFAIL(statement, fmt, ...) do { \ +#define SKIP(statement, fmt, ...) do { \ + snprintf(_metadata->results->reason, \ + sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \ - ##__VA_ARGS__); \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + _metadata->results->reason); \ } \ - /* TODO: find a way to pass xfail to test runner process. */ \ _metadata->passed = 1; \ + _metadata->skip = 1; \ _metadata->trigger = 0; \ statement; \ } while (0) @@ -195,8 +202,9 @@ * * .. code-block:: c * - * FIXTURE_DATA(datatype name) + * FIXTURE_DATA(datatype_name) * + * Almost always, you want just FIXTURE() instead (see below). * This call may be used when the type of the fixture data * is needed. In general, this should not be needed unless * the *self* is being passed to a helper directly. @@ -211,7 +219,7 @@ * * .. code-block:: c * - * FIXTURE(datatype name) { + * FIXTURE(fixture_name) { * type property1; * ... * }; @@ -238,7 +246,7 @@ * * .. code-block:: c * - * FIXTURE_SETUP(fixture name) { implementation } + * FIXTURE_SETUP(fixture_name) { implementation } * * Populates the required "setup" function for a fixture. An instance of the * datatype defined with FIXTURE_DATA() will be exposed as *self* for the @@ -264,7 +272,7 @@ * * .. code-block:: c * - * FIXTURE_TEARDOWN(fixture name) { implementation } + * FIXTURE_TEARDOWN(fixture_name) { implementation } * * Populates the required "teardown" function for a fixture. An instance of the * datatype defined with FIXTURE_DATA() will be exposed as *self* for the @@ -285,7 +293,7 @@ * * .. code-block:: c * - * FIXTURE_VARIANT(datatype name) { + * FIXTURE_VARIANT(fixture_name) { * type property1; * ... * }; @@ -305,8 +313,8 @@ * * .. code-block:: c * - * FIXTURE_ADD(datatype name) { - * .property1 = val1; + * FIXTURE_VARIANT_ADD(fixture_name, variant_name) { + * .property1 = val1, * ... * }; * @@ -672,20 +680,53 @@ __bail(_assert, _metadata->no_print, _metadata->step)) #define __INC_STEP(_metadata) \ - if (_metadata->passed && _metadata->step < 255) \ + /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \ + if (_metadata->passed && _metadata->step < 253) \ _metadata->step++; +#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) + #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ if (_assert) __INC_STEP(_metadata); \ if (!(__exp _t __seen)) { \ - unsigned long long __exp_print = (uintptr_t)__exp; \ - unsigned long long __seen_print = (uintptr_t)__seen; \ - __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ - _expected_str, __exp_print, #_t, \ - _seen_str, __seen_print); \ + /* Report with actual signedness to avoid weird output. */ \ + switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ + case 0: { \ + unsigned long long __exp_print = (uintptr_t)__exp; \ + unsigned long long __seen_print = (uintptr_t)__seen; \ + __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ + break; \ + } \ + case 1: { \ + unsigned long long __exp_print = (uintptr_t)__exp; \ + long long __seen_print = (intptr_t)__seen; \ + __TH_LOG("Expected %s (%llu) %s %s (%lld)", \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ + break; \ + } \ + case 2: { \ + long long __exp_print = (intptr_t)__exp; \ + unsigned long long __seen_print = (uintptr_t)__seen; \ + __TH_LOG("Expected %s (%lld) %s %s (%llu)", \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ + break; \ + } \ + case 3: { \ + long long __exp_print = (intptr_t)__exp; \ + long long __seen_print = (intptr_t)__seen; \ + __TH_LOG("Expected %s (%lld) %s %s (%lld)", \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ + break; \ + } \ + } \ _metadata->passed = 0; \ /* Ensure the optional handler is triggered */ \ _metadata->trigger = 1; \ @@ -726,6 +767,10 @@ } \ } +struct __test_results { + char reason[1024]; /* Reason for test result */ +}; + struct __test_metadata; struct __fixture_variant_metadata; @@ -773,11 +818,13 @@ struct __test_metadata { struct __fixture_metadata *fixture; int termsig; int passed; + int skip; /* did SKIP get used? */ int trigger; /* extra handler after the evaluation */ int timeout; /* seconds to wait for test timeout */ bool timed_out; /* did this test timeout instead of exiting? */ __u8 step; bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ + struct __test_results *results; struct __test_metadata *prev, *next; }; @@ -813,12 +860,12 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) /* Sanity check handler execution environment. */ if (!t) { fprintf(TH_LOG_STREAM, - "no active test in SIGALRM handler!?\n"); + "# no active test in SIGALRM handler!?\n"); abort(); } if (sig != SIGALRM || sig != info->si_signo) { fprintf(TH_LOG_STREAM, - "%s: SIGALRM handler caught signal %d!?\n", + "# %s: SIGALRM handler caught signal %d!?\n", t->name, sig != SIGALRM ? sig : info->si_signo); abort(); } @@ -839,7 +886,7 @@ void __wait_for_test(struct __test_metadata *t) if (sigaction(SIGALRM, &action, &saved_action)) { t->passed = 0; fprintf(TH_LOG_STREAM, - "%s: unable to install SIGALRM handler\n", + "# %s: unable to install SIGALRM handler\n", t->name); return; } @@ -851,7 +898,7 @@ void __wait_for_test(struct __test_metadata *t) if (sigaction(SIGALRM, &saved_action, NULL)) { t->passed = 0; fprintf(TH_LOG_STREAM, - "%s: unable to uninstall SIGALRM handler\n", + "# %s: unable to uninstall SIGALRM handler\n", t->name); return; } @@ -860,39 +907,51 @@ void __wait_for_test(struct __test_metadata *t) if (t->timed_out) { t->passed = 0; fprintf(TH_LOG_STREAM, - "%s: Test terminated by timeout\n", t->name); + "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { - t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0; if (t->termsig != -1) { + t->passed = 0; fprintf(TH_LOG_STREAM, - "%s: Test exited normally " - "instead of by signal (code: %d)\n", - t->name, - WEXITSTATUS(status)); - } else if (!t->passed) { - fprintf(TH_LOG_STREAM, - "%s: Test failed at step #%d\n", + "# %s: Test exited normally instead of by signal (code: %d)\n", t->name, WEXITSTATUS(status)); + } else { + switch (WEXITSTATUS(status)) { + /* Success */ + case 0: + t->passed = 1; + break; + /* SKIP */ + case 255: + t->passed = 1; + t->skip = 1; + break; + /* Other failure, assume step report. */ + default: + t->passed = 0; + fprintf(TH_LOG_STREAM, + "# %s: Test failed at step #%d\n", + t->name, + WEXITSTATUS(status)); + } } } else if (WIFSIGNALED(status)) { t->passed = 0; if (WTERMSIG(status) == SIGABRT) { fprintf(TH_LOG_STREAM, - "%s: Test terminated by assertion\n", + "# %s: Test terminated by assertion\n", t->name); } else if (WTERMSIG(status) == t->termsig) { t->passed = 1; } else { fprintf(TH_LOG_STREAM, - "%s: Test terminated unexpectedly " - "by signal %d\n", + "# %s: Test terminated unexpectedly by signal %d\n", t->name, WTERMSIG(status)); } } else { fprintf(TH_LOG_STREAM, - "%s: Test ended in some other way [%u]\n", + "# %s: Test ended in some other way [%u]\n", t->name, status); } @@ -904,25 +963,39 @@ void __run_test(struct __fixture_metadata *f, { /* reset test struct */ t->passed = 1; + t->skip = 0; t->trigger = 0; t->step = 0; t->no_print = 0; + memset(t->results->reason, 0, sizeof(t->results->reason)); - printf("[ RUN ] %s%s%s.%s\n", + ksft_print_msg(" RUN %s%s%s.%s ...\n", f->name, variant->name[0] ? "." : "", variant->name, t->name); t->pid = fork(); if (t->pid < 0) { - printf("ERROR SPAWNING TEST CHILD\n"); + ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { t->fn(t, variant); - /* return the step that failed or 0 */ - _exit(t->passed ? 0 : t->step); + if (t->skip) + _exit(255); + /* Pass is exit 0 */ + if (t->passed) + _exit(0); + /* Something else happened, report the step. */ + _exit(t->step); } else { __wait_for_test(t); } - printf("[ %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"), + ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL", f->name, variant->name[0] ? "." : "", variant->name, t->name); + + if (t->skip) + ksft_test_result_skip("%s\n", t->results->reason[0] ? + t->results->reason : "unknown"); + else + ksft_test_result(t->passed, "%s%s%s.%s\n", + f->name, variant->name[0] ? "." : "", variant->name, t->name); } static int test_harness_run(int __attribute__((unused)) argc, @@ -931,6 +1004,7 @@ static int test_harness_run(int __attribute__((unused)) argc, struct __fixture_variant_metadata no_variant = { .name = "", }; struct __fixture_variant_metadata *v; struct __fixture_metadata *f; + struct __test_results *results; struct __test_metadata *t; int ret = 0; unsigned int case_count = 0, test_count = 0; @@ -945,14 +1019,20 @@ static int test_harness_run(int __attribute__((unused)) argc, } } - /* TODO(wad) add optional arguments similar to gtest. */ - printf("[==========] Running %u tests from %u test cases.\n", + results = mmap(NULL, sizeof(*results), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + ksft_print_header(); + ksft_set_plan(test_count); + ksft_print_msg("Starting %u tests from %u test cases.\n", test_count, case_count); for (f = __fixture_list; f; f = f->next) { for (v = f->variant ?: &no_variant; v; v = v->next) { for (t = f->tests; t; t = t->next) { count++; + t->results = results; __run_test(f, v, t); + t->results = NULL; if (t->passed) pass_count++; else @@ -960,9 +1040,14 @@ static int test_harness_run(int __attribute__((unused)) argc, } } } - printf("[==========] %u / %u tests passed.\n", pass_count, count); - printf("[ %s ]\n", (ret ? "FAILED" : "PASSED")); - return ret; + munmap(results, sizeof(*results)); + + ksft_print_msg("%s: %u / %u tests passed.\n", ret ? "FAILED" : "PASSED", + pass_count, count); + ksft_exit(ret == 0); + + /* unreachable */ + return KSFT_FAIL; } static void __attribute__((constructor)) __constructor_order_first(void) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index b0556c752443..7a17ea815736 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -59,9 +59,8 @@ else all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) endif -.ONESHELL: define RUN_TESTS - @BASE_DIR="$(selfdir)"; \ + BASE_DIR="$(selfdir)"; \ . $(selfdir)/kselftest/runner.sh; \ if [ "X$(summary)" != "X" ]; then \ per_test_logging=1; \ @@ -71,22 +70,21 @@ endef run_tests: all ifdef building_out_of_srctree - @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then - @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT) + @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ + rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi - @if [ "X$(TEST_PROGS)" != "X" ]; then - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) - else - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)) + @if [ "X$(TEST_PROGS)" != "X" ]; then \ + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \ + else \ + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \ fi else - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) + @$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif define INSTALL_SINGLE_RULE $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) - $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) - $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) endef define INSTALL_RULE diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index ee64ff8df8f4..8383eb89d88a 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -8,6 +8,7 @@ # set -e TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT +CLEAR_ONCE=/sys/kernel/debug/clear_warn_once KSELFTEST_SKIP_TEST=4 # Verify we have LKDTM available in the kernel. @@ -67,6 +68,11 @@ cleanup() { } trap cleanup EXIT +# Reset WARN_ONCE counters so we trip it each time this runs. +if [ -w $CLEAR_ONCE ] ; then + echo 1 > $CLEAR_ONCE +fi + # Save existing dmesg so we can detect new content below dmesg > "$DMESG" diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 92ca32143ae5..9d266e79c6a2 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -14,6 +14,7 @@ STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_TRAILING UNSET_SMEP CR4 bits went missing DOUBLE_FAULT +CORRUPT_PAC UNALIGNED_LOAD_STORE_WRITE #OVERWRITE_ALLOCATION Corrupts memory on failure #WRITE_AFTER_FREE Corrupts memory on failure diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 895ec992b2f1..9491bbaa0831 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -17,6 +17,8 @@ TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh +TEST_PROGS += rxtimestamp.sh +TEST_PROGS += devlink_port_split.py TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py new file mode 100755 index 000000000000..58bb7e9b88ce --- /dev/null +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -0,0 +1,277 @@ +#!/usr/bin/python3 +# SPDX-License-Identifier: GPL-2.0 + +from subprocess import PIPE, Popen +import json +import time +import argparse +import collections +import sys + +# +# Test port split configuration using devlink-port lanes attribute. +# The test is skipped in case the attribute is not available. +# +# First, check that all the ports with 1 lane fail to split. +# Second, check that all the ports with more than 1 lane can be split +# to all valid configurations (e.g., split to 2, split to 4 etc.) +# + + +Port = collections.namedtuple('Port', 'bus_info name') + + +def run_command(cmd, should_fail=False): + """ + Run a command in subprocess. + Return: Tuple of (stdout, stderr). + """ + + p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) + stdout, stderr = p.communicate() + stdout, stderr = stdout.decode(), stderr.decode() + + if stderr != "" and not should_fail: + print("Error sending command: %s" % cmd) + print(stdout) + print(stderr) + return stdout, stderr + + +class devlink_ports(object): + """ + Class that holds information on the devlink ports, required to the tests; + if_names: A list of interfaces in the devlink ports. + """ + + def get_if_names(dev): + """ + Get a list of physical devlink ports. + Return: Array of tuples (bus_info/port, if_name). + """ + + arr = [] + + cmd = "devlink -j port show" + stdout, stderr = run_command(cmd) + assert stderr == "" + ports = json.loads(stdout)['port'] + + for port in ports: + if dev in port: + if ports[port]['flavour'] == 'physical': + arr.append(Port(bus_info=port, name=ports[port]['netdev'])) + + return arr + + def __init__(self, dev): + self.if_names = devlink_ports.get_if_names(dev) + + +def get_max_lanes(port): + """ + Get the $port's maximum number of lanes. + Return: number of lanes, e.g. 1, 2, 4 and 8. + """ + + cmd = "devlink -j port show %s" % port + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + if 'lanes' in values: + lanes = values['lanes'] + else: + lanes = 0 + return lanes + + +def get_split_ability(port): + """ + Get the $port split ability. + Return: split ability, true or false. + """ + + cmd = "devlink -j port show %s" % port.name + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + return values['splittable'] + + +def split(k, port, should_fail=False): + """ + Split $port into $k ports. + If should_fail == True, the split should fail. Otherwise, should pass. + Return: Array of sub ports after splitting. + If the $port wasn't split, the array will be empty. + """ + + cmd = "devlink port split %s count %s" % (port.bus_info, k) + stdout, stderr = run_command(cmd, should_fail=should_fail) + + if should_fail: + if not test(stderr != "", "%s is unsplittable" % port.name): + print("split an unsplittable port %s" % port.name) + return create_split_group(port, k) + else: + if stderr == "": + return create_split_group(port, k) + print("didn't split a splittable port %s" % port.name) + + return [] + + +def unsplit(port): + """ + Unsplit $port. + """ + + cmd = "devlink port unsplit %s" % port + stdout, stderr = run_command(cmd) + test(stderr == "", "Unsplit port %s" % port) + + +def exists(port, dev): + """ + Check if $port exists in the devlink ports. + Return: True is so, False otherwise. + """ + + return any(dev_port.name == port + for dev_port in devlink_ports.get_if_names(dev)) + + +def exists_and_lanes(ports, lanes, dev): + """ + Check if every port in the list $ports exists in the devlink ports and has + $lanes number of lanes after splitting. + Return: True if both are True, False otherwise. + """ + + for port in ports: + max_lanes = get_max_lanes(port) + if not exists(port, dev): + print("port %s doesn't exist in devlink ports" % port) + return False + if max_lanes != lanes: + print("port %s has %d lanes, but %s were expected" + % (port, lanes, max_lanes)) + return False + return True + + +def test(cond, msg): + """ + Check $cond and print a message accordingly. + Return: True is pass, False otherwise. + """ + + if cond: + print("TEST: %-60s [ OK ]" % msg) + else: + print("TEST: %-60s [FAIL]" % msg) + + return cond + + +def create_split_group(port, k): + """ + Create the split group for $port. + Return: Array with $k elements, which are the split port group. + """ + + return list(port.name + "s" + str(i) for i in range(k)) + + +def split_unsplittable_port(port, k): + """ + Test that splitting of unsplittable port fails. + """ + + # split to max + new_split_group = split(k, port, should_fail=True) + + if new_split_group != []: + unsplit(port.bus_info) + + +def split_splittable_port(port, k, lanes, dev): + """ + Test that splitting of splittable port passes correctly. + """ + + new_split_group = split(k, port) + + # Once the split command ends, it takes some time to the sub ifaces' + # to get their names. Use udevadm to continue only when all current udev + # events are handled. + cmd = "udevadm settle" + stdout, stderr = run_command(cmd) + assert stderr == "" + + if new_split_group != []: + test(exists_and_lanes(new_split_group, lanes/k, dev), + "split port %s into %s" % (port.name, k)) + + unsplit(port.bus_info) + + +def make_parser(): + parser = argparse.ArgumentParser(description='A test for port splitting.') + parser.add_argument('--dev', + help='The devlink handle of the device under test. ' + + 'The default is the first registered devlink ' + + 'handle.') + + return parser + + +def main(cmdline=None): + parser = make_parser() + args = parser.parse_args(cmdline) + + dev = args.dev + if not dev: + cmd = "devlink -j dev show" + stdout, stderr = run_command(cmd) + assert stderr == "" + + devs = json.loads(stdout)['dev'] + dev = list(devs.keys())[0] + + cmd = "devlink dev show %s" % dev + stdout, stderr = run_command(cmd) + if stderr != "": + print("devlink device %s can not be found" % dev) + sys.exit(1) + + ports = devlink_ports(dev) + + for port in ports.if_names: + max_lanes = get_max_lanes(port.name) + + # If max lanes is 0, do not test port splitting at all + if max_lanes == 0: + continue + + # If 1 lane, shouldn't be able to split + elif max_lanes == 1: + test(not get_split_ability(port), + "%s should not be able to split" % port.name) + split_unsplittable_port(port, max_lanes) + + # Else, splitting should pass and all the split ports should exist. + else: + lane = max_lanes + test(get_split_ability(port), + "%s should be able to split" % port.name) + while lane > 1: + split_splittable_port(port, lane, max_lanes, dev) + + lane //= 2 + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index f0e6be4c09e9..75fe24bcb9cd 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -98,6 +98,11 @@ devlink_resource_size_set() check_err $? "Failed setting path $path to size $size" } +devlink_resource_occ_get() +{ + devlink_resource_get "$@" | jq '.["occ"]' +} + devlink_reload() { local still_pending diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh index 43a948feed26..dbb9fcf759e0 100755 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ b/tools/testing/selftests/net/forwarding/ethtool.sh @@ -50,23 +50,6 @@ cleanup() h1_destroy } -different_speeds_get() -{ - local dev1=$1; shift - local dev2=$1; shift - local with_mode=$1; shift - local adver=$1; shift - - local -a speeds_arr - - speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) - if [[ ${#speeds_arr[@]} < 2 ]]; then - check_err 1 "cannot check different speeds. There are not enough speeds" - fi - - echo ${speeds_arr[0]} ${speeds_arr[1]} -} - same_speeds_autoneg_off() { # Check that when each of the reported speeds is forced, the links come diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh new file mode 100755 index 000000000000..4b42dfd4efd1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + autoneg + autoneg_force_mode + no_cable +" + +NUM_NETIFS=2 +source lib.sh +source ethtool_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + swp3=$NETIF_NO_CABLE +} + +ethtool_extended_state_check() +{ + local dev=$1; shift + local expected_ext_state=$1; shift + local expected_ext_substate=${1:-""}; shift + + local ext_state=$(ethtool $dev | grep "Link detected" \ + | cut -d "(" -f2 | cut -d ")" -f1) + local ext_substate=$(echo $ext_state | cut -sd "," -f2 \ + | sed -e 's/^[[:space:]]*//') + ext_state=$(echo $ext_state | cut -d "," -f1) + + [[ $ext_state == $expected_ext_state ]] + check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\"" + + [[ $ext_substate == $expected_ext_substate ]] + check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\"" +} + +autoneg() +{ + RET=0 + + ip link set dev $swp1 up + + sleep 4 + ethtool_extended_state_check $swp1 "Autoneg" "No partner detected" + + log_test "Autoneg, No partner detected" + + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + RET=0 + + ip link set dev $swp1 up + ip link set dev $swp2 up + + local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $swp1 speed $speed1 autoneg off + ethtool_set $swp2 speed $speed2 autoneg off + + sleep 4 + ethtool_extended_state_check $swp1 "Autoneg" \ + "No partner detected during force mode" + + ethtool_extended_state_check $swp2 "Autoneg" \ + "No partner detected during force mode" + + log_test "Autoneg, No partner detected during force mode" + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +no_cable() +{ + RET=0 + + ip link set dev $swp3 up + + sleep 1 + ethtool_extended_state_check $swp3 "No cable" + + log_test "No cable" + + ip link set dev $swp3 down +} + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh index 925d229a59d8..9188e624dec0 100644 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh @@ -67,3 +67,20 @@ common_speeds_get() <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) } + +different_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local with_mode=$1; shift + local adver=$1; shift + + local -a speeds_arr + + speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) + if [[ ${#speeds_arr[@]} < 2 ]]; then + check_err 1 "cannot check different speeds. There are not enough speeds" + fi + + echo ${speeds_arr[0]} ${speeds_arr[1]} +} diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index e2adb533c8fc..b802c14d2950 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -14,6 +14,9 @@ NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +# Port that does not have a cable connected. +NETIF_NO_CABLE=eth8 + ############################################################################## # Defines diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh new file mode 100755 index 000000000000..5f20d289ee43 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the +# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the +# packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_udp_sport + test_udp_dport + test_tcp_sport + test_tcp_dport +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +do_test_pedit_l4port_one() +{ + local pedit_locus=$1; shift + local pedit_prot=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + local saddr=$1; shift + local daddr=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + RET=0 + + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345 + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + + pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101) + ((pkts >= 10)) + check_err $? "Expected to get 10 packets on pedit rule, but got $pkts." + + log_test "$pedit_locus pedit $pedit_action" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_pedit_l4port() +{ + local locus=$1; shift + local prot=$1; shift + local pedit_port=$1; shift + local flower_port=$1; shift + local port + + for port in 1 11111 65535; do + do_test_pedit_l4port_one "$locus" "$prot" \ + "$prot $pedit_port set $port" \ + ip "ip_proto $prot $flower_port $port" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_udp_sport() +{ + do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port + do_test_pedit_l4port "dev $swp2 egress" udp sport src_port +} + +test_udp_dport() +{ + do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port + do_test_pedit_l4port "dev $swp2 egress" udp dport dst_port +} + +test_tcp_sport() +{ + do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port + do_test_pedit_l4port "dev $swp2 egress" tcp sport src_port +} + +test_tcp_dport() +{ + do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port + do_test_pedit_l4port "dev $swp2 egress" tcp dport dst_port +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh new file mode 100755 index 000000000000..e714bae473fb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -0,0 +1,492 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends one stream of traffic from H1 through a TBF shaper, to a RED +# within TBF shaper on $swp3. The two shapers have the same configuration, and +# thus the resulting stream should fill all available bandwidth on the latter +# shaper. A second stream is sent from H2 also via $swp3, and used to inject +# additional traffic. Since all available bandwidth is taken, this traffic has +# to go to backlog. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | TBF 10Mbps | | | | +# +-----|--------------------+ +-----|--------------------+ +# | | +# +-----|------------------------------------------------|--------------------+ +# | SW | | | +# | +--|------------------------------------------------|----------------+ | +# | | + $swp1 + $swp2 | | +# | | BR | | +# | | | | +# | | + $swp3 | | +# | | | TBF 10Mbps / RED | | +# | +--------------------------------|-----------------------------------+ | +# | | | +# +-----------------------------------|---------------------------------------+ +# | +# +-----|--------------------+ +# | H3 | | +# | + $h1 | +# | 192.0.2.3/28 | +# | | +# +--------------------------+ + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_nodrop_test + red_test + red_qevent_test + ecn_qevent_test +" + +NUM_NETIFS=6 +CHECK_TC="yes" +source lib.sh + +BACKLOG=30000 +PKTSZ=1400 + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + mtu_set $h1 10000 + tc qdisc replace dev $h1 root handle 1: tbf \ + rate 10Mbit burst 10K limit 1M +} + +h1_destroy() +{ + tc qdisc del dev $h1 root + mtu_restore $h1 + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + mtu_set $h2 10000 +} + +h2_destroy() +{ + mtu_restore $h2 + simple_if_fini $h2 192.0.2.2/28 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.3/28 + mtu_set $h3 10000 +} + +h3_destroy() +{ + mtu_restore $h3 + simple_if_fini $h3 192.0.2.3/28 +} + +switch_create() +{ + ip link add dev br up type bridge + ip link set dev $swp1 up master br + ip link set dev $swp2 up master br + ip link set dev $swp3 up master br + + mtu_set $swp1 10000 + mtu_set $swp2 10000 + mtu_set $swp3 10000 + + tc qdisc replace dev $swp3 root handle 1: tbf \ + rate 10Mbit burst 10K limit 1M + ip link add name _drop_test up type dummy +} + +switch_destroy() +{ + ip link del dev _drop_test + tc qdisc del dev $swp3 root + + mtu_restore $h3 + mtu_restore $h2 + mtu_restore $h1 + + ip link set dev $swp3 down nomaster + ip link set dev $swp2 down nomaster + ip link set dev $swp1 down nomaster + ip link del dev br +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3_mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.3 " from host 1" + ping_test $h2 192.0.2.3 " from host 2" +} + +get_qdisc_backlog() +{ + qdisc_stats_get $swp3 11: .backlog +} + +get_nmarked() +{ + qdisc_stats_get $swp3 11: .marked +} + +get_qdisc_npackets() +{ + qdisc_stats_get $swp3 11: .packets +} + +get_nmirrored() +{ + link_stats_get _drop_test tx packets +} + +send_packets() +{ + local proto=$1; shift + local pkts=$1; shift + + $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@" +} + +# This sends traffic in an attempt to build a backlog of $size. Returns 0 on +# success. After 10 failed attempts it bails out and returns 1. It dumps the +# backlog size to stdout. +build_backlog() +{ + local size=$1; shift + local proto=$1; shift + + local i=0 + + while :; do + local cur=$(get_qdisc_backlog) + local diff=$((size - cur)) + local pkts=$(((diff + PKTSZ - 1) / PKTSZ)) + + if ((cur >= size)); then + echo $cur + return 0 + elif ((i++ > 10)); then + echo $cur + return 1 + fi + + send_packets $proto $pkts "$@" + sleep 1 + done +} + +check_marking() +{ + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets) + local nmarked_0=$(get_nmarked) + sleep 5 + local npackets_1=$(get_qdisc_npackets) + local nmarked_1=$(get_nmarked) + + local nmarked_d=$((nmarked_1 - nmarked_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmarked_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +check_mirroring() +{ + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets) + local nmirrored_0=$(get_nmirrored) + sleep 5 + local npackets_1=$(get_qdisc_npackets) + local nmirrored_1=$(get_nmirrored) + + local nmirrored_d=$((nmirrored_1 - nmirrored_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmirrored_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +ecn_test_common() +{ + local name=$1; shift + local limit=$1; shift + local backlog + local pct + + # Build the below-the-limit backlog using UDP. We could use TCP just + # fine, but this way we get a proof that UDP is accepted when queue + # length is below the limit. The main stream is using TCP, and if the + # limit is misconfigured, we would see this traffic being ECN marked. + RET=0 + backlog=$(build_backlog $((2 * limit / 3)) udp) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "$name backlog < limit" + + # Now push TCP, because non-TCP traffic would be early-dropped after the + # backlog crosses the limit, and we want to make sure that the backlog + # is above the limit. + RET=0 + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." + log_test "$name backlog > limit" +} + +do_ecn_test() +{ + local limit=$1; shift + local name=ECN + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + ecn_test_common "$name" $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, it should all get dropped, and backlog + # building should fail. + RET=0 + build_backlog $((2 * limit)) udp >/dev/null + check_fail $? "UDP traffic went into backlog instead of being early-dropped" + log_test "$name backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_ecn_nodrop_test() +{ + local limit=$1; shift + local name="ECN nodrop" + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + ecn_test_common "$name" $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "$name backlog > limit: UDP not dropped" + + stop_traffic + sleep 1 +} + +do_red_test() +{ + local limit=$1; shift + local backlog + local pct + + # Use ECN-capable TCP to verify there's no marking even though the queue + # is above limit. + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + + # Pushing below the queue limit should work. + RET=0 + backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "RED backlog < limit" + + # Pushing above should not. + RET=0 + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_fail $? "Traffic went into backlog instead of being early-dropped" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "RED backlog > limit" + + stop_traffic + sleep 1 +} + +do_red_qevent_test() +{ + local limit=$1; shift + local backlog + local base + local now + local pct + + RET=0 + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t udp -q & + sleep 1 + + tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ + action mirred egress mirror dev _drop_test + + # Push to the queue until it's at the limit. The configured limit is + # rounded by the qdisc, so this is the best we can do to get to the real + # limit. + build_backlog $((3 * limit / 2)) udp >/dev/null + + base=$(get_nmirrored) + send_packets udp 100 + sleep 1 + now=$(get_nmirrored) + ((now >= base + 100)) + check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen" + + tc filter del block 10 pref 1234 handle 102 matchall + + base=$(get_nmirrored) + send_packets udp 100 + sleep 1 + now=$(get_nmirrored) + ((now == base)) + check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" + + log_test "RED early_dropped packets mirrored" + + stop_traffic + sleep 1 +} + +do_ecn_qevent_test() +{ + local limit=$1; shift + local name=ECN + + RET=0 + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ + action mirred egress mirror dev _drop_test + + backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_mirroring "== 0") + check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0." + + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_mirroring ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95." + + tc filter del block 10 pref 1234 handle 102 matchall + + log_test "ECN marked packets mirrored" + + stop_traffic + sleep 1 +} + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc replace dev $swp3 parent 1:1 handle 11: red \ + limit 1M avpkt $PKTSZ probability 1 \ + min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 parent 1:1 +} + +ecn_test() +{ + install_qdisc ecn + do_ecn_test $BACKLOG + uninstall_qdisc +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + do_ecn_nodrop_test $BACKLOG + uninstall_qdisc +} + +red_test() +{ + install_qdisc + do_red_test $BACKLOG + uninstall_qdisc +} + +red_qevent_test() +{ + install_qdisc qevent early_drop block 10 + do_red_qevent_test $BACKLOG + uninstall_qdisc +} + +ecn_qevent_test() +{ + install_qdisc ecn qevent mark block 10 + do_ecn_qevent_test $BACKLOG + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh new file mode 100755 index 000000000000..160f9cccdfb7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -0,0 +1,333 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test tc-police action. +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | | +# | | default via 192.0.2.2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | | +# | 198.51.100.2/24 203.0.113.2/24 | +# | + $rp2 + $rp3 | +# | | | | +# +----|-----------------------------------------|----------------------------+ +# | | +# +----|----------------------------+ +----|----------------------------+ +# | | default via 198.51.100.2 | | | default via 203.0.113.2 | +# | | | | | | +# | | 198.51.100.1/24 | | | 203.0.113.1/24 | +# | + $h2 | | + $h3 | +# | H2 (vrf) | | H3 (vrf) | +# +---------------------------------+ +---------------------------------+ + +ALL_TESTS=" + police_rx_test + police_tx_test + police_shared_test + police_rx_mirror_test + police_tx_mirror_test +" +NUM_NETIFS=6 +source tc_common.sh +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 +} + +h3_create() +{ + simple_if_init $h3 203.0.113.1/24 + + ip -4 route add default vrf v$h3 nexthop via 203.0.113.2 + + tc qdisc add dev $h3 clsact +} + +h3_destroy() +{ + tc qdisc del dev $h3 clsact + + ip -4 route del default vrf v$h3 nexthop via 203.0.113.2 + + simple_if_fini $h3 203.0.113.1/24 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + ip link set dev $rp3 up + + __addr_add_del $rp1 add 192.0.2.2/24 + __addr_add_del $rp2 add 198.51.100.2/24 + __addr_add_del $rp3 add 203.0.113.2/24 + + tc qdisc add dev $rp1 clsact + tc qdisc add dev $rp2 clsact +} + +router_destroy() +{ + tc qdisc del dev $rp2 clsact + tc qdisc del dev $rp1 clsact + + __addr_add_del $rp3 del 203.0.113.2/24 + __addr_add_del $rp2 del 198.51.100.2/24 + __addr_add_del $rp1 del 192.0.2.2/24 + + ip link set dev $rp3 down + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +police_common_test() +{ + local test_name=$1; shift + + RET=0 + + # Rule to measure bandwidth on ingress of $h2 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 0 -q & + + local t0=$(tc_rule_stats_get $h2 1 ingress .bytes) + sleep 10 + local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) + + local er=$((80 * 1000 * 1000)) + local nr=$(rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + log_test "$test_name" + + { kill %% && wait %%; } 2>/dev/null + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower +} + +police_rx_test() +{ + # Rule to police traffic destined to $h2 on ingress of $rp1 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police rate 80mbit burst 16k conform-exceed drop/ok + + police_common_test "police on rx" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +police_tx_test() +{ + # Rule to police traffic destined to $h2 on egress of $rp2 + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police rate 80mbit burst 16k conform-exceed drop/ok + + police_common_test "police on tx" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower +} + +police_shared_common_test() +{ + local dport=$1; shift + local test_name=$1; shift + + RET=0 + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=$dport -p 1000 -c 0 -q & + + local t0=$(tc_rule_stats_get $h2 1 ingress .bytes) + sleep 10 + local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) + + local er=$((80 * 1000 * 1000)) + local nr=$(rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + log_test "$test_name" + + { kill %% && wait %%; } 2>/dev/null +} + +police_shared_test() +{ + # Rule to measure bandwidth on ingress of $h2 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp src_port 12345 \ + action drop + + # Rule to police traffic destined to $h2 on ingress of $rp1 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police rate 80mbit burst 16k conform-exceed drop/ok \ + index 10 + + # Rule to police a different flow destined to $h2 on egress of $rp2 + # using same policer + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 22222 \ + action police index 10 + + police_shared_common_test 54321 "police with shared policer - rx" + + police_shared_common_test 22222 "police with shared policer - tx" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower +} + +police_mirror_common_test() +{ + local pol_if=$1; shift + local dir=$1; shift + local test_name=$1; shift + + RET=0 + + # Rule to measure bandwidth on ingress of $h2 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + # Rule to measure bandwidth of mirrored traffic on ingress of $h3 + tc filter add dev $h3 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + # Rule to police traffic destined to $h2 and mirror to $h3 + tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police rate 80mbit burst 16k conform-exceed drop/pipe \ + action mirred egress mirror dev $rp3 + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 0 -q & + + local t0=$(tc_rule_stats_get $h2 1 ingress .bytes) + sleep 10 + local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) + + local er=$((80 * 1000 * 1000)) + local nr=$(rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + local t0=$(tc_rule_stats_get $h3 1 ingress .bytes) + sleep 10 + local t1=$(tc_rule_stats_get $h3 1 ingress .bytes) + + local er=$((80 * 1000 * 1000)) + local nr=$(rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + log_test "$test_name" + + { kill %% && wait %%; } 2>/dev/null + tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower + tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower +} + +police_rx_mirror_test() +{ + police_mirror_common_test $rp1 ingress "police rx and mirror" +} + +police_tx_mirror_test() +{ + police_mirror_common_test $rp2 egress "police tx and mirror" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f50976ee7d44..aa254aefc2c3 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -5,7 +5,7 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh new file mode 100755 index 000000000000..39edce4f541c --- /dev/null +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns="ns1-$rndh" +ksft_skip=4 +test_cnt=1 +ret=0 +pids=() + +flush_pids() +{ + # mptcp_connect in join mode will sleep a bit before completing, + # give it some time + sleep 1.1 + + for pid in ${pids[@]}; do + [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1 + done + pids=() +} + +cleanup() +{ + ip netns del $ns + for pid in ${pids[@]}; do + [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1 + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi +ss -h | grep -q MPTCP +if [ $? -ne 0 ];then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip +fi + +__chk_nr() +{ + local condition="$1" + local expected=$2 + local msg nr + + shift 2 + msg=$* + nr=$(ss -inmHMN $ns | $condition) + + printf "%-50s" "$msg" + if [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi + test_cnt=$((test_cnt+1)) +} + +chk_msk_nr() +{ + __chk_nr "grep -c token:" $* +} + +chk_msk_fallback_nr() +{ + __chk_nr "grep -c fallback" $* +} + +chk_msk_remote_key_nr() +{ + __chk_nr "grep -c remote_key" $* +} + + +trap cleanup EXIT +ip netns add $ns +ip -n $ns link set dev lo up + +echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null & +sleep 0.1 +pids[0]=$! +chk_msk_nr 0 "no msk on netns creation" + +echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null & +sleep 0.1 +pids[1]=$! +chk_msk_nr 2 "after MPC handshake " +chk_msk_remote_key_nr 2 "....chk remote_key" +chk_msk_fallback_nr 0 "....chk no fallback" +flush_pids + + +echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null & +pids[0]=$! +sleep 0.1 +echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null & +pids[1]=$! +sleep 0.1 +chk_msk_fallback_nr 1 "check fallback" +flush_pids + +NR_CLIENTS=100 +for I in `seq 1 $NR_CLIENTS`; do + echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null & + pids[$((I*2))]=$! +done +sleep 0.1 + +for I in `seq 1 $NR_CLIENTS`; do + echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null & + pids[$((I*2 + 1))]=$! +done +sleep 1.5 + +chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +flush_pids + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cedee5b952ba..cad6f73a5fd0 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -11,6 +11,7 @@ #include <stdio.h> #include <stdlib.h> #include <strings.h> +#include <signal.h> #include <unistd.h> #include <sys/poll.h> @@ -36,6 +37,7 @@ extern int optind; static int poll_timeout = 10 * 1000; static bool listen_mode; +static bool quit; enum cfg_mode { CFG_MODE_POLL, @@ -52,11 +54,12 @@ static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static int cfg_wait; static void die_usage(void) { fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" - "[-l] connect_address\n"); + "[-l] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); fprintf(stderr, "\t-t num -- set poll timeout to num\n"); fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); @@ -65,9 +68,15 @@ static void die_usage(void) fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); + fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); } +static void handle_signal(int nr) +{ + quit = true; +} + static const char *getxinfo_strerr(int err) { if (err == EAI_SYSTEM) @@ -418,8 +427,8 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); close(peerfd); return 0; @@ -812,11 +821,12 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) { + while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; break; case 'l': listen_mode = true; @@ -850,6 +860,9 @@ static void parse_opts(int argc, char **argv) case 'R': cfg_rcvbuf = parse_int(optarg); break; + case 'w': + cfg_wait = atoi(optarg)*1000000; + break; } } @@ -865,6 +878,7 @@ int main(int argc, char *argv[]) { init_rng(); + signal(SIGUSR1, handle_signal); parse_opts(argc, argv); if (tcpulp_audit) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index acf02e156d20..57d75b7f6220 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:" +optstring="S:R:d:e:l:r:h4cm:f:t" ret=0 sin="" sout="" @@ -21,6 +21,8 @@ testmode="" sndbuf=0 rcvbuf=0 options_log=true +do_tcp=0 +filesize=0 if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -40,9 +42,11 @@ usage() { echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-f: size of file to transfer in bytes (default random)" echo -e "\t-S: set sndbuf value (default: use kernel default)" echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" + echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" } while getopts "$optstring" option;do @@ -94,6 +98,12 @@ while getopts "$optstring" option;do "m") testmode="$OPTARG" ;; + "f") + filesize="$OPTARG" + ;; + "t") + do_tcp=$((do_tcp+1)) + ;; "?") usage $0 exit 1 @@ -186,6 +196,9 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 +# use TCP syn cookies, even if no flooding was detected. +ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + set_ethtool_flags() { local ns="$1" local dev="$2" @@ -385,14 +398,23 @@ do_transfer() capuser="-Z $SUDO_USER" fi - local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap" + local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" + + ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - local cappid=$! + ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! sleep 1 fi + local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) + local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) + local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) + local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & local spid=$! @@ -413,7 +435,8 @@ do_transfer() if $capture; then sleep 1 - kill $cappid + kill ${cappid_listener} + kill ${cappid_connector} fi local duration @@ -435,6 +458,45 @@ do_transfer() check_transfer $cin $sout "file received by server" rets=$? + local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) + local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) + + local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) + local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + + expect_synrx=$((stat_synrx_last_l)) + expect_ackrx=$((stat_ackrx_last_l)) + + cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) + cookies=${cookies##*=} + + if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then + expect_synrx=$((stat_synrx_last_l+1)) + expect_ackrx=$((stat_ackrx_last_l+1)) + fi + if [ $cookies -eq 2 ];then + if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance" + fi + if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance" + fi + else + if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed" + fi + if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed" + fi + fi + + if [ $expect_synrx -ne $stat_synrx_now_l ] ;then + echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + fi + if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then + echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then echo "$duration [ OK ]" cat "$capout" @@ -449,20 +511,25 @@ make_file() { local name=$1 local who=$2 + local SIZE=$filesize + local ksize + local rem - local SIZE TSIZE - SIZE=$((RANDOM % (1024 * 8))) - TSIZE=$((SIZE * 1024)) + if [ $SIZE -eq 0 ]; then + local MAXSIZE=$((1024 * 1024 * 8)) + local MINSIZE=$((1024 * 256)) - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE)) + fi - SIZE=$((RANDOM % 1024)) - SIZE=$((SIZE + 128)) - TSIZE=$((TSIZE + SIZE)) - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + ksize=$((SIZE / 1024)) + rem=$((SIZE - (ksize * 1024))) + + dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null + dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" - echo "Created $name (size $TSIZE) containing data sent by $who" + echo "Created $name (size $(du -b "$name")) containing data sent by $who" } run_tests_lo() @@ -497,9 +564,11 @@ run_tests_lo() return 1 fi - # don't bother testing fallback tcp except for loopback case. - if [ ${listener_ns} != ${connector_ns} ]; then - return 0 + if [ $do_tcp -eq 0 ]; then + # don't bother testing fallback tcp except for loopback case. + if [ ${listener_ns} != ${connector_ns} ]; then + return 0 + fi fi do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} @@ -516,6 +585,15 @@ run_tests_lo() return 1 fi + if [ $do_tcp -gt 1 ] ;then + do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + fi + return 0 } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index dd42c2f692d0..f39c1129ce5f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -72,6 +72,15 @@ reset() init } +reset_with_cookies() +{ + reset + + for netns in "$ns1" "$ns2";do + ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2 + done +} + for arg in "$@"; do if [ "$arg" = "-c" ]; then capture=1 @@ -138,7 +147,7 @@ do_transfer() capuser="-Z $SUDO_USER" fi - capfile="mp_join-${listener_ns}.pcap" + capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") echo "Capturing traffic for test $TEST_COUNT into $capfile" ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & @@ -227,7 +236,7 @@ chk_join_nr() local count local dump_stats - printf "%-36s %s" "$msg" "syn" + printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn" count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$syn_nr" ]; then @@ -354,4 +363,57 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 +# single subflow, syncookies +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow with syn cookies" 1 1 1 + +# multiple subflows with syn cookies +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows with syn cookies" 2 2 2 + +# multiple subflows limited by server +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflows limited by server w cookies" 2 2 1 + +# test signal address with cookies +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address with syn cookies" 1 1 1 + +# test cookie with subflow and signal +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflow and signal w cookies" 2 2 2 + +# accept and use add_addr with additional subflows +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflows and signal w. cookies" 3 3 3 + exit $ret diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 4b02933cab8a..bdc03a2097e8 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -125,9 +125,8 @@ static int do_setcpu(int cpu) CPU_ZERO(&mask); CPU_SET(cpu, &mask); if (sched_setaffinity(0, sizeof(mask), &mask)) - error(1, 0, "setaffinity %d", cpu); - - if (cfg_verbose) + fprintf(stderr, "cpu: unable to pin, may increase variance.\n"); + else if (cfg_verbose) fprintf(stderr, "cpu: %u\n", cpu); return 0; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 77c09cd339c3..6bbf69a28e12 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -59,6 +59,45 @@ # Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of # VXLAN # +# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception +# Set up three namespaces, A, B, and C, with routing between A and B over +# R1. R2 is unused in these tests. A has a veth connection to C, and is +# connected to B via a VXLAN endpoint, which is directly bridged to C. +# MTU on the B-R1 link is lower than other MTUs. +# +# Check that both C and A are able to communicate with B over the VXLAN +# tunnel, and that PMTU exceptions with the correct values are created. +# +# segment a_r1 segment b_r1 b_r1: 4000 +# .--------------R1--------------. everything +# C---veth A B else: 5000 +# ' bridge | +# '---- - - - - - VXLAN - - - - - - - ' +# +# - pmtu_ipv{4,6}_br_geneve{4,6}_exception +# Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel +# instead. +# +# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception +# Set up two namespaces, B, and C, with routing between the init namespace +# and B over R1. A and R2 are unused in these tests. The init namespace +# has a veth connection to C, and is connected to B via a VXLAN endpoint, +# which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link +# is lower than other MTUs. +# +# Check that C is able to communicate with B over the VXLAN tunnel, and +# that PMTU exceptions with the correct values are created. +# +# segment a_r1 segment b_r1 b_r1: 4000 +# .--------------R1--------------. everything +# C---veth init B else: 5000 +# '- ovs | +# '---- - - - - - VXLAN - - - - - - - ' +# +# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception +# Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel +# instead. +# # - pmtu_ipv{4,6}_fou{4,6}_exception # Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation # (FoU) over IPv4/IPv6, instead of VXLAN @@ -147,6 +186,22 @@ tests=" pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1 pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1 pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1 + pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1 + pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1 + pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1 + pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1 + pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1 + pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1 + pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1 + pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1 + pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1 + pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1 + pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1 + pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1 + pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1 + pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1 + pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1 + pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1 pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1 pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1 pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1 @@ -173,10 +228,12 @@ tests=" NS_A="ns-A" NS_B="ns-B" +NS_C="ns-C" NS_R1="ns-R1" NS_R2="ns-R2" ns_a="ip netns exec ${NS_A}" ns_b="ip netns exec ${NS_B}" +ns_c="ip netns exec ${NS_C}" ns_r1="ip netns exec ${NS_R1}" ns_r2="ip netns exec ${NS_R2}" @@ -239,9 +296,11 @@ routes_nh=" veth4_a_addr="192.168.1.1" veth4_b_addr="192.168.1.2" +veth4_c_addr="192.168.2.10" veth4_mask="24" veth6_a_addr="fd00:1::a" veth6_b_addr="fd00:1::b" +veth6_c_addr="fd00:2::c" veth6_mask="64" tunnel4_a_addr="192.168.2.1" @@ -428,7 +487,7 @@ setup_ip6ip6() { } setup_namespaces() { - for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns add ${n} || return 1 # Disable DAD, so that we don't have to wait to use the @@ -484,6 +543,7 @@ setup_vxlan_or_geneve() { a_addr="${2}" b_addr="${3}" opts="${4}" + br_if_a="${5}" if [ "${type}" = "vxlan" ]; then opts="${opts} ttl 64 dstport 4789" @@ -497,10 +557,16 @@ setup_vxlan_or_geneve() { run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} - run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a - run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b + if [ -n "${br_if_a}" ]; then + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a} + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a} + run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a} + else + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + fi - run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b run_cmd ${ns_a} ip link set ${type}_a up @@ -516,11 +582,27 @@ setup_vxlan4() { } setup_geneve6() { - setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 + setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" } setup_vxlan6() { - setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 + setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" +} + +setup_bridged_geneve4() { + setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0" +} + +setup_bridged_vxlan4() { + setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0" +} + +setup_bridged_geneve6() { + setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0" +} + +setup_bridged_vxlan6() { + setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0" } setup_xfrm() { @@ -630,6 +712,80 @@ setup_routing() { return 0 } +setup_bridge() { + run_cmd ${ns_a} ip link add br0 type bridge || return 2 + run_cmd ${ns_a} ip link set br0 up + + run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C + run_cmd ${ns_c} ip link set veth_A-C netns ns-A + + run_cmd ${ns_a} ip link set veth_A-C up + run_cmd ${ns_c} ip link set veth_C-A up + run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A + run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A + run_cmd ${ns_a} ip link set veth_A-C master br0 +} + +setup_ovs_vxlan_or_geneve() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + + if [ "${type}" = "vxlan" ]; then + opts="${opts} ttl 64 dstport 4789" + opts_b="local ${b_addr}" + fi + + run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ + set interface ${type}_a type=${type} \ + options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 + + run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1 + + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b + run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + + run_cmd ${ns_b} ip link set ${type}_b up +} + +setup_ovs_geneve4() { + setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 +} + +setup_ovs_vxlan4() { + setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 +} + +setup_ovs_geneve6() { + setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 +} + +setup_ovs_vxlan6() { + setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 +} + +setup_ovs_bridge() { + run_cmd ovs-vsctl add-br ovs_br0 || return 2 + run_cmd ip link set ovs_br0 up + + run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C + run_cmd ${ns_c} ip link set veth_A-C netns 1 + + run_cmd ip link set veth_A-C up + run_cmd ${ns_c} ip link set veth_C-A up + run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A + run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A + run_cmd ovs-vsctl add-port ovs_br0 veth_A-C + + # Move veth_A-R1 to init + run_cmd ${ns_a} ip link set veth_A-R1 netns 1 + run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1 + run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1 + run_cmd ip link set veth_A-R1 up + run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2 + run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -657,9 +813,14 @@ cleanup() { done tcpdump_pids= - for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done + + ip link del veth_A-C 2>/dev/null + ip link del veth_A-R1 2>/dev/null + ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null + ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null } mtu() { @@ -892,6 +1053,177 @@ test_pmtu_ipv6_geneve6_exception() { test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 } +test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { + type=${1} + family=${2} + outer_family=${3} + ll_mtu=4000 + + if [ ${outer_family} -eq 4 ]; then + setup namespaces routing bridge bridged_${type}4 || return 2 + # IPv4 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + else + setup namespaces routing bridge bridged_${type}6 || return 2 + # IPv6 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + fi + + trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "${ns_a}" br0 "${ns_a}" veth-A-C \ + "${ns_c}" veth_C-A + + if [ ${family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_a}" br0 $((${ll_mtu} + 1000)) + mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000)) + mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + + run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1 + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1 + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface" +} + +test_pmtu_ipv4_br_vxlan4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4 +} + +test_pmtu_ipv6_br_vxlan4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4 +} + +test_pmtu_ipv4_br_geneve4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4 +} + +test_pmtu_ipv6_br_geneve4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4 +} + +test_pmtu_ipv4_br_vxlan6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6 +} + +test_pmtu_ipv6_br_vxlan6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6 +} + +test_pmtu_ipv4_br_geneve6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6 +} + +test_pmtu_ipv6_br_geneve6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6 +} + +test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { + type=${1} + family=${2} + outer_family=${3} + ll_mtu=4000 + + if [ ${outer_family} -eq 4 ]; then + setup namespaces routing ovs_bridge ovs_${type}4 || return 2 + # IPv4 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + else + setup namespaces routing ovs_bridge ovs_${type}6 || return 2 + # IPv6 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + fi + + if [ "${type}" = "vxlan" ]; then + tun_a="vxlan_sys_4789" + elif [ "${type}" = "geneve" ]; then + tun_a="genev_sys_6081" + fi + + trace "" "${tun_a}" "${ns_b}" ${type}_b \ + "" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "" ovs_br0 "" veth-A-C \ + "${ns_c}" veth_C-A + + if [ ${family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + # Create route exception by exceeding link layer MTU + mtu "" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "" ovs_br0 $((${ll_mtu} + 1000)) + mtu "" veth_A-C $((${ll_mtu} + 1000)) + mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "" ${tun_a} $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + + run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1 + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface" +} + +test_pmtu_ipv4_ovs_vxlan4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4 +} + +test_pmtu_ipv6_ovs_vxlan4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4 +} + +test_pmtu_ipv4_ovs_geneve4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4 +} + +test_pmtu_ipv6_ovs_geneve4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4 +} + +test_pmtu_ipv4_ovs_vxlan6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6 +} + +test_pmtu_ipv6_ovs_vxlan6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6 +} + +test_pmtu_ipv4_ovs_geneve6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6 +} + +test_pmtu_ipv6_ovs_geneve6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6 +} + test_pmtu_ipvX_over_fouY_or_gueY() { inner_family=${1} outer_family=${2} diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bdbf4b3125b6..7c38a909f8b8 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -5,7 +5,6 @@ # set -e devdummy="test-dummy0" -ret=0 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -66,7 +65,7 @@ kci_test_bridge() devbr="test-br0" vlandev="testbr-vlan1" - ret=0 + local ret=0 ip link add name "$devbr" type bridge check_err $? @@ -113,7 +112,7 @@ kci_test_gre() rem=10.42.42.1 loc=10.0.0.1 - ret=0 + local ret=0 ip tunnel add $gredev mode gre remote $rem local $loc ttl 1 check_err $? ip link set $gredev up @@ -149,7 +148,7 @@ kci_test_gre() kci_test_tc() { dev=lo - ret=0 + local ret=0 tc qdisc add dev "$dev" root handle 1: htb check_err $? @@ -184,7 +183,7 @@ kci_test_tc() kci_test_polrouting() { - ret=0 + local ret=0 ip rule add fwmark 1 lookup 100 check_err $? ip route add local 0.0.0.0/0 dev lo table 100 @@ -207,7 +206,7 @@ kci_test_route_get() { local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) - ret=0 + local ret=0 ip route get 127.0.0.1 > /dev/null check_err $? @@ -290,7 +289,7 @@ kci_test_promote_secondaries() kci_test_addrlabel() { - ret=0 + local ret=0 ip addrlabel add prefix dead::/64 dev lo label 1 check_err $? @@ -330,7 +329,7 @@ kci_test_addrlabel() kci_test_ifalias() { - ret=0 + local ret=0 namewant=$(uuidgen) syspathname="/sys/class/net/$devdummy/ifalias" @@ -385,7 +384,7 @@ kci_test_ifalias() kci_test_vrf() { vrfname="test-vrf" - ret=0 + local ret=0 ip link show type vrf 2>/dev/null if [ $? -ne 0 ]; then @@ -425,7 +424,7 @@ kci_test_vrf() kci_test_encap_vxlan() { - ret=0 + local ret=0 vxlan="test-vxlan0" vlan="test-vlan0" testns="$1" @@ -511,7 +510,7 @@ kci_test_encap_vxlan() kci_test_encap_fou() { - ret=0 + local ret=0 name="test-fou" testns="$1" @@ -548,7 +547,7 @@ kci_test_encap_fou() kci_test_encap() { testns="testns" - ret=0 + local ret=0 ip netns add "$testns" if [ $? -ne 0 ]; then @@ -565,15 +564,18 @@ kci_test_encap() check_err $? kci_test_encap_vxlan "$testns" + check_err $? kci_test_encap_fou "$testns" + check_err $? ip netns del "$testns" + return $ret } kci_test_macsec() { msname="test_macsec0" - ret=0 + local ret=0 ip macsec help 2>&1 | grep -q "^Usage: ip macsec" if [ $? -ne 0 ]; then @@ -631,7 +633,7 @@ kci_test_macsec() #------------------------------------------------------------------- kci_test_ipsec() { - ret=0 + local ret=0 algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128" srcip=192.168.123.1 dstip=192.168.123.2 @@ -731,7 +733,7 @@ kci_test_ipsec() #------------------------------------------------------------------- kci_test_ipsec_offload() { - ret=0 + local ret=0 algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128" srcip=192.168.123.3 dstip=192.168.123.4 @@ -841,7 +843,7 @@ kci_test_gretap() { testns="testns" DEV_NS=gretap00 - ret=0 + local ret=0 ip netns add "$testns" if [ $? -ne 0 ]; then @@ -891,7 +893,7 @@ kci_test_ip6gretap() { testns="testns" DEV_NS=ip6gretap00 - ret=0 + local ret=0 ip netns add "$testns" if [ $? -ne 0 ]; then @@ -941,7 +943,7 @@ kci_test_erspan() { testns="testns" DEV_NS=erspan00 - ret=0 + local ret=0 ip link help erspan 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then @@ -1006,7 +1008,7 @@ kci_test_ip6erspan() { testns="testns" DEV_NS=ip6erspan00 - ret=0 + local ret=0 ip link help ip6erspan 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then @@ -1077,7 +1079,7 @@ kci_test_fdb_get() test_mac=de:ad:be:ef:13:37 localip="10.0.2.2" dstip="10.0.2.3" - ret=0 + local ret=0 bridge fdb help 2>&1 |grep -q 'bridge fdb get' if [ $? -ne 0 ];then @@ -1125,7 +1127,7 @@ kci_test_neigh_get() dstmac=de:ad:be:ef:13:37 dstip=10.0.2.4 dstip6=dead::2 - ret=0 + local ret=0 ip neigh help 2>&1 |grep -q 'ip neigh get' if [ $? -ne 0 ];then @@ -1175,6 +1177,7 @@ kci_test_neigh_get() kci_test_rtnl() { + local ret=0 kci_add_dummy if [ $ret -ne 0 ];then echo "FAIL: cannot add dummy interface" @@ -1182,27 +1185,48 @@ kci_test_rtnl() fi kci_test_polrouting + check_err $? kci_test_route_get + check_err $? kci_test_addrlft + check_err $? kci_test_promote_secondaries + check_err $? kci_test_tc + check_err $? kci_test_gre + check_err $? kci_test_gretap + check_err $? kci_test_ip6gretap + check_err $? kci_test_erspan + check_err $? kci_test_ip6erspan + check_err $? kci_test_bridge + check_err $? kci_test_addrlabel + check_err $? kci_test_ifalias + check_err $? kci_test_vrf + check_err $? kci_test_encap + check_err $? kci_test_macsec + check_err $? kci_test_ipsec + check_err $? kci_test_ipsec_offload + check_err $? kci_test_fdb_get + check_err $? kci_test_neigh_get + check_err $? kci_del_dummy + return $ret } #check for needed privileges @@ -1221,4 +1245,4 @@ done kci_test_rtnl -exit $ret +exit $? diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index bcb79ba1f214..e4613ce4ed69 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -44,6 +44,7 @@ struct test_case { struct options sockopt; struct tstamps expected; bool enabled; + bool warn_on_fail; }; struct sof_flag { @@ -67,44 +68,44 @@ static struct socket_type socket_types[] = { static struct test_case test_cases[] = { { {}, {} }, { - { so_timestamp: 1 }, - { tstamp: true } + { .so_timestamp = 1 }, + { .tstamp = true } }, { - { so_timestampns: 1 }, - { tstampns: true } + { .so_timestampns = 1 }, + { .tstampns = true } }, { - { so_timestamp: 1, so_timestampns: 1 }, - { tstampns: true } + { .so_timestamp = 1, .so_timestampns = 1 }, + { .tstampns = true } }, { - { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE }, + { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE }, {} }, { /* Loopback device does not support hw timestamps. */ - { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE }, + { .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE }, {} }, { - { so_timestamping: SOF_TIMESTAMPING_SOFTWARE }, - {} + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE }, + .warn_on_fail = true }, { - { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE + { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE }, {} }, { - { so_timestamping: SOF_TIMESTAMPING_SOFTWARE + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, - { swtstamp: true } + { .swtstamp = true } }, { - { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE + { .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, - { tstamp: true, swtstamp: true } + { .tstamp = true, .swtstamp = true } }, }; @@ -115,6 +116,9 @@ static struct option long_options[] = { { "tcp", no_argument, 0, 't' }, { "udp", no_argument, 0, 'u' }, { "ip", no_argument, 0, 'i' }, + { "strict", no_argument, 0, 'S' }, + { "ipv4", no_argument, 0, '4' }, + { "ipv6", no_argument, 0, '6' }, { NULL, 0, NULL, 0 }, }; @@ -270,37 +274,55 @@ void config_so_flags(int rcv, struct options o) error(1, errno, "Failed to set SO_TIMESTAMPING"); } -bool run_test_case(struct socket_type s, struct test_case t) +bool run_test_case(struct socket_type *s, int test_num, char ip_version, + bool strict) { - int port = (s.type == SOCK_RAW) ? 0 : next_port++; + union { + struct sockaddr_in6 addr6; + struct sockaddr_in addr4; + struct sockaddr addr_un; + } addr; int read_size = op_size; - struct sockaddr_in addr; + int src, dst, rcv, port; + socklen_t addr_size; bool failed = false; - int src, dst, rcv; - src = socket(AF_INET, s.type, s.protocol); + port = (s->type == SOCK_RAW) ? 0 : next_port++; + memset(&addr, 0, sizeof(addr)); + if (ip_version == '4') { + addr.addr4.sin_family = AF_INET; + addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.addr4.sin_port = htons(port); + addr_size = sizeof(addr.addr4); + if (s->type == SOCK_RAW) + read_size += 20; /* for IPv4 header */ + } else { + addr.addr6.sin6_family = AF_INET6; + addr.addr6.sin6_addr = in6addr_loopback; + addr.addr6.sin6_port = htons(port); + addr_size = sizeof(addr.addr6); + } + printf("Starting testcase %d over ipv%c...\n", test_num, ip_version); + src = socket(addr.addr_un.sa_family, s->type, + s->protocol); if (src < 0) error(1, errno, "Failed to open src socket"); - dst = socket(AF_INET, s.type, s.protocol); + dst = socket(addr.addr_un.sa_family, s->type, + s->protocol); if (dst < 0) error(1, errno, "Failed to open dst socket"); - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr.sin_port = htons(port); - - if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0) + if (bind(dst, &addr.addr_un, addr_size) < 0) error(1, errno, "Failed to bind to port %d", port); - if (s.type == SOCK_STREAM && (listen(dst, 1) < 0)) + if (s->type == SOCK_STREAM && (listen(dst, 1) < 0)) error(1, errno, "Failed to listen"); - if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0) + if (connect(src, &addr.addr_un, addr_size) < 0) error(1, errno, "Failed to connect"); - if (s.type == SOCK_STREAM) { + if (s->type == SOCK_STREAM) { rcv = accept(dst, NULL, NULL); if (rcv < 0) error(1, errno, "Failed to accept"); @@ -309,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t) rcv = dst; } - config_so_flags(rcv, t.sockopt); + config_so_flags(rcv, test_cases[test_num].sockopt); usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */ do_send(src); - if (s.type == SOCK_RAW) - read_size += 20; /* for IP header */ - failed = do_recv(rcv, read_size, t.expected); + failed = do_recv(rcv, read_size, test_cases[test_num].expected); close(rcv); close(src); + if (failed) { + printf("FAILURE in testcase %d over ipv%c ", test_num, + ip_version); + print_test_case(&test_cases[test_num]); + if (!strict && test_cases[test_num].warn_on_fail) + failed = false; + } return failed; } @@ -327,6 +354,9 @@ int main(int argc, char **argv) { bool all_protocols = true; bool all_tests = true; + bool cfg_ipv4 = false; + bool cfg_ipv6 = false; + bool strict = false; int arg_index = 0; int failures = 0; int s, t, opt; @@ -362,6 +392,15 @@ int main(int argc, char **argv) all_protocols = false; socket_types[0].enabled = true; break; + case 'S': + strict = true; + break; + case '4': + cfg_ipv4 = true; + break; + case '6': + cfg_ipv6 = true; + break; default: error(1, 0, "Failed to parse parameters."); } @@ -375,13 +414,14 @@ int main(int argc, char **argv) for (t = 0; t < ARRAY_SIZE(test_cases); t++) { if (!all_tests && !test_cases[t].enabled) continue; - - printf("Starting testcase %d...\n", t); - if (run_test_case(socket_types[s], test_cases[t])) { - failures++; - printf("FAILURE in test case "); - print_test_case(&test_cases[t]); - } + if (cfg_ipv4 || !cfg_ipv6) + if (run_test_case(&socket_types[s], t, '4', + strict)) + failures++; + if (cfg_ipv6 || !cfg_ipv4) + if (run_test_case(&socket_types[s], t, '6', + strict)) + failures++; } } if (!failures) diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh new file mode 100755 index 000000000000..91631e88bf46 --- /dev/null +++ b/tools/testing/selftests/net/rxtimestamp.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./rxtimestamp $@ diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 011b0da6b033..490a8cca708a 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -64,6 +64,7 @@ static int cfg_payload_len = 10; static int cfg_poll_timeout = 100; static int cfg_delay_snd; static int cfg_delay_ack; +static int cfg_delay_tolerance_usec = 500; static bool cfg_show_payload; static bool cfg_do_pktinfo; static bool cfg_busy_poll; @@ -152,11 +153,12 @@ static void validate_key(int tskey, int tstype) static void validate_timestamp(struct timespec *cur, int min_delay) { - int max_delay = min_delay + 500 /* processing time upper bound */; int64_t cur64, start64; + int max_delay; cur64 = timespec_to_us64(cur); start64 = timespec_to_us64(&ts_usr); + max_delay = min_delay + cfg_delay_tolerance_usec; if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { fprintf(stderr, "ERROR: %lu us expected between %d and %d\n", @@ -683,6 +685,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -S N: usec to sleep before reading error queue\n" + " -t N: tolerance (usec) for timestamp validation\n" " -u: use udp\n" " -v: validate SND delay (usec)\n" " -V: validate ACK delay (usec)\n" @@ -697,7 +700,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -760,6 +763,9 @@ static void parse_opt(int argc, char **argv) case 'S': cfg_sleep_usec = strtoul(optarg, NULL, 10); break; + case 't': + cfg_delay_tolerance_usec = strtoul(optarg, NULL, 10); + break; case 'u': proto_count++; cfg_proto = SOCK_DGRAM; diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh new file mode 100755 index 000000000000..18b982d611de --- /dev/null +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -0,0 +1,396 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is designed for testing the new VRF strict_mode functionality. + +ret=0 + +# identifies the "init" network namespace which is often called root network +# namespace. +INIT_NETNS_NAME="init" + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +ip_expand_args() +{ + local nsname=$1 + local nsarg="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + nsarg="-netns ${nsname}" + fi + + echo "${nsarg}" +} + +vrf_count() +{ + local nsname=$1 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} -o link show type vrf | wc -l +} + +count_vrf_by_table_id() +{ + local nsname=$1 + local tableid=$2 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l +} + +add_vrf() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null +} + +add_vrf_and_check() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local cnt + local rc + + add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$? + + cnt=$(count_vrf_by_table_id ${nsname} ${vrftable}) + + log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}" +} + +add_vrf_and_check_fail() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local cnt + local rc + + add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$? + + cnt=$(count_vrf_by_table_id ${nsname} ${vrftable}) + + log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}" +} + +del_vrf_and_check() +{ + local nsname=$1 + local vrfname=$2 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link del ${vrfname} + log_test $? 0 "${nsname}: remove vrf ${vrfname}" +} + +config_vrf_and_check() +{ + local nsname=$1 + local addr=$2 + local vrfname=$3 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link set dev ${vrfname} up && \ + ip ${nsarg} addr add ${addr} dev ${vrfname} + log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}" +} + +read_strict_mode() +{ + local nsname=$1 + local rval + local rc=0 + local nsexec="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + # a custom network namespace is provided + nsexec="ip netns exec ${nsname}" + fi + + rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \ + grep -E "^[0-1]$")" &> /dev/null + if [ $? -ne 0 ]; then + # set errors + rval=255 + rc=1 + fi + + # on success, rval can be only 0 or 1; on error, rval is equal to 255 + echo ${rval} + return ${rc} +} + +read_strict_mode_compare_and_check() +{ + local nsname=$1 + local expected=$2 + local res + + res="$(read_strict_mode ${nsname})" + log_test ${res} ${expected} "${nsname}: check strict_mode=${res}" +} + +set_strict_mode() +{ + local nsname=$1 + local val=$2 + local nsexec="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + # a custom network namespace is provided + nsexec="ip netns exec ${nsname}" + fi + + ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null +} + +enable_strict_mode() +{ + local nsname=$1 + + set_strict_mode ${nsname} 1 +} + +disable_strict_mode() +{ + local nsname=$1 + + set_strict_mode ${nsname} 0 +} + +disable_strict_mode_and_check() +{ + local nsname=$1 + + disable_strict_mode ${nsname} + log_test $? 0 "${nsname}: disable strict_mode (=0)" +} + +enable_strict_mode_and_check() +{ + local nsname=$1 + + enable_strict_mode ${nsname} + log_test $? 0 "${nsname}: enable strict_mode (=1)" +} + +enable_strict_mode_and_check_fail() +{ + local nsname=$1 + + enable_strict_mode ${nsname} + log_test $? 1 "${nsname}: CANNOT enable strict_mode" +} + +strict_mode_check_default() +{ + local nsname=$1 + local strictmode + local vrfcnt + + vrfcnt=$(vrf_count ${nsname}) + strictmode=$(read_strict_mode ${nsname}) + log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs" +} + +setup() +{ + modprobe vrf + + ip netns add testns + ip netns exec testns ip link set lo up +} + +cleanup() +{ + ip netns del testns 2>/dev/null + + ip link del vrf100 2>/dev/null + ip link del vrf101 2>/dev/null + ip link del vrf102 2>/dev/null + + echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null +} + +vrf_strict_mode_tests_init() +{ + vrf_strict_mode_check_support init + + strict_mode_check_default init + + add_vrf_and_check init vrf100 100 + config_vrf_and_check init 172.16.100.1/24 vrf100 + + enable_strict_mode_and_check init + + add_vrf_and_check_fail init vrf101 100 + + disable_strict_mode_and_check init + + add_vrf_and_check init vrf101 100 + config_vrf_and_check init 172.16.101.1/24 vrf101 + + enable_strict_mode_and_check_fail init + + del_vrf_and_check init vrf101 + + enable_strict_mode_and_check init + + add_vrf_and_check init vrf102 102 + config_vrf_and_check init 172.16.102.1/24 vrf102 + + # the strict_modle is enabled in the init +} + +vrf_strict_mode_tests_testns() +{ + vrf_strict_mode_check_support testns + + strict_mode_check_default testns + + enable_strict_mode_and_check testns + + add_vrf_and_check testns vrf100 100 + config_vrf_and_check testns 10.0.100.1/24 vrf100 + + add_vrf_and_check_fail testns vrf101 100 + + add_vrf_and_check_fail testns vrf102 100 + + add_vrf_and_check testns vrf200 200 + + disable_strict_mode_and_check testns + + add_vrf_and_check testns vrf101 100 + + add_vrf_and_check testns vrf102 100 + + #the strict_mode is disabled in the testns +} + +vrf_strict_mode_tests_mix() +{ + read_strict_mode_compare_and_check init 1 + + read_strict_mode_compare_and_check testns 0 + + del_vrf_and_check testns vrf101 + + del_vrf_and_check testns vrf102 + + disable_strict_mode_and_check init + + enable_strict_mode_and_check testns + + enable_strict_mode_and_check init + enable_strict_mode_and_check init + + disable_strict_mode_and_check testns + disable_strict_mode_and_check testns + + read_strict_mode_compare_and_check init 1 + + read_strict_mode_compare_and_check testns 0 +} + +vrf_strict_mode_tests() +{ + log_section "VRF strict_mode test on init network namespace" + vrf_strict_mode_tests_init + + log_section "VRF strict_mode test on testns network namespace" + vrf_strict_mode_tests_testns + + log_section "VRF strict_mode test mixing init and testns network namespaces" + vrf_strict_mode_tests_mix +} + +vrf_strict_mode_check_support() +{ + local nsname=$1 + local output + local rc + + output="$(lsmod | grep '^vrf' | awk '{print $1}')" + if [ -z "${output}" ]; then + modinfo vrf || return $? + fi + + # we do not care about the value of the strict_mode; we only check if + # the strict_mode parameter is available or not. + read_strict_mode ${nsname} &>/dev/null; rc=$? + log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available" + + return ${rc} +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit 0 +fi + +cleanup &> /dev/null + +setup +vrf_strict_mode_tests +cleanup + +print_log_test_results + +exit $ret diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a179f0dca8ce..a374e10ef506 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,7 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh + nft_queue.sh nft_meta.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh new file mode 100755 index 000000000000..d250b84dd5bc --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +# check iif/iifname/oifgroup/iiftype match. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +cleanup() +{ + ip netns del "$ns0" +} + +ip netns add "$ns0" +ip -net "$ns0" link set lo up +ip -net "$ns0" addr add 127.0.0.1 dev lo + +trap cleanup EXIT + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table inet filter { + counter iifcount {} + counter iifnamecount {} + counter iifgroupcount {} + counter iiftypecount {} + counter infproto4count {} + counter il4protocounter {} + counter imarkcounter {} + + counter oifcount {} + counter oifnamecount {} + counter oifgroupcount {} + counter oiftypecount {} + counter onfproto4count {} + counter ol4protocounter {} + counter oskuidcounter {} + counter oskgidcounter {} + counter omarkcounter {} + + chain input { + type filter hook input priority 0; policy accept; + + meta iif lo counter name "iifcount" + meta iifname "lo" counter name "iifnamecount" + meta iifgroup "default" counter name "iifgroupcount" + meta iiftype "loopback" counter name "iiftypecount" + meta nfproto ipv4 counter name "infproto4count" + meta l4proto icmp counter name "il4protocounter" + meta mark 42 counter name "imarkcounter" + } + + chain output { + type filter hook output priority 0; policy accept; + meta oif lo counter name "oifcount" counter + meta oifname "lo" counter name "oifnamecount" + meta oifgroup "default" counter name "oifgroupcount" + meta oiftype "loopback" counter name "oiftypecount" + meta nfproto ipv4 counter name "onfproto4count" + meta l4proto icmp counter name "ol4protocounter" + meta skuid 0 counter name "oskuidcounter" + meta skgid 0 counter name "oskgidcounter" + meta mark 42 counter name "omarkcounter" + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not add test ruleset" + exit $ksft_skip +fi + +ret=0 + +check_one_counter() +{ + local cname="$1" + local want="packets $2" + local verbose="$3" + + cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want") + if [ $? -ne 0 ];then + echo "FAIL: $cname, want \"$want\", got" + ret=1 + ip netns exec "$ns0" nft list counter inet filter $counter + fi +} + +check_lo_counters() +{ + local want="$1" + local verbose="$2" + local counter + + for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \ + oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \ + il4protocounter \ + ol4protocounter \ + ; do + check_one_counter "$counter" "$want" "$verbose" + done +} + +check_lo_counters "0" false +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null + +check_lo_counters "2" true + +check_one_counter oskuidcounter "1" true +check_one_counter oskgidcounter "1" true +check_one_counter imarkcounter "1" true +check_one_counter omarkcounter "1" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta iif/oif counters at expected values" +fi + +exit $ret diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c index 73d532556d17..7d84097ad45c 100644 --- a/tools/testing/selftests/pid_namespace/regression_enomem.c +++ b/tools/testing/selftests/pid_namespace/regression_enomem.c @@ -11,7 +11,6 @@ #include <syscall.h> #include <sys/wait.h> -#include "../kselftest.h" #include "../kselftest_harness.h" #include "../pidfd/pidfd.h" diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 8d728eda783d..a2c80914e3dc 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -22,6 +22,10 @@ #define P_PIDFD 3 #endif +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + #ifndef CLONE_PIDFD #define CLONE_PIDFD 0x00001000 #endif diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 84b65ecccb04..7758c98be015 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -18,7 +18,6 @@ #include <linux/kcmp.h> #include "pidfd.h" -#include "../kselftest.h" #include "../kselftest_harness.h" /* diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 9418108eae13..7dca1aa4672d 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -20,7 +20,6 @@ #include "pidfd.h" #include "../clone3/clone3_selftests.h" -#include "../kselftest.h" #include "../kselftest_harness.h" enum { @@ -32,6 +31,7 @@ enum { PIDFD_NS_NET, PIDFD_NS_CGROUP, PIDFD_NS_PIDCLD, + PIDFD_NS_TIME, PIDFD_NS_MAX }; @@ -47,6 +47,7 @@ const struct ns_info { [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, + [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, }; FIXTURE(current_nsset) @@ -83,9 +84,49 @@ pid_t create_child(int *pidfd, unsigned flags) return sys_clone3(&args, sizeof(struct clone_args)); } +static bool switch_timens(void) +{ + int fd, ret; + + if (unshare(CLONE_NEWTIME)) + return false; + + fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC); + if (fd < 0) + return false; + + ret = setns(fd, CLONE_NEWTIME); + close(fd); + return ret == 0; +} + +static ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = read(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + FIXTURE_SETUP(current_nsset) { int i, proc_fd, ret; + int ipc_sockets[2]; + char c; for (i = 0; i < PIDFD_NS_MAX; i++) { self->nsfds[i] = -EBADF; @@ -130,6 +171,9 @@ FIXTURE_SETUP(current_nsset) TH_LOG("%m - Failed to open pidfd for process %d", self->pid); } + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + /* Create tasks that will be stopped. */ self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWNS | @@ -139,10 +183,27 @@ FIXTURE_SETUP(current_nsset) EXPECT_GE(self->child_pid1, 0); if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (!switch_timens()) + _exit(EXIT_FAILURE); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + pause(); _exit(EXIT_SUCCESS); } + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWCGROUP | CLONE_NEWIPC | @@ -151,10 +212,24 @@ FIXTURE_SETUP(current_nsset) EXPECT_GE(self->child_pid2, 0); if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (!switch_timens()) + _exit(EXIT_FAILURE); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + pause(); _exit(EXIT_SUCCESS); } + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + for (i = 0; i < PIDFD_NS_MAX; i++) { char p[100]; diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 7aff2d3b42c0..c585aaa2acd8 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -8,6 +8,7 @@ #include <sched.h> #include <signal.h> #include <stdio.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <syscall.h> @@ -27,6 +28,8 @@ #define MAX_EVENTS 5 +static bool have_pidfd_send_signal; + static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) { size_t stack_size = 1024; @@ -56,6 +59,13 @@ static int test_pidfd_send_signal_simple_success(void) int pidfd, ret; const char *test_name = "pidfd_send_signal send SIGUSR1"; + if (!have_pidfd_send_signal) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC); if (pidfd < 0) ksft_exit_fail_msg( @@ -86,6 +96,13 @@ static int test_pidfd_send_signal_exited_fail(void) pid_t pid; const char *test_name = "pidfd_send_signal signal exited process"; + if (!have_pidfd_send_signal) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + pid = fork(); if (pid < 0) ksft_exit_fail_msg("%s test: Failed to create new process\n", @@ -137,16 +154,34 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) pid_t pid1; const char *test_name = "pidfd_send_signal signal recycled pid"; + if (!have_pidfd_send_signal) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + ret = unshare(CLONE_NEWPID); - if (ret < 0) + if (ret < 0) { + if (errno == EPERM) { + ksft_test_result_skip("%s test: Unsharing pid namespace not permitted\n", + test_name); + return 0; + } ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n", test_name); + } ret = unshare(CLONE_NEWNS); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: Failed to unshare mount namespace\n", - test_name); + if (ret < 0) { + if (errno == EPERM) { + ksft_test_result_skip("%s test: Unsharing mount namespace not permitted\n", + test_name); + return 0; + } + ksft_exit_fail_msg("%s test: Failed to unshare mount namespace\n", + test_name); + } ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); if (ret < 0) @@ -325,15 +360,17 @@ static int test_pidfd_send_signal_syscall_support(void) ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0); if (ret < 0) { - if (errno == ENOSYS) - ksft_exit_skip( + if (errno == ENOSYS) { + ksft_test_result_skip( "%s test: pidfd_send_signal() syscall not supported\n", test_name); - + return 0; + } ksft_exit_fail_msg("%s test: Failed to send signal\n", test_name); } + have_pidfd_send_signal = true; close(pidfd); ksft_test_result_pass( "%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n", @@ -521,7 +558,7 @@ static void test_pidfd_poll_leader_exit(int use_waitpid) int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(4); + ksft_set_plan(8); test_pidfd_poll_exec(0); test_pidfd_poll_exec(1); diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index da7a9dda9490..f7911aaeb007 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -35,6 +35,8 @@ #define CLOCK_INVALID -1 #endif +#define NSEC_PER_SEC 1000000000LL + /* clock_adjtime is not available in GLIBC < 2.14 */ #if !__GLIBC_PREREQ(2, 14) #include <sys/syscall.h> @@ -132,6 +134,8 @@ static void usage(char *progname) " 1 - external time stamp\n" " 2 - periodic output\n" " -p val enable output with a period of 'val' nanoseconds\n" + " -H val set output phase to 'val' nanoseconds (requires -p)\n" + " -w val set output pulse width to 'val' nanoseconds (requires -p)\n" " -P val enable or disable (val=1|0) the system clock PPS\n" " -s set the ptp clock time from the system time\n" " -S set the system time from the ptp clock time\n" @@ -169,7 +173,6 @@ int main(int argc, char *argv[]) int list_pins = 0; int pct_offset = 0; int n_samples = 0; - int perout = -1; int pin_index = -1, pin_func; int pps = -1; int seconds = 0; @@ -177,10 +180,13 @@ int main(int argc, char *argv[]) int64_t t1, t2, tp; int64_t interval, offset; + int64_t perout_phase = -1; + int64_t pulsewidth = -1; + int64_t perout = -1; progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:p:P:sSt:T:w:z"))) { switch (c) { case 'c': capabilities = 1; @@ -197,6 +203,9 @@ int main(int argc, char *argv[]) case 'g': gettime = 1; break; + case 'H': + perout_phase = atoll(optarg); + break; case 'i': index = atoi(optarg); break; @@ -215,7 +224,7 @@ int main(int argc, char *argv[]) } break; case 'p': - perout = atoi(optarg); + perout = atoll(optarg); break; case 'P': pps = atoi(optarg); @@ -233,6 +242,9 @@ int main(int argc, char *argv[]) settime = 3; seconds = atoi(optarg); break; + case 'w': + pulsewidth = atoi(optarg); + break; case 'z': flagtest = 1; break; @@ -391,6 +403,16 @@ int main(int argc, char *argv[]) } } + if (pulsewidth >= 0 && perout < 0) { + puts("-w can only be specified together with -p"); + return -1; + } + + if (perout_phase >= 0 && perout < 0) { + puts("-H can only be specified together with -p"); + return -1; + } + if (perout >= 0) { if (clock_gettime(clkid, &ts)) { perror("clock_gettime"); @@ -398,11 +420,24 @@ int main(int argc, char *argv[]) } memset(&perout_request, 0, sizeof(perout_request)); perout_request.index = index; - perout_request.start.sec = ts.tv_sec + 2; - perout_request.start.nsec = 0; - perout_request.period.sec = 0; - perout_request.period.nsec = perout; - if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) { + perout_request.period.sec = perout / NSEC_PER_SEC; + perout_request.period.nsec = perout % NSEC_PER_SEC; + perout_request.flags = 0; + if (pulsewidth >= 0) { + perout_request.flags |= PTP_PEROUT_DUTY_CYCLE; + perout_request.on.sec = pulsewidth / NSEC_PER_SEC; + perout_request.on.nsec = pulsewidth % NSEC_PER_SEC; + } + if (perout_phase >= 0) { + perout_request.flags |= PTP_PEROUT_PHASE; + perout_request.phase.sec = perout_phase / NSEC_PER_SEC; + perout_request.phase.nsec = perout_phase % NSEC_PER_SEC; + } else { + perout_request.start.sec = ts.tv_sec + 2; + perout_request.start.nsec = 0; + } + + if (ioctl(fd, PTP_PEROUT_REQUEST2, &perout_request)) { perror("PTP_PEROUT_REQUEST"); } else { puts("periodic output request okay"); diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index 93e80a42249a..d6e5ce084b1c 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -32,11 +32,11 @@ if test -z "$TORTURE_TRUST_MAKE" then make clean > $resdir/Make.clean 2>&1 fi -make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1 +make $TORTURE_KMAKE_ARG $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1 mv .config .config.sav sh $T/upd.sh < .config.sav > .config cp .config .config.new -yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err +yes '' | make $TORTURE_KMAKE_ARG oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err # verify new config matches specification. configcheck.sh .config $c diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh new file mode 100755 index 000000000000..0e4c0b2eb7f0 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Scan standard input for error messages, dumping any found to standard +# output. +# +# Usage: console-badness.sh +# +# Copyright (C) 2020 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +grep -v 'ODEBUG: ' | +grep -v 'This means that this is a DEBUG kernel and it is' | +grep -v 'Warning: unable to open an initial console' diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 12810229fddc..51f3464b96d3 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -215,9 +215,6 @@ identify_qemu_args () { then echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC echo -netdev bridge,br=br0,id=net0 - elif test -n "$TORTURE_QEMU_INTERACTIVE" - then - echo -net nic -net user fi ;; esac @@ -234,7 +231,7 @@ identify_qemu_args () { # Returns the number of virtual CPUs available to the aggregate of the # guest OSes. identify_qemu_vcpus () { - lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://' + lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://' -e 's/[ ]*//g' } # print_bug @@ -275,3 +272,21 @@ specify_qemu_cpus () { esac fi } + +# specify_qemu_net qemu-args +# +# Appends a string containing "-net none" to qemu-args, unless the incoming +# qemu-args already contains "-smp" or unless the TORTURE_QEMU_INTERACTIVE +# environment variable is set, in which case the string that is be added is +# instead "-net nic -net user". +specify_qemu_net () { + if echo $1 | grep -q -e -net + then + echo $1 + elif test -n "$TORTURE_QEMU_INTERACTIVE" + then + echo $1 -net nic -net user + else + echo $1 -net none + fi +} diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index 30cb5b27d32e..188b864bc4bf 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -46,6 +46,12 @@ do exit 0; fi + # Check for stop request. + if test -f "$TORTURE_STOPFILE" + then + exit 1; + fi + # Set affinity to randomly selected online CPU if cpus=`grep 1 /sys/devices/system/cpu/*/online 2>&1 | sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'` diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 18d6518504ee..115e1822b26f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -9,6 +9,12 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> +if test -f "$TORTURE_STOPFILE" +then + echo "kvm-build.sh early exit due to run STOP request" + exit 1 +fi + config_template=${1} if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh new file mode 100755 index 000000000000..6e65c134e5f1 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh @@ -0,0 +1,108 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Run a group of kvm.sh tests on the specified commits. This currently +# unconditionally does three-minute runs on each scenario in CFLIST, +# taking advantage of all available CPUs and trusting the "make" utility. +# In the short term, adjustments can be made by editing this script and +# CFLIST. If some adjustments appear to have ongoing value, this script +# might grow some command-line arguments. +# +# Usage: kvm-check-branches.sh commit1 commit2..commit3 commit4 ... +# +# This script considers its arguments one at a time. If more elaborate +# specification of commits is needed, please use "git rev-list" to +# produce something that this simple script can understand. The reason +# for retaining the simplicity is that it allows the user to more easily +# see which commit came from which branch. +# +# This script creates a yyyy.mm.dd-hh.mm.ss-group entry in the "res" +# directory. The calls to kvm.sh create the usual entries, but this script +# moves them under the yyyy.mm.dd-hh.mm.ss-group entry, each in its own +# directory numbered in run order, that is, "0001", "0002", and so on. +# For successful runs, the large build artifacts are removed. Doing this +# reduces the disk space required by about two orders of magnitude for +# successful runs. +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +if ! git status > /dev/null 2>&1 +then + echo '!!!' This script needs to run in a git archive. 1>&2 + echo '!!!' Giving up. 1>&2 + exit 1 +fi + +# Remember where we started so that we can get back and the end. +curcommit="`git status | head -1 | awk '{ print $NF }'`" + +nfail=0 +ntry=0 +resdir="tools/testing/selftests/rcutorture/res" +ds="`date +%Y.%m.%d-%H.%M.%S`-group" +if ! test -e $resdir +then + mkdir $resdir || : +fi +mkdir $resdir/$ds +echo Results directory: $resdir/$ds + +KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM +PATH=${KVM}/bin:$PATH; export PATH +. functions.sh +cpus="`identify_qemu_vcpus`" +echo Using up to $cpus CPUs. + +# Each pass through this loop does one command-line argument. +for gitbr in $@ +do + echo ' --- git branch ' $gitbr + + # Each pass through this loop tests one commit. + for i in `git rev-list "$gitbr"` + do + ntry=`expr $ntry + 1` + idir=`awk -v ntry="$ntry" 'END { printf "%04d", ntry; }' < /dev/null` + echo ' --- commit ' $i from branch $gitbr + date + mkdir $resdir/$ds/$idir + echo $gitbr > $resdir/$ds/$idir/gitbr + echo $i >> $resdir/$ds/$idir/gitbr + + # Test the specified commit. + git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1 + echo git checkout return code: $? "(Commit $ntry: $i)" + kvm.sh --cpus $cpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1 + ret=$? + echo kvm.sh return code $ret for commit $i from branch $gitbr + + # Move the build products to their resting place. + runresdir="`grep -m 1 '^Results directory:' < $resdir/$ds/$idir/kvm.sh.out | sed -e 's/^Results directory://'`" + mv $runresdir $resdir/$ds/$idir + rrd="`echo $runresdir | sed -e 's,^.*/,,'`" + echo Run results: $resdir/$ds/$idir/$rrd + if test "$ret" -ne 0 + then + # Failure, so leave all evidence intact. + nfail=`expr $nfail + 1` + else + # Success, so remove large files to save about 1GB. + ( cd $resdir/$ds/$idir/$rrd; rm -f */vmlinux */bzImage */System.map */Module.symvers ) + fi + done +done +date + +# Go back to the original commit. +git checkout "$curcommit" + +if test $nfail -ne 0 +then + echo '!!! ' $nfail failures in $ntry 'runs!!!' + exit 1 +else + echo No failures in $ntry runs. + exit 0 +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh new file mode 100755 index 000000000000..35a463dddffe --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Analyze a given results directory for refscale performance measurements. +# +# Usage: kvm-recheck-refscale.sh resdir +# +# Copyright (C) IBM Corporation, 2016 +# +# Authors: Paul E. McKenney <paulmck@linux.ibm.com> + +i="$1" +if test -d "$i" -a -r "$i" +then + : +else + echo Unreadable results directory: $i + exit 1 +fi +PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH +. functions.sh + +configfile=`echo $i | sed -e 's/^.*\///'` + +sed -e 's/^\[[^]]*]//' < $i/console.log | tr -d '\015' | +awk -v configfile="$configfile" ' +/^[ ]*Runs Time\(ns\) *$/ { + if (dataphase + 0 == 0) { + dataphase = 1; + # print configfile, $0; + } + next; +} + +/[^ ]*[0-9][0-9]* [0-9][0-9]*\.[0-9][0-9]*$/ { + if (dataphase == 1) { + # print $0; + readertimes[++n] = $2; + sum += $2; + } + next; +} + +{ + if (dataphase == 1) + dataphase == 2; + next; +} + +END { + print configfile " results:"; + newNR = asort(readertimes); + if (newNR <= 0) { + print "No refscale records found???" + exit; + } + medianidx = int(newNR / 2); + if (newNR == medianidx * 2) + medianvalue = (readertimes[medianidx - 1] + readertimes[medianidx]) / 2; + else + medianvalue = readertimes[medianidx]; + points = "Points:"; + for (i = 1; i <= newNR; i++) + points = points " " readertimes[i]; + print points; + print "Average reader duration: " sum / newNR " nanoseconds"; + print "Minimum reader duration: " readertimes[1]; + print "Median reader duration: " medianvalue; + print "Maximum reader duration: " readertimes[newNR]; + print "Computed from refscale printk output."; +}' diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 736f04749b90..840a4679a0d7 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -31,6 +31,7 @@ do head -1 $resdir/log fi TORTURE_SUITE="`cat $i/../TORTURE_SUITE`" + configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags kvm-recheck-${TORTURE_SUITE}.sh $i if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137 @@ -43,7 +44,8 @@ do then echo QEMU killed fi - configcheck.sh $i/.config $i/ConfigFragment + configcheck.sh $i/.config $i/ConfigFragment > $T 2>&1 + cat $T if test -r $i/Make.oldconfig.err then cat $i/Make.oldconfig.err @@ -55,15 +57,15 @@ do cat $i/Warnings fi else - if test -f "$i/qemu-cmd" - then - print_bug qemu failed - echo " $i" - elif test -f "$i/buildonly" + if test -f "$i/buildonly" then echo Build-only run, no boot/test configcheck.sh $i/.config $i/ConfigFragment parse-build.sh $i/Make.out $configfile + elif test -f "$i/qemu-cmd" + then + print_bug qemu failed + echo " $i" else print_bug Build failed echo " $i" @@ -72,7 +74,11 @@ do done if test -f "$rd/kcsan.sum" then - if test -s "$rd/kcsan.sum" + if grep -q CONFIG_KCSAN=y $T + then + echo "Compiler or architecture does not support KCSAN!" + echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'? + elif test -s "$rd/kcsan.sum" then echo KCSAN summary in $rd/kcsan.sum else diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 6ff611c630d1..e07779a62634 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -124,7 +124,6 @@ seconds=$4 qemu_args=$5 boot_args=$6 -cd $KVM kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` if test -z "$TORTURE_BUILDONLY" then @@ -141,6 +140,7 @@ then cpu_count=$TORTURE_ALLOTED_CPUS fi qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`" +qemu_args="`specify_qemu_net "$qemu_args"`" # Generate architecture-specific and interaction-specific qemu arguments qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`" @@ -152,6 +152,7 @@ qemu_append="`identify_qemu_append "$QEMU"`" boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" # Generate kernel-version-specific boot parameters boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" +echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -159,9 +160,16 @@ then touch $resdir/buildonly exit 0 fi + +# Decorate qemu-cmd with redirection, backgrounding, and PID capture +sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd +echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd + +# In case qemu refuses to run... echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log -echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd -( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & + +# Attempt to run qemu +( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 sleep 10 # Give qemu's pid a chance to reach the file if test -s "$resdir/qemu_pid" @@ -181,7 +189,7 @@ do kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 then - if test $kruntime -ge $seconds + if test $kruntime -ge $seconds -o -f "$TORTURE_STOPFILE" then break; fi @@ -210,10 +218,19 @@ then fi if test $commandcompleted -eq 0 -a -n "$qemu_pid" then - echo Grace period for qemu job at pid $qemu_pid + if ! test -f "$TORTURE_STOPFILE" + then + echo Grace period for qemu job at pid $qemu_pid + fi oldline="`tail $resdir/console.log`" while : do + if test -f "$TORTURE_STOPFILE" + then + echo "PID $qemu_pid killed due to run STOP request" >> $resdir/Warnings 2>&1 + kill -KILL $qemu_pid + break + fi kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` if kill -0 $qemu_pid > /dev/null 2>&1 then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh new file mode 100755 index 000000000000..c45a953ef393 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Transform a qemu-cmd file to allow reuse. +# +# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out +# +# bzImage: Kernel and initrd from the same prior kvm.sh run. +# console.log: File into which to place console output. +# +# The original qemu-cmd file is provided on standard input. +# The transformed qemu-cmd file is on standard output. +# The transformation assumes that the qemu command is confined to a +# single line. It also assumes no whitespace in filenames. +# +# Copyright (C) 2020 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +image="$1" +if test -z "$image" +then + echo Need kernel image file. + exit 1 +fi +consolelog="$2" +if test -z "$consolelog" +then + echo "Need console log file name." + exit 1 +fi + +awk -v image="$image" -v consolelog="$consolelog" ' +{ + line = ""; + for (i = 1; i <= NF; i++) { + if (line == "") + line = $i; + else + line = line " " $i; + if ($i == "-serial") { + i++; + line = line " file:" consolelog; + } + if ($i == "-kernel") { + i++; + line = line " " image; + } + } + print line; +}' diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index c279cf9cb010..e655983b7429 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -73,6 +73,10 @@ usage () { while test $# -gt 0 do case "$1" in + --allcpus) + cpus=$TORTURE_ALLOTED_CPUS + max_cpus=$TORTURE_ALLOTED_CPUS + ;; --bootargs|--bootarg) checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--' TORTURE_BOOTARGS="$2" @@ -180,13 +184,14 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--' TORTURE_SUITE=$2 shift - if test "$TORTURE_SUITE" = rcuperf + if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale then - # If you really want jitter for rcuperf, specify - # it after specifying rcuperf. (But why?) + # If you really want jitter for refscale or + # rcuperf, specify it after specifying the rcuperf + # or the refscale. (But why jitter in these cases?) jitter=0 fi ;; @@ -333,6 +338,8 @@ then mkdir -p "$resdir" || : fi mkdir $resdir/$ds +TORTURE_RESDIR="$resdir/$ds"; export TORTURE_RESDIR +TORTURE_STOPFILE="$resdir/$ds/STOP"; export TORTURE_STOPFILE echo Results directory: $resdir/$ds echo $scriptname $args touch $resdir/$ds/log @@ -497,3 +504,7 @@ fi # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier # Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop # Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y" +# Control buffer size: --bootargs trace_buf_size=3k +# Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops +# Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu +# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 4bf62d7b1cbc..71a9f43a3918 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -33,8 +33,8 @@ then fi cat /dev/null > $file.diags -# Check for proper termination, except that rcuperf runs don't indicate this. -if test "$TORTURE_SUITE" != rcuperf +# Check for proper termination, except for rcuperf and refscale. +if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale then # check for abject failure @@ -44,11 +44,23 @@ then tail -1 | awk ' { - for (i=NF-8;i<=NF;i++) + normalexit = 1; + for (i=NF-8;i<=NF;i++) { + if (i <= 0 || i !~ /^[0-9]*$/) { + bangstring = $0; + gsub(/^\[[^]]*] /, "", bangstring); + print bangstring; + normalexit = 0; + exit 0; + } sum+=$i; + } } - END { print sum }'` - print_bug $title FAILURE, $nerrs instances + END { + if (normalexit) + print sum " instances" + }'` + print_bug $title FAILURE, $nerrs exit fi @@ -104,10 +116,7 @@ then fi fi | tee -a $file.diags -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | -grep -v 'ODEBUG: ' | -grep -v 'This means that this is a DEBUG kernel and it is' | -grep -v 'Warning: unable to open an initial console' > $T.diags +console-badness.sh < $file > $T.diags if test -s $T.diags then print_warning "Assertion failure in $file $title" diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFLIST b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST new file mode 100644 index 000000000000..4d62eb4a39f9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST @@ -0,0 +1,2 @@ +NOPREEMPT +PREEMPT diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon new file mode 100644 index 000000000000..a98b58b54bb1 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_REF_SCALE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT new file mode 100644 index 000000000000..1cd25b7314e3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -0,0 +1,18 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_PREEMPT_RCU=n +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT new file mode 100644 index 000000000000..d10bc694f42c --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT @@ -0,0 +1,18 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh new file mode 100644 index 000000000000..321e82641287 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Torture-suite-dependent shell functions for the rest of the scripts. +# +# Copyright (C) IBM Corporation, 2015 +# +# Authors: Paul E. McKenney <paulmck@linux.ibm.com> + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 refscale.shutdown=1 \ + refscale.verbose=1 +} diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config index db1e11b08c8a..64c19d8eba79 100644 --- a/tools/testing/selftests/seccomp/config +++ b/tools/testing/selftests/seccomp/config @@ -1,2 +1,3 @@ CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5838c8697ec3..91f5a89cadac 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -18,9 +18,9 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) { - pid_t pid, ret; - unsigned long long i; struct timespec start, finish; + unsigned long long i; + pid_t pid, ret; pid = getpid(); assert(clock_gettime(clk_id, &start) == 0); @@ -31,30 +31,43 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) assert(clock_gettime(clk_id, &finish) == 0); i = finish.tv_sec - start.tv_sec; - i *= 1000000000; + i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu\n", + printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", finish.tv_sec, finish.tv_nsec, start.tv_sec, start.tv_nsec, - i); + i, (double)i / 1000000000.0); return i; } unsigned long long calibrate(void) { - unsigned long long i; - - printf("Calibrating reasonable sample size...\n"); + struct timespec start, finish; + unsigned long long i, samples, step = 9973; + pid_t pid, ret; + int seconds = 15; - for (i = 5; ; i++) { - unsigned long long samples = 1 << i; + printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); - /* Find something that takes more than 5 seconds to run. */ - if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) - return samples; - } + samples = 0; + pid = getpid(); + assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0); + do { + for (i = 0; i < step; i++) { + ret = syscall(__NR_getpid); + assert(pid == ret); + } + assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0); + + samples += step; + i = finish.tv_sec - start.tv_sec; + i *= 1000000000ULL; + i += finish.tv_nsec - start.tv_nsec; + } while (i < 1000000000ULL); + + return samples * seconds; } int main(int argc, char *argv[]) @@ -68,32 +81,55 @@ int main(int argc, char *argv[]) }; long ret; unsigned long long samples; - unsigned long long native, filtered; + unsigned long long native, filter1, filter2; + + printf("Current BPF sysctl settings:\n"); + system("sysctl net.core.bpf_jit_enable"); + system("sysctl net.core.bpf_jit_harden"); if (argc > 1) samples = strtoull(argv[1], NULL, 0); else samples = calibrate(); - printf("Benchmarking %llu samples...\n", samples); + printf("Benchmarking %llu syscalls...\n", samples); + /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; printf("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); + /* One filter */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); - filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW: %llu ns\n", filtered); + filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1); + + if (filter1 == native) + printf("No overhead measured!? Try running again with more samples.\n"); + + /* Two filters */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + assert(ret == 0); + + filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2); + + /* Calculations */ + printf("Estimated total seccomp overhead for 1 filter: %llu ns\n", + filter1 - native); + + printf("Estimated total seccomp overhead for 2 filters: %llu ns\n", + filter2 - native); - printf("Estimated seccomp overhead per syscall: %llu ns\n", - filtered - native); + printf("Estimated seccomp per-filter overhead: %llu ns\n", + filter2 - filter1); - if (filtered == native) - printf("Trying running again with more samples.\n"); + printf("Estimated seccomp entry overhead: %llu ns\n", + filter1 - native - (filter2 - filter1)); return 0; } diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 252140a52553..5267b9fb7c0f 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -45,12 +45,19 @@ #include <sys/socket.h> #include <sys/ioctl.h> #include <linux/kcmp.h> +#include <sys/resource.h> #include <unistd.h> #include <sys/syscall.h> #include <poll.h> #include "../kselftest_harness.h" +#include "../clone3/clone3_selftests.h" + +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 @@ -167,7 +174,9 @@ struct seccomp_metadata { #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#endif +#ifndef SECCOMP_RET_USER_NOTIF #define SECCOMP_RET_USER_NOTIF 0x7fc00000U #define SECCOMP_IOC_MAGIC '!' @@ -180,7 +189,7 @@ struct seccomp_metadata { #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) -#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) struct seccomp_notif { __u64 id; @@ -203,6 +212,39 @@ struct seccomp_notif_sizes { }; #endif +#ifndef SECCOMP_IOCTL_NOTIF_ADDFD +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ + +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; +#endif + +struct seccomp_notif_addfd_small { + __u64 id; + char weird[4]; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ + SECCOMP_IOW(3, struct seccomp_notif_addfd_small) + +struct seccomp_notif_addfd_big { + union { + struct seccomp_notif_addfd addfd; + char buf[sizeof(struct seccomp_notif_addfd) + 8]; + }; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ + SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) + #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 @@ -236,6 +278,40 @@ int seccomp(unsigned int op, unsigned int flags, void *args) #define SIBLING_EXIT_FAILURE 0xbadface #define SIBLING_EXIT_NEWPRIVS 0xbadfeed +static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) +{ +#ifdef __NR_kcmp + errno = 0; + return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); +#else + errno = ENOSYS; + return -1; +#endif +} + +/* Have TH_LOG report actual location filecmp() is used. */ +#define filecmp(pid1, pid2, fd1, fd2) ({ \ + int _ret; \ + \ + _ret = __filecmp(pid1, pid2, fd1, fd2); \ + if (_ret != 0) { \ + if (_ret < 0 && errno == ENOSYS) { \ + TH_LOG("kcmp() syscall missing (test is less accurate)");\ + _ret = 0; \ + } \ + } \ + _ret; }) + +TEST(kcmp) +{ + int ret; + + ret = __filecmp(getpid(), getpid(), 1, 1); + EXPECT_EQ(ret, 0); + if (ret != 0 && errno == ENOSYS) + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)"); +} + TEST(mode_strict_support) { long ret; @@ -1470,6 +1546,7 @@ pid_t setup_trace_fixture(struct __test_metadata *_metadata, return tracer_pid; } + void teardown_trace_fixture(struct __test_metadata *_metadata, pid_t tracer) { @@ -1750,7 +1827,7 @@ void change_syscall(struct __test_metadata *_metadata, EXPECT_EQ(0, ret); } -void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, +void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { int ret; @@ -1827,6 +1904,24 @@ FIXTURE(TRACE_syscall) { pid_t tracer, mytid, mypid, parent; }; +FIXTURE_VARIANT(TRACE_syscall) { + /* + * All of the SECCOMP_RET_TRACE behaviors can be tested with either + * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. + * This indicates if we should use SECCOMP_RET_TRACE (false), or + * ptrace (true). + */ + bool use_ptrace; +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { + .use_ptrace = true, +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { + .use_ptrace = false, +}; + FIXTURE_SETUP(TRACE_syscall) { struct sock_filter filter[] = { @@ -1842,12 +1937,11 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; - - memset(&self->prog, 0, sizeof(self->prog)); - self->prog.filter = malloc(sizeof(filter)); - ASSERT_NE(NULL, self->prog.filter); - memcpy(self->prog.filter, filter, sizeof(filter)); - self->prog.len = (unsigned short)ARRAY_SIZE(filter); + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; /* Prepare some testable syscall results. */ self->mytid = syscall(__NR_gettid); @@ -1865,60 +1959,48 @@ FIXTURE_SETUP(TRACE_syscall) ASSERT_NE(self->parent, self->mypid); /* Launch tracer. */ - self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, - false); -} + self->tracer = setup_trace_fixture(_metadata, + variant->use_ptrace ? tracer_ptrace + : tracer_seccomp, + NULL, variant->use_ptrace); -FIXTURE_TEARDOWN(TRACE_syscall) -{ - teardown_trace_fixture(_metadata, self->tracer); - if (self->prog.filter) - free(self->prog.filter); -} + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); -TEST_F(TRACE_syscall, ptrace_syscall_redirected) -{ - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); + if (variant->use_ptrace) + return; - /* Tracer will redirect getpid to getppid. */ - EXPECT_NE(self->mypid, syscall(__NR_getpid)); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); } -TEST_F(TRACE_syscall, ptrace_syscall_errno) +FIXTURE_TEARDOWN(TRACE_syscall) { - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - /* Tracer should skip the open syscall, resulting in ESRCH. */ - EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); } -TEST_F(TRACE_syscall, ptrace_syscall_faked) +TEST(negative_ENOSYS) { - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); + /* + * There should be no difference between an "internal" skip + * and userspace asking for syscall "-1". + */ + errno = 0; + EXPECT_EQ(-1, syscall(-1)); + EXPECT_EQ(errno, ENOSYS); + /* And no difference for "still not valid but not -1". */ + errno = 0; + EXPECT_EQ(-1, syscall(-101)); + EXPECT_EQ(errno, ENOSYS); +} - /* Tracer should skip the gettid syscall, resulting fake pid. */ - EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); +TEST_F(TRACE_syscall, negative_ENOSYS) +{ + negative_ENOSYS(_metadata); } TEST_F(TRACE_syscall, syscall_allowed) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - /* getppid works as expected (no changes). */ EXPECT_EQ(self->parent, syscall(__NR_getppid)); EXPECT_NE(self->mypid, syscall(__NR_getppid)); @@ -1926,14 +2008,6 @@ TEST_F(TRACE_syscall, syscall_allowed) TEST_F(TRACE_syscall, syscall_redirected) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - /* getpid has been redirected to getppid as expected. */ EXPECT_EQ(self->parent, syscall(__NR_getpid)); EXPECT_NE(self->mypid, syscall(__NR_getpid)); @@ -1941,33 +2015,17 @@ TEST_F(TRACE_syscall, syscall_redirected) TEST_F(TRACE_syscall, syscall_errno) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* openat has been skipped and an errno return. */ + /* Tracer should skip the open syscall, resulting in ESRCH. */ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_faked) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* gettid has been skipped and an altered return value stored. */ + /* Tracer skips the gettid syscall and store altered return value. */ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } -TEST_F(TRACE_syscall, skip_after_RET_TRACE) +TEST_F(TRACE_syscall, skip_after) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -1982,14 +2040,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) }; long ret; - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install fixture filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "errno on getppid" filter. */ + /* Install additional "errno on getppid" filter. */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); ASSERT_EQ(0, ret); @@ -1999,69 +2050,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) EXPECT_EQ(EPERM, errno); } -TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install fixture filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "death on getppid" filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Tracer will redirect getpid to getppid, and we should die. */ - EXPECT_NE(self->mypid, syscall(__NR_getpid)); -} - -TEST_F(TRACE_syscall, skip_after_ptrace) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - long ret; - - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "errno on getppid" filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Tracer will redirect getpid to getppid, and we should see EPERM. */ - EXPECT_EQ(-1, syscall(__NR_getpid)); - EXPECT_EQ(EPERM, errno); -} - -TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) +TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -2076,15 +2065,7 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) }; long ret; - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "death on getppid" filter. */ + /* Install additional "death on getppid" filter. */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); ASSERT_EQ(0, ret); @@ -3069,7 +3050,7 @@ TEST(get_metadata) /* Only real root can get metadata. */ if (geteuid()) { - XFAIL(return, "get_metadata requires real root"); + SKIP(return, "get_metadata requires real root"); return; } @@ -3112,7 +3093,7 @@ TEST(get_metadata) ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); EXPECT_EQ(sizeof(md), ret) { if (errno == EINVAL) - XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); + SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); } EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); @@ -3128,7 +3109,7 @@ skip: ASSERT_EQ(0, kill(pid, SIGKILL)); } -static int user_trap_syscall(int nr, unsigned int flags) +static int user_notif_syscall(int nr, unsigned int flags) { struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, @@ -3174,7 +3155,7 @@ TEST(user_notification_basic) /* Check that we get -ENOSYS with no listener attached */ if (pid == 0) { - if (user_trap_syscall(__NR_getppid, 0) < 0) + if (user_notif_syscall(__NR_getppid, 0) < 0) exit(1); ret = syscall(__NR_getppid); exit(ret >= 0 || errno != ENOSYS); @@ -3191,13 +3172,13 @@ TEST(user_notification_basic) EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); /* Check that the basic notification machinery works */ - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* Installing a second listener in the chain should EBUSY */ - EXPECT_EQ(user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER), + EXPECT_EQ(user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER), -1); EXPECT_EQ(errno, EBUSY); @@ -3258,15 +3239,20 @@ TEST(user_notification_with_tsync) int ret; unsigned int flags; + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + /* these were exclusive */ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | SECCOMP_FILTER_FLAG_TSYNC; - ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags)); + ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); ASSERT_EQ(EINVAL, errno); /* but now they're not */ flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; - ret = user_trap_syscall(__NR_getppid, flags); + ret = user_notif_syscall(__NR_getppid, flags); close(ret); ASSERT_LE(0, ret); } @@ -3284,8 +3270,8 @@ TEST(user_notification_kill_in_middle) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* @@ -3338,8 +3324,8 @@ TEST(user_notification_signal) ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); - listener = user_trap_syscall(__NR_gettid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_gettid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3408,8 +3394,8 @@ TEST(user_notification_closed_listener) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* @@ -3440,10 +3426,13 @@ TEST(user_notification_child_pid_ns) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { + if (errno == EINVAL) + SKIP(return, "kernel missing CLONE_NEWUSER support"); + }; - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3482,8 +3471,8 @@ TEST(user_notification_sibling_pid_ns) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3505,7 +3494,10 @@ TEST(user_notification_sibling_pid_ns) } /* Create the sibling ns, and sibling in it. */ - ASSERT_EQ(unshare(CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWPID), 0) { + if (errno == EPERM) + SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + } ASSERT_EQ(errno, 0); pid2 = fork(); @@ -3547,8 +3539,8 @@ TEST(user_notification_fault_recv) ASSERT_EQ(unshare(CLONE_NEWUSER), 0); - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3585,16 +3577,6 @@ TEST(seccomp_get_notif_sizes) EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); } -static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) -{ -#ifdef __NR_kcmp - return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); -#else - errno = ENOSYS; - return -1; -#endif -} - TEST(user_notification_continue) { pid_t pid; @@ -3609,7 +3591,7 @@ TEST(user_notification_continue) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3619,20 +3601,14 @@ TEST(user_notification_continue) int dup_fd, pipe_fds[2]; pid_t self; - ret = pipe(pipe_fds); - if (ret < 0) - exit(1); + ASSERT_GE(pipe(pipe_fds), 0); dup_fd = dup(pipe_fds[0]); - if (dup_fd < 0) - exit(1); + ASSERT_GE(dup_fd, 0); + EXPECT_NE(pipe_fds[0], dup_fd); self = getpid(); - - ret = filecmp(self, self, pipe_fds[0], dup_fd); - if (ret) - exit(2); - + ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); exit(0); } @@ -3673,7 +3649,7 @@ TEST(user_notification_continue) resp.val = 0; EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { if (errno == EINVAL) - XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); + SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); } skip: @@ -3681,15 +3657,342 @@ skip: EXPECT_EQ(true, WIFEXITED(status)); EXPECT_EQ(0, WEXITSTATUS(status)) { if (WEXITSTATUS(status) == 2) { - XFAIL(return, "Kernel does not support kcmp() syscall"); + SKIP(return, "Kernel does not support kcmp() syscall"); return; } } } +TEST(user_notification_filter_empty) +{ + pid_t pid; + long ret; + int status; + struct pollfd pollfd; + struct clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int listener; + + listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + + close(listener); + + _exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + pollfd.fd = 200; + pollfd.events = POLLHUP; + + EXPECT_GT(poll(&pollfd, 1, 2000), 0); + EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); +} + +static void *do_thread(void *data) +{ + return NULL; +} + +TEST(user_notification_filter_empty_threaded) +{ + pid_t pid; + long ret; + int status; + struct pollfd pollfd; + struct clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t pid1, pid2; + int listener, status; + pthread_t thread; + + listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + + close(listener); + + pid1 = fork(); + if (pid1 < 0) + _exit(EXIT_FAILURE); + + if (pid1 == 0) + _exit(EXIT_SUCCESS); + + pid2 = fork(); + if (pid2 < 0) + _exit(EXIT_FAILURE); + + if (pid2 == 0) + _exit(EXIT_SUCCESS); + + if (pthread_create(&thread, NULL, do_thread, NULL) || + pthread_join(thread, NULL)) + _exit(EXIT_FAILURE); + + if (pthread_create(&thread, NULL, do_thread, NULL) || + pthread_join(thread, NULL)) + _exit(EXIT_FAILURE); + + if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || + WEXITSTATUS(status)) + _exit(EXIT_FAILURE); + + if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || + WEXITSTATUS(status)) + _exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + pollfd.fd = 200; + pollfd.events = POLLHUP; + + EXPECT_GT(poll(&pollfd, 1, 2000), 0); + EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); +} + +TEST(user_notification_addfd) +{ + pid_t pid; + long ret; + int status, listener, memfd, fd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif_addfd_small small = {}; + struct seccomp_notif_addfd_big big = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) + exit(1); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + } + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + addfd.srcfd = memfd; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0x0; + + /* Verify bad newfd_flags cannot be set */ + addfd.newfd_flags = ~O_CLOEXEC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd_flags = O_CLOEXEC; + + /* Verify bad flags cannot be set */ + addfd.flags = 0xff; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.flags = 0; + + /* Verify that remote_fd cannot be set without setting flags */ + addfd.newfd = 1; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd = 0; + + /* Verify small size cannot be set */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); + EXPECT_EQ(errno, EINVAL); + + /* Verify we can't send bits filled in unknown buffer area */ + memset(&big, 0xAA, sizeof(big)); + big.addfd = addfd; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); + EXPECT_EQ(errno, E2BIG); + + + /* Verify we can set an arbitrary remote fd */ + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + /* + * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), + * 4(listener), so the newly allocated fd should be 5. + */ + EXPECT_EQ(fd, 5); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Verify we can set an arbitrary remote fd with large size */ + memset(&big, 0x0, sizeof(big)); + big.addfd = addfd; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); + EXPECT_EQ(fd, 6); + + /* Verify we can set a specific remote fd */ + addfd.newfd = 42; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + EXPECT_EQ(fd, 42); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Resume syscall */ + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* + * This sets the ID of the ADD FD to the last request plus 1. The + * notification ID increments 1 per notification. + */ + addfd.id = req.id + 1; + + /* This spins until the underlying notification is generated */ + while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && + errno != -EINPROGRESS) + nanosleep(&delay, NULL); + + memset(&req, 0, sizeof(req)); + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + ASSERT_EQ(addfd.id, req.id); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + +TEST(user_notification_addfd_rlimit) +{ + pid_t pid; + long ret; + int status, listener, memfd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + const struct rlimit lim = { + .rlim_cur = 0, + .rlim_max = 0, + }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); + + addfd.srcfd = memfd; + addfd.newfd_flags = O_CLOEXEC; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0; + + /* Should probably spot check /proc/sys/fs/file-nr */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EMFILE); + + addfd.newfd = 100; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EBADF); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + /* * TODO: - * - add microbenchmarks * - expand NNP testing * - better arch-specific TRACE and TRAP handlers. * - endianness checking when appropriate @@ -3697,7 +4000,6 @@ skip: * - arch value testing (x86 modes especially) * - verify that FILTER_FLAG_LOG filters generate log messages * - verify that RET_LOG generates log messages - * - ... */ TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings new file mode 100644 index 000000000000..ba4d85f74cd6 --- /dev/null +++ b/tools/testing/selftests/seccomp/settings @@ -0,0 +1 @@ +timeout=90 diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index ad0f8df2ca0a..8934a3766d20 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -71,7 +71,7 @@ void my_usr1(int sig, siginfo_t *si, void *u) swapcontext(&sc, &uc); ksft_print_msg("%s\n", p->msg); if (!p->flag) { - ksft_exit_skip("[RUN]\tAborting\n"); + ksft_exit_fail_msg("[RUN]\tAborting\n"); exit(EXIT_FAILURE); } } @@ -144,7 +144,7 @@ int main(void) err = sigaltstack(&stk, NULL); if (err) { if (errno == EINVAL) { - ksft_exit_skip( + ksft_test_result_skip( "[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n"); /* * If test cases for the !SS_AUTODISARM variant were diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c index 3824b66f41a0..414a617db993 100644 --- a/tools/testing/selftests/sync/sync_test.c +++ b/tools/testing/selftests/sync/sync_test.c @@ -86,9 +86,9 @@ int main(void) int err; ksft_print_header(); - ksft_set_plan(3 + 7); sync_api_supported(); + ksft_set_plan(3 + 7); ksft_print_msg("[RUN]\tTesting sync framework\n"); diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/Makefile index be5a5e542804..91fee5c43274 100644 --- a/tools/testing/selftests/tc-testing/bpf/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -APIDIR := ../../../../include/uapi +top_srcdir = $(abspath ../../../..) +APIDIR := $(top_scrdir)/include/uapi TEST_GEN_FILES = action.o -top_srcdir = ../../../../.. KSFT_KHDR_INSTALL := 1 -include ../../lib.mk +include ../lib.mk CLANG ?= clang LLC ?= llc @@ -28,3 +28,6 @@ $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ + +TEST_PROGS += ./tdc.sh +TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/action.c index c32b99b80e19..c32b99b80e19 100644 --- a/tools/testing/selftests/tc-testing/bpf/action.c +++ b/tools/testing/selftests/tc-testing/action.c diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh new file mode 100755 index 000000000000..7fe38c76db44 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./tdc.py -c actions --nobuildebpf +./tdc.py -c qdisc diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index 080709cc4297..cd4a27ee1466 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -24,7 +24,7 @@ NAMES = { # Name of the namespace to use 'NS': 'tcut', # Directory containing eBPF test programs - 'EBPFDIR': './bpf' + 'EBPFDIR': './' } diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c index f83391aa42cf..5cebfb356345 100644 --- a/tools/testing/selftests/uevent/uevent_filtering.c +++ b/tools/testing/selftests/uevent/uevent_filtering.c @@ -19,7 +19,6 @@ #include <sys/wait.h> #include <unistd.h> -#include "../kselftest.h" #include "../kselftest_harness.h" #define __DEV_FULL "/sys/devices/virtual/mem/full/uevent" diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 79db22604019..91d38a29956b 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -881,8 +881,9 @@ TEST_F(hmm, migrate) } /* - * Migrate anonymous memory to device private memory and fault it back to system - * memory. + * Migrate anonymous memory to device private memory and fault some of it back + * to system memory, then try migrating the resulting mix of system and device + * private memory to the device. */ TEST_F(hmm, migrate_fault) { @@ -924,8 +925,17 @@ TEST_F(hmm, migrate_fault) for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); - /* Fault pages back to system memory and check them. */ - for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + /* Fault half the pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) + ASSERT_EQ(ptr[i], i); + + /* Migrate memory to the device again. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); hmm_buffer_free(buffer); @@ -1292,6 +1302,82 @@ TEST_F(hmm2, snapshot) } /* + * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that + * should be mapped by a large page table entry. + */ +TEST_F(hmm, compound) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int *ptr; + unsigned char *m; + int ret; + long pagesizes[4]; + int n, idx; + unsigned long i; + + /* Skip test if we can't allocate a hugetlbfs page. */ + + n = gethugepagesizes(pagesizes, 4); + if (n <= 0) + return; + for (idx = 0; --n > 0; ) { + if (pagesizes[n] < pagesizes[idx]) + idx = n; + } + size = ALIGN(TWOMEG, pagesizes[idx]); + npages = size >> self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->ptr = get_hugepage_region(size, GHR_STRICT); + if (buffer->ptr == NULL) { + free(buffer); + return; + } + + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + /* Initialize the pages the device will snapshot in buffer->ptr. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + for (i = 0; i < npages; ++i) + ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE | + HMM_DMIRROR_PROT_PMD); + + /* Make the region read-only. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + for (i = 0; i < npages; ++i) + ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ | + HMM_DMIRROR_PROT_PMD); + + free_hugepage_region(buffer->ptr); + buffer->ptr = NULL; + hmm_buffer_free(buffer); +} + +/* * Test two devices reading the same memory (double mapped). */ TEST_F(hmm2, double_map) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index fc19addcb5c8..fdbb602ecf32 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt) + * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) * * There are examples in here of: * * how to set protection keys on memory diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d2796ea98c5a..6703c7906b71 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ test_vdso test_vsyscall mov_ss_trap \ - syscall_arg_fault + syscall_arg_fault fsgsbase_restore TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 15a329da59fa..998319553523 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -285,7 +285,8 @@ static unsigned short load_gs(void) /* 32-bit set_thread_area */ long ret; asm volatile ("int $0x80" - : "=a" (ret) : "a" (243), "b" (low_desc) + : "=a" (ret), "+m" (*low_desc) + : "a" (243), "b" (low_desc) : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); @@ -489,11 +490,28 @@ static void test_ptrace_write_gsbase(void) * selector value is changed or not by the GSBASE write in * a ptracer. */ - if (gs == 0 && base == 0xFF) { - printf("[OK]\tGS was reset as expected\n"); - } else { + if (gs != *shared_scratch) { nerrs++; - printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base); + printf("[FAIL]\tGS changed to %lx\n", gs); + + /* + * On older kernels, poking a nonzero value into the + * base would zero the selector. On newer kernels, + * this behavior has changed -- poking the base + * changes only the base and, if FSGSBASE is not + * available, this may have no effect once the tracee + * is resumed. + */ + if (gs == 0) + printf("\tNote: this is expected behavior on older kernels.\n"); + } else if (have_fsgsbase && (base != 0xFF)) { + nerrs++; + printf("[FAIL]\tGSBASE changed to %lx\n", base); + } else { + printf("[OK]\tGS remained 0x%hx", *shared_scratch); + if (have_fsgsbase) + printf(" and GSBASE changed to 0xFF"); + printf("\n"); } } diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c new file mode 100644 index 000000000000..6fffadc51579 --- /dev/null +++ b/tools/testing/selftests/x86/fsgsbase_restore.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fsgsbase_restore.c, test ptrace vs fsgsbase + * Copyright (c) 2020 Andy Lutomirski + * + * This test case simulates a tracer redirecting tracee execution to + * a function and then restoring tracee state using PTRACE_GETREGS and + * PTRACE_SETREGS. This is similar to what gdb does when doing + * 'p func()'. The catch is that this test has the called function + * modify a segment register. This makes sure that ptrace correctly + * restores segment state when using PTRACE_SETREGS. + * + * This is not part of fsgsbase.c, because that test is 64-bit only. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <err.h> +#include <sys/user.h> +#include <asm/prctl.h> +#include <sys/prctl.h> +#include <asm/ldt.h> +#include <sys/mman.h> +#include <stddef.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <stdint.h> + +#define EXPECTED_VALUE 0x1337f00d + +#ifdef __x86_64__ +# define SEG "%gs" +#else +# define SEG "%fs" +#endif + +static unsigned int dereference_seg_base(void) +{ + int ret; + asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret)); + return ret; +} + +static void init_seg(void) +{ + unsigned int *target = mmap( + NULL, sizeof(unsigned int), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + if (target == MAP_FAILED) + err(1, "mmap"); + + *target = EXPECTED_VALUE; + + printf("\tsegment base address = 0x%lx\n", (unsigned long)target); + + struct user_desc desc = { + .entry_number = 0, + .base_addr = (unsigned int)(uintptr_t)target, + .limit = sizeof(unsigned int) - 1, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { + printf("\tusing LDT slot 0\n"); + asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7)); + } else { + /* No modify_ldt for us (configured out, perhaps) */ + + struct user_desc *low_desc = mmap( + NULL, sizeof(desc), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + memcpy(low_desc, &desc, sizeof(desc)); + + low_desc->entry_number = -1; + + /* 32-bit set_thread_area */ + long ret; + asm volatile ("int $0x80" + : "=a" (ret), "+m" (*low_desc) + : "a" (243), "b" (low_desc) +#ifdef __x86_64__ + : "r8", "r9", "r10", "r11" +#endif + ); + memcpy(&desc, low_desc, sizeof(desc)); + munmap(low_desc, sizeof(desc)); + + if (ret != 0) { + printf("[NOTE]\tcould not create a segment -- can't test anything\n"); + exit(0); + } + printf("\tusing GDT slot %d\n", desc.entry_number); + + unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3); + asm volatile ("mov %0, %" SEG :: "rm" (sel)); + } +} + +static void tracee_zap_segment(void) +{ + /* + * The tracer will redirect execution here. This is meant to + * work like gdb's 'p func()' feature. The tricky bit is that + * we modify a segment register in order to make sure that ptrace + * can correctly restore segment registers. + */ + printf("\tTracee: in tracee_zap_segment()\n"); + + /* + * Write a nonzero selector with base zero to the segment register. + * Using a null selector would defeat the test on AMD pre-Zen2 + * CPUs, as such CPUs don't clear the base when loading a null + * selector. + */ + unsigned short sel; + asm volatile ("mov %%ss, %0\n\t" + "mov %0, %" SEG + : "=rm" (sel)); + + pid_t pid = getpid(), tid = syscall(SYS_gettid); + + printf("\tTracee is going back to sleep\n"); + syscall(SYS_tgkill, pid, tid, SIGSTOP); + + /* Should not get here. */ + while (true) { + printf("[FAIL]\tTracee hit unreachable code\n"); + pause(); + } +} + +int main() +{ + printf("\tSetting up a segment\n"); + init_seg(); + + unsigned int val = dereference_seg_base(); + if (val != EXPECTED_VALUE) { + printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE); + return 1; + } + printf("[OK]\tThe segment points to the right place.\n"); + + pid_t chld = fork(); + if (chld < 0) + err(1, "fork"); + + if (chld == 0) { + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0); + + if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) + err(1, "PTRACE_TRACEME"); + + pid_t pid = getpid(), tid = syscall(SYS_gettid); + + printf("\tTracee will take a nap until signaled\n"); + syscall(SYS_tgkill, pid, tid, SIGSTOP); + + printf("\tTracee was resumed. Will re-check segment.\n"); + + val = dereference_seg_base(); + if (val != EXPECTED_VALUE) { + printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE); + exit(1); + } + + printf("[OK]\tThe segment points to the right place.\n"); + exit(0); + } + + int status; + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + struct user_regs_struct regs; + + if (ptrace(PTRACE_GETREGS, chld, NULL, ®s) != 0) + err(1, "PTRACE_GETREGS"); + +#ifdef __x86_64__ + printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base); +#else + printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs); +#endif + + struct user_regs_struct regs2 = regs; +#ifdef __x86_64__ + regs2.rip = (unsigned long)tracee_zap_segment; + regs2.rsp -= 128; /* Don't clobber the redzone. */ +#else + regs2.eip = (unsigned long)tracee_zap_segment; +#endif + + printf("\tTracer: redirecting tracee to tracee_zap_segment()\n"); + if (ptrace(PTRACE_SETREGS, chld, NULL, ®s2) != 0) + err(1, "PTRACE_GETREGS"); + if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0) + err(1, "PTRACE_GETREGS"); + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + printf("\tTracer: restoring tracee state\n"); + if (ptrace(PTRACE_SETREGS, chld, NULL, ®s) != 0) + err(1, "PTRACE_GETREGS"); + if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0) + err(1, "PTRACE_GETREGS"); + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld) + err(1, "waitpid"); + + if (WIFSIGNALED(status)) { + printf("[FAIL]\tTracee crashed\n"); + return 1; + } + + if (!WIFEXITED(status)) { + printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status); + return 1; + } + + int exitcode = WEXITSTATUS(status); + if (exitcode != 0) { + printf("[FAIL]\tTracee reported failure\n"); + return 1; + } + + printf("[OK]\tAll is well.\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c index 5b7abebbcbb9..bff474b5efc6 100644 --- a/tools/testing/selftests/x86/syscall_arg_fault.c +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -53,6 +53,7 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void) if (ax != -EFAULT && ax != -ENOSYS) { printf("[FAIL]\tAX had the wrong value: 0x%lx\n", (unsigned long)ax); + printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); n_errs++; } else { printf("[OK]\tSeems okay\n"); @@ -207,5 +208,30 @@ int main() } set_eflags(get_eflags() & ~X86_EFLAGS_TF); +#ifdef __x86_64__ + printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n"); + + if (sigsetjmp(jmpbuf, 1) == 0) { + sigtrap_consecutive_syscalls = 0; + + asm volatile ("wrgsbase %%rax\n\t" + :: "a" (0xffffffffffff0000UL)); + + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "sysenter" + : : : "memory", "flags"); + } + set_eflags(get_eflags() & ~X86_EFLAGS_TF); +#endif + return 0; } |