diff options
author | Joe Damato <jdamato@fastly.com> | 2024-11-09 06:02:35 +0100 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-11-12 03:45:06 +0100 |
commit | 347fcdc414f98998df1c5969e4612e4da67d6852 (patch) | |
tree | 3e940685ca620e9270621f6a0c8b8de6a50eb4e7 /tools/testing/selftests/net | |
parent | eventpoll: Control irq suspension for prefer_busy_poll (diff) | |
download | linux-347fcdc414f98998df1c5969e4612e4da67d6852.tar.xz linux-347fcdc414f98998df1c5969e4612e4da67d6852.zip |
selftests: net: Add busy_poll_test
Add an epoll busy poll test using netdevsim.
This test is comprised of:
- busy_poller (via busy_poller.c)
- busy_poll_test.sh which loads netdevsim, sets up network namespaces,
and runs busy_poller to receive data and socat to send data.
The selftest tests two different scenarios:
- busy poll (the pre-existing version in the kernel)
- busy poll with suspend enabled (what this series adds)
The data transmit is a 1MiB temporary file generated from /dev/urandom
and the test is considered passing if the md5sum of the input file to
socat matches the md5sum of the output file from busy_poller.
netdevsim was chosen instead of veth due to netdevsim's support for
netdev-genl.
For now, this test uses the functionality that netdevsim provides. In the
future, perhaps netdevsim can be extended to emulate device IRQs to more
thoroughly test all pre-existing kernel options (like defer_hard_irqs)
and suspend.
Signed-off-by: Joe Damato <jdamato@fastly.com>
Co-developed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241109050245.191288-6-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 9 | ||||
-rwxr-xr-x | tools/testing/selftests/net/busy_poll_test.sh | 165 | ||||
-rw-r--r-- | tools/testing/selftests/net/busy_poller.c | 346 |
4 files changed, 521 insertions, 0 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index a78debbd1fe7..48973e78d46b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,6 +2,7 @@ bind_bhash bind_timewait bind_wildcard +busy_poller cmsg_sender diag_uid epoll_busy_poll diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9322b904ad00..2b2a5ec7fa6a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -97,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh +TEST_PROGS += busy_poll_test.sh + +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := busy_poller +TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh @@ -107,6 +112,10 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +# YNL build +YNL_GENS := netdev +include ynl.mk + $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh new file mode 100755 index 000000000000..7db292ec4884 --- /dev/null +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source net_helper.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +# busy poll config +MAX_EVENTS=8 +BUSY_POLL_USECS=0 +BUSY_POLL_BUDGET=16 +PREFER_BUSY_POLL=1 + +# IRQ deferral config +NAPI_DEFER_HARD_IRQS=100 +GRO_FLUSH_TIMEOUT=50000 +SUSPEND_TIMEOUT=20000000 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + # ensure the server has 1 queue + ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +test_busypoll() +{ + suspend_value=${1:-0} + tmp_file=$(mktemp) + out_file=$(mktemp) + + # fill a test file with random data + dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null + + timeout -k 1s 30s ip netns exec nssv ./busy_poller \ + -p${SERVER_PORT} \ + -b${SERVER_IP} \ + -m${MAX_EVENTS} \ + -u${BUSY_POLL_USECS} \ + -P${PREFER_BUSY_POLL} \ + -g${BUSY_POLL_BUDGET} \ + -i${NSIM_SV_IFIDX} \ + -s${suspend_value} \ + -o${out_file}& + + wait_local_port_listen nssv ${SERVER_PORT} tcp + + ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT} + + wait + + tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ') + out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ') + + if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then + res=0 + else + echo "md5sum mismatch" + echo "input file md5sum: ${tmp_file_md5sum}"; + echo "output file md5sum: ${out_file_md5sum}"; + res=1 + fi + + rm $out_file $tmp_file + + return $res +} + +test_busypoll_with_suspend() +{ + test_busypoll ${SUSPEND_TIMEOUT} + + return $? +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +test_busypoll +if [ $? -ne 0 ]; then + echo "test_busypoll failed" + cleanup_ns + exit 1 +fi + +test_busypoll_with_suspend +if [ $? -ne 0 ]; then + echo "test_busypoll_with_suspend failed" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c new file mode 100644 index 000000000000..99b0e8c17fca --- /dev/null +++ b/tools/testing/selftests/net/busy_poller.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <linux/genetlink.h> +#include <linux/netlink.h> + +#include "netdev-user.h" + +/* The below ifdef blob is required because: + * + * - sys/epoll.h does not (yet) have the ioctl definitions included. So, + * systems with older glibcs will not have them available. However, + * sys/epoll.h does include the type definition for epoll_data, which is + * needed by the user program (e.g. epoll_event.data.fd) + * + * - linux/eventpoll.h does not define the epoll_data type, it is simply an + * opaque __u64. It does, however, include the ioctl definition. + * + * Including both headers is impossible (types would be redefined), so I've + * opted instead to take sys/epoll.h, and include the blob below. + * + * Someday, when glibc is globally up to date, the blob below can be removed. + */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +static uint32_t cfg_port = 8000; +static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; +static char *cfg_outfile; +static int cfg_max_events = 8; +static int cfg_ifindex; + +/* busy poll params */ +static uint32_t cfg_busy_poll_usecs; +static uint32_t cfg_busy_poll_budget; +static uint32_t cfg_prefer_busy_poll; + +/* IRQ params */ +static uint32_t cfg_defer_hard_irqs; +static uint64_t cfg_gro_flush_timeout; +static uint64_t cfg_irq_suspend_timeout; + +static void usage(const char *filepath) +{ + error(1, 0, + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + switch (c) { + case 'u': + cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); + if (cfg_busy_poll_usecs == ULONG_MAX || + cfg_busy_poll_usecs > UINT32_MAX) + error(1, ERANGE, "busy_poll_usecs too large"); + break; + case 'P': + cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); + if (cfg_prefer_busy_poll == ULONG_MAX || + cfg_prefer_busy_poll > 1) + error(1, ERANGE, + "prefer busy poll should be 0 or 1"); + break; + case 'g': + cfg_busy_poll_budget = strtoul(optarg, NULL, 0); + if (cfg_busy_poll_budget == ULONG_MAX || + cfg_busy_poll_budget > UINT16_MAX) + error(1, ERANGE, + "busy poll budget must be [0, UINT16_MAX]"); + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + if (cfg_port > UINT16_MAX) + error(1, ERANGE, "port must be <= 65535"); + break; + case 'b': + ret = inet_aton(optarg, &cfg_bind_addr); + if (ret == 0) + error(1, errno, + "bind address %s invalid", optarg); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + case 'm': + cfg_max_events = strtol(optarg, NULL, 0); + + if (cfg_max_events == LONG_MIN || + cfg_max_events == LONG_MAX || + cfg_max_events <= 0) + error(1, ERANGE, + "max events must be > 0 and < LONG_MAX"); + break; + case 'd': + cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); + + if (cfg_defer_hard_irqs == ULONG_MAX || + cfg_defer_hard_irqs > INT32_MAX) + error(1, ERANGE, + "defer_hard_irqs must be <= INT32_MAX"); + break; + case 'r': + cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); + + if (cfg_gro_flush_timeout == ULLONG_MAX) + error(1, ERANGE, + "gro_flush_timeout must be < ULLONG_MAX"); + break; + case 's': + cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); + + if (cfg_irq_suspend_timeout == ULLONG_MAX) + error(1, ERANGE, + "irq_suspend_timeout must be < ULLONG_MAX"); + break; + case 'i': + cfg_ifindex = strtoul(optarg, NULL, 0); + if (cfg_ifindex == ULONG_MAX) + error(1, ERANGE, + "ifindex must be < ULONG_MAX"); + break; + } + } + + if (!cfg_ifindex) + usage(argv[0]); + + if (optind != argc) + usage(argv[0]); +} + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + + ev.events = events; + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) + error(1, errno, "epoll_ctl add fd: %d", fd); +} + +static void setnonblock(int sockfd) +{ + int flags; + + flags = fcntl(sockfd, F_GETFL, 0); + + if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) + error(1, errno, "unable to set socket to nonblocking mode"); +} + +static void write_chunk(int fd, char *buf, ssize_t buflen) +{ + ssize_t remaining = buflen; + char *buf_offset = buf; + ssize_t writelen = 0; + ssize_t write_result; + + while (writelen < buflen) { + write_result = write(fd, buf_offset, remaining); + if (write_result == -1) + error(1, errno, "unable to write data to outfile"); + + writelen += write_result; + remaining -= write_result; + buf_offset += write_result; + } +} + +static void setup_queue(void) +{ + struct netdev_napi_get_list *napi_list = NULL; + struct netdev_napi_get_req_dump *req = NULL; + struct netdev_napi_set_req *set_req = NULL; + struct ynl_sock *ys; + struct ynl_error yerr; + uint32_t napi_id; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) + error(1, 0, "YNL: %s", yerr.msg); + + req = netdev_napi_get_req_dump_alloc(); + netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); + napi_list = netdev_napi_get_dump(ys, req); + + /* assume there is 1 NAPI configured and take the first */ + if (napi_list->obj._present.id) + napi_id = napi_list->obj.id; + else + error(1, 0, "napi ID not present?"); + + set_req = netdev_napi_set_req_alloc(); + netdev_napi_set_req_set_id(set_req, napi_id); + netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); + netdev_napi_set_req_set_gro_flush_timeout(set_req, + cfg_gro_flush_timeout); + netdev_napi_set_req_set_irq_suspend_timeout(set_req, + cfg_irq_suspend_timeout); + + if (netdev_napi_set(ys, set_req)) + error(1, 0, "can't set NAPI params: %s\n", yerr.msg); + + netdev_napi_get_list_free(napi_list); + netdev_napi_get_req_dump_free(req); + netdev_napi_set_req_free(set_req); + ynl_sock_destroy(ys); +} + +static void run_poller(void) +{ + struct epoll_event events[cfg_max_events]; + struct epoll_params epoll_params = {0}; + struct sockaddr_in server_addr; + int i, epfd, nfds; + ssize_t readlen; + int outfile_fd; + char buf[1024]; + int sockfd; + int conn; + int val; + + outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); + if (outfile_fd == -1) + error(1, errno, "unable to open outfile: %s", cfg_outfile); + + sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == -1) + error(1, errno, "unable to create listen socket"); + + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(cfg_port); + server_addr.sin_addr = cfg_bind_addr; + + /* these values are range checked during parse_opts, so casting is safe + * here + */ + epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; + epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.__pad = 0; + + val = 1; + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) + error(1, errno, "poller setsockopt reuseaddr"); + + setnonblock(sockfd); + + if (bind(sockfd, (struct sockaddr *)&server_addr, + sizeof(struct sockaddr_in))) + error(0, errno, "poller bind to port: %d\n", cfg_port); + + if (listen(sockfd, 1)) + error(1, errno, "poller listen"); + + epfd = epoll_create1(0); + if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) + error(1, errno, "unable to set busy poll params"); + + epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); + + for (;;) { + nfds = epoll_wait(epfd, events, cfg_max_events, -1); + for (i = 0; i < nfds; i++) { + if (events[i].data.fd == sockfd) { + conn = accept(sockfd, NULL, NULL); + if (conn == -1) + error(1, errno, + "accepting incoming connection failed"); + + setnonblock(conn); + epoll_ctl_add(epfd, conn, + EPOLLIN | EPOLLET | EPOLLRDHUP | + EPOLLHUP); + } else if (events[i].events & EPOLLIN) { + for (;;) { + readlen = read(events[i].data.fd, buf, + sizeof(buf)); + if (readlen > 0) + write_chunk(outfile_fd, buf, + readlen); + else + break; + } + } else { + /* spurious event ? */ + } + if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { + epoll_ctl(epfd, EPOLL_CTL_DEL, + events[i].data.fd, NULL); + close(events[i].data.fd); + close(outfile_fd); + return; + } + } + } +} + +int main(int argc, char *argv[]) +{ + parse_opts(argc, argv); + setup_queue(); + run_poller(); + return 0; +} |