diff options
Diffstat (limited to 'tools')
101 files changed, 2572 insertions, 945 deletions
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index a00a53e15ab7..1d111350197f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -57,6 +57,7 @@ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -155,6 +156,15 @@ * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. */ +#define ARCH_CAP_GDS_CTRL BIT(25) /* + * CPU is vulnerable to Gather + * Data Sampling (GDS) and + * has controls for mitigation. + */ +#define ARCH_CAP_GDS_NO BIT(26) /* + * CPU is not vulnerable to Gather + * Data Sampling (GDS). + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR @@ -178,6 +188,8 @@ #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ +#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ +#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 4798f9d18fe8..9de35df1afc3 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -26,6 +26,6 @@ #ifndef __NR_setns #define __NR_setns 346 #endif -#ifdef __NR_seccomp +#ifndef __NR_seccomp #define __NR_seccomp 354 #endif diff --git a/tools/build/feature/test-llvm.cpp b/tools/build/feature/test-llvm.cpp new file mode 100644 index 000000000000..88a3d1bdd9f6 --- /dev/null +++ b/tools/build/feature/test-llvm.cpp @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH) + +#if NUM_VERSION < 0x030900 +# error "LLVM version too low" +#endif +int main() +{ + llvm::errs() << "Hello World!\n"; + llvm::llvm_shutdown(); + return 0; +} diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 27f5e7dfc2f7..264eeb9c46a9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1171,12 +1171,79 @@ static int process_ip_string(FILE *f, char *ip_string, int type) return 0; } +/* + * Only IPv4 subnet strings needs to be converted to plen + * For IPv6 the subnet is already privided in plen format + */ +static int kvp_subnet_to_plen(char *subnet_addr_str) +{ + int plen = 0; + struct in_addr subnet_addr4; + + /* + * Convert subnet address to binary representation + */ + if (inet_pton(AF_INET, subnet_addr_str, &subnet_addr4) == 1) { + uint32_t subnet_mask = ntohl(subnet_addr4.s_addr); + + while (subnet_mask & 0x80000000) { + plen++; + subnet_mask <<= 1; + } + } else { + return -1; + } + + return plen; +} + +static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet, + int is_ipv6) +{ + char addr[INET6_ADDRSTRLEN]; + char subnet_addr[INET6_ADDRSTRLEN]; + int error, i = 0; + int ip_offset = 0, subnet_offset = 0; + int plen; + + memset(addr, 0, sizeof(addr)); + memset(subnet_addr, 0, sizeof(subnet_addr)); + + while (parse_ip_val_buffer(ip_string, &ip_offset, addr, + (MAX_IP_ADDR_SIZE * 2)) && + parse_ip_val_buffer(subnet, + &subnet_offset, + subnet_addr, + (MAX_IP_ADDR_SIZE * + 2))) { + if (!is_ipv6) + plen = kvp_subnet_to_plen((char *)subnet_addr); + else + plen = atoi(subnet_addr); + + if (plen < 0) + return plen; + + error = fprintf(f, "address%d=%s/%d\n", ++i, (char *)addr, + plen); + if (error < 0) + return error; + + memset(addr, 0, sizeof(addr)); + memset(subnet_addr, 0, sizeof(subnet_addr)); + } + + return 0; +} + static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) { int error = 0; - char if_file[PATH_MAX]; - FILE *file; + char if_filename[PATH_MAX]; + char nm_filename[PATH_MAX]; + FILE *ifcfg_file, *nmfile; char cmd[PATH_MAX]; + int is_ipv6 = 0; char *mac_addr; int str_len; @@ -1197,7 +1264,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * in a given distro to configure the interface and so are free * ignore information that may not be relevant. * - * Here is the format of the ip configuration file: + * Here is the ifcfg format of the ip configuration file: * * HWADDR=macaddr * DEVICE=interface name @@ -1220,6 +1287,32 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as * IPV6NETMASK. * + * Here is the keyfile format of the ip configuration file: + * + * [ethernet] + * mac-address=macaddr + * [connection] + * interface-name=interface name + * + * [ipv4] + * method=<protocol> (where <protocol> is "auto" if DHCP is configured + * or "manual" if no boot-time protocol should be used) + * + * address1=ipaddr1/plen + * address2=ipaddr2/plen + * + * gateway=gateway1;gateway2 + * + * dns=dns1;dns2 + * + * [ipv6] + * address1=ipaddr1/plen + * address2=ipaddr2/plen + * + * gateway=gateway1;gateway2 + * + * dns=dns1;dns2 + * * The host can specify multiple ipv4 and ipv6 addresses to be * configured for the interface. Furthermore, the configuration * needs to be persistent. A subsequent GET call on the interface @@ -1227,14 +1320,29 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * call. */ - snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC, - "/ifcfg-", if_name); + /* + * We are populating both ifcfg and nmconnection files + */ + snprintf(if_filename, sizeof(if_filename), "%s%s%s", KVP_CONFIG_LOC, + "/ifcfg-", if_name); - file = fopen(if_file, "w"); + ifcfg_file = fopen(if_filename, "w"); - if (file == NULL) { + if (!ifcfg_file) { syslog(LOG_ERR, "Failed to open config file; error: %d %s", - errno, strerror(errno)); + errno, strerror(errno)); + return HV_E_FAIL; + } + + snprintf(nm_filename, sizeof(nm_filename), "%s%s%s%s", KVP_CONFIG_LOC, + "/", if_name, ".nmconnection"); + + nmfile = fopen(nm_filename, "w"); + + if (!nmfile) { + syslog(LOG_ERR, "Failed to open config file; error: %d %s", + errno, strerror(errno)); + fclose(ifcfg_file); return HV_E_FAIL; } @@ -1248,14 +1356,31 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) goto setval_error; } - error = kvp_write_file(file, "HWADDR", "", mac_addr); - free(mac_addr); + error = kvp_write_file(ifcfg_file, "HWADDR", "", mac_addr); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(ifcfg_file, "DEVICE", "", if_name); + if (error < 0) + goto setmac_error; + + error = fprintf(nmfile, "\n[connection]\n"); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(nmfile, "interface-name", "", if_name); if (error) - goto setval_error; + goto setmac_error; - error = kvp_write_file(file, "DEVICE", "", if_name); + error = fprintf(nmfile, "\n[ethernet]\n"); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(nmfile, "mac-address", "", mac_addr); if (error) - goto setval_error; + goto setmac_error; + + free(mac_addr); /* * The dhcp_enabled flag is only for IPv4. In the case the host only @@ -1263,47 +1388,91 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * proceed to parse and pass the IPv6 information to the * disto-specific script hv_set_ifconfig. */ + + /* + * First populate the ifcfg file format + */ if (new_val->dhcp_enabled) { - error = kvp_write_file(file, "BOOTPROTO", "", "dhcp"); + error = kvp_write_file(ifcfg_file, "BOOTPROTO", "", "dhcp"); if (error) goto setval_error; - } else { - error = kvp_write_file(file, "BOOTPROTO", "", "none"); + error = kvp_write_file(ifcfg_file, "BOOTPROTO", "", "none"); if (error) goto setval_error; } - /* - * Write the configuration for ipaddress, netmask, gateway and - * name servers. - */ - - error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR); + error = process_ip_string(ifcfg_file, (char *)new_val->ip_addr, + IPADDR); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->sub_net, NETMASK); + error = process_ip_string(ifcfg_file, (char *)new_val->sub_net, + NETMASK); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY); + error = process_ip_string(ifcfg_file, (char *)new_val->gate_way, + GATEWAY); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->dns_addr, DNS); + error = process_ip_string(ifcfg_file, (char *)new_val->dns_addr, DNS); if (error) goto setval_error; - fclose(file); + if (new_val->addr_family == ADDR_FAMILY_IPV6) { + error = fprintf(nmfile, "\n[ipv6]\n"); + if (error < 0) + goto setval_error; + is_ipv6 = 1; + } else { + error = fprintf(nmfile, "\n[ipv4]\n"); + if (error < 0) + goto setval_error; + } + + /* + * Now we populate the keyfile format + */ + + if (new_val->dhcp_enabled) { + error = kvp_write_file(nmfile, "method", "", "auto"); + if (error < 0) + goto setval_error; + } else { + error = kvp_write_file(nmfile, "method", "", "manual"); + if (error < 0) + goto setval_error; + } + + /* + * Write the configuration for ipaddress, netmask, gateway and + * name services + */ + error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr, + (char *)new_val->sub_net, is_ipv6); + if (error < 0) + goto setval_error; + + error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + if (error < 0) + goto setval_error; + + error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); + if (error < 0) + goto setval_error; + + fclose(nmfile); + fclose(ifcfg_file); /* * Now that we have populated the configuration file, * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", - "hv_set_ifconfig", if_file); + str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", + "hv_set_ifconfig", if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. @@ -1316,14 +1485,16 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (system(cmd)) { syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s", - cmd, errno, strerror(errno)); + cmd, errno, strerror(errno)); return HV_E_FAIL; } return 0; - +setmac_error: + free(mac_addr); setval_error: syslog(LOG_ERR, "Failed to write config file"); - fclose(file); + fclose(ifcfg_file); + fclose(nmfile); return error; } diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index d10fe35b7f25..ae5a7a8249a2 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -18,12 +18,12 @@ # # This example script is based on a RHEL environment. # -# Here is the format of the ip configuration file: +# Here is the ifcfg format of the ip configuration file: # # HWADDR=macaddr # DEVICE=interface name # BOOTPROTO=<protocol> (where <protocol> is "dhcp" if DHCP is configured -# or "none" if no boot-time protocol should be used) +# or "none" if no boot-time protocol should be used) # # IPADDR0=ipaddr1 # IPADDR1=ipaddr2 @@ -41,6 +41,32 @@ # tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as # IPV6NETMASK. # +# Here is the keyfile format of the ip configuration file: +# +# [ethernet] +# mac-address=macaddr +# [connection] +# interface-name=interface name +# +# [ipv4] +# method=<protocol> (where <protocol> is "auto" if DHCP is configured +# or "manual" if no boot-time protocol should be used) +# +# address1=ipaddr1/plen +# address=ipaddr2/plen +# +# gateway=gateway1;gateway2 +# +# dns=dns1; +# +# [ipv6] +# address1=ipaddr1/plen +# address2=ipaddr1/plen +# +# gateway=gateway1;gateway2 +# +# dns=dns1;dns2 +# # The host can specify multiple ipv4 and ipv6 addresses to be # configured for the interface. Furthermore, the configuration # needs to be persistent. A subsequent GET call on the interface @@ -48,18 +74,19 @@ # call. # - - echo "IPV6INIT=yes" >> $1 echo "NM_CONTROLLED=no" >> $1 echo "PEERDNS=yes" >> $1 echo "ONBOOT=yes" >> $1 - cp $1 /etc/sysconfig/network-scripts/ +chmod 600 $2 +interface=$(echo $2 | awk -F - '{ print $2 }') +filename="${2##*/}" + +sed '/\[connection\]/a autoconnect=true' $2 > /etc/NetworkManager/system-connections/${filename} -interface=$(echo $1 | awk -F - '{ print $2 }') /sbin/ifdown $interface 2>/dev/null /sbin/ifup $interface 2>/dev/null diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 71e54b1e3796..2f882d5cb30f 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -38,7 +38,7 @@ asm( \ ____BTF_ID(symbol) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index a03d9bba5151..f3c82ab5b14c 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -11,8 +11,6 @@ #define PHYS_ADDR_MAX (~(phys_addr_t)0) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) @@ -29,7 +27,7 @@ static inline void *phys_to_virt(unsigned long address) return __va(address); } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); static inline void totalram_pages_inc(void) { diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h index 102fd9217f1f..f6bc226af0c1 100644 --- a/tools/include/linux/seq_file.h +++ b/tools/include/linux/seq_file.h @@ -1,4 +1,6 @@ #ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H #define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H +struct seq_file; + #endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index fd6c1cb585db..abe087c53b4b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) +#define __NR_fchmodat2 452 +__SYSCALL(__NR_fchmodat2, sys_fchmodat2) + #undef __NR_syscalls -#define __NR_syscalls 452 +#define __NR_syscalls 453 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index a87bbbbca2d4..794c1d857677 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -673,8 +673,11 @@ struct drm_gem_open { * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT * and &DRM_PRIME_CAP_EXPORT. * - * PRIME buffers are exposed as dma-buf file descriptors. See - * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. */ #define DRM_CAP_PRIME 0x5 /** @@ -682,6 +685,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_IMPORT 0x1 /** @@ -689,6 +694,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_EXPORT 0x2 /** @@ -756,15 +763,14 @@ struct drm_gem_open { /** * DRM_CAP_SYNCOBJ * - * If set to 1, the driver supports sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ 0x13 /** * DRM_CAP_SYNCOBJ_TIMELINE * * If set to 1, the driver supports timeline operations on sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 @@ -909,6 +915,27 @@ struct drm_syncobj_timeline_wait { __u32 pad; }; +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + struct drm_syncobj_array { __u64 handles; @@ -1169,6 +1196,8 @@ extern "C" { */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. @@ -1180,25 +1209,50 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/* - * Header for events written back to userspace on the drm fd. The - * type defines the type of event, the length specifies the total - * length of the event (including the header), and user_data is - * typically a 64 bit value passed with the ioctl that triggered the - * event. A read on the drm fd will always only return complete - * events, that is, if for example the read buffer is 100 bytes, and - * there are two 64 byte events pending, only one will be returned. +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). * - * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and - * up are chipset specific. + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. */ struct drm_event { __u32 type; __u32 length; }; +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ #define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8790b3962e4b..0448700890f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1962,7 +1962,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Description diff --git a/tools/include/uapi/linux/seccomp.h b/tools/include/uapi/linux/seccomp.h new file mode 100644 index 000000000000..dbfc9b37fcae --- /dev/null +++ b/tools/include/uapi/linux/seccomp.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_SECCOMP_H +#define _UAPI_LINUX_SECCOMP_H + +#include <linux/compiler.h> +#include <linux/types.h> + + +/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */ +#define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */ +#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ +#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ + +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 +#define SECCOMP_GET_NOTIF_SIZES 3 + +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) +/* Received notifications wait in killable state (only respond to fatal signals) */ +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) + +/* + * All BPF programs must return a 32-bit value. + * The bottom 16-bits are for optional return data. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). + * + * The ordering ensures that a min_t() over composed return values always + * selects the least permissive choice. + */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +/** + * struct seccomp_data - the format the BPF program executes over. + * @nr: the system call number + * @arch: indicates system call convention as an AUDIT_ARCH_* value + * as defined in <linux/audit.h>. + * @instruction_pointer: at the time of the system call. + * @args: up to 6 system call arguments always stored as 64-bit values + * regardless of the architecture. + */ +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; + +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +/* + * Valid flags for struct seccomp_notif_resp + * + * Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution! + * If set by the process supervising the syscalls of another process the + * syscall will continue. This is problematic because of an inherent TOCTOU. + * An attacker can exploit the time while the supervised process is waiting on + * a response from the supervising process to rewrite syscall arguments which + * are passed as pointers of the intercepted syscall. + * It should be absolutely clear that this means that the seccomp notifier + * _cannot_ be used to implement a security policy! It should only ever be used + * in scenarios where a more privileged process supervises the syscalls of a + * lesser privileged process to get around kernel-enforced security + * restrictions when the privileged process deems this safe. In other words, + * in order to continue a syscall the supervising process should be sure that + * another security mechanism or the kernel itself will sufficiently block + * syscalls if arguments are rewritten to something unsafe. + * + * Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF + * or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the + * same syscall, the most recently added filter takes precedence. This means + * that the new SECCOMP_RET_USER_NOTIF filter can override any + * SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all + * such filtered syscalls to be executed by sending the response + * SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally + * be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE. + */ +#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0) + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ + +/** + * struct seccomp_notif_addfd + * @id: The ID of the seccomp notification + * @flags: SECCOMP_ADDFD_FLAG_* + * @srcfd: The local fd number + * @newfd: Optional remote FD number if SETFD option is set, otherwise 0. + * @newfd_flags: The O_* flags the remote FD should have applied + */ +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) + +#endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index 3a8d8499fab6..2cb2518500cb 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -16,19 +16,19 @@ static const char * const devlink_op_strmap[] = { [3] = "get", [7] = "port-get", - [DEVLINK_CMD_SB_GET] = "sb-get", - [DEVLINK_CMD_SB_POOL_GET] = "sb-pool-get", - [DEVLINK_CMD_SB_PORT_POOL_GET] = "sb-port-pool-get", - [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = "sb-tc-pool-bind-get", + [13] = "sb-get", + [17] = "sb-pool-get", + [21] = "sb-port-pool-get", + [25] = "sb-tc-pool-bind-get", [DEVLINK_CMD_PARAM_GET] = "param-get", [DEVLINK_CMD_REGION_GET] = "region-get", [DEVLINK_CMD_INFO_GET] = "info-get", [DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get", - [DEVLINK_CMD_TRAP_GET] = "trap-get", - [DEVLINK_CMD_TRAP_GROUP_GET] = "trap-group-get", - [DEVLINK_CMD_TRAP_POLICER_GET] = "trap-policer-get", - [DEVLINK_CMD_RATE_GET] = "rate-get", - [DEVLINK_CMD_LINECARD_GET] = "linecard-get", + [63] = "trap-get", + [67] = "trap-group-get", + [71] = "trap-policer-get", + [76] = "rate-get", + [80] = "linecard-get", [DEVLINK_CMD_SELFTESTS_GET] = "selftests-get", }; @@ -838,7 +838,7 @@ devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_GET; + yrs.rsp_cmd = 13; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -876,7 +876,7 @@ devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_get_list); yds.cb = devlink_sb_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_GET; + yds.rsp_cmd = 13; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); @@ -987,7 +987,7 @@ devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yrs.rsp_cmd = 17; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1026,7 +1026,7 @@ devlink_sb_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); yds.cb = devlink_sb_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yds.rsp_cmd = 17; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); @@ -1147,7 +1147,7 @@ devlink_sb_port_pool_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_port_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yrs.rsp_cmd = 21; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1187,7 +1187,7 @@ devlink_sb_port_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); yds.cb = devlink_sb_port_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yds.rsp_cmd = 21; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); @@ -1316,7 +1316,7 @@ devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yrs.rsp_cmd = 25; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1356,7 +1356,7 @@ devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yds.rsp_cmd = 25; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); @@ -2183,7 +2183,7 @@ devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yrs.rsp_cmd = 63; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2223,7 +2223,7 @@ devlink_trap_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_get_list); yds.cb = devlink_trap_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yds.rsp_cmd = 63; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1); @@ -2336,7 +2336,7 @@ devlink_trap_group_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_group_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yrs.rsp_cmd = 67; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2376,7 +2376,7 @@ devlink_trap_group_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_group_get_list); yds.cb = devlink_trap_group_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yds.rsp_cmd = 67; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1); @@ -2483,7 +2483,7 @@ devlink_trap_policer_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_policer_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yrs.rsp_cmd = 71; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2523,7 +2523,7 @@ devlink_trap_policer_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_policer_get_list); yds.cb = devlink_trap_policer_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yds.rsp_cmd = 71; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1); @@ -2642,7 +2642,7 @@ devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_rate_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_RATE_GET; + yrs.rsp_cmd = 76; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2682,7 +2682,7 @@ devlink_rate_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_rate_get_list); yds.cb = devlink_rate_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_RATE_GET; + yds.rsp_cmd = 76; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1); @@ -2786,7 +2786,7 @@ devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_linecard_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yrs.rsp_cmd = 80; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2825,7 +2825,7 @@ devlink_linecard_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_linecard_get_list); yds.cb = devlink_linecard_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yds.rsp_cmd = 80; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1); diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index cfda2511badf..cb5e757f6621 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -366,3 +366,4 @@ 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat +452 n64 fchmodat2 sys_fchmodat2 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 8c0b08b7a80e..20e50586e8a2 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -538,3 +538,4 @@ 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a6935af2235c..0122cc156952 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 227538b0ce80..1d6eee30eceb 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -373,6 +373,8 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 64 map_shadow_stack sys_map_shadow_stack # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c index b04ebcde4036..a01c40131493 100644 --- a/tools/perf/bench/sched-seccomp-notify.c +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -9,7 +9,7 @@ #include <sys/syscall.h> #include <sys/ioctl.h> #include <linux/time64.h> -#include <linux/seccomp.h> +#include <uapi/linux/seccomp.h> #include <sys/prctl.h> #include <unistd.h> diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 4314c9197850..e21caadda7c1 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -21,6 +21,7 @@ FILES=( "include/uapi/linux/perf_event.h" "include/uapi/linux/prctl.h" "include/uapi/linux/sched.h" + "include/uapi/linux/seccomp.h" "include/uapi/linux/stat.h" "include/uapi/linux/usbdevice_fs.h" "include/uapi/linux/vhost.h" diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 72f263d49121..4083b1abeaab 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -289,6 +289,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -314,7 +323,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 38e593d92920..32ff619e881c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -308,6 +308,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -333,7 +342,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index a7e88332276d..72ba4a9239c6 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -991,7 +991,7 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) } } free(cpuid); - if (!pmu) + if (!pmu || !table) return table; for (i = 0; i < table->num_pmus; i++) { diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index 0e9ec65d92ae..3e673f25d5fd 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -413,10 +413,10 @@ def has_event(event: Event) -> Function: # pylint: disable=invalid-name return Function('has_event', event) -def strcmp_cpuid_str(event: str) -> Function: +def strcmp_cpuid_str(cpuid: Event) -> Function: # pylint: disable=redefined-builtin # pylint: disable=invalid-name - return Function('strcmp_cpuid_str', event) + return Function('strcmp_cpuid_str', cpuid) class Metric: """An individual metric that will specifiable on the perf command line.""" diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c deleted file mode 100644 index 9887ae09242d..000000000000 --- a/tools/perf/util/bpf-prologue.c +++ /dev/null @@ -1,508 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-prologue.c - * - * Copyright (C) 2015 He Kuang <hekuang@huawei.com> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - */ - -#include <bpf/libbpf.h> -#include "debug.h" -#include "bpf-loader.h" -#include "bpf-prologue.h" -#include "probe-finder.h" -#include <errno.h> -#include <stdlib.h> -#include <dwarf-regs.h> -#include <linux/filter.h> - -#define BPF_REG_SIZE 8 - -#define JMP_TO_ERROR_CODE -1 -#define JMP_TO_SUCCESS_CODE -2 -#define JMP_TO_USER_CODE -3 - -struct bpf_insn_pos { - struct bpf_insn *begin; - struct bpf_insn *end; - struct bpf_insn *pos; -}; - -static inline int -pos_get_cnt(struct bpf_insn_pos *pos) -{ - return pos->pos - pos->begin; -} - -static int -append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos) -{ - if (!pos->pos) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - - if (pos->pos + 1 >= pos->end) { - pr_err("bpf prologue: prologue too long\n"); - pos->pos = NULL; - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - } - - *(pos->pos)++ = new_insn; - return 0; -} - -static int -check_pos(struct bpf_insn_pos *pos) -{ - if (!pos->pos || pos->pos >= pos->end) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - return 0; -} - -/* - * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see - * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM - * instruction (BPF_{B,H,W,DW}). - */ -static int -argtype_to_ldx_size(const char *type) -{ - int arg_size = type ? atoi(&type[1]) : 64; - - switch (arg_size) { - case 8: - return BPF_B; - case 16: - return BPF_H; - case 32: - return BPF_W; - case 64: - default: - return BPF_DW; - } -} - -static const char * -insn_sz_to_str(int insn_sz) -{ - switch (insn_sz) { - case BPF_B: - return "BPF_B"; - case BPF_H: - return "BPF_H"; - case BPF_W: - return "BPF_W"; - case BPF_DW: - return "BPF_DW"; - default: - return "UNKNOWN"; - } -} - -/* Give it a shorter name */ -#define ins(i, p) append_insn((i), (p)) - -/* - * Give a register name (in 'reg'), generate instruction to - * load register into an eBPF register rd: - * 'ldd target_reg, offset(ctx_reg)', where: - * ctx_reg is pre initialized to pointer of 'struct pt_regs'. - */ -static int -gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg, - const char *reg, int target_reg) -{ - int offset = regs_query_register_offset(reg); - - if (offset < 0) { - pr_err("bpf: prologue: failed to get register %s\n", - reg); - return offset; - } - ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos); - - return check_pos(pos); -} - -/* - * Generate a BPF_FUNC_probe_read function call. - * - * src_base_addr_reg is a register holding base address, - * dst_addr_reg is a register holding dest address (on stack), - * result is: - * - * *[dst_addr_reg] = *([src_base_addr_reg] + offset) - * - * Arguments of BPF_FUNC_probe_read: - * ARG1: ptr to stack (dest) - * ARG2: size (8) - * ARG3: unsafe ptr (src) - */ -static int -gen_read_mem(struct bpf_insn_pos *pos, - int src_base_addr_reg, - int dst_addr_reg, - long offset, - int probeid) -{ - /* mov arg3, src_base_addr_reg */ - if (src_base_addr_reg != BPF_REG_ARG3) - ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos); - /* add arg3, #offset */ - if (offset) - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos); - - /* mov arg2, #reg_size */ - ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos); - - /* mov arg1, dst_addr_reg */ - if (dst_addr_reg != BPF_REG_ARG1) - ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); - - /* Call probe_read */ - ins(BPF_EMIT_CALL(probeid), pos); - /* - * Error processing: if read fail, goto error code, - * will be relocated. Target should be the start of - * error processing code. - */ - ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE), - pos); - - return check_pos(pos); -} - -/* - * Each arg should be bare register. Fetch and save them into argument - * registers (r3 - r5). - * - * BPF_REG_1 should have been initialized with pointer to - * 'struct pt_regs'. - */ -static int -gen_prologue_fastpath(struct bpf_insn_pos *pos, - struct probe_trace_arg *args, int nargs) -{ - int i, err = 0; - - for (i = 0; i < nargs; i++) { - err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value, - BPF_PROLOGUE_START_ARG_REG + i); - if (err) - goto errout; - } - - return check_pos(pos); -errout: - return err; -} - -/* - * Slow path: - * At least one argument has the form of 'offset($rx)'. - * - * Following code first stores them into stack, then loads all of then - * to r2 - r5. - * Before final loading, the final result should be: - * - * low address - * BPF_REG_FP - 24 ARG3 - * BPF_REG_FP - 16 ARG2 - * BPF_REG_FP - 8 ARG1 - * BPF_REG_FP - * high address - * - * For each argument (described as: offn(...off2(off1(reg)))), - * generates following code: - * - * r7 <- fp - * r7 <- r7 - stack_offset // Ideal code should initialize r7 using - * // fp before generating args. However, - * // eBPF won't regard r7 as stack pointer - * // if it is generated by minus 8 from - * // another stack pointer except fp. - * // This is why we have to set r7 - * // to fp for each variable. - * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx() - * (r7) <- r3 // skip following instructions for bare reg - * r3 <- r3 + off1 . // skip if off1 == 0 - * r2 <- 8 \ - * r1 <- r7 |-> generated by gen_read_mem() - * call probe_read / - * jnei r0, 0, err ./ - * r3 <- (r7) - * r3 <- r3 + off2 . // skip if off2 == 0 - * r2 <- 8 \ // r2 may be broken by probe_read, so set again - * r1 <- r7 |-> generated by gen_read_mem() - * call probe_read / - * jnei r0, 0, err ./ - * ... - */ -static int -gen_prologue_slowpath(struct bpf_insn_pos *pos, - struct probe_trace_arg *args, int nargs) -{ - int err, i, probeid; - - for (i = 0; i < nargs; i++) { - struct probe_trace_arg *arg = &args[i]; - const char *reg = arg->value; - struct probe_trace_arg_ref *ref = NULL; - int stack_offset = (i + 1) * -8; - - pr_debug("prologue: fetch arg %d, base reg is %s\n", - i, reg); - - /* value of base register is stored into ARG3 */ - err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg, - BPF_REG_ARG3); - if (err) { - pr_err("prologue: failed to get offset of register %s\n", - reg); - goto errout; - } - - /* Make r7 the stack pointer. */ - ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos); - /* r7 += -8 */ - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos); - /* - * Store r3 (base register) onto stack - * Ensure fp[offset] is set. - * fp is the only valid base register when storing - * into stack. We are not allowed to use r7 as base - * register here. - */ - ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3, - stack_offset), pos); - - ref = arg->ref; - probeid = BPF_FUNC_probe_read_kernel; - while (ref) { - pr_debug("prologue: arg %d: offset %ld\n", - i, ref->offset); - - if (ref->user_access) - probeid = BPF_FUNC_probe_read_user; - - err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, - ref->offset, probeid); - if (err) { - pr_err("prologue: failed to generate probe_read function call\n"); - goto errout; - } - - ref = ref->next; - /* - * Load previous result into ARG3. Use - * BPF_REG_FP instead of r7 because verifier - * allows FP based addressing only. - */ - if (ref) - ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3, - BPF_REG_FP, stack_offset), pos); - } - } - - /* Final pass: read to registers */ - for (i = 0; i < nargs; i++) { - int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW; - - pr_debug("prologue: load arg %d, insn_sz is %s\n", - i, insn_sz_to_str(insn_sz)); - ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i, - BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); - } - - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); - - return check_pos(pos); -errout: - return err; -} - -static int -prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code, - struct bpf_insn *success_code, struct bpf_insn *user_code) -{ - struct bpf_insn *insn; - - if (check_pos(pos)) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - - for (insn = pos->begin; insn < pos->pos; insn++) { - struct bpf_insn *target; - u8 class = BPF_CLASS(insn->code); - u8 opcode; - - if (class != BPF_JMP) - continue; - opcode = BPF_OP(insn->code); - if (opcode == BPF_CALL) - continue; - - switch (insn->off) { - case JMP_TO_ERROR_CODE: - target = error_code; - break; - case JMP_TO_SUCCESS_CODE: - target = success_code; - break; - case JMP_TO_USER_CODE: - target = user_code; - break; - default: - pr_err("bpf prologue: internal error: relocation failed\n"); - return -BPF_LOADER_ERRNO__PROLOGUE; - } - - insn->off = target - (insn + 1); - } - return 0; -} - -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space) -{ - struct bpf_insn *success_code = NULL; - struct bpf_insn *error_code = NULL; - struct bpf_insn *user_code = NULL; - struct bpf_insn_pos pos; - bool fastpath = true; - int err = 0, i; - - if (!new_prog || !new_cnt) - return -EINVAL; - - if (cnt_space > BPF_MAXINSNS) - cnt_space = BPF_MAXINSNS; - - pos.begin = new_prog; - pos.end = new_prog + cnt_space; - pos.pos = new_prog; - - if (!nargs) { - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), - &pos); - - if (check_pos(&pos)) - goto errout; - - *new_cnt = pos_get_cnt(&pos); - return 0; - } - - if (nargs > BPF_PROLOGUE_MAX_ARGS) { - pr_warning("bpf: prologue: %d arguments are dropped\n", - nargs - BPF_PROLOGUE_MAX_ARGS); - nargs = BPF_PROLOGUE_MAX_ARGS; - } - - /* First pass: validation */ - for (i = 0; i < nargs; i++) { - struct probe_trace_arg_ref *ref = args[i].ref; - - if (args[i].value[0] == '@') { - /* TODO: fetch global variable */ - pr_err("bpf: prologue: global %s%+ld not support\n", - args[i].value, ref ? ref->offset : 0); - return -ENOTSUP; - } - - while (ref) { - /* fastpath is true if all args has ref == NULL */ - fastpath = false; - - /* - * Instruction encodes immediate value using - * s32, ref->offset is long. On systems which - * can't fill long in s32, refuse to process if - * ref->offset too large (or small). - */ -#ifdef __LP64__ -#define OFFSET_MAX ((1LL << 31) - 1) -#define OFFSET_MIN ((1LL << 31) * -1) - if (ref->offset > OFFSET_MAX || - ref->offset < OFFSET_MIN) { - pr_err("bpf: prologue: offset out of bound: %ld\n", - ref->offset); - return -BPF_LOADER_ERRNO__PROLOGUEOOB; - } -#endif - ref = ref->next; - } - } - pr_debug("prologue: pass validation\n"); - - if (fastpath) { - /* If all variables are registers... */ - pr_debug("prologue: fast path\n"); - err = gen_prologue_fastpath(&pos, args, nargs); - if (err) - goto errout; - } else { - pr_debug("prologue: slow path\n"); - - /* Initialization: move ctx to a callee saved register. */ - ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos); - - err = gen_prologue_slowpath(&pos, args, nargs); - if (err) - goto errout; - /* - * start of ERROR_CODE (only slow pass needs error code) - * mov r2 <- 1 // r2 is error number - * mov r3 <- 0 // r3, r4... should be touched or - * // verifier would complain - * mov r4 <- 0 - * ... - * goto usercode - */ - error_code = pos.pos; - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1), - &pos); - - for (i = 0; i < nargs; i++) - ins(BPF_ALU64_IMM(BPF_MOV, - BPF_PROLOGUE_START_ARG_REG + i, - 0), - &pos); - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE), - &pos); - } - - /* - * start of SUCCESS_CODE: - * mov r2 <- 0 - * goto usercode // skip - */ - success_code = pos.pos; - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos); - - /* - * start of USER_CODE: - * Restore ctx to r1 - */ - user_code = pos.pos; - if (!fastpath) { - /* - * Only slow path needs restoring of ctx. In fast path, - * register are loaded directly from r1. - */ - ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos); - err = prologue_relocate(&pos, error_code, success_code, - user_code); - if (err) - goto errout; - } - - err = check_pos(&pos); - if (err) - goto errout; - - *new_cnt = pos_get_cnt(&pos); - return 0; -errout: - return err; -} diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 90ce22f9c1a9..939ec769bf4a 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -23,7 +23,9 @@ #define MAX_CPUS 4096 // FIXME: These should come from system headers +#ifndef bool typedef char bool; +#endif typedef int pid_t; typedef long long int __s64; typedef __s64 time64_t; diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 1dbf27822ee2..4a1dc21b0450 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -282,13 +282,21 @@ static struct perf_event_attr *dlfilter__attr(void *ctx) return &d->evsel->core.attr; } +static __s32 code_read(__u64 ip, struct map *map, struct machine *machine, void *buf, __u32 len) +{ + u64 offset = map__map_ip(map, ip); + + if (ip + len >= map__end(map)) + len = map__end(map) - ip; + + return dso__data_read_offset(map__dso(map), machine, offset, buf, len); +} + static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) { struct dlfilter *d = (struct dlfilter *)ctx; struct addr_location *al; struct addr_location a; - struct map *map; - u64 offset; __s32 ret; if (!d->ctx_valid) @@ -298,27 +306,17 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) if (!al) return -1; - map = al->map; - - if (map && ip >= map__start(map) && ip < map__end(map) && + if (al->map && ip >= map__start(al->map) && ip < map__end(al->map) && machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip)) - goto have_map; + return code_read(ip, al->map, d->machine, buf, len); addr_location__init(&a); + thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); - if (!a.map) { - ret = -1; - goto out; - } + ret = a.map ? code_read(ip, a.map, d->machine, buf, len) : -1; - map = a.map; -have_map: - offset = map__map_ip(map, ip); - if (ip + len >= map__end(map)) - len = map__end(map) - ip; - ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); -out: addr_location__exit(&a); + return ret; } diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index 0a5bf1937a7c..c12f8320e668 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -80,16 +80,6 @@ struct hashmap { size_t sz; }; -#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ - .hash_fn = (hash_fn), \ - .equal_fn = (equal_fn), \ - .ctx = (ctx), \ - .buckets = NULL, \ - .cap = 0, \ - .cap_bits = 0, \ - .sz = 0, \ -} - void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, hashmap_equal_fn equal_fn, void *ctx); struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d85602aa4b9f..d515ba8a0e16 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -295,7 +295,7 @@ static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *al len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.scale", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.scale", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -330,7 +330,7 @@ static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *ali len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.unit", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.unit", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -364,7 +364,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.per-pkg", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -385,7 +385,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.snapshot", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -520,7 +520,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, pmu_name = pe->pmu; } - alias = malloc(sizeof(*alias)); + alias = zalloc(sizeof(*alias)); if (!alias) return -ENOMEM; diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..f6c6e5474c3a 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -20,4 +20,8 @@ void memblock_free_pages(struct page *page, unsigned long pfn, { } +static inline void accept_memory(phys_addr_t start, phys_addr_t end) +{ +} + #endif diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c index 7b0909e8b759..d3d58851864e 100644 --- a/tools/testing/memblock/mmzone.c +++ b/tools/testing/memblock/mmzone.c @@ -11,7 +11,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) return NULL; } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end) +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid) { } diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 411647094cc3..57bf2688edfd 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include "basic_api.h" #include <string.h> #include <linux/memblock.h> -#include "basic_api.h" #define EXPECTED_MEMBLOCK_REGIONS 128 #define FUNC_ADD "memblock_add" diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 4f23302ee677..b5ec59aa62d7 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -5,6 +5,7 @@ #include <stdlib.h> #include <assert.h> #include <linux/types.h> +#include <linux/seq_file.h> #include <linux/memblock.h> #include <linux/sizes.h> #include <linux/printk.h> diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 42806add0114..1a21d6beebc6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -92,7 +92,7 @@ endif TARGETS += tmpfs TARGETS += tpm2 TARGETS += tty -TARGETS += uevents +TARGETS += uevent TARGETS += user TARGETS += user_events TARGETS += vDSO diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index d7aafe5a1993..2f1685a3eae1 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -431,7 +431,6 @@ long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def) { snd_config_t *cfg; - long l; int ret; if (!root) diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index c95d63e553f4..21e482b23f50 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -188,7 +188,7 @@ static int wait_for_event(struct ctl_data *ctl, int timeout) { unsigned short revents; snd_ctl_event_t *event; - int count, err; + int err; unsigned int mask = 0; unsigned int ev_id; @@ -430,7 +430,6 @@ static bool strend(const char *haystack, const char *needle) static void test_ctl_name(struct ctl_data *ctl) { bool name_ok = true; - bool check; ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem, ctl->name); @@ -863,7 +862,6 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, snd_ctl_elem_value_t *val) { int err; - long val_read; /* Ideally this will fail... */ err = snd_ctl_elem_write(ctl->card->handle, val); @@ -883,8 +881,7 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl) { - int err, i; - long val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -994,8 +991,7 @@ static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl) static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) { - int err, i; - unsigned int val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -1027,7 +1023,6 @@ static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) static void test_ctl_write_invalid(struct ctl_data *ctl) { bool pass; - int err; /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index 2f5e3c462194..c0a39818c5a4 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -257,7 +257,7 @@ static void find_pcms(void) static void test_pcm_time(struct pcm_data *data, enum test_class class, const char *test_name, snd_config_t *pcm_cfg) { - char name[64], key[128], msg[256]; + char name[64], msg[256]; const int duration_s = 2, margin_ms = 100; const int duration_ms = duration_s * 1000; const char *cs; @@ -567,7 +567,7 @@ int main(void) { struct card_data *card; struct pcm_data *pcm; - snd_config_t *global_config, *cfg, *pcm_cfg; + snd_config_t *global_config, *cfg; int num_pcm_tests = 0, num_tests, num_std_pcm_tests; int ret; void *thread_ret; diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c index 357adc722cba..a52ecd43dbe3 100644 --- a/tools/testing/selftests/alsa/test-pcmtest-driver.c +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -313,7 +313,6 @@ TEST_F(pcmtest, ni_playback) { */ TEST_F(pcmtest, reset_ioctl) { snd_pcm_t *handle; - unsigned char *it; int test_res; struct pcmtest_test_params *params = &self->params; diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 7f768d335698..3babaf3eee5c 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,14 +1,8 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 fexit_sleep # The test never returns. The remaining tests cannot start. -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/skel_api # libbpf: failed to load BPF skeleton 'kprobe_multi': -3 +kprobe_multi_bench_attach # needs CONFIG_FPROBE +kprobe_multi_test # needs CONFIG_FPROBE module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 1c7584e8dd9e..e41eb33b2704 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -4,6 +4,7 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index b650b2e617b8..2e70a6048278 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -20,7 +20,6 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y CONFIG_BPFILTER=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index a53c254c6058..4aabeaa525d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -185,6 +185,8 @@ static void test_cubic(void) do_test("bpf_cubic", NULL); + ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 3b77d8a422db..261228eb68e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -24,6 +24,7 @@ void test_empty_skb(void) int *ifindex; int err; int ret; + int lwt_egress_ret; /* expected retval at lwt/egress */ bool success_on_tc; } tests[] = { /* Empty packets are always rejected. */ @@ -57,6 +58,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &veth_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, .success_on_tc = true, }, { @@ -70,6 +72,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &ipip_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, }, /* ETH_HLEN+1-sized packet should be redirected. */ @@ -79,6 +82,7 @@ void test_empty_skb(void) .data_in = eth_hlen_pp, .data_size_in = sizeof(eth_hlen_pp), .ifindex = &veth_ifindex, + .lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */ }, { .msg = "ipip ETH_HLEN+1 packet ingress", @@ -108,8 +112,12 @@ void test_empty_skb(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { bpf_object__for_each_program(prog, bpf_obj->obj) { - char buf[128]; + bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL; bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + int expected_ret; + char buf[128]; + + expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret; tattr.data_in = tests[i].data_in; tattr.data_size_in = tests[i].data_size_in; @@ -128,7 +136,7 @@ void test_empty_skb(void) if (at_tc && tests[i].success_on_tc) ASSERT_GE(bpf_obj->bss->ret, 0, buf); else - ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf); } } diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 179fe300534f..4041cfa670eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -3,6 +3,7 @@ #include "kprobe_multi.skel.h" #include "trace_helpers.h" #include "kprobe_multi_empty.skel.h" +#include "kprobe_multi_override.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -453,6 +454,40 @@ cleanup: } } +static void test_attach_override(void) +{ + struct kprobe_multi_override *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_multi_override__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + /* The test_override calls bpf_override_return so it should fail + * to attach to bpf_fentry_test1 function, which is not on error + * injection list. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) { + bpf_link__destroy(link); + goto cleanup; + } + + /* The should_fail_bio function is on error injection list, + * attach should succeed. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "should_fail_bio", NULL); + if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio")) + goto cleanup; + + bpf_link__destroy(link); + +cleanup: + kprobe_multi_override__destroy(skel); +} + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -480,4 +515,6 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + if (test__start_subtest("attach_override")) + test_attach_override(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 064cc5e8d9ad..dda7060e86a0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -475,6 +475,55 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_peek(void) +{ + int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail; + struct test_sockmap_pass_prog *pass; + char snd[256] = "0123456789"; + char rcv[256] = "0"; + + pass = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(pass, "open_and_load")) + return; + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + map = bpf_map__fd(pass->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + s = socket_loopback(AF_INET, SOCK_STREAM); + if (!ASSERT_GT(s, -1, "socket_loopback(s)")) + goto out; + + err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pairs(s)")) + goto out; + + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + + sent = xsend(p1, snd, sizeof(snd), 0); + ASSERT_EQ(sent, sizeof(snd), "xsend(p1)"); + recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK); + ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)"); + recvd = recv(c1, rcv, sizeof(rcv), 0); + ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_pass_prog__destroy(pass); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -515,4 +564,6 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict msg_f_peek")) + test_sockmap_skb_verdict_peek(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 6c93215be8a3..67f985f7d215 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -45,7 +45,7 @@ static inline __u32 ifindex_from_link_fd(int fd) return link_info.tcx.ifindex; } -static inline void __assert_mprog_count(int target, int expected, bool miniq, int ifindex) +static inline void __assert_mprog_count(int target, int expected, int ifindex) { __u32 count = 0, attach_flags = 0; int err; @@ -53,20 +53,22 @@ static inline void __assert_mprog_count(int target, int expected, bool miniq, in err = bpf_prog_query(ifindex, target, 0, &attach_flags, NULL, &count); ASSERT_EQ(count, expected, "count"); - if (!expected && !miniq) - ASSERT_EQ(err, -ENOENT, "prog_query"); - else - ASSERT_EQ(err, 0, "prog_query"); + ASSERT_EQ(err, 0, "prog_query"); } static inline void assert_mprog_count(int target, int expected) { - __assert_mprog_count(target, expected, false, loopback); + __assert_mprog_count(target, expected, loopback); } static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected) { - __assert_mprog_count(target, expected, false, ifindex); + __assert_mprog_count(target, expected, ifindex); +} + +static inline void tc_skel_reset_all_seen(struct test_tc_link *skel) +{ + memset(skel->bss, 0, sizeof(*skel->bss)); } #endif /* TC_HELPERS */ diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 74fc1fe9ee26..bc9841144685 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -65,6 +65,7 @@ void serial_test_tc_links_basic(void) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -97,6 +98,7 @@ void serial_test_tc_links_basic(void) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -187,6 +189,7 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -194,9 +197,6 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, .relative_fd = bpf_program__fd(skel->progs.tc2), @@ -246,6 +246,7 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -342,6 +343,7 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -349,9 +351,6 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_AFTER, .relative_fd = bpf_program__fd(skel->progs.tc1), @@ -401,6 +400,7 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -502,6 +502,7 @@ static void test_tc_links_revision_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -581,22 +582,20 @@ static void test_tc_chain_classic(int target, bool chain_tc_old) assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -707,16 +706,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_REPLACE, .relative_fd = bpf_program__fd(skel->progs.tc2), @@ -781,16 +777,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "link_detach")) goto cleanup; @@ -812,16 +805,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1); if (!ASSERT_OK(err, "link_update_self")) goto cleanup; @@ -843,6 +833,7 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1254,6 +1245,7 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1261,9 +1253,6 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, ); @@ -1311,6 +1300,7 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1411,6 +1401,7 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1418,9 +1409,6 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_AFTER, ); @@ -1468,6 +1456,7 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1637,38 +1626,33 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4); if (!ASSERT_OK(err, "link_update")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - err = bpf_link__detach(skel->links.tc6); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); @@ -1758,22 +1742,20 @@ static void test_tc_links_ingress(int target, bool chain_tc_old, assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 7a2ecd4eca5d..ca506d2fcf58 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -59,6 +59,7 @@ void serial_test_tc_opts_basic(void) ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -83,6 +84,7 @@ void serial_test_tc_opts_basic(void) ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -163,6 +165,7 @@ static void test_tc_opts_before_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -219,6 +222,7 @@ static void test_tc_opts_before_target(int target) ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -313,6 +317,7 @@ static void test_tc_opts_after_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -369,6 +374,7 @@ static void test_tc_opts_after_target(int target) ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -514,6 +520,7 @@ static void test_tc_opts_revision_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -608,22 +615,20 @@ static void test_tc_chain_classic(int target, bool chain_tc_old) assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_prog_detach_opts(fd2, loopback, target, &optd); if (!ASSERT_OK(err, "prog_detach")) goto cleanup_detach; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -635,7 +640,7 @@ cleanup_detach: if (!ASSERT_OK(err, "prog_detach")) goto cleanup; - __assert_mprog_count(target, 0, chain_tc_old, loopback); + assert_mprog_count(target, 0); cleanup: if (tc_attached) { tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; @@ -730,16 +735,13 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]"); ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE, .replace_prog_fd = fd2, @@ -767,16 +769,13 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE | BPF_F_BEFORE, .replace_prog_fd = fd3, @@ -805,6 +804,7 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1084,6 +1084,7 @@ static void test_tc_opts_prepend_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1124,6 +1125,7 @@ static void test_tc_opts_prepend_target(int target) ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1222,6 +1224,7 @@ static void test_tc_opts_append_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1262,6 +1265,7 @@ static void test_tc_opts_append_target(int target) ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -2250,7 +2254,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old) BPF_TC_INGRESS : BPF_TC_EGRESS; err = bpf_tc_hook_create(&tc_hook); ASSERT_OK(err, "bpf_tc_hook_create"); - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); } err = bpf_prog_detach_opts(0, loopback, target, &optd); ASSERT_EQ(err, -ENOENT, "prog_detach"); @@ -2316,16 +2320,13 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE, .replace_prog_fd = fd3, @@ -2339,21 +2340,19 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - cleanup_opts: err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); @@ -2378,3 +2377,313 @@ void serial_test_tc_opts_chain_mixed(void) test_tc_chain_mixed(BPF_TCX_INGRESS); test_tc_chain_mixed(BPF_TCX_EGRESS); } + +static int generate_dummy_prog(void) +{ + const struct bpf_insn prog_insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); + LIBBPF_OPTS(bpf_prog_load_opts, opts); + const size_t log_buf_sz = 256; + char *log_buf; + int fd = -1; + + log_buf = malloc(log_buf_sz); + if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) + return fd; + opts.log_buf = log_buf; + opts.log_size = log_buf_sz; + + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, "tcx_prog", "GPL", + prog_insns, prog_insn_cnt, &opts); + ASSERT_STREQ(log_buf, "", "log_0"); + ASSERT_GE(fd, 0, "prog_fd"); + free(log_buf); + return fd; +} + +static void test_tc_opts_max_target(int target, int flags, bool relative) +{ + int err, ifindex, i, prog_fd, last_fd = -1; + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + const int max_progs = 63; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + for (i = 0; i < max_progs; i++) { + prog_fd = generate_dummy_prog(); + if (!ASSERT_GE(prog_fd, 0, "dummy_prog")) + goto cleanup; + err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count_ifindex(ifindex, target, i + 1); + if (i == max_progs - 1 && relative) + last_fd = prog_fd; + else + close(prog_fd); + } + + prog_fd = generate_dummy_prog(); + if (!ASSERT_GE(prog_fd, 0, "dummy_prog")) + goto cleanup; + opta.flags = flags; + if (last_fd > 0) + opta.relative_fd = last_fd; + err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta); + ASSERT_EQ(err, -ERANGE, "prog_64_attach"); + assert_mprog_count_ifindex(ifindex, target, max_progs); + close(prog_fd); +cleanup: + if (last_fd > 0) + close(last_fd); + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_opts_max(void) +{ + test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false); + test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false); + + test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_BEFORE, false); + test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_BEFORE, true); + + test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true); + test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false); +} + +static void test_tc_opts_query_target(int target) +{ + const size_t attr_size = offsetofend(union bpf_attr, query); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + union bpf_attr attr; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + /* Test 1: Double query via libbpf API */ + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids, NULL, "prog_ids"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + /* Test 2: Double query via bpf_attr & bpf(2) directly */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query(void) +{ + test_tc_opts_query_target(BPF_TCX_INGRESS); + test_tc_opts_query_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_query_attach_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct test_tc_link *skel; + __u32 prog_ids[2]; + __u32 fd1, id1; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + id1 = id_from_prog_fd(fd1); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 0, "count"); + ASSERT_EQ(optq.revision, 1, "revision"); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = optq.revision, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup1; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query_attach(void) +{ + test_tc_opts_query_attach_target(BPF_TCX_INGRESS); + test_tc_opts_query_attach_target(BPF_TCX_EGRESS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c new file mode 100644 index 000000000000..0cca4e8ae38e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <pthread.h> +#include <stdbool.h> +#include <bpf/btf.h> +#include <test_progs.h> + +#include "test_bpf_ma.skel.h" + +void test_test_bpf_ma(void) +{ + struct test_bpf_ma *skel; + struct btf *btf; + int i, err; + + skel = test_bpf_ma__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + btf = bpf_object__btf(skel->obj); + if (!ASSERT_OK_PTR(btf, "btf")) + goto out; + + for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { + char name[32]; + int id; + + snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, "bin_data")) + goto out; + skel->rodata->data_btf_ids[i] = id; + } + + err = test_bpf_ma__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + err = test_bpf_ma__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_OK(skel->bss->err, "test error"); +out: + test_bpf_ma__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 290c21dbe65a..ce2c61d62fc6 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> #include "timer.skel.h" +#include "timer_failure.skel.h" static int timer(struct timer *timer_skel) { @@ -49,10 +50,11 @@ void serial_test_timer(void) timer_skel = timer__open_and_load(); if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) - goto cleanup; + return; err = timer(timer_skel); ASSERT_OK(err, "timer"); -cleanup: timer__destroy(timer_skel); + + RUN_TESTS(timer_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c new file mode 100644 index 000000000000..7dd18c6d06c6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <net/if.h> +#include <test_progs.h> +#include <network_helpers.h> + +#define LOCAL_NETNS "xdp_dev_bound_only_netns" + +static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags) +{ + struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.prog_flags = flags; + opts.prog_ifindex = ifindex; + return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts); +} + +/* A test case for bpf_offload_netdev->offload handling bug: + * - create a veth device (does not support offload); + * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag + * (such programs are not offloaded); + * - create a device bound XDP program without flags (such programs are offloaded). + * This might lead to 'BUG: kernel NULL pointer dereference'. + */ +void test_xdp_dev_bound_only_offdev(void) +{ + struct nstoken *tok = NULL; + __u32 ifindex; + int fd1 = -1; + int fd2 = -1; + + SYS(out, "ip netns add " LOCAL_NETNS); + tok = open_netns(LOCAL_NETNS); + if (!ASSERT_OK_PTR(tok, "open_netns")) + goto out; + SYS(out, "ip link add eth42 type veth"); + ifindex = if_nametoindex("eth42"); + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) { + perror("if_nametoindex"); + goto out; + } + fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) { + perror("load_dummy_prog #1"); + goto out; + } + /* Program with ifindex is considered offloaded, however veth + * does not support offload => error should be reported. + */ + fd2 = load_dummy_prog("dummy2", ifindex, 0); + ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)"); + +out: + close(fd1); + close(fd2); + close_netns(tok); + /* eth42 was added inside netns, removing the netns will + * also remove eth42 veth pair. + */ + SYS_NOFAIL("ip netns del " LOCAL_NETNS); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index d9660e7200e2..c997e3e3d3fb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -490,6 +490,8 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) } } +int bpf_cubic_acked_called = 0; + void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample) { @@ -497,6 +499,7 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, struct bictcp *ca = inet_csk_ca(sk); __u32 delay; + bpf_cubic_acked_called = 1; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c new file mode 100644 index 000000000000..28f8487c9059 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe.multi") +int test_override(struct pt_regs *ctx) +{ + bpf_override_return(ctx, 123); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c new file mode 100644 index 000000000000..ecde41ae0fc8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct generic_map_value { + void *data; +}; + +char _license[] SEC("license") = "GPL"; + +const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + +int err = 0; +int pid = 0; + +#define DEFINE_ARRAY_WITH_KPTR(_size) \ + struct bin_data_##_size { \ + char data[_size - sizeof(void *)]; \ + }; \ + struct map_value_##_size { \ + struct bin_data_##_size __kptr * data; \ + /* To emit BTF info for bin_data_xx */ \ + struct bin_data_##_size not_used; \ + }; \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __type(key, int); \ + __type(value, struct map_value_##_size); \ + __uint(max_entries, 128); \ + } array_##_size SEC(".maps"); + +static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old, *new; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 1; + return; + } + new = bpf_obj_new_impl(data_btf_ids[idx], NULL); + if (!new) { + err = 2; + return; + } + old = bpf_kptr_xchg(&value->data, new); + if (old) { + bpf_obj_drop(old); + err = 3; + return; + } + } + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 4; + return; + } + old = bpf_kptr_xchg(&value->data, NULL); + if (!old) { + err = 5; + return; + } + bpf_obj_drop(old); + } +} + +#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ + batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) + +DEFINE_ARRAY_WITH_KPTR(8); +DEFINE_ARRAY_WITH_KPTR(16); +DEFINE_ARRAY_WITH_KPTR(32); +DEFINE_ARRAY_WITH_KPTR(64); +DEFINE_ARRAY_WITH_KPTR(96); +DEFINE_ARRAY_WITH_KPTR(128); +DEFINE_ARRAY_WITH_KPTR(192); +DEFINE_ARRAY_WITH_KPTR(256); +DEFINE_ARRAY_WITH_KPTR(512); +DEFINE_ARRAY_WITH_KPTR(1024); +DEFINE_ARRAY_WITH_KPTR(2048); +DEFINE_ARRAY_WITH_KPTR(4096); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int test_bpf_mem_alloc_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 8-bytes objects in batch to trigger refilling, + * then free 128 8-bytes objects in batch to trigger freeing. + */ + CALL_BATCH_ALLOC_FREE(8, 128, 0); + CALL_BATCH_ALLOC_FREE(16, 128, 1); + CALL_BATCH_ALLOC_FREE(32, 128, 2); + CALL_BATCH_ALLOC_FREE(64, 128, 3); + CALL_BATCH_ALLOC_FREE(96, 128, 4); + CALL_BATCH_ALLOC_FREE(128, 128, 5); + CALL_BATCH_ALLOC_FREE(192, 128, 6); + CALL_BATCH_ALLOC_FREE(256, 128, 7); + CALL_BATCH_ALLOC_FREE(512, 64, 8); + CALL_BATCH_ALLOC_FREE(1024, 32, 9); + CALL_BATCH_ALLOC_FREE(2048, 16, 10); + CALL_BATCH_ALLOC_FREE(4096, 8, 11); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c new file mode 100644 index 000000000000..226d33b5a05c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer_map SEC(".maps"); + +static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +{ + if (bpf_get_smp_processor_id() % 2) + return 1; + else + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__failure __msg("should have been in (0x0; 0x0)") +int BPF_PROG2(test_ret_1, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + timer = bpf_map_lookup_elem(&timer_map, &key); + if (timer) { + bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_start(timer, 1000, 0); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 31f1c935cd07..98107e0452d3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1880,7 +1880,7 @@ int main(int argc, char **argv) } } - get_unpriv_disabled(); + unpriv_disabled = get_unpriv_disabled(); if (unpriv && unpriv_disabled) { printf("Cannot run as unprivileged user with sysctl %s.\n", UNPRIV_SYSCTL); diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 20839f8e43f2..71ec34bf1501 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) TEST_GEN_PROGS := fchmodat2_test include ../lib.mk diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc new file mode 100644 index 000000000000..bc9514428dba --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test failure of registering kprobe on non unique symbol +# requires: kprobe_events + +SYMBOL='name_show' + +# We skip this test on kernel where SYMBOL is unique or does not exist. +if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then + exit_unsupported +fi + +! echo "p:test_non_unique ${SYMBOL}" > kprobe_events diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 112bc1da732a..ce33d306c2cb 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UCALL_COMMON_H diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4fd042112526..25bc61dac5fb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -68,6 +68,12 @@ struct xstate { #define XFEATURE_MASK_OPMASK BIT_ULL(5) #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) @@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) @@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value) __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; @@ -908,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) !kvm_cpu_has(feature.anti_feature); } +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index c4a69d8aeb68..74627514c4d4 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -200,6 +200,13 @@ repeat: ++fmt; } + /* + * Play nice with %llu, %llx, etc. KVM selftests only support + * 64-bit builds, so just treat %ll* the same as %l*. + */ + if (qualifier == 'l' && *fmt == 'l') + ++fmt; + /* default base */ base = 10; diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 3e36019eeb4a..5d7f28b02d73 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -387,7 +387,7 @@ char *strdup_printf(const char *fmt, ...) char *str; va_start(ap, fmt); - vasprintf(&str, fmt, ap); + TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed"); va_end(ap); return str; diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c index 7168e25c194e..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 20eb2e730800..8698d1ab60d0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestruntime) { uint64_t maxslots; - struct test_result result; + struct test_result result = {}; - result.nloops = 0; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, &result.nloops, &result.slot_runtime, &result.guest_runtime)) { @@ -1089,7 +1088,7 @@ int main(int argc, char *argv[]) .seconds = 5, .runs = 1, }; - struct test_result rbestslottime; + struct test_result rbestslottime = {}; int tctr; if (!check_memory_sizes()) @@ -1098,11 +1097,10 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv, &targs)) return -1; - rbestslottime.slottimens = 0; for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { const struct test_data *data = &tests[tctr]; unsigned int runctr; - struct test_result rbestruntime; + struct test_result rbestruntime = {}; if (tctr > targs.tfirst) pr_info("\n"); @@ -1110,7 +1108,6 @@ int main(int argc, char *argv[]) pr_info("Testing %s performance with %i runs, %d seconds each\n", data->name, targs.runs, targs.seconds); - rbestruntime.runtimens = 0; for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs, &rbestslottime, &rbestruntime)) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index d8ecacd03ecf..9f99ea42f45f 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -12,19 +12,37 @@ #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) +static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; + bool filter_reg(__u64 reg) { + switch (reg & ~REG_MASK) { /* - * Some ISA extensions are optional and not present on all host, - * but they can't be disabled through ISA_EXT registers when present. - * So, to make life easy, just filtering out these kind of registers. + * Same set of ISA_EXT registers are not present on all host because + * ISA_EXT registers are visible to the KVM user space based on the + * ISA extensions available on the host. Also, disabling an ISA + * extension using corresponding ISA_EXT register does not affect + * the visibility of the ISA_EXT register itself. + * + * Based on above, we should filter-out all ISA_EXT registers. */ - switch (reg & ~REG_MASK) { + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: @@ -32,6 +50,15 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: return true; + /* AIA registers are always available when Ssaia can't be disabled */ + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA]; default: break; } @@ -50,24 +77,27 @@ static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext) unsigned long value; ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value); - if (ret) { - printf("Failed to get ext %d", ext); - return false; - } - - return !!value; + return (ret) ? false : !!value; } void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { + unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; struct vcpu_reg_sublist *s; + int rc; + + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) + __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]); /* * Disable all extensions which were enabled by default * if they were available in the risc-v host. */ - for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) - __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + if (rc && isa_ext_state[i]) + isa_ext_cant_disable[i] = true; + } for_each_sublist(c, s) { if (!s->feature) @@ -506,10 +536,6 @@ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index e446d76d1c0c..6c1278562090 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * KVM_GET/SET_* tests - * * Copyright (C) 2022, Red Hat, Inc. * * Tests for Hyper-V extensions to SVM. diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 7f36c32fa760..18ac5c1952a3 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/nx_huge_page_test.c - * * Usage: to be run via nx_huge_page_test.sh, which does the necessary * environment setup and teardown * diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 0560149e66ed..7cbb409801ee 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -4,7 +4,6 @@ # Wrapper script which performs setup and cleanup for nx_huge_pages_test. # Makes use of root privileges to set up huge pages and KVM module parameters. # -# tools/testing/selftests/kvm/nx_huge_page_test.sh # Copyright (C) 2022, Google LLC. set -e diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4c4925a8ab45..88b58aab7207 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) static void __attribute__((__flatten__)) guest_code(void *arg) { GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[4096]; + + memset(buffer, 0xcc, sizeof(buffer)); + + set_cr4(get_cr4() | X86_CR4_OSXSAVE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + xsetbv(0, xgetbv(0) | supported_xcr0); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + GUEST_SYNC(2); if (arg) { @@ -153,10 +230,11 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { + uint64_t *xstate_bv, saved_xstate_bv; vm_vaddr_t nested_gva = 0; - + struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vcpuN; struct kvm_vm *vm; struct kvm_x86_state *state; struct ucall uc; @@ -209,6 +287,34 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. + */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c index 5b669818e39a..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 05898ad9f4d9..9ec9ab60b63e 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index a5cb4b09a46c..0899019a7fcb 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then fi if [[ $cgroup2 ]]; then - cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path @@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb" >$cgroup_path/cgroup.subtree_control else - cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index bf2d2a684edf..14d26075c863 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -20,7 +20,7 @@ fi if [[ $cgroup2 ]]; then - CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT @@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control else - CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8b017070960d..4a2881d43989 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -34,6 +34,7 @@ TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh +TEST_PROGS += netns-name.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index e7d2a530618a..66d0db7a2614 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2437,6 +2437,9 @@ ipv4_mpath_list_test() run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2449,7 +2452,7 @@ ipv4_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2494,7 +2497,7 @@ ipv6_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index df9143538708..1c6457e54625 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -41,61 +41,6 @@ cleanup() done } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -trap cleanup EXIT - -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done - -echo "INFO: preparing interfaces." -# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. -# -# ns1eth1 ----- ns2eth1 -# hsr1 hsr2 -# ns1eth2 ns2eth2 -# | | -# ns3eth1 ns3eth2 -# \ / -# hsr3 -# -# Interfaces -ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" -ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" -ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" - -# HSRv0. -ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 -ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 -ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 - -# IP for HSR -ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 -ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad -ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 -ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad -ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 -ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad - -# All Links up -ip -net "$ns1" link set ns1eth1 up -ip -net "$ns1" link set ns1eth2 up -ip -net "$ns1" link set hsr1 up - -ip -net "$ns2" link set ns2eth1 up -ip -net "$ns2" link set ns2eth2 up -ip -net "$ns2" link set hsr2 up - -ip -net "$ns3" link set ns3eth1 up -ip -net "$ns3" link set ns3eth2 up -ip -net "$ns3" link set hsr3 up - # $1: IP address is_v6() { @@ -164,93 +109,168 @@ stop_if_error() fi } - -echo "INFO: Initial validation ping." -# Each node has to be able each one. -do_ping "$ns1" 100.64.0.2 -do_ping "$ns2" 100.64.0.1 -do_ping "$ns3" 100.64.0.1 -stop_if_error "Initial validation failed." - -do_ping "$ns1" 100.64.0.3 -do_ping "$ns2" 100.64.0.3 -do_ping "$ns3" 100.64.0.2 - -do_ping "$ns1" dead:beef:1::2 -do_ping "$ns1" dead:beef:1::3 -do_ping "$ns2" dead:beef:1::1 -do_ping "$ns2" dead:beef:1::2 -do_ping "$ns3" dead:beef:1::1 -do_ping "$ns3" dead:beef:1::2 - -stop_if_error "Initial validation failed." +do_complete_ping_test() +{ + echo "INFO: Initial validation ping." + # Each node has to be able each one. + do_ping "$ns1" 100.64.0.2 + do_ping "$ns2" 100.64.0.1 + do_ping "$ns3" 100.64.0.1 + stop_if_error "Initial validation failed." + + do_ping "$ns1" 100.64.0.3 + do_ping "$ns2" 100.64.0.3 + do_ping "$ns3" 100.64.0.2 + + do_ping "$ns1" dead:beef:1::2 + do_ping "$ns1" dead:beef:1::3 + do_ping "$ns2" dead:beef:1::1 + do_ping "$ns2" dead:beef:1::2 + do_ping "$ns3" dead:beef:1::1 + do_ping "$ns3" dead:beef:1::2 + + stop_if_error "Initial validation failed." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. -WAIT=5 -while [ ${WAIT} -gt 0 ] -do - grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table - if [ $? -ne 0 ] - then - break - fi - sleep 1 - let WAIT = WAIT - 1 -done + WAIT=5 + while [ ${WAIT} -gt 0 ] + do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let "WAIT = WAIT - 1" + done # Just a safety delay in case the above check didn't handle it. -sleep 1 + sleep 1 + + echo "INFO: Longer ping test." + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" dead:beef:1::2 + do_ping_long "$ns1" 100.64.0.3 + do_ping_long "$ns1" dead:beef:1::3 -echo "INFO: Longer ping test." -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" dead:beef:1::2 -do_ping_long "$ns1" 100.64.0.3 -do_ping_long "$ns1" dead:beef:1::3 + stop_if_error "Longer ping test failed." -stop_if_error "Longer ping test failed." + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" dead:beef:1::1 + do_ping_long "$ns2" 100.64.0.3 + do_ping_long "$ns2" dead:beef:1::2 + stop_if_error "Longer ping test failed." -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" dead:beef:1::1 -do_ping_long "$ns2" 100.64.0.3 -do_ping_long "$ns2" dead:beef:1::2 -stop_if_error "Longer ping test failed." + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" dead:beef:1::1 + do_ping_long "$ns3" 100.64.0.2 + do_ping_long "$ns3" dead:beef:1::2 + stop_if_error "Longer ping test failed." -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" dead:beef:1::1 -do_ping_long "$ns3" 100.64.0.2 -do_ping_long "$ns3" dead:beef:1::2 -stop_if_error "Longer ping test failed." + echo "INFO: Cutting one link." + do_ping_long "$ns1" 100.64.0.3 & -echo "INFO: Cutting one link." -do_ping_long "$ns1" 100.64.0.3 & + sleep 3 + ip -net "$ns3" link set ns3eth1 down + wait -sleep 3 -ip -net "$ns3" link set ns3eth1 down -wait + ip -net "$ns3" link set ns3eth1 up -ip -net "$ns3" link set ns3eth1 up + stop_if_error "Failed with one link down." -stop_if_error "Failed with one link down." + echo "INFO: Delay the link and drop a few packages." + tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms + tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% -echo "INFO: Delay the link and drop a few packages." -tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms -tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" 100.64.0.3 -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" 100.64.0.3 + stop_if_error "Failed with delay and packetloss." -stop_if_error "Failed with delay and packetloss." + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" 100.64.0.3 -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" 100.64.0.3 + stop_if_error "Failed with delay and packetloss." -stop_if_error "Failed with delay and packetloss." + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" 100.64.0.2 + stop_if_error "Failed with delay and packetloss." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv}." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# + # Interfaces + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" + ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + + # HSRv0/1 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 + + # IP for HSR + ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 + ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + + # All Links up + ip -net "$ns1" link set ns1eth1 up + ip -net "$ns1" link set ns1eth2 up + ip -net "$ns1" link set hsr1 up + + ip -net "$ns2" link set ns2eth1 up + ip -net "$ns2" link set ns2eth2 up + ip -net "$ns2" link set hsr2 up + + ip -net "$ns3" link set ns3eth1 up + ip -net "$ns3" link set ns3eth2 up + ip -net "$ns3" link set hsr3 up +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces 0 +do_complete_ping_test +cleanup + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" 100.64.0.2 -stop_if_error "Failed with delay and packetloss." +setup_hsr_interfaces 1 +do_complete_ping_test -echo "INFO: All good." exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index ee1f89a872b3..dc895b7b94e1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1432,7 +1432,9 @@ chk_rst_nr() count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip - elif [ $count -lt $rst_tx ]; then + # accept more rst than expected except if we don't expect any + elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } || + { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then fail_test "got $count MP_RST[s] TX expected $rst_tx" else print_ok @@ -1442,7 +1444,9 @@ chk_rst_nr() count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip - elif [ "$count" -lt "$rst_rx" ]; then + # accept more rst than expected except if we don't expect any + elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } || + { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then fail_test "got $count MP_RST[s] RX expected $rst_rx" else print_ok @@ -2305,6 +2309,7 @@ remove_tests() chk_join_nr 1 1 1 chk_rm_tx_nr 1 chk_rm_nr 1 1 + chk_rst_nr 0 0 fi # multiple subflows, remove @@ -2317,6 +2322,7 @@ remove_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_rm_nr 2 2 + chk_rst_nr 0 0 fi # single address, remove @@ -2329,6 +2335,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + chk_rst_nr 0 0 fi # subflow and signal, remove @@ -2342,6 +2349,7 @@ remove_tests() chk_join_nr 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 + chk_rst_nr 0 0 fi # subflows and signal, remove @@ -2356,6 +2364,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 + chk_rst_nr 0 0 fi # addresses remove @@ -2370,6 +2379,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert + chk_rst_nr 0 0 fi # invalid addresses remove @@ -2384,6 +2394,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert + chk_rst_nr 0 0 fi # subflows and signal, flush @@ -2398,6 +2409,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 1 3 invert simult + chk_rst_nr 0 0 fi # subflows flush @@ -2417,6 +2429,7 @@ remove_tests() else chk_rm_nr 3 3 fi + chk_rst_nr 0 0 fi # addresses flush @@ -2431,6 +2444,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert simult + chk_rst_nr 0 0 fi # invalid addresses flush @@ -2445,6 +2459,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert + chk_rst_nr 0 0 fi # remove id 0 subflow @@ -2456,6 +2471,7 @@ remove_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_rm_nr 1 1 + chk_rst_nr 0 0 fi # remove id 0 address @@ -2468,6 +2484,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + chk_rst_nr 0 0 invert fi } diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh new file mode 100755 index 000000000000..7d3d3fc99461 --- /dev/null +++ b/tools/testing/selftests/net/netns-name.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -o pipefail + +NS=netns-name-test +DEV=dummy-dev0 +DEV2=dummy-dev1 +ALT_NAME=some-alt-name + +RET_CODE=0 + +cleanup() { + ip netns del $NS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +ip netns add $NS + +# +# Test basic move without a rename +# +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns 1 || + fail "Can't perform a netns move" +ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip link del $DEV || fail + +# +# Test move with a conflict +# +ip link add name $DEV type dummy +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns 1 2> /dev/null && + fail "Performed a netns move with a name conflict" +ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $NS link del $DEV || fail +ip link del $DEV || fail + +# +# Test move with a conflict and rename +# +ip link add name $DEV type dummy +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns 1 name $DEV2 || + fail "Can't perform a netns move with rename" +ip link del $DEV2 || fail +ip link del $DEV || fail + +# +# Test dup alt-name with netns move +# +ip link add name $DEV type dummy || fail +ip link property add dev $DEV altname $ALT_NAME || fail +ip -netns $NS link add name $DEV2 type dummy || fail +ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail + +ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null && + fail "Moved with alt-name dup" + +ip link del $DEV || fail +ip -netns $NS link del $DEV2 || fail + +# +# Test creating alt-name in one net-ns and using in another +# +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail +ip -netns $NS link set dev $DEV netns 1 || fail +ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" +ip -netns $NS link show dev $ALT_NAME 2> /dev/null && + fail "Can still find alt-name after move" +ip link del $DEV || fail + +echo -ne "$(basename $0) \t\t\t\t" +if [ $RET_CODE -eq 0 ]; then + echo "[ OK ]" +else + echo "[ FAIL ]" +fi +exit $RET_CODE diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 9c2012d70b08..f8499d4c87f3 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -3,6 +3,8 @@ # # OVS kernel module self tests +trap ovs_exit_sig EXIT TERM INT ERR + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -142,6 +144,12 @@ ovs_add_flow () { return 0 } +ovs_del_flows () { + info "Deleting all flows from DP: sbx:$1 br:$2" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2" + return 0 +} + ovs_drop_record_and_run () { local sbx=$1 shift @@ -198,6 +206,17 @@ test_drop_reason() { ip netns exec server ip addr add 172.31.110.20/24 dev s1 ip netns exec server ip link set s1 up + # Check if drop reasons can be sent + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null + if [ $? == 1 ]; then + info "no support for drop reasons - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_drop_reason" dropreason + # Allow ARP ovs_add_flow "test_drop_reason" dropreason \ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 @@ -525,7 +544,7 @@ run_test() { fi if python3 ovs-dpctl.py -h 2>&1 | \ - grep "Need to install the python" >/dev/null 2>&1; then + grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 912dc8c49085..b97e621face9 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -28,8 +28,10 @@ try: from pyroute2.netlink import nlmsg_atoms from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + import pyroute2 + except ModuleNotFoundError: - print("Need to install the python pyroute2 package.") + print("Need to install the python pyroute2 package >= 0.6.") sys.exit(0) @@ -1117,12 +1119,14 @@ class ovskey(nla): "src", lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ( "dst", "dst", - lambda x: str(ipaddress.IPv6Address(x)), + lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), @@ -1904,6 +1908,32 @@ class OvsFlow(GenericNetlinkSocket): raise ne return reply + def del_flows(self, dpifindex): + """ + Send a del message to the kernel that will drop all flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + """ + + flowmsg = OvsFlow.ovs_flow_msg() + flowmsg["cmd"] = OVS_FLOW_CMD_DEL + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + def dump(self, dpifindex, flowspec=None): """ Returns a list of messages containing flows. @@ -1998,6 +2028,12 @@ def main(argv): nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions + # version check for pyroute2 + prverscheck = pyroute2.__version__.split(".") + if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: + print("Need to upgrade the python pyroute2 package to >= 0.6.") + sys.exit(0) + parser = argparse.ArgumentParser() parser.add_argument( "-v", @@ -2060,6 +2096,9 @@ def main(argv): addflcmd.add_argument("flow", help="Flow specification") addflcmd.add_argument("acts", help="Flow actions") + delfscmd = subparsers.add_parser("del-flows") + delfscmd.add_argument("flsbr", help="Datapath name") + args = parser.parse_args() if args.verbose > 0: @@ -2143,6 +2182,11 @@ def main(argv): flow = OvsFlow.ovs_flow_msg() flow.parse(args.flow, args.acts, rep["dpifindex"]) ovsflow.add_flow(rep["dpifindex"], flow) + elif hasattr(args, "flsbr"): + rep = ovsdp.info(args.flsbr, 0) + if rep is None: + print("DP '%s' not found." % args.flsbr) + ovsflow.del_flows(rep["dpifindex"]) return 0 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 297d972558fb..464853a7f982 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -613,11 +613,11 @@ TEST_F(tls, sendmsg_large) msg.msg_iov = &vec; msg.msg_iovlen = 1; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); } while (recvs++ < sends) { - EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1); } free(mem); @@ -646,9 +646,9 @@ TEST_F(tls, sendmsg_multiple) msg.msg_iov = vec; msg.msg_iovlen = iov_len; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len); buf = malloc(total_len); - EXPECT_NE(recv(self->fd, buf, total_len, 0), -1); + EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1); for (i = 0; i < iov_len; i++) { EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp, strlen(test_strs[i])), diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4cb887b57413..4b2928e1c19d 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue connect_close +audit_logread diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3686bfa6c58d..ef90aca4cc96 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,13 +6,14 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ + conntrack_sctp_collision.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision include ../lib.mk diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include <linux/audit.h> +#include <linux/netlink.h> + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 4faf2ce021d9..7c42b1b2c69b 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m CONFIG_NFT_MASQ=m CONFIG_NFT_FLOW_OFFLOAD=m CONFIG_NF_CT_NETLINK=m +CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 000000000000..a924e595cfd8 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + ip net add $CLIENT_NS + ip net add $SERVER_NS + ip net add $ROUTER_NS + ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS + ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS + + ip -n $SERVER_NS link set link0 up + ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0 + ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n $ROUTER_NS link set link1 up + ip -n $ROUTER_NS link set link2 up + ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1 + ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2 + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + + ip -n $CLIENT_NS link set link3 up + ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3 + ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb + tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms + + # simulate the ctstate check on OVS nf_conntrack + ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe sctp + ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null + ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null + ip net del "$CLIENT_NS" + ip net del "$SERVER_NS" + ip net del "$ROUTER_NS" +} + +do_test() { + ip net exec $SERVER_NS ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec $CLIENT_NS ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh new file mode 100755 index 000000000000..99ed5bd6e840 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -0,0 +1,245 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +# give other scripts a chance to finish - audit_logread sees all activity +sleep 1 + +logfile=$(mktemp) +rulefile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) +} + +nft flush ruleset + +# adding tables, chains and rules + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add counter t2 c$i" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add quota t2 q$i { 10 bytes }" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +# resetting rules + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +# resetting sets and elements + +elem=(22 ,80 ,443) +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# resetting counters + +do_test 'nft reset counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t2' \ +'table=t2 family=2 entries=342 op=nft_reset_obj +table=t2 family=2 entries=158 op=nft_reset_obj' + +do_test 'nft reset counters' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=341 op=nft_reset_obj +table=t2 family=2 entries=159 op=nft_reset_obj' + +# resetting quotas + +do_test 'nft reset quota t1 q1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t2' \ +'table=t2 family=2 entries=315 op=nft_reset_obj +table=t2 family=2 entries=185 op=nft_reset_obj' + +do_test 'nft reset quotas' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=314 op=nft_reset_obj +table=t2 family=2 entries=186 op=nft_reset_obj' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +exit $RC diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c new file mode 100644 index 000000000000..21bb1cfd8a85 --- /dev/null +++ b/tools/testing/selftests/netfilter/sctp_collision.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> + +int main(int argc, char *argv[]) +{ + struct sockaddr_in saddr = {}, daddr = {}; + int sd, ret, len = sizeof(daddr); + struct timeval tv = {25, 0}; + char buf[] = "hello"; + + if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", + argv[0]); + return -1; + } + + sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + if (sd < 0) { + printf("Failed to create sd\n"); + return -1; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = inet_addr(argv[2]); + saddr.sin_port = htons(atoi(argv[3])); + + ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + printf("Failed to bind to address\n"); + goto out; + } + + ret = listen(sd, 5); + if (ret < 0) { + printf("Failed to listen on port\n"); + goto out; + } + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = inet_addr(argv[4]); + daddr.sin_port = htons(atoi(argv[5])); + + /* make test shorter than 25s */ + ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + if (ret < 0) { + printf("Failed to setsockopt SO_RCVTIMEO\n"); + goto out; + } + + if (!strcmp(argv[1], "server")) { + sleep(1); /* wait a bit for client's INIT */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + printf("Server: sent! %d\n", ret); + } + + if (!strcmp(argv[1], "client")) { + usleep(300000); /* wait a bit for server's listening */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */ + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + printf("Client: rcvd! %d\n", ret); + } + ret = 0; +out: + close(sd); + return ret; +} diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 843ba56d8e49..254d676a2689 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index b16c13688b88..ee71ce52cb6a 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -267,6 +267,7 @@ static const char g_smaps_rollup[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile index 11e0f0568923..c333263f2b27 100644 --- a/tools/testing/selftests/riscv/mm/Makefile +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -5,11 +5,11 @@ # Additional include paths needed by kselftest.h and local headers CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. -TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup +TEST_GEN_FILES := mmap_default mmap_bottomup -TEST_PROGS := testcases/run_mmap.sh +TEST_PROGS := run_mmap.sh include ../../lib.mk -$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h +$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c index b29379f7e478..1757d19ca89b 100644 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <sys/mman.h> -#include <testcases/mmap_test.h> +#include <mmap_test.h> #include "../../kselftest_harness.h" diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c index d1accb91b726..c63c60b9397e 100644 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <sys/mman.h> -#include <testcases/mmap_test.h> +#include <mmap_test.h> #include "../../kselftest_harness.h" diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 9b8434f62f57..9b8434f62f57 100644 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh index ca5ad7c48bad..ca5ad7c48bad 100755 --- a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh +++ b/tools/testing/selftests/riscv/mm/run_mmap.sh diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index 22374d29ffdd..f5575ef2007c 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -47,7 +47,7 @@ static int change_event(bool enable) return ret; } -static int reg_enable(long *enable, int size, int bit) +static int reg_enable(void *enable, int size, int bit) { struct user_reg reg = {0}; int fd = open(data_file, O_RDWR); @@ -69,7 +69,7 @@ static int reg_enable(long *enable, int size, int bit) return ret; } -static int reg_disable(long *enable, int bit) +static int reg_disable(void *enable, int bit) { struct user_unreg reg = {0}; int fd = open(data_file, O_RDWR); @@ -90,17 +90,21 @@ static int reg_disable(long *enable, int bit) } FIXTURE(user) { - long check; + int check; + long check_long; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); change_event(false); self->check = 0; + self->check_long = 0; } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); } TEST_F(user, enablement) { @@ -134,9 +138,9 @@ TEST_F(user, bit_sizes) { #if BITS_PER_LONG == 8 /* Allow 0-64 bits for 64-bit */ - ASSERT_EQ(0, reg_enable(&self->check, sizeof(long), 63)); - ASSERT_NE(0, reg_enable(&self->check, sizeof(long), 64)); - ASSERT_EQ(0, reg_disable(&self->check, 63)); + ASSERT_EQ(0, reg_enable(&self->check_long, sizeof(long), 63)); + ASSERT_NE(0, reg_enable(&self->check_long, sizeof(long), 64)); + ASSERT_EQ(0, reg_disable(&self->check_long, 63)); #endif /* Disallowed sizes (everything beside 4 and 8) */ @@ -198,7 +202,7 @@ static int clone_check(void *check) for (i = 0; i < 10; ++i) { usleep(100000); - if (*(long *)check) + if (*(int *)check) return 0; } diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index 32c827a52d7d..a85980190bea 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -144,13 +144,16 @@ do { \ FIXTURE(user) { int check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + wait_for_delete(); } diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index 6a260caeeddc..dcd7509fe2e0 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -204,10 +204,11 @@ FIXTURE(user) { int data_fd; int enable_fd; int check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); self->status_fd = open(status_file, O_RDONLY); ASSERT_NE(-1, self->status_fd); @@ -219,6 +220,8 @@ FIXTURE_SETUP(user) { } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + close(self->status_fd); close(self->data_fd); diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index f893398cda05..5288e768b207 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -111,16 +111,19 @@ static int clear(int *check) FIXTURE(user) { int data_fd; int check; + bool umount; }; FIXTURE_SETUP(user) { - USER_EVENT_FIXTURE_SETUP(return); + USER_EVENT_FIXTURE_SETUP(return, self->umount); self->data_fd = open(data_file, O_RDWR); ASSERT_NE(-1, self->data_fd); } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + close(self->data_fd); if (clear(&self->check) != 0) diff --git a/tools/testing/selftests/user_events/user_events_selftests.h b/tools/testing/selftests/user_events/user_events_selftests.h index 690378942f82..e1c3c063c031 100644 --- a/tools/testing/selftests/user_events/user_events_selftests.h +++ b/tools/testing/selftests/user_events/user_events_selftests.h @@ -11,13 +11,19 @@ #include "../kselftest.h" -static inline bool tracefs_enabled(char **message, bool *fail) +static inline void tracefs_unmount(void) +{ + umount("/sys/kernel/tracing"); +} + +static inline bool tracefs_enabled(char **message, bool *fail, bool *umount) { struct stat buf; int ret; *message = ""; *fail = false; + *umount = false; /* Ensure tracefs is installed */ ret = stat("/sys/kernel/tracing", &buf); @@ -37,6 +43,8 @@ static inline bool tracefs_enabled(char **message, bool *fail) return false; } + *umount = true; + ret = stat("/sys/kernel/tracing/README", &buf); } @@ -49,13 +57,14 @@ static inline bool tracefs_enabled(char **message, bool *fail) return true; } -static inline bool user_events_enabled(char **message, bool *fail) +static inline bool user_events_enabled(char **message, bool *fail, bool *umount) { struct stat buf; int ret; *message = ""; *fail = false; + *umount = false; if (getuid() != 0) { *message = "Must be run as root"; @@ -63,7 +72,7 @@ static inline bool user_events_enabled(char **message, bool *fail) return false; } - if (!tracefs_enabled(message, fail)) + if (!tracefs_enabled(message, fail, umount)) return false; /* Ensure user_events is installed */ @@ -85,10 +94,10 @@ static inline bool user_events_enabled(char **message, bool *fail) return true; } -#define USER_EVENT_FIXTURE_SETUP(statement) do { \ +#define USER_EVENT_FIXTURE_SETUP(statement, umount) do { \ char *message; \ bool fail; \ - if (!user_events_enabled(&message, &fail)) { \ + if (!user_events_enabled(&message, &fail, &(umount))) { \ if (fail) { \ TH_LOG("Setup failed due to: %s", message); \ ASSERT_FALSE(fail); \ @@ -97,4 +106,9 @@ static inline bool user_events_enabled(char **message, bool *fail) } \ } while (0) +#define USER_EVENT_FIXTURE_TEARDOWN(umount) do { \ + if ((umount)) \ + tracefs_unmount(); \ +} while (0) + #endif /* _USER_EVENTS_SELFTESTS_H */ diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index e0ffe69c271c..7093fd5333be 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -159,6 +159,7 @@ static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, taa_data->thread_nmi_sum = 0; taa_data->thread_irq_sum = 0; taa_data->thread_softirq_sum = 0; + taa_data->thread_thread_sum = 0; taa_data->thread_blocking_duration = 0; taa_data->timer_irq_start_time = 0; taa_data->timer_irq_duration = 0; @@ -337,7 +338,23 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor taa_data->timer_irq_start_time = start; taa_data->timer_irq_duration = duration; - taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + /* + * We are dealing with two different clock sources: the + * external clock source that timerlat uses as a reference + * and the clock used by the tracer. There are also two + * moments: the time reading the clock and the timer in + * which the event is placed in the buffer (the trace + * event timestamp). If the processor is slow or there + * is some hardware noise, the difference between the + * timestamp and the external clock read can be longer + * than the IRQ handler delay, resulting in a negative + * time. If so, set IRQ start delay as 0. In the end, + * it is less relevant than the noise. + */ + if (expected_start < taa_data->timer_irq_start_time) + taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + else + taa_data->timer_irq_start_delay = 0; /* * not exit from idle. @@ -528,7 +545,7 @@ static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_rec static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, int irq_thresh, int thread_thresh) { - unsigned long long exp_irq_ts; + long long exp_irq_ts; int total; int irq; @@ -545,12 +562,15 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, /* * Expected IRQ arrival time using the trace clock as the base. + * + * TODO: Add a list of previous IRQ, and then run the list backwards. */ exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; - - if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) - printf(" Previous IRQ interference: \t\t up to %9.2f us\n", - ns_to_usf(taa_data->prev_irq_duration)); + if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { + if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) + printf(" Previous IRQ interference: \t\t up to %9.2f us\n", + ns_to_usf(taa_data->prev_irq_duration)); + } /* * The delay that the IRQ suffered before starting. diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c index 05e310696dd5..01dbf9a6b5a5 100644 --- a/tools/tracing/rtla/src/timerlat_u.c +++ b/tools/tracing/rtla/src/timerlat_u.c @@ -45,7 +45,7 @@ static int timerlat_u_main(int cpu, struct timerlat_u_params *params) retval = sched_setaffinity(gettid(), sizeof(set), &set); if (retval == -1) { - err_msg("Error setting user thread affinity\n"); + debug_msg("Error setting user thread affinity %d, is the CPU online?\n", cpu); exit(1); } @@ -193,7 +193,9 @@ void *timerlat_u_dispatcher(void *data) procs_count--; } } - break; + + if (!procs_count) + break; } sleep(1); |